npm - @zhouchangui/math-ati - Versions diffs - 0.1.2 → 0.1.4 - Mend

@zhouchangui/math-ati 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/AGENTS.md +4 -1
package/README.md +11 -0
package/bin/math-ati.js +136 -5
package/dist/assets/{index-CGZslJ0a.css → index-DOg8CQsE.css} +1 -1
package/dist/assets/index-DyfeTKmg.js +22 -0
package/dist/index.html +3 -3
package/package.json +9 -5
package/prompts/geometry-practice-experience.md +44 -0
package/prompts/grading.system.md +3 -1
package/prompts/knowledge-extract.system.md +35 -54
package/prompts/knowledge-structure.system.md +75 -0
package/prompts/knowledge-summarize.system.md +21 -7
package/prompts/pdf-grading.system.md +4 -1
package/prompts/pdf-recheck.system.md +2 -0
package/prompts/practice-answers.system.md +154 -0
package/prompts/practice-coverage-repair.system.md +112 -0
package/prompts/practice-generate.system.md +51 -9
package/prompts/practice-review.system.md +4 -2
package/prompts/practice-revise.system.md +5 -4
package/prompts/practice-rules.md +61 -0
package/prompts/svg-figure-review.system.md +13 -0
package/prompts/svg-figure-revise.system.md +21 -0
package/server/agentClient.js +179 -10
package/server/coveragePlanner.js +174 -0
package/server/fileStore.js +49 -9
package/server/index.js +78 -1
package/server/knowledgeExtractor.js +717 -120
package/server/knowledgeFeedback.js +69 -0
package/server/practiceGenerator.js +637 -116
package/server/practicePaperHtml.js +105 -35
package/server/practiceService.js +27 -2
package/server/promptStore.js +14 -0
package/server/submissionService.js +1 -1
package/server/svgFigureVerifier.js +315 -0
package/dist/assets/index-CGfjl7nO.js +0 -22

package/server/knowledgeExtractor.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { readdir } from 'node:fs/promises';
+import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
 import path from 'node:path';
-import { callChatAgent, callVisionAgent } from './agentClient.js';
+import { callChatAgent, callChatTextAgent, callVisionTextAgent } from './agentClient.js';
 import {
   chapterDataPaths,
   ensureChapterDataDirs,
@@ -13,17 +13,28 @@ import {
 } from './fileStore.js';
 import { promptPayload, readPrompt } from './promptStore.js';
-const KNOWLEDGE_PAGE_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_TIMEOUT_MS || 180000);
-const KNOWLEDGE_SUMMARY_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_TIMEOUT_MS || 120000);
-const KNOWLEDGE_PAGE_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_RETRIES || 2);
-const KNOWLEDGE_SUMMARY_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_RETRIES || 2);
+const KNOWLEDGE_PAGE_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_TIMEOUT_MS || 300000);
+const KNOWLEDGE_SUMMARY_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_TIMEOUT_MS || 600000);
+const KNOWLEDGE_PAGE_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_RETRIES || 3);
+const KNOWLEDGE_SUMMARY_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_RETRIES || 3);
+const KNOWLEDGE_SUMMARY_PAGE_CHUNK_SIZE = Number(process.env.KNOWLEDGE_SUMMARY_PAGE_CHUNK_SIZE || 4);
+const KNOWLEDGE_MAX_CORE_POINTS = Number(process.env.KNOWLEDGE_MAX_CORE_POINTS || 24);
+const KNOWLEDGE_MAX_MISTAKE_POINTS = Number(process.env.KNOWLEDGE_MAX_MISTAKE_POINTS || 8);
 function extractionDir(chapterId) {
   return chapterDataPaths(chapterId).pageExtracts;
 }
 function pageExtractPath(chapterId, imageFile) {
-  return path.join(extractionDir(chapterId), `${path.basename(imageFile, path.extname(imageFile))}.json`);
+  return path.join(extractionDir(chapterId), `${path.basename(imageFile, path.extname(imageFile))}.md`);
+}
+function chunkSummaryDir(chapterId) {
+  return path.join(chapterDataPaths(chapterId).knowledge, 'chunk_summaries');
+}
+function chunkSummaryPath(chapterId, chunkIndex) {
+  return path.join(chunkSummaryDir(chapterId), `chunk-${String(chunkIndex + 1).padStart(2, '0')}.md`);
 }
 function summaryPath(chapterId) {
@@ -46,17 +57,15 @@ async function chapterImages(chapter) {
 }
 function normalizeExtractProfile(profile = {}) {
-  const detailLevels = new Set(['exam_focus', 'balanced', 'fine_grained']);
   const baselines = new Set(['strong', 'normal', 'weak']);
-  const allowedFocus = new Set(['exam_points', 'error_prone', 'prerequisite_gaps', 'calculation_links']);
-  const focus = Array.isArray(profile.focus)
-    ? profile.focus.filter((item) => allowedFocus.has(item))
-    : [];
   return {
-    detailLevel: detailLevels.has(profile.detailLevel) ? profile.detailLevel : 'exam_focus',
+    detailLevel: 'exam_focus',
     studentBaseline: baselines.has(profile.studentBaseline) ? profile.studentBaseline : 'strong',
-    focus: focus.length ? focus : ['exam_points', 'error_prone'],
-    displayLayer: profile.displayLayer === 'fine_grained_wall' ? 'fine_grained_wall' : 'exam_point_wall'
+    focus: ['exam_points', 'error_prone'],
+    displayLayer: 'exam_point_wall',
+    maxCorePointCount: Math.max(8, Math.min(40, Number(profile.maxCorePointCount || KNOWLEDGE_MAX_CORE_POINTS))),
+    maxMistakePointCount: Math.max(0, Math.min(16, Number(profile.maxMistakePointCount || KNOWLEDGE_MAX_MISTAKE_POINTS))),
+    summaryChunkSize: Math.max(2, Math.min(8, Number(profile.summaryChunkSize || KNOWLEDGE_SUMMARY_PAGE_CHUNK_SIZE)))
   };
 }
@@ -90,6 +99,97 @@ function normalizePageExtract(chapter, imagePath, pageIndex, data, source = 'age
   };
 }
+function markdownSection(markdown, title) {
+  const lines = String(markdown || '').split(/\r?\n/);
+  const start = lines.findIndex((line) => new RegExp(`^##\\s+${title}\\s*$`).test(line.trim()));
+  if (start < 0) return '';
+  const end = lines.findIndex((line, index) => index > start && /^##\s+/.test(line.trim()));
+  return lines.slice(start + 1, end < 0 ? undefined : end).join('\n').trim();
+}
+function markdownHeadingItems(sectionText) {
+  const items = [];
+  const lines = String(sectionText || '').split(/\r?\n/);
+  let current = null;
+  for (const line of lines) {
+    const heading = line.match(/^###\s+(.+?)\s*$/);
+    if (heading) {
+      current = { title: heading[1].trim(), body: [] };
+      items.push(current);
+    } else if (current) {
+      current.body.push(line);
+    }
+  }
+  return items;
+}
+function fieldFromMarkdownBody(body, label) {
+  const pattern = new RegExp(`^\\s*-\\s*(?:\\*\\*)?${label}(?:\\*\\*)?[：:]\\s*`);
+  const line = body.find((item) => pattern.test(item.trim()));
+  return line ? line.replace(pattern, '').trim() : '';
+}
+function markdownFieldList(body, label) {
+  const pattern = new RegExp(`^\\s*-\\s*(?:\\*\\*)?${label}(?:\\*\\*)?[：:]\\s*`);
+  const nextFieldPattern = /^\s*-\s*(?:\*\*)?[\u4e00-\u9fa5A-Za-z0-9 /_-]+(?:\*\*)?[：:]/;
+  const start = body.findIndex((line) => pattern.test(line.trim()));
+  if (start < 0) return [];
+  const firstValue = body[start].replace(pattern, '').trim();
+  const values = firstValue ? [firstValue] : [];
+  for (let index = start + 1; index < body.length; index += 1) {
+    const line = body[index];
+    if (nextFieldPattern.test(line.trim())) break;
+    const listItem = line.match(/^\s*-\s+(.+?)\s*$/);
+    if (listItem?.[1]) values.push(listItem[1].trim());
+  }
+  return values.filter(Boolean);
+}
+function parsePageMarkdownExtract({ chapter, imagePath, pageIndex, pageCount, markdown }) {
+  const knowledgeItems = markdownHeadingItems(markdownSection(markdown, '知识点'));
+  const mistakeItems = markdownHeadingItems(markdownSection(markdown, '易错点'));
+  const pageTitle = markdownSection(markdown, '页面标题').split(/\r?\n/).find(Boolean) || path.basename(imagePath);
+  return {
+    chapterId: chapter.id,
+    chapterTitle: chapter.fullTitle,
+    imageFile: path.basename(imagePath),
+    pageIndex,
+    pageCount,
+    source: 'agent_markdown',
+    extractedAt: new Date().toISOString(),
+    pageTitle: pageTitle.replace(/^#+\s*/, '').trim(),
+    rawOutline: markdownSection(markdown, '原文结构')
+      .split(/\r?\n/)
+      .map((line) => line.replace(/^\s*-\s*/, '').trim())
+      .filter(Boolean),
+    knowledgePoints: knowledgeItems.map((item) => ({
+      title: item.title,
+      summary: fieldFromMarkdownBody(item.body, '摘要'),
+      formulas: fieldFromMarkdownBody(item.body, '公式')
+        .split(/[；;]/)
+        .map((value) => value.trim())
+        .filter(Boolean),
+      examples: fieldFromMarkdownBody(item.body, '例子')
+        .split(/[；;]/)
+        .map((value) => value.trim())
+        .filter(Boolean),
+      prerequisite: fieldFromMarkdownBody(item.body, '前置'),
+      difficulty: fieldFromMarkdownBody(item.body, '难度') || 'basic'
+    })),
+    easyMistakes: mistakeItems.map((item) => ({
+      title: item.title,
+      errorType: fieldFromMarkdownBody(item.body, '错因') || item.title,
+      description: fieldFromMarkdownBody(item.body, '说明'),
+      correction: fieldFromMarkdownBody(item.body, '纠正')
+    })),
+    exerciseHints: markdownSection(markdown, '出题方向')
+      .split(/\r?\n/)
+      .map((line) => line.replace(/^\s*-\s*/, '').trim())
+      .filter(Boolean),
+    markdown
+  };
+}
 function knowledgeExtractionError(reason, detail = '') {
   const error = new Error(`knowledge_extraction_failed:${reason}`);
   error.status = 502;
@@ -113,13 +213,15 @@ export async function extractChapterPage({
   pageCount = 0,
   force = false,
   extractProfile = null,
+  chapterStructure = '',
   onProgress = null
 }) {
   await ensureChapterWorkspace(chapter);
   const outputPath = pageExtractPath(chapter.id, imagePath);
   if (!force) {
-    const existing = await readJson(outputPath, null);
+    const existing = await readFile(outputPath, 'utf8').catch(() => '');
     if (existing) {
+      const parsed = parsePageMarkdownExtract({ chapter, imagePath, pageIndex, pageCount, markdown: existing });
       onProgress?.({
         step: 'knowledge_extract.page.cached',
         message: `第 ${pageIndex}/${pageCount || '?'} 页已有提取缓存，直接复用。`,
@@ -127,7 +229,7 @@ export async function extractChapterPage({
         pageCount,
         imageFile: path.basename(imagePath)
       });
-      return existing;
+      return parsed;
     }
   }
   const systemPrompt = await readPrompt('knowledge-extract.system.md');
@@ -138,46 +240,37 @@ export async function extractChapterPage({
     pageCount,
     imageFile: path.basename(imagePath)
   });
-  const agent = await callVisionAgent({
+  const agent = await callVisionTextAgent({
     timeoutMs: KNOWLEDGE_PAGE_TIMEOUT_MS,
     retries: KNOWLEDGE_PAGE_RETRIES,
     system: systemPrompt,
-    text: promptPayload({
-      task: '从这一页提分笔记图片中逐项提取知识点、公式、例题线索和易错点。',
-      context: {
-        chapter: {
-          id: chapter.id,
-          title: chapter.fullTitle,
-          track: chapter.track
-        },
-        extractProfile: normalizeExtractProfile(extractProfile || {})
-      },
-      requirements: [
-        'knowledgePoints 要覆盖页面出现的每个概念、性质、公式、方法或题型。',
-        'easyMistakes 要提取页面明确写出的易错点，也可以从页面中的提醒、比较、条件限制中归纳，但不能凭空添加。',
-        'summary 用学生能懂的短句；公式必须使用 $...$。',
-        'exerciseHints 只写题型方向，不写完整答案。'
-      ],
-      schema: {
-        pageTitle: 'string',
-        rawOutline: ['string'],
-        knowledgePoints: [{
-          title: 'string',
-          summary: 'string',
-          formulas: ['string with LaTeX'],
-          examples: ['short example or expression'],
-          prerequisite: 'string',
-          difficulty: 'basic|medium|challenge'
-        }],
-        easyMistakes: [{
-          title: 'string',
-          errorType: 'string',
-          description: 'string',
-          correction: 'string'
-        }],
-        exerciseHints: ['string']
-      }
-    }),
+    text: [
+      `任务：从这一页提分笔记图片中提取可用于后续汇总的 Markdown 笔记。`,
+      '',
+      `章节：${chapter.id} ${chapter.fullTitle}`,
+      `主线：${chapter.track}`,
+      `提取策略：考点和易错点优先；不要把例子拆成独立知识点；不要补充图片没有出现的内容。`,
+      chapterStructure ? `\n以下是本《${chapter.fullTitle}》的章节整体结构分析，供逐页提取时参考。请在提取本页时注意：\n- 本页在章节中的大致角色\n- 本页涉及的核心概念是否已在结构分析中列出\n- 本页知识点与前后页的关联关系\n\n${chapterStructure.slice(0, 3000)}` : '',
+      '',
+      '输出必须是 Markdown，且只包含以下标题：',
+      '# 页面知识提取',
+      '## 页面标题',
+      '## 原文结构',
+      '## 知识点',
+      '### <知识点标题>',
+      '- 摘要：<一句话>',
+      '- 公式：<可为空，多个用分号>',
+      '- 例子：<可为空，多个用分号>',
+      '- 前置：<可为空>',
+      '- 难度：basic|medium|challenge',
+      '## 易错点',
+      '### <易错点标题>',
+      '- 错因：<错误类型>',
+      '- 说明：<错误表现>',
+      '- 纠正：<正确做法>',
+      '## 出题方向',
+      '- <题型方向，只写方向，不写完整答案>'
+    ].join('\n'),
     imagePaths: [imagePath],
     onAttempt: ({ phase, attempt, attempts, delayMs, result }) => {
       const base = `第 ${pageIndex}/${pageCount || '?'} 页识别`;
@@ -210,9 +303,12 @@ export async function extractChapterPage({
       `第 ${pageIndex}/${pageCount || '?'} 页 ${path.basename(imagePath)} 识别失败，已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
     );
   }
-  const extract = normalizePageExtract(chapter, imagePath, pageIndex, agent.data, 'agent');
-  await writeJson(outputPath, extract);
-  await writeJson(path.join(paths.knowledgeExtracts, chapter.id, `${path.basename(imagePath, path.extname(imagePath))}.json`), extract);
+  const markdown = String(agent.data || '').trim();
+  const extract = parsePageMarkdownExtract({ chapter, imagePath, pageIndex, pageCount, markdown });
+  await writeFile(outputPath, `${markdown}\n`, 'utf8');
+  const mirrorPath = path.join(paths.knowledgeExtracts, chapter.id, `${path.basename(imagePath, path.extname(imagePath))}.md`);
+  await mkdir(path.dirname(mirrorPath), { recursive: true });
+  await writeFile(mirrorPath, `${markdown}\n`, 'utf8');
   onProgress?.({
     step: 'knowledge_extract.page.done',
     message: `第 ${pageIndex}/${pageCount || '?'} 页识别完成，提取 ${extract.knowledgePoints.length} 个知识点。`,
@@ -226,7 +322,7 @@ export async function extractChapterPage({
 function dedupeByTitle(items) {
   const seen = new Map();
   for (const item of items) {
-    const key = String(item.title || item.errorType || '').replace(/\s+/g, '');
+    const key = normalizedKnowledgeKey(item.title || item.errorType || '');
     if (!key) continue;
     if (!seen.has(key)) {
       seen.set(key, { ...item });
@@ -242,6 +338,171 @@ function dedupeByTitle(items) {
   return [...seen.values()];
 }
+function normalizedKnowledgeKey(value) {
+  return String(value || '')
+    .toLowerCase()
+    .replace(/[“”"‘'`]/g, '')
+    .replace(/[（(].*?[）)]/g, '')
+    .replace(/[：:，,。；;、\s·\-—_]/g, '')
+    .replace(/的概念|概念|性质|定义|方法|判定|定理|公式/g, '')
+    .trim();
+}
+function chunkArray(items, size) {
+  const chunks = [];
+  for (let index = 0; index < items.length; index += size) {
+    chunks.push(items.slice(index, index + size));
+  }
+  return chunks;
+}
+function countDocPoints(doc) {
+  return (doc.sections || []).reduce((sum, section) => sum + (section.points?.length || 0), 0);
+}
+function pointHasUsableTemplate(point) {
+  return Array.isArray(point.questionTemplates)
+    && point.questionTemplates.some((template) => Array.isArray(template) && template[0] && template[1]);
+}
+function normalizePointForBudget(point) {
+  return {
+    ...point,
+    title: String(point.title || '').trim(),
+    summary: String(point.summary || '').trim(),
+    formulas: Array.isArray(point.formulas) ? point.formulas.filter(Boolean).slice(0, 4) : [],
+    pitfalls: Array.isArray(point.pitfalls) ? point.pitfalls.filter(Boolean).slice(0, 5) : [],
+    examples: Array.isArray(point.examples) ? point.examples.filter(Boolean).slice(0, 4) : [],
+    teachingTips: point.teachingTips && typeof point.teachingTips === 'object'
+      ? {
+          commonMisconceptions: Array.isArray(point.teachingTips.commonMisconceptions)
+            ? point.teachingTips.commonMisconceptions.filter(Boolean).slice(0, 3)
+            : [],
+          scaffoldingOrder: Array.isArray(point.teachingTips.scaffoldingOrder)
+            ? point.teachingTips.scaffoldingOrder.filter(Boolean).slice(0, 4)
+            : [],
+          checkUnderstandingQuestions: Array.isArray(point.teachingTips.checkUnderstandingQuestions)
+            ? point.teachingTips.checkUnderstandingQuestions.filter(Boolean).slice(0, 2)
+            : []
+        }
+      : point.teachingTips || {
+          commonMisconceptions: [],
+          scaffoldingOrder: [],
+          checkUnderstandingQuestions: []
+        },
+    questionTemplates: Array.isArray(point.questionTemplates) && point.questionTemplates.length
+      ? point.questionTemplates.filter((template) => Array.isArray(template)).slice(0, 3)
+      : [[
+          `围绕「${point.title || '本知识点'}」完成一道基础覆盖题，并写出关键结论。`,
+          point.summary || '答案需符合知识点定义、性质或方法。',
+          point.title || '知识点理解错误'
+        ]]
+  };
+}
+function mergeDuplicatePoints(points) {
+  const byKey = new Map();
+  for (const rawPoint of points) {
+    const point = normalizePointForBudget(rawPoint);
+    const key = normalizedKnowledgeKey(point.title);
+    if (!key) continue;
+    if (!byKey.has(key)) {
+      byKey.set(key, point);
+      continue;
+    }
+    const current = byKey.get(key);
+    current.summary = current.summary.length >= point.summary.length ? current.summary : point.summary;
+    current.formulas = [...new Set([...(current.formulas || []), ...(point.formulas || [])])].slice(0, 4);
+    current.pitfalls = [...new Set([...(current.pitfalls || []), ...(point.pitfalls || [])])].slice(0, 5);
+    current.examples = [...new Set([...(current.examples || []), ...(point.examples || [])])].slice(0, 4);
+    current.questionTemplates = [...(current.questionTemplates || []), ...(point.questionTemplates || [])]
+      .filter((template, index, templates) => (
+        Array.isArray(template)
+          && templates.findIndex((candidate) => Array.isArray(candidate) && candidate[0] === template[0]) === index
+      ))
+      .slice(0, 3);
+    current.sources = [...new Set([...(current.sources || []), ...(point.sources || [])])];
+    if (point.teachingTips && typeof point.teachingTips === 'object') {
+      const currentTips = current.teachingTips || { commonMisconceptions: [], scaffoldingOrder: [], checkUnderstandingQuestions: [] };
+      const pointTips = point.teachingTips;
+      current.teachingTips = {
+        commonMisconceptions: [
+          ...new Set([
+            ...(Array.isArray(currentTips.commonMisconceptions) ? currentTips.commonMisconceptions : []),
+            ...(Array.isArray(pointTips.commonMisconceptions) ? pointTips.commonMisconceptions : [])
+          ])
+        ].slice(0, 4),
+        scaffoldingOrder: Array.isArray(pointTips.scaffoldingOrder) && pointTips.scaffoldingOrder.length
+          ? pointTips.scaffoldingOrder
+          : currentTips.scaffoldingOrder || [],
+        checkUnderstandingQuestions: [
+          ...new Set([
+            ...(Array.isArray(currentTips.checkUnderstandingQuestions) ? currentTips.checkUnderstandingQuestions : []),
+            ...(Array.isArray(pointTips.checkUnderstandingQuestions) ? pointTips.checkUnderstandingQuestions : [])
+          ])
+        ].slice(0, 3)
+      };
+    }
+  }
+  return [...byKey.values()];
+}
+function pointPriority(point) {
+  let score = 0;
+  if (pointHasUsableTemplate(point)) score += 4;
+  if (point.summary) score += 2;
+  if (point.pitfalls?.length) score += 2;
+  if (point.formulas?.length) score += 1;
+  if (point.examples?.length) score += 1;
+  return score;
+}
+function enforceKnowledgeBudget(chapter, doc, profile) {
+  const maxCore = profile.maxCorePointCount || KNOWLEDGE_MAX_CORE_POINTS;
+  const maxMistakes = profile.maxMistakePointCount ?? KNOWLEDGE_MAX_MISTAKE_POINTS;
+  const corePoints = [];
+  const mistakePoints = [];
+  for (const section of doc.sections || []) {
+    const isMistakeSection = /易错|错题|错误|mistake/i.test(section.title || '');
+    for (const point of section.points || []) {
+      if (isMistakeSection) mistakePoints.push(point);
+      else corePoints.push(point);
+    }
+  }
+  // Knowledge points may be de-duplicated and merged, but must never be dropped
+  // just to fit a numeric budget — losing knowledge points corrupts the chapter's
+  // coverage/mastery loop. The budget values are only a target hint passed to the
+  // extract/summary agent prompts upstream; the post-processing here only de-dups.
+  const dedupedCore = mergeDuplicatePoints(corePoints)
+    .sort((a, b) => pointPriority(b) - pointPriority(a))
+    .map((point, index) => ({
+      ...point,
+      id: `${chapter.id}-kp-${String(index + 1).padStart(2, '0')}`
+    }));
+  const dedupedMistakes = mergeDuplicatePoints(mistakePoints)
+    .sort((a, b) => pointPriority(b) - pointPriority(a))
+    .map((point, index) => ({
+      ...point,
+      id: `${chapter.id}-mistake-${String(index + 1).padStart(2, '0')}`
+    }));
+  return {
+    ...doc,
+    sections: [
+      { title: '知识点覆盖', points: dedupedCore },
+      { title: '易错题专项', points: dedupedMistakes }
+    ].filter((section) => section.points.length),
+    review: {
+      ...(doc.review || {}),
+      pointBudget: {
+        maxCorePointCount: maxCore,
+        maxMistakePointCount: maxMistakes,
+        corePointCount: dedupedCore.length,
+        mistakePointCount: dedupedMistakes.length
+      }
+    }
+  };
+}
 function localMergeChapter(chapter, pageExtracts) {
   const points = dedupeByTitle(pageExtracts.flatMap((page) =>
     page.knowledgePoints.map((point) => ({
@@ -276,6 +537,14 @@ function localMergeChapter(chapter, pageExtracts) {
               point.title
             ]
           ],
+          teachingTips: {
+            commonMisconceptions: (point.pitfalls || []).slice(0, 2)
+              .map((pitfall) => `常见的误解：${pitfall}`),
+            scaffoldingOrder: [`先理解「${point.title}」的基本定义`, `再通过例子巩固`, `最后独立完成变式题`],
+            checkUnderstandingQuestions: [
+              `用自己的话解释什么是「${point.title}」，并举一个例子。`
+            ]
+          },
           sources: point.sources || []
         }))
       },
@@ -295,6 +564,11 @@ function localMergeChapter(chapter, pageExtracts) {
               mistake.errorType || mistake.title
             ]
           ],
+          teachingTips: {
+            commonMisconceptions: [mistake.description || mistake.errorType || mistake.title || '易错点'].filter(Boolean).slice(0, 2),
+            scaffoldingOrder: ['先识别错误类型', '再用正确方法重新做一遍'],
+            checkUnderstandingQuestions: ['这个易错点最容易在什么情况下出现？如何避免？']
+          },
           sources: mistake.sources || []
         }))
       }
@@ -302,79 +576,35 @@ function localMergeChapter(chapter, pageExtracts) {
   };
 }
-export async function summarizeChapterExtraction({
+async function callKnowledgeSummaryAgent({
+  systemPrompt,
   chapter,
-  pageExtracts,
-  extractProfile = null,
-  resetLearningState = false,
-  onProgress = null
+  task,
+  context,
+  requirements,
+  schema,
+  onProgress,
+  progressPrefix
 }) {
-  const local = localMergeChapter(chapter, pageExtracts);
-  const normalizedProfile = normalizeExtractProfile(extractProfile || {});
-  const systemPrompt = await readPrompt('knowledge-summarize.system.md');
-  onProgress?.({
-    step: 'knowledge_extract.summary.start',
-    message: `正在合并 ${pageExtracts.length} 页提取结果，生成章节知识点。`,
-    pageCount: pageExtracts.length
-  });
-  const agent = await callChatAgent({
+  return callChatAgent({
     timeoutMs: KNOWLEDGE_SUMMARY_TIMEOUT_MS,
     retries: KNOWLEDGE_SUMMARY_RETRIES,
     temperature: 0.1,
     system: systemPrompt,
-    user: promptPayload({
-      task: '把逐页提取结果合并成章节知识文档，并做覆盖检查。',
-      context: {
-        chapter,
-        pageExtracts,
-        localDraft: local,
-        extractProfile: normalizedProfile
-      },
-      requirements: [
-        '合并同义知识点，保留来源页。',
-        normalizedProfile.detailLevel === 'fine_grained'
-          ? '当前提取画像要求细粒度拆分：保留必要前置概念、步骤性方法和基础易错点。'
-          : '当前提取画像要求考点优先：优先保留考试常见考点、易错点、变式边界和必要前置关系，不把教材说明拆得过碎。',
-        'sections 至少包含“知识点覆盖”；如果有易错点，单独包含“易错题专项”。',
-        '每个知识点必须有 id、title、summary、formulas、pitfalls、questionTemplates。',
-        'questionTemplates 用于后续出题，题干只写题目，不写解题过程。',
-        'review.missingOrWeak 列出疑似遗漏或需要人工复核的点。'
-      ],
-      schema: {
-        sections: [{
-          title: '知识点覆盖',
-          points: [{
-            id: `${chapter.id}-kp-01`,
-            title: 'string',
-            summary: 'string',
-            formulas: ['string with LaTeX'],
-            pitfalls: ['string'],
-            examples: ['string'],
-            questionTemplates: [['stem', 'answer', 'expectedErrorType']],
-            sources: ['image filename']
-          }]
-        }],
-        review: {
-          passed: true,
-          coverageSummary: 'string',
-          missingOrWeak: ['string'],
-          duplicateMerged: ['string']
-        }
-      }
-    }),
+    user: promptPayload({ task, context, requirements, schema }),
     onAttempt: ({ phase, attempt, attempts, delayMs, result }) => {
       if (phase === 'start') {
         onProgress?.({
-          step: 'knowledge_extract.summary.attempt',
-          message: `章节汇总：第 ${attempt}/${attempts} 次尝试。`,
+          step: `${progressPrefix}.attempt`,
+          message: `${chapter.fullTitle}知识点合并：第 ${attempt}/${attempts} 次尝试。`,
           attempt,
           attempts
         });
       }
       if (phase === 'retry') {
         onProgress?.({
-          step: 'knowledge_extract.summary.retry',
-          message: `章节汇总遇到${retryReasonText(result?.reason)}，${Math.round(delayMs / 1000)} 秒后自动重试。`,
+          step: `${progressPrefix}.retry`,
+          message: `知识点合并遇到${retryReasonText(result?.reason)}，${Math.round(delayMs / 1000)} 秒后自动重试。`,
           attempt,
           attempts,
           reason: result?.reason || null
@@ -382,13 +612,329 @@ export async function summarizeChapterExtraction({
       }
     }
   });
-  if (!agent.ok || !Array.isArray(agent.data?.sections)) {
+}
+function knowledgeSummarySchema(chapter) {
+  return {
+    sections: [{
+      title: '知识点覆盖',
+      points: [{
+        id: `${chapter.id}-kp-01`,
+        title: 'string',
+        summary: 'string',
+        formulas: ['string with LaTeX'],
+        pitfalls: ['string'],
+        examples: ['string'],
+        questionTemplates: [['stem', 'answer', 'expectedErrorType']],
+        teachingTips: {
+          commonMisconceptions: ['string'],
+          scaffoldingOrder: ['string'],
+          checkUnderstandingQuestions: ['string']
+        },
+        sources: ['image filename']
+      }]
+    }],
+    review: {
+      passed: true,
+      coverageSummary: 'string',
+      missingOrWeak: ['string'],
+      duplicateMerged: ['string']
+    }
+  };
+}
+async function summarizePageChunk({ chapter, pages, chunkIndex, chunkCount, systemPrompt, normalizedProfile, onProgress }) {
+  const pageLabels = pages.map((page) => page.imageFile).join('、');
+  const cachedMarkdown = await readFile(chunkSummaryPath(chapter.id, chunkIndex), 'utf8').catch(() => '');
+  if (cachedMarkdown) {
+    const knowledgePointCount = (cachedMarkdown.match(/^###\s+/gm) || []).length;
+    onProgress?.({
+      step: 'knowledge_extract.summary.chunk.cached',
+      message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 已有 Markdown 汇总缓存，直接复用。`,
+      chunkIndex: chunkIndex + 1,
+      chunkCount,
+      knowledgePointCount
+    });
+    return {
+      chunkIndex: chunkIndex + 1,
+      pageFiles: pages.map((page) => page.imageFile),
+      markdown: cachedMarkdown
+    };
+  }
+  onProgress?.({
+    step: 'knowledge_extract.summary.chunk.start',
+    message: `正在合并知识点分组 ${chunkIndex + 1}/${chunkCount}（${pageLabels}）。`,
+    chunkIndex: chunkIndex + 1,
+    chunkCount,
+    pageCount: pages.length
+  });
+  // The prompt requires a structured Markdown doc ("# 分组知识汇总", "## 知识点覆盖",
+  // "## 易错题专项"). Some models leak reasoning as a leading sentence and skip the
+  // requested structure, producing a chunk with zero knowledge points. callChatTextAgent
+  // returns responseFormat: 'text', so such leakage is reported as ok and is not retried
+  // at the agent level. Retry the chunk in place so one bad chunk does not force a whole-
+  // chapter redo (which would discard the other chunk's valid cached summary).
+  const requiredHeadings = /^#\s+分组知识汇总/m;
+  const structureAttempts = Number(process.env.KNOWLEDGE_SUMMARY_STRUCTURE_RETRIES || 3);
+  const userPayload = [
+    `任务：合并《${chapter.fullTitle}》第 ${chunkIndex + 1}/${chunkCount} 组逐页 Markdown 提取结果。`,
+    '',
+    `本组页面：${pageLabels}`,
+    `数量控制：核心知识点不超过 ${Math.ceil(normalizedProfile.maxCorePointCount / chunkCount) + 4} 个；易错点不超过 ${Math.ceil(normalizedProfile.maxMistakePointCount / chunkCount) + 2} 个。`,
+    '',
+    '要求：',
+    '- 只基于输入页面合并知识点，不新增页面没有依据的内容。',
+    '- 合并同义、过细、重复候选，保留来源页。',
+    '- 优先保留考试常见考点、易错边界、几何概念辨析和可出题的方法。',
+    '- 输出 Markdown，不输出 JSON。',
+    '- 必须包含标题：# 分组知识汇总、## 知识点覆盖、## 易错题专项、## 合并说明。',
+    '- 每个知识点用三级标题，包含：摘要、来源、公式、易错边界、出题模板。直接输出结果，不要输出思考过程或开场白。',
+    '',
+    '逐页 Markdown：',
+    ...pages.map((page) => [
+      `\n---\n`,
+      `来源页：${page.imageFile}`,
+      page.markdown || ''
+    ].join('\n'))
+  ].join('\n');
+  const handleAttempt = ({ phase, attempt, attempts, delayMs, result }) => {
+    if (phase === 'start') {
+      onProgress?.({
+        step: 'knowledge_extract.summary.chunk.attempt',
+        message: `知识点分组 ${chunkIndex + 1}/${chunkCount}：第 ${attempt}/${attempts} 次尝试。`,
+        attempt,
+        attempts
+      });
+    }
+    if (phase === 'retry') {
+      onProgress?.({
+        step: 'knowledge_extract.summary.chunk.retry',
+        message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 遇到${retryReasonText(result?.reason)}，${Math.round(delayMs / 1000)} 秒后自动重试。`,
+        attempt,
+        attempts,
+        reason: result?.reason || null
+      });
+    }
+  };
+  let markdown = '';
+  let pointHeadings = 0;
+  for (let structureAttempt = 1; structureAttempt <= structureAttempts; structureAttempt += 1) {
+    const agent = await callChatTextAgent({
+      system: systemPrompt,
+      timeoutMs: KNOWLEDGE_SUMMARY_TIMEOUT_MS,
+      retries: KNOWLEDGE_SUMMARY_RETRIES,
+      temperature: 0.1,
+      user: userPayload,
+      onAttempt: handleAttempt
+    });
+    if (!agent.ok || !agent.data) {
+      throw knowledgeExtractionError(
+        agent.reason || 'empty_response',
+        `知识点分组 ${chunkIndex + 1}/${chunkCount} 合并失败，已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
+      );
+    }
+    markdown = String(agent.data || '').trim();
+    pointHeadings = (markdown.match(/^###\s+/gm) || []).length;
+    if (requiredHeadings.test(markdown) && pointHeadings > 0) {
+      break;
+    }
+    onProgress?.({
+      step: 'knowledge_extract.summary.chunk.structure_retry',
+      message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 第 ${structureAttempt}/${structureAttempts} 次返回缺少结构或知识点，重新生成。`,
+      chunkIndex: chunkIndex + 1,
+      chunkCount,
+      structureAttempt,
+      structureAttempts
+    });
+    if (structureAttempt < structureAttempts) {
+      await new Promise((resolve) => setTimeout(resolve, 2000));
+    }
+  }
+  if (!requiredHeadings.test(markdown) || pointHeadings === 0) {
     throw knowledgeExtractionError(
-      agent.reason || 'invalid_agent_response',
-      `章节汇总失败，已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
+      'invalid_chunk_structure',
+      `知识点分组 ${chunkIndex + 1}/${chunkCount} 输出缺少必需的 Markdown 结构或知识点，已重试 ${structureAttempts} 次。`
     );
   }
-  const merged = agent.data;
+  await mkdir(chunkSummaryDir(chapter.id), { recursive: true });
+  await writeFile(chunkSummaryPath(chapter.id, chunkIndex), `${markdown}\n`, 'utf8');
+  const knowledgePointCount = pointHeadings;
+  onProgress?.({
+    step: 'knowledge_extract.summary.chunk.done',
+    message: `知识点分组 ${chunkIndex + 1}/${chunkCount} 合并完成，得到约 ${knowledgePointCount} 个候选点。`,
+    chunkIndex: chunkIndex + 1,
+    chunkCount,
+    knowledgePointCount
+  });
+  return {
+    chunkIndex: chunkIndex + 1,
+    pageFiles: pages.map((page) => page.imageFile),
+    markdown
+  };
+}
+function cleanMarkdownTitle(title) {
+  return String(title || '')
+    .replace(/^\s*\d+[.、]\s*/, '')
+    .replace(/^#+\s*/, '')
+    .trim();
+}
+function splitMarkdownValues(value) {
+  return String(value || '')
+    .split(/[；;、,，]/)
+    .map((item) => item.trim())
+    .filter((item) => item && item !== '无');
+}
+function chunkMarkdownPoints(markdown, sectionTitle, pointType = 'core') {
+  const section = markdownSection(markdown, sectionTitle);
+  return markdownHeadingItems(section).map((item) => {
+    const summary = fieldFromMarkdownBody(item.body, '摘要');
+    const sources = splitMarkdownValues(fieldFromMarkdownBody(item.body, '来源'));
+    const formulas = splitMarkdownValues(fieldFromMarkdownBody(item.body, '公式'));
+    const pitfalls = [
+      ...splitMarkdownValues(fieldFromMarkdownBody(item.body, '易错边界')),
+      ...splitMarkdownValues(fieldFromMarkdownBody(item.body, '错因')),
+      ...splitMarkdownValues(fieldFromMarkdownBody(item.body, '说明'))
+    ].filter(Boolean);
+    const templateStems = markdownFieldList(item.body, '出题模板');
+    const title = cleanMarkdownTitle(item.title);
+    return {
+      title,
+      summary,
+      formulas,
+      pitfalls: [...new Set(pitfalls)].slice(0, 5),
+      examples: [],
+      questionTemplates: (templateStems.length ? templateStems : [
+        `围绕「${title}」设计一道${pointType === 'mistake' ? '易错辨析' : '基础覆盖'}题。`
+      ]).slice(0, 3).map((stem) => [
+        stem,
+        summary || `正确运用「${title}」相关概念、性质或方法。`,
+        pitfalls[0] || title
+      ]),
+      sources
+    };
+  }).filter((point) => point.title);
+}
+function localMergeChunkSummaries(chapter, chunkDocs) {
+  const corePoints = [];
+  const mistakePoints = [];
+  const mergeNotes = [];
+  for (const chunk of chunkDocs) {
+    corePoints.push(...chunkMarkdownPoints(chunk.markdown, '知识点覆盖', 'core'));
+    mistakePoints.push(...chunkMarkdownPoints(chunk.markdown, '易错题专项', 'mistake'));
+    mergeNotes.push(...markdownSection(chunk.markdown, '合并说明')
+      .split(/\r?\n/)
+      .map((line) => line.replace(/^\s*-\s*/, '').trim())
+      .filter(Boolean));
+  }
+  return {
+    sections: [
+      { title: '知识点覆盖', points: corePoints },
+      { title: '易错题专项', points: mistakePoints }
+    ].filter((section) => section.points.length),
+    review: {
+      passed: corePoints.length > 0,
+      coverageSummary: `由 ${chunkDocs.length} 个分组 Markdown 汇总合并生成最终章节知识点，并在保存前执行去重和数量控制。`,
+      missingOrWeak: [],
+      duplicateMerged: mergeNotes.slice(0, 20)
+    }
+  };
+}
+export async function summarizeChapterExtraction({
+  chapter,
+  pageExtracts,
+  extractProfile = null,
+  resetLearningState = false,
+  onProgress = null
+}) {
+  const local = localMergeChapter(chapter, pageExtracts);
+  const normalizedProfile = normalizeExtractProfile(extractProfile || {});
+  const systemPrompt = await readPrompt('knowledge-summarize.system.md');
+  onProgress?.({
+    step: 'knowledge_extract.summary.start',
+    message: `正在合并 ${pageExtracts.length} 页提取结果，生成章节知识点。`,
+    pageCount: pageExtracts.length
+  });
+  const pageChunks = chunkArray(pageExtracts, normalizedProfile.summaryChunkSize);
+  const chunkDocs = [];
+  if (pageChunks.length > 1) {
+    for (let index = 0; index < pageChunks.length; index += 1) {
+      chunkDocs.push(await summarizePageChunk({
+        chapter,
+        pages: pageChunks[index],
+        chunkIndex: index,
+        chunkCount: pageChunks.length,
+        systemPrompt,
+        normalizedProfile,
+        onProgress
+      }));
+    }
+  }
+  const finalContext = pageChunks.length > 1
+    ? { chapter, chunkDocs, localDraft: local, extractProfile: normalizedProfile }
+    : { chapter, pageExtracts, localDraft: local, extractProfile: normalizedProfile };
+  onProgress?.({
+    step: 'knowledge_extract.summary.final.start',
+    message: pageChunks.length > 1
+      ? `正在把 ${chunkDocs.length} 个知识点分组合并为最终章节知识。`
+      : '正在生成最终章节知识点。',
+    chunkCount: chunkDocs.length
+  });
+  let finalDoc = null;
+  // Prefer the deterministic local merge so the final chapter doc is built
+  // from real per-page / per-chunk agent outputs (now structurally validated,
+  // with chunk-level retries on bad output) instead of an additional free-form
+  // LLM merge that can lose points or return invalid JSON. When the chapter
+  // fits in a single chunk the chunk summary is skipped, so fall back to
+  // localMergeChapter which parses per-page knowledge points directly.
+  const localDoc = chunkDocs.length > 0
+    ? localMergeChunkSummaries(chapter, chunkDocs)
+    : local;
+  const hasUsableSections = (localDoc.sections || []).some(
+    (section) => Array.isArray(section.points) && section.points.length > 0
+  );
+  if (hasUsableSections) {
+    finalDoc = localDoc;
+    onProgress?.({
+      step: 'knowledge_extract.summary.final.local',
+      message: `已从 ${chunkDocs.length} 个分组 Markdown 本地合并为最终章节知识，共约 ${
+        (localDoc.sections || []).reduce((sum, s) => sum + (s.points || []).length, 0)
+      } 个候选点。`,
+      chunkCount: chunkDocs.length
+    });
+  } else {
+    const agent = await callKnowledgeSummaryAgent({
+      systemPrompt,
+      chapter,
+      task: '把逐页提取结果合并成章节知识文档，并做覆盖检查。',
+      progressPrefix: 'knowledge_extract.summary.final',
+      onProgress,
+      context: finalContext,
+      requirements: [
+        '合并同义知识点，保留来源页。',
+        `最终“知识点覆盖”核心点数量控制在 ${normalizedProfile.maxCorePointCount} 个以内。`,
+        `最终“易错题专项”数量控制在 ${normalizedProfile.maxMistakePointCount} 个以内。`,
+        '优先保留考试常见考点、易错点、变式边界、几何概念辨析和必要前置关系，不把教材说明拆得过碎。',
+        'sections 至少包含“知识点覆盖”；如果有易错点，单独包含“易错题专项”。',
+        '每个知识点必须有 title、summary、formulas、pitfalls、questionTemplates。',
+        'questionTemplates 用于后续出题，题干只写题目，不写解题过程。',
+        'review.missingOrWeak 列出疑似遗漏或需要人工复核的点。'
+      ],
+      schema: knowledgeSummarySchema(chapter)
+    });
+    if (!agent.ok || !Array.isArray(agent.data?.sections)) {
+      throw knowledgeExtractionError(
+        agent.reason || 'invalid_agent_response',
+        `章节汇总失败，已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
+      );
+    }
+    finalDoc = agent.data;
+  }
+  const merged = enforceKnowledgeBudget(chapter, finalDoc, normalizedProfile);
   const extractedAt = new Date().toISOString();
   const normalized = await saveKnowledgeDoc(chapter, merged, 'agent', {
     extractProfile: normalizedProfile,
@@ -404,6 +950,7 @@ export async function summarizeChapterExtraction({
     updatedAt: extractedAt,
     extractProfile: normalizedProfile,
     extractorVersion: 1,
+    summaryStrategy: pageChunks.length > 1 ? 'chunked_markdown_local_merge' : 'single_merge',
     review: merged.review || null,
     knowledgePointCount: normalized.sections.reduce((sum, section) => sum + section.points.length, 0)
   };
@@ -440,6 +987,55 @@ export async function extractChapterKnowledge({
       pageCount: scopedImages.length
     });
     const pageExtracts = [];
+    // Phase 0: analyze chapter structure with sampled pages
+    const structureSampleSize = Math.min(
+      Number(process.env.KNOWLEDGE_STRUCTURE_SAMPLE_PAGES || 6),
+      scopedImages.length
+    );
+    let chapterStructure = '';
+    const structureCachePath = path.join(chapterDataPaths(chapter.id).context, 'chapter_structure.md');
+    const cachedStructure = force ? '' : await readFile(structureCachePath, 'utf8').catch(() => '');
+    if (cachedStructure) {
+      chapterStructure = cachedStructure;
+      onProgress?.({
+        step: 'knowledge_extract.structure.cached',
+        message: '章节结构分析已有缓存，直接复用。'
+      });
+    } else {
+      onProgress?.({
+        step: 'knowledge_extract.structure.start',
+        message: `正在分析《${chapter.fullTitle}》章节结构（抽样 ${structureSampleSize}/${scopedImages.length} 页）。`
+      });
+      const sampleImages = scopedImages.slice(0, structureSampleSize);
+      const structurePrompt = await readPrompt('knowledge-structure.system.md');
+      const structureAgent = await callVisionTextAgent({
+        timeoutMs: Math.max(120000, KNOWLEDGE_PAGE_TIMEOUT_MS),
+        retries: 1,
+        system: structurePrompt,
+        text: [
+          `任务：快速浏览《${chapter.fullTitle}》（${chapter.track}）的章节图片，输出整体结构框架。`,
+          '',
+          '要求：只做结构概览，不做详细提取。识别核心概念、主要公式/法则、常见易错类型、每页角色（概念引入/定义/推导/例题/总结）。'
+        ].join('\n'),
+        imagePaths: sampleImages,
+        onAttempt: null
+      });
+      if (structureAgent.ok && structureAgent.data) {
+        chapterStructure = String(structureAgent.data || '').trim();
+        await mkdir(path.dirname(structureCachePath), { recursive: true });
+        await writeFile(structureCachePath, `${chapterStructure}\n`, 'utf8');
+        onProgress?.({
+          step: 'knowledge_extract.structure.done',
+          message: `章节结构分析完成，已缓存。`
+        });
+      } else {
+        chapterStructure = `# 章节结构分析\n\n## 章节主题\n${chapter.fullTitle}\n\n## 核心概念\n（结构分析未能完成，逐页提取将独立进行）\n`;
+        onProgress?.({
+          step: 'knowledge_extract.structure.failed',
+          message: `章节结构分析失败：${structureAgent.reason || 'unknown'}，继续逐页提取。`
+        });
+      }
+    }
     for (let index = 0; index < scopedImages.length; index += 1) {
       pageExtracts.push(await extractChapterPage({
         chapter,
@@ -448,6 +1044,7 @@ export async function extractChapterKnowledge({
         pageCount: scopedImages.length,
         force,
         extractProfile: normalizedProfile,
+        chapterStructure,
         onProgress
       }));
     }