@zhouchangui/math-ati 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +1 -0
- package/bin/math-ati.js +168 -6
- package/dist/assets/{index-Bk2WFPoL.css → index-CGZslJ0a.css} +1 -1
- package/dist/assets/{index-BYFoutza.js → index-CGfjl7nO.js} +8 -8
- package/dist/index.html +2 -2
- package/package.json +3 -1
- package/server/agentClient.js +32 -12
- package/server/fileStore.js +77 -28
- package/server/index.js +5 -3
- package/server/knowledgeExtractor.js +135 -12
- package/server/pdfSubmissionGrader.js +1 -1
- package/server/practiceService.js +6 -4
- package/templates/workspace/data/knowledge_points.json +0 -1264
package/dist/index.html
CHANGED
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
<meta charset="UTF-8" />
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
6
|
<title>周子烊数学提分 Agent</title>
|
|
7
|
-
<script type="module" crossorigin src="/assets/index-
|
|
8
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
7
|
+
<script type="module" crossorigin src="/assets/index-CGfjl7nO.js"></script>
|
|
8
|
+
<link rel="stylesheet" crossorigin href="/assets/index-CGZslJ0a.css">
|
|
9
9
|
</head>
|
|
10
10
|
<body>
|
|
11
11
|
<div id="root"></div>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zhouchangui/math-ati",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Local ATI math learning loop for printable practice, PDF grading, and mastery tracking.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -28,8 +28,10 @@
|
|
|
28
28
|
"dev:client": "vite --host 127.0.0.1",
|
|
29
29
|
"dev:server": "node --watch server/index.js",
|
|
30
30
|
"build": "vite build",
|
|
31
|
+
"build:curriculum": "node scripts/build-curriculum-package.js",
|
|
31
32
|
"prepack": "npm run build",
|
|
32
33
|
"pack:local": "npm pack --dry-run",
|
|
34
|
+
"pack:curriculum": "npm run build:curriculum && npm pack tmp/npm/math-ati-curriculum-rj-7a --pack-destination tmp/npm",
|
|
33
35
|
"release:npm": "node scripts/release-npm.js",
|
|
34
36
|
"preview": "vite preview --host 127.0.0.1",
|
|
35
37
|
"extract:knowledge": "node scripts/extract-knowledge.js",
|
package/server/agentClient.js
CHANGED
|
@@ -141,30 +141,41 @@ async function postChatCompletionOnce({ messages, temperature = 0.3, timeoutMs =
|
|
|
141
141
|
}
|
|
142
142
|
}
|
|
143
143
|
|
|
144
|
-
async function
|
|
144
|
+
async function postChatCompletionWithProgress({
|
|
145
|
+
messages,
|
|
146
|
+
temperature = 0.3,
|
|
147
|
+
timeoutMs = 12000,
|
|
148
|
+
retries = 1,
|
|
149
|
+
onAttempt = null
|
|
150
|
+
}) {
|
|
145
151
|
let lastResult = null;
|
|
146
152
|
const attempts = Math.max(1, Number(retries || 0) + 1);
|
|
147
153
|
for (let attempt = 1; attempt <= attempts; attempt += 1) {
|
|
154
|
+
await onAttempt?.({ phase: 'start', attempt, attempts, timeoutMs });
|
|
148
155
|
const result = await postChatCompletionOnce({ messages, temperature, timeoutMs });
|
|
149
|
-
if (result.ok
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
};
|
|
156
|
+
if (result.ok) {
|
|
157
|
+
await onAttempt?.({ phase: 'success', attempt, attempts, timeoutMs });
|
|
158
|
+
return { ...result, attempts: attempt, previousReason: lastResult?.reason || null };
|
|
159
|
+
}
|
|
160
|
+
if (!shouldRetry(result) || attempt === attempts) {
|
|
161
|
+
await onAttempt?.({ phase: 'failed', attempt, attempts, timeoutMs, result });
|
|
162
|
+
return { ...result, attempts: attempt, previousReason: lastResult?.reason || null };
|
|
155
163
|
}
|
|
156
164
|
lastResult = result;
|
|
157
|
-
|
|
165
|
+
const delayMs = Math.min(1000 * attempt, 3000);
|
|
166
|
+
await onAttempt?.({ phase: 'retry', attempt, attempts, timeoutMs, delayMs, result });
|
|
167
|
+
await sleep(delayMs);
|
|
158
168
|
}
|
|
159
169
|
return lastResult;
|
|
160
170
|
}
|
|
161
171
|
|
|
162
|
-
export async function callChatAgent({ system, user, temperature = 0.3, timeoutMs = 12000, retries = 1 }) {
|
|
172
|
+
export async function callChatAgent({ system, user, temperature = 0.3, timeoutMs = 12000, retries = 1, onAttempt = null }) {
|
|
163
173
|
if (fixtureMode() === 'knowledge-extract') return knowledgeSummarizeFixture();
|
|
164
|
-
return
|
|
174
|
+
return postChatCompletionWithProgress({
|
|
165
175
|
temperature,
|
|
166
176
|
timeoutMs,
|
|
167
177
|
retries,
|
|
178
|
+
onAttempt,
|
|
168
179
|
messages: [
|
|
169
180
|
{ role: 'system', content: system },
|
|
170
181
|
{ role: 'user', content: user }
|
|
@@ -172,7 +183,15 @@ export async function callChatAgent({ system, user, temperature = 0.3, timeoutMs
|
|
|
172
183
|
});
|
|
173
184
|
}
|
|
174
185
|
|
|
175
|
-
export async function callVisionAgent({
|
|
186
|
+
export async function callVisionAgent({
|
|
187
|
+
system,
|
|
188
|
+
text,
|
|
189
|
+
imagePaths,
|
|
190
|
+
temperature = 0.1,
|
|
191
|
+
timeoutMs = 45000,
|
|
192
|
+
retries = 1,
|
|
193
|
+
onAttempt = null
|
|
194
|
+
}) {
|
|
176
195
|
if (fixtureMode() === 'knowledge-extract') return knowledgeExtractPageFixture();
|
|
177
196
|
const imageContent = [];
|
|
178
197
|
for (const imagePath of imagePaths || []) {
|
|
@@ -184,10 +203,11 @@ export async function callVisionAgent({ system, text, imagePaths, temperature =
|
|
|
184
203
|
}
|
|
185
204
|
});
|
|
186
205
|
}
|
|
187
|
-
return
|
|
206
|
+
return postChatCompletionWithProgress({
|
|
188
207
|
temperature,
|
|
189
208
|
timeoutMs,
|
|
190
209
|
retries,
|
|
210
|
+
onAttempt,
|
|
191
211
|
messages: [
|
|
192
212
|
{ role: 'system', content: system },
|
|
193
213
|
{
|
package/server/fileStore.js
CHANGED
|
@@ -57,8 +57,32 @@ export async function writeJson(filePath, data) {
|
|
|
57
57
|
await writeFile(filePath, `${JSON.stringify(data, null, 2)}\n`, 'utf8');
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
async function exists(filePath) {
|
|
61
|
+
try {
|
|
62
|
+
await access(filePath);
|
|
63
|
+
return true;
|
|
64
|
+
} catch {
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
60
69
|
export function relativeDataPath(filePath) {
|
|
61
|
-
|
|
70
|
+
const resolvedPath = path.resolve(filePath);
|
|
71
|
+
const dataRelative = path.relative(paths.dataDir, resolvedPath);
|
|
72
|
+
if (dataRelative && !dataRelative.startsWith('..') && !path.isAbsolute(dataRelative)) {
|
|
73
|
+
return path.join('data', dataRelative).split(path.sep).join('/');
|
|
74
|
+
}
|
|
75
|
+
return path.relative(rootDir, resolvedPath).split(path.sep).join('/');
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function resolveStoredPath(storedPath) {
|
|
79
|
+
if (!storedPath) return '';
|
|
80
|
+
if (path.isAbsolute(storedPath)) return storedPath;
|
|
81
|
+
const normalized = storedPath.split('/').join(path.sep);
|
|
82
|
+
if (normalized === 'data' || normalized.startsWith(`data${path.sep}`)) {
|
|
83
|
+
return path.join(paths.dataDir, normalized.slice(`data${path.sep}`.length));
|
|
84
|
+
}
|
|
85
|
+
return path.join(rootDir, normalized);
|
|
62
86
|
}
|
|
63
87
|
|
|
64
88
|
export function chapterDataPaths(chapterId) {
|
|
@@ -100,34 +124,61 @@ export async function ensureChapterDataDirs(chapterId) {
|
|
|
100
124
|
return chapterPaths;
|
|
101
125
|
}
|
|
102
126
|
|
|
127
|
+
function isSourcePageImage(file) {
|
|
128
|
+
return /\.(png|jpe?g|webp)$/i.test(file);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
async function listSourcePageImages(sourceDir) {
|
|
132
|
+
try {
|
|
133
|
+
return (await readdir(sourceDir))
|
|
134
|
+
.filter(isSourcePageImage)
|
|
135
|
+
.sort();
|
|
136
|
+
} catch (error) {
|
|
137
|
+
if (error.code === 'ENOENT') return [];
|
|
138
|
+
throw error;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function buildSourceManifest(chapter, chapterPaths, files, sourceDir) {
|
|
143
|
+
return {
|
|
144
|
+
chapterId: chapter.id,
|
|
145
|
+
chapterTitle: chapter.fullTitle,
|
|
146
|
+
imageFolder: chapter.imageFolder,
|
|
147
|
+
pageCount: files.length,
|
|
148
|
+
pages: files.map((file) => ({
|
|
149
|
+
file,
|
|
150
|
+
sourcePath: relativeDataPath(path.join(sourceDir, file)),
|
|
151
|
+
localPath: relativeDataPath(path.join(chapterPaths.sourcePages, file))
|
|
152
|
+
}))
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
103
156
|
export async function ensureChapterWorkspace(chapter) {
|
|
104
157
|
const chapterPaths = await ensureChapterDataDirs(chapter.id);
|
|
105
158
|
await writeJson(chapterPaths.chapter, chapter);
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
chapterId: chapter.id,
|
|
113
|
-
chapterTitle: chapter.fullTitle,
|
|
114
|
-
imageFolder: chapter.imageFolder,
|
|
115
|
-
pageCount: files.length,
|
|
116
|
-
pages: files.map((file) => ({
|
|
117
|
-
file,
|
|
118
|
-
sourcePath: relativeDataPath(path.join(folderPath, file)),
|
|
119
|
-
localPath: relativeDataPath(path.join(chapterPaths.sourcePages, file))
|
|
120
|
-
}))
|
|
121
|
-
});
|
|
122
|
-
} catch {
|
|
123
|
-
await writeJson(chapterPaths.sourceManifest, {
|
|
124
|
-
chapterId: chapter.id,
|
|
125
|
-
chapterTitle: chapter.fullTitle,
|
|
126
|
-
imageFolder: chapter.imageFolder,
|
|
127
|
-
pageCount: 0,
|
|
128
|
-
pages: []
|
|
129
|
-
});
|
|
159
|
+
const existingManifest = await readJson(chapterPaths.sourceManifest, null);
|
|
160
|
+
const existingFirstPage = existingManifest?.pages?.[0]?.file
|
|
161
|
+
? path.join(chapterPaths.sourcePages, existingManifest.pages[0].file)
|
|
162
|
+
: null;
|
|
163
|
+
if (existingManifest?.pages?.length && existingFirstPage && await exists(existingFirstPage)) {
|
|
164
|
+
return chapterPaths;
|
|
130
165
|
}
|
|
166
|
+
|
|
167
|
+
const localFiles = await listSourcePageImages(chapterPaths.sourcePages);
|
|
168
|
+
if (localFiles.length) {
|
|
169
|
+
await writeJson(
|
|
170
|
+
chapterPaths.sourceManifest,
|
|
171
|
+
buildSourceManifest(chapter, chapterPaths, localFiles, chapterPaths.sourcePages)
|
|
172
|
+
);
|
|
173
|
+
return chapterPaths;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const folderPath = path.join(paths.imageRoot, chapter.imageFolder);
|
|
177
|
+
const sourceFiles = await listSourcePageImages(folderPath);
|
|
178
|
+
await writeJson(
|
|
179
|
+
chapterPaths.sourceManifest,
|
|
180
|
+
buildSourceManifest(chapter, chapterPaths, sourceFiles, folderPath)
|
|
181
|
+
);
|
|
131
182
|
return chapterPaths;
|
|
132
183
|
}
|
|
133
184
|
|
|
@@ -546,9 +597,7 @@ export async function seedKnowledgeAssets(chapters) {
|
|
|
546
597
|
for (const chapter of chapters) {
|
|
547
598
|
const existing = byChapter.get(chapter.id);
|
|
548
599
|
if (!existing) {
|
|
549
|
-
|
|
550
|
-
error.status = 422;
|
|
551
|
-
throw error;
|
|
600
|
+
continue;
|
|
552
601
|
}
|
|
553
602
|
const doc = existing;
|
|
554
603
|
const docPath = path.join(paths.knowledgeDocs, `${chapter.id}.md`);
|
package/server/index.js
CHANGED
|
@@ -12,7 +12,8 @@ import {
|
|
|
12
12
|
getKnowledgeBundle,
|
|
13
13
|
readChapterMistakes,
|
|
14
14
|
writeJson,
|
|
15
|
-
chapterDataPaths
|
|
15
|
+
chapterDataPaths,
|
|
16
|
+
resolveStoredPath
|
|
16
17
|
} from './fileStore.js';
|
|
17
18
|
import { gradeSubmission } from './grading.js';
|
|
18
19
|
import { extractChapterKnowledge } from './knowledgeExtractor.js';
|
|
@@ -223,7 +224,8 @@ app.post('/api/jobs/knowledge-extract', async (req, res, next) => {
|
|
|
223
224
|
limitPages: Math.max(0, Math.min(50, Number(req.body.limitPages || 0))),
|
|
224
225
|
force: Boolean(req.body.force),
|
|
225
226
|
extractProfile: req.body.extractProfile || req.body.profile || {},
|
|
226
|
-
resetLearningState: Boolean(req.body.resetLearningState)
|
|
227
|
+
resetLearningState: Boolean(req.body.resetLearningState),
|
|
228
|
+
onProgress: (event) => addJobEvent(job.id, event)
|
|
227
229
|
});
|
|
228
230
|
await refreshAbilityStateAfterLearningReset(chapterId, Boolean(req.body.resetLearningState));
|
|
229
231
|
addJobEvent(job.id, { step: 'knowledge_extract.done', message: '章节知识点提取完成。' });
|
|
@@ -565,7 +567,7 @@ app.get('/api/submissions/:id/artifacts/:name', async (req, res, next) => {
|
|
|
565
567
|
const submission = await readSubmission(req.params.id);
|
|
566
568
|
const filePath = submission.artifactPaths?.[req.params.name];
|
|
567
569
|
if (!filePath) return res.status(404).json({ error: 'artifact_not_found' });
|
|
568
|
-
res.sendFile(
|
|
570
|
+
res.sendFile(resolveStoredPath(filePath));
|
|
569
571
|
} catch (error) {
|
|
570
572
|
next(error);
|
|
571
573
|
}
|
|
@@ -13,6 +13,11 @@ import {
|
|
|
13
13
|
} from './fileStore.js';
|
|
14
14
|
import { promptPayload, readPrompt } from './promptStore.js';
|
|
15
15
|
|
|
16
|
+
const KNOWLEDGE_PAGE_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_TIMEOUT_MS || 180000);
|
|
17
|
+
const KNOWLEDGE_SUMMARY_TIMEOUT_MS = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_TIMEOUT_MS || 120000);
|
|
18
|
+
const KNOWLEDGE_PAGE_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_PAGE_RETRIES || 2);
|
|
19
|
+
const KNOWLEDGE_SUMMARY_RETRIES = Number(process.env.KNOWLEDGE_EXTRACT_SUMMARY_RETRIES || 2);
|
|
20
|
+
|
|
16
21
|
function extractionDir(chapterId) {
|
|
17
22
|
return chapterDataPaths(chapterId).pageExtracts;
|
|
18
23
|
}
|
|
@@ -26,6 +31,13 @@ function summaryPath(chapterId) {
|
|
|
26
31
|
}
|
|
27
32
|
|
|
28
33
|
async function chapterImages(chapter) {
|
|
34
|
+
const chapterPaths = await ensureChapterWorkspace(chapter);
|
|
35
|
+
const manifest = await readJson(chapterPaths.sourceManifest, null);
|
|
36
|
+
if (manifest?.pages?.length) {
|
|
37
|
+
return manifest.pages
|
|
38
|
+
.map((page) => path.join(chapterPaths.sourcePages, page.file))
|
|
39
|
+
.filter(Boolean);
|
|
40
|
+
}
|
|
29
41
|
const folderPath = path.join(paths.imageRoot, chapter.imageFolder);
|
|
30
42
|
const files = (await readdir(folderPath))
|
|
31
43
|
.filter((file) => /\.(png|jpe?g|webp)$/i.test(file))
|
|
@@ -86,16 +98,49 @@ function knowledgeExtractionError(reason, detail = '') {
|
|
|
86
98
|
return error;
|
|
87
99
|
}
|
|
88
100
|
|
|
89
|
-
|
|
101
|
+
function retryReasonText(reason) {
|
|
102
|
+
if (reason === 'timeout') return '模型响应超时';
|
|
103
|
+
if (reason === 'fetch_failed') return '模型服务连接失败';
|
|
104
|
+
if (reason === 'invalid_json') return '模型返回格式需要重试';
|
|
105
|
+
if (String(reason || '').startsWith('http_')) return `模型服务返回 ${reason.replace('http_', 'HTTP ')}`;
|
|
106
|
+
return reason || '模型调用失败';
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export async function extractChapterPage({
|
|
110
|
+
chapter,
|
|
111
|
+
imagePath,
|
|
112
|
+
pageIndex,
|
|
113
|
+
pageCount = 0,
|
|
114
|
+
force = false,
|
|
115
|
+
extractProfile = null,
|
|
116
|
+
onProgress = null
|
|
117
|
+
}) {
|
|
90
118
|
await ensureChapterWorkspace(chapter);
|
|
91
119
|
const outputPath = pageExtractPath(chapter.id, imagePath);
|
|
92
120
|
if (!force) {
|
|
93
121
|
const existing = await readJson(outputPath, null);
|
|
94
|
-
if (existing)
|
|
122
|
+
if (existing) {
|
|
123
|
+
onProgress?.({
|
|
124
|
+
step: 'knowledge_extract.page.cached',
|
|
125
|
+
message: `第 ${pageIndex}/${pageCount || '?'} 页已有提取缓存,直接复用。`,
|
|
126
|
+
pageIndex,
|
|
127
|
+
pageCount,
|
|
128
|
+
imageFile: path.basename(imagePath)
|
|
129
|
+
});
|
|
130
|
+
return existing;
|
|
131
|
+
}
|
|
95
132
|
}
|
|
96
133
|
const systemPrompt = await readPrompt('knowledge-extract.system.md');
|
|
134
|
+
onProgress?.({
|
|
135
|
+
step: 'knowledge_extract.page.start',
|
|
136
|
+
message: `正在识别第 ${pageIndex}/${pageCount || '?'} 页原始笔记。`,
|
|
137
|
+
pageIndex,
|
|
138
|
+
pageCount,
|
|
139
|
+
imageFile: path.basename(imagePath)
|
|
140
|
+
});
|
|
97
141
|
const agent = await callVisionAgent({
|
|
98
|
-
timeoutMs:
|
|
142
|
+
timeoutMs: KNOWLEDGE_PAGE_TIMEOUT_MS,
|
|
143
|
+
retries: KNOWLEDGE_PAGE_RETRIES,
|
|
99
144
|
system: systemPrompt,
|
|
100
145
|
text: promptPayload({
|
|
101
146
|
task: '从这一页提分笔记图片中逐项提取知识点、公式、例题线索和易错点。',
|
|
@@ -133,14 +178,48 @@ export async function extractChapterPage({ chapter, imagePath, pageIndex, force
|
|
|
133
178
|
exerciseHints: ['string']
|
|
134
179
|
}
|
|
135
180
|
}),
|
|
136
|
-
imagePaths: [imagePath]
|
|
181
|
+
imagePaths: [imagePath],
|
|
182
|
+
onAttempt: ({ phase, attempt, attempts, delayMs, result }) => {
|
|
183
|
+
const base = `第 ${pageIndex}/${pageCount || '?'} 页识别`;
|
|
184
|
+
if (phase === 'start') {
|
|
185
|
+
onProgress?.({
|
|
186
|
+
step: 'knowledge_extract.page.attempt',
|
|
187
|
+
message: `${base}:第 ${attempt}/${attempts} 次尝试。`,
|
|
188
|
+
pageIndex,
|
|
189
|
+
pageCount,
|
|
190
|
+
attempt,
|
|
191
|
+
attempts
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
if (phase === 'retry') {
|
|
195
|
+
onProgress?.({
|
|
196
|
+
step: 'knowledge_extract.page.retry',
|
|
197
|
+
message: `${base}遇到${retryReasonText(result?.reason)},${Math.round(delayMs / 1000)} 秒后自动重试。`,
|
|
198
|
+
pageIndex,
|
|
199
|
+
pageCount,
|
|
200
|
+
attempt,
|
|
201
|
+
attempts,
|
|
202
|
+
reason: result?.reason || null
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
}
|
|
137
206
|
});
|
|
138
207
|
if (!agent.ok) {
|
|
139
|
-
throw knowledgeExtractionError(
|
|
208
|
+
throw knowledgeExtractionError(
|
|
209
|
+
agent.reason || 'agent_failed',
|
|
210
|
+
`第 ${pageIndex}/${pageCount || '?'} 页 ${path.basename(imagePath)} 识别失败,已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
|
|
211
|
+
);
|
|
140
212
|
}
|
|
141
213
|
const extract = normalizePageExtract(chapter, imagePath, pageIndex, agent.data, 'agent');
|
|
142
214
|
await writeJson(outputPath, extract);
|
|
143
215
|
await writeJson(path.join(paths.knowledgeExtracts, chapter.id, `${path.basename(imagePath, path.extname(imagePath))}.json`), extract);
|
|
216
|
+
onProgress?.({
|
|
217
|
+
step: 'knowledge_extract.page.done',
|
|
218
|
+
message: `第 ${pageIndex}/${pageCount || '?'} 页识别完成,提取 ${extract.knowledgePoints.length} 个知识点。`,
|
|
219
|
+
pageIndex,
|
|
220
|
+
pageCount,
|
|
221
|
+
knowledgePointCount: extract.knowledgePoints.length
|
|
222
|
+
});
|
|
144
223
|
return extract;
|
|
145
224
|
}
|
|
146
225
|
|
|
@@ -227,13 +306,20 @@ export async function summarizeChapterExtraction({
|
|
|
227
306
|
chapter,
|
|
228
307
|
pageExtracts,
|
|
229
308
|
extractProfile = null,
|
|
230
|
-
resetLearningState = false
|
|
309
|
+
resetLearningState = false,
|
|
310
|
+
onProgress = null
|
|
231
311
|
}) {
|
|
232
312
|
const local = localMergeChapter(chapter, pageExtracts);
|
|
233
313
|
const normalizedProfile = normalizeExtractProfile(extractProfile || {});
|
|
234
314
|
const systemPrompt = await readPrompt('knowledge-summarize.system.md');
|
|
315
|
+
onProgress?.({
|
|
316
|
+
step: 'knowledge_extract.summary.start',
|
|
317
|
+
message: `正在合并 ${pageExtracts.length} 页提取结果,生成章节知识点。`,
|
|
318
|
+
pageCount: pageExtracts.length
|
|
319
|
+
});
|
|
235
320
|
const agent = await callChatAgent({
|
|
236
|
-
timeoutMs:
|
|
321
|
+
timeoutMs: KNOWLEDGE_SUMMARY_TIMEOUT_MS,
|
|
322
|
+
retries: KNOWLEDGE_SUMMARY_RETRIES,
|
|
237
323
|
temperature: 0.1,
|
|
238
324
|
system: systemPrompt,
|
|
239
325
|
user: promptPayload({
|
|
@@ -275,10 +361,32 @@ export async function summarizeChapterExtraction({
|
|
|
275
361
|
duplicateMerged: ['string']
|
|
276
362
|
}
|
|
277
363
|
}
|
|
278
|
-
})
|
|
364
|
+
}),
|
|
365
|
+
onAttempt: ({ phase, attempt, attempts, delayMs, result }) => {
|
|
366
|
+
if (phase === 'start') {
|
|
367
|
+
onProgress?.({
|
|
368
|
+
step: 'knowledge_extract.summary.attempt',
|
|
369
|
+
message: `章节汇总:第 ${attempt}/${attempts} 次尝试。`,
|
|
370
|
+
attempt,
|
|
371
|
+
attempts
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
if (phase === 'retry') {
|
|
375
|
+
onProgress?.({
|
|
376
|
+
step: 'knowledge_extract.summary.retry',
|
|
377
|
+
message: `章节汇总遇到${retryReasonText(result?.reason)},${Math.round(delayMs / 1000)} 秒后自动重试。`,
|
|
378
|
+
attempt,
|
|
379
|
+
attempts,
|
|
380
|
+
reason: result?.reason || null
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
}
|
|
279
384
|
});
|
|
280
385
|
if (!agent.ok || !Array.isArray(agent.data?.sections)) {
|
|
281
|
-
throw knowledgeExtractionError(
|
|
386
|
+
throw knowledgeExtractionError(
|
|
387
|
+
agent.reason || 'invalid_agent_response',
|
|
388
|
+
`章节汇总失败,已尝试 ${agent.attempts || 1} 次。${agent.detail || ''}`.trim()
|
|
389
|
+
);
|
|
282
390
|
}
|
|
283
391
|
const merged = agent.data;
|
|
284
392
|
const extractedAt = new Date().toISOString();
|
|
@@ -302,6 +410,11 @@ export async function summarizeChapterExtraction({
|
|
|
302
410
|
await writeJson(summaryPath(chapter.id), summary);
|
|
303
411
|
await writeJson(path.join(paths.knowledgeExtracts, chapter.id, 'summary.json'), summary);
|
|
304
412
|
const reset = resetLearningState ? await resetChapterLearningLoop(chapter.id) : null;
|
|
413
|
+
onProgress?.({
|
|
414
|
+
step: 'knowledge_extract.summary.done',
|
|
415
|
+
message: `章节知识点已生成,共 ${summary.knowledgePointCount} 个。`,
|
|
416
|
+
knowledgePointCount: summary.knowledgePointCount
|
|
417
|
+
});
|
|
305
418
|
return { knowledge: normalized, summary, reset };
|
|
306
419
|
}
|
|
307
420
|
|
|
@@ -310,7 +423,8 @@ export async function extractChapterKnowledge({
|
|
|
310
423
|
limitPages = 0,
|
|
311
424
|
force = false,
|
|
312
425
|
extractProfile = null,
|
|
313
|
-
resetLearningState = false
|
|
426
|
+
resetLearningState = false,
|
|
427
|
+
onProgress = null
|
|
314
428
|
} = {}) {
|
|
315
429
|
const chapters = await readJson(paths.chapters, []);
|
|
316
430
|
const selected = chapterId ? chapters.filter((chapter) => chapter.id === chapterId) : chapters;
|
|
@@ -319,21 +433,30 @@ export async function extractChapterKnowledge({
|
|
|
319
433
|
for (const chapter of selected) {
|
|
320
434
|
const images = await chapterImages(chapter);
|
|
321
435
|
const scopedImages = limitPages > 0 ? images.slice(0, limitPages) : images;
|
|
436
|
+
onProgress?.({
|
|
437
|
+
step: 'knowledge_extract.chapter.start',
|
|
438
|
+
message: `开始处理《${chapter.fullTitle}》,共 ${scopedImages.length} 页。`,
|
|
439
|
+
chapterId: chapter.id,
|
|
440
|
+
pageCount: scopedImages.length
|
|
441
|
+
});
|
|
322
442
|
const pageExtracts = [];
|
|
323
443
|
for (let index = 0; index < scopedImages.length; index += 1) {
|
|
324
444
|
pageExtracts.push(await extractChapterPage({
|
|
325
445
|
chapter,
|
|
326
446
|
imagePath: scopedImages[index],
|
|
327
447
|
pageIndex: index + 1,
|
|
448
|
+
pageCount: scopedImages.length,
|
|
328
449
|
force,
|
|
329
|
-
extractProfile: normalizedProfile
|
|
450
|
+
extractProfile: normalizedProfile,
|
|
451
|
+
onProgress
|
|
330
452
|
}));
|
|
331
453
|
}
|
|
332
454
|
results.push(await summarizeChapterExtraction({
|
|
333
455
|
chapter,
|
|
334
456
|
pageExtracts,
|
|
335
457
|
extractProfile: normalizedProfile,
|
|
336
|
-
resetLearningState
|
|
458
|
+
resetLearningState,
|
|
459
|
+
onProgress
|
|
337
460
|
}));
|
|
338
461
|
}
|
|
339
462
|
return results;
|
|
@@ -207,7 +207,7 @@ function normalizePageExtract(data, page) {
|
|
|
207
207
|
}
|
|
208
208
|
|
|
209
209
|
function practiceMismatchWarnings(pageExtracts) {
|
|
210
|
-
const mismatchPattern =
|
|
210
|
+
const mismatchPattern = /^(practice_mismatch\b|上传\s*PDF\s*与当前试卷不匹配)|题干明显不一致|题干不一致|题面不一致|同编号题干不一致|题号对应冲突|does\s+not\s+match\s+selected\s+practice/i;
|
|
211
211
|
return pageExtracts
|
|
212
212
|
.map((extract) => {
|
|
213
213
|
const warnings = (extract.warnings || []).filter((warning) => mismatchPattern.test(String(warning || '')));
|
|
@@ -9,6 +9,8 @@ import {
|
|
|
9
9
|
getKnowledgeBundle,
|
|
10
10
|
paths,
|
|
11
11
|
readJson,
|
|
12
|
+
relativeDataPath,
|
|
13
|
+
resolveStoredPath,
|
|
12
14
|
writeJson
|
|
13
15
|
} from './fileStore.js';
|
|
14
16
|
import { generatePracticeContent } from './practiceGenerator.js';
|
|
@@ -208,8 +210,8 @@ export async function ensurePracticeHtml(practice) {
|
|
|
208
210
|
error.status = 422;
|
|
209
211
|
throw error;
|
|
210
212
|
}
|
|
211
|
-
const htmlPath =
|
|
212
|
-
const answersHtmlPath =
|
|
213
|
+
const htmlPath = resolveStoredPath(practice.htmlPath);
|
|
214
|
+
const answersHtmlPath = resolveStoredPath(practice.answersHtmlPath);
|
|
213
215
|
const [html, answersHtml] = await Promise.all([
|
|
214
216
|
readFile(htmlPath, 'utf8').catch(() => ''),
|
|
215
217
|
readFile(answersHtmlPath, 'utf8').catch(() => '')
|
|
@@ -316,8 +318,8 @@ export async function createPractice({
|
|
|
316
318
|
const chapterPracticePath = path.join(chapterPaths.practices, `${id}.json`);
|
|
317
319
|
const chapterHtmlPath = path.join(chapterPaths.practices, `${id}.html`);
|
|
318
320
|
const chapterAnswersHtmlPath = path.join(chapterPaths.practices, `${id}.answers.html`);
|
|
319
|
-
practice.htmlPath =
|
|
320
|
-
practice.answersHtmlPath =
|
|
321
|
+
practice.htmlPath = relativeDataPath(chapterHtmlPath);
|
|
322
|
+
practice.answersHtmlPath = relativeDataPath(chapterAnswersHtmlPath);
|
|
321
323
|
await writeJson(chapterPracticePath, practice);
|
|
322
324
|
await writeFile(chapterHtmlPath, practiceQuestionsToHtml(practice), 'utf8');
|
|
323
325
|
await writeFile(chapterAnswersHtmlPath, practiceAnswersToHtml(practice), 'utf8');
|