studylens 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,673 @@
1
+ const http = require('http');
2
+ const https = require('https');
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+
6
+ const LLM_CONFIG_PATH = path.join(__dirname, '..', 'wiki', 'config', 'llm-config.json');
7
+ const LLM_CONFIG_TEMPLATE = path.join(__dirname, '..', 'config', 'llm-config.template.json');
8
+
9
+ function loadLLMConfig() {
10
+ try {
11
+ return JSON.parse(fs.readFileSync(LLM_CONFIG_PATH, 'utf-8'));
12
+ } catch {
13
+ try { return JSON.parse(fs.readFileSync(LLM_CONFIG_TEMPLATE, 'utf-8')); } catch { return {}; }
14
+ }
15
+ }
16
+
17
+ function saveLLMConfig(config) {
18
+ const dir = path.dirname(LLM_CONFIG_PATH);
19
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
20
+ fs.writeFileSync(LLM_CONFIG_PATH, JSON.stringify(config, null, 2), 'utf-8');
21
+ }
22
+
23
+ function buildProvider(name, cfg) {
24
+ if (name === 'agent-maestro') {
25
+ const baseUrl = (cfg.baseUrl || process.env.LLM_PROXY_URL || process.env.ANTHROPIC_BASE_URL || 'http://localhost:23333/api/anthropic').replace(/\/$/, '');
26
+ return {
27
+ name: 'agent-maestro',
28
+ url: () => baseUrl.endsWith('/v1/messages') ? baseUrl : `${baseUrl}/v1/messages`,
29
+ model: () => cfg.model || process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-6',
30
+ headers: () => ({ 'Content-Type': 'application/json' }),
31
+ buildBody: (messages, model, maxTokens) => JSON.stringify({ model, max_tokens: maxTokens, messages }),
32
+ parseResponse: (body) => { const data = JSON.parse(body); return data.content?.[0]?.text || ''; },
33
+ };
34
+ }
35
+ if (name === 'openai-compatible') {
36
+ const baseUrl = (cfg.baseUrl || 'https://api.openai.com/v1').replace(/\/$/, '');
37
+ const apiKey = cfg.apiKey || process.env.OPENAI_API_KEY || '';
38
+ return {
39
+ name: 'openai-compatible',
40
+ url: () => baseUrl.endsWith('/chat/completions') ? baseUrl : `${baseUrl}/chat/completions`,
41
+ model: () => cfg.model || 'gpt-4o',
42
+ headers: () => {
43
+ const h = { 'Content-Type': 'application/json' };
44
+ if (apiKey) h['Authorization'] = `Bearer ${apiKey}`;
45
+ return h;
46
+ },
47
+ buildBody: (messages, model, maxTokens) => JSON.stringify({ model, max_tokens: maxTokens, messages }),
48
+ parseResponse: (body) => { const data = JSON.parse(body); return data.choices?.[0]?.message?.content || ''; },
49
+ };
50
+ }
51
+ if (name === 'ollama') {
52
+ const baseUrl = (cfg.baseUrl || process.env.OLLAMA_URL || 'http://localhost:11434').replace(/\/$/, '');
53
+ return {
54
+ name: 'ollama',
55
+ url: () => `${baseUrl}/api/chat`,
56
+ model: () => cfg.model || process.env.OLLAMA_MODEL || 'llama3.2',
57
+ headers: () => ({ 'Content-Type': 'application/json' }),
58
+ buildBody: (messages, model) => JSON.stringify({ model, messages, stream: false }),
59
+ parseResponse: (body) => { const data = JSON.parse(body); return data.message?.content || ''; },
60
+ };
61
+ }
62
+ return null;
63
+ }
64
+
65
+ let _maestroAvailable = null;
66
+ let _maestroCheckedAt = 0;
67
+
68
+ async function probeAgentMaestro(config) {
69
+ const now = Date.now();
70
+ if (_maestroAvailable !== null && now - _maestroCheckedAt < 60000) return _maestroAvailable;
71
+ const cfg = config.providers?.['agent-maestro'] || {};
72
+ const baseUrl = (cfg.baseUrl || 'http://localhost:23333/api/anthropic').replace(/\/$/, '');
73
+ try {
74
+ await new Promise((resolve, reject) => {
75
+ const url = new URL(baseUrl);
76
+ const mod = url.protocol === 'https:' ? https : http;
77
+ const req = mod.request(url, { method: 'GET', timeout: 3000 }, (res) => {
78
+ res.resume();
79
+ resolve(true);
80
+ });
81
+ req.on('error', () => reject());
82
+ req.on('timeout', () => { req.destroy(); reject(); });
83
+ req.end();
84
+ });
85
+ _maestroAvailable = true;
86
+ } catch {
87
+ _maestroAvailable = false;
88
+ }
89
+ _maestroCheckedAt = now;
90
+ return _maestroAvailable;
91
+ }
92
+
93
+ async function getProvidersForTask(task = 'default') {
94
+ const config = loadLLMConfig();
95
+ const routing = config.taskRouting || {};
96
+ const providerName = routing[task] || 'default';
97
+
98
+ if (providerName !== 'default') {
99
+ const cfg = config.providers?.[providerName];
100
+ if (cfg?.enabled !== false) {
101
+ const p = buildProvider(providerName, cfg);
102
+ if (p) return [p];
103
+ }
104
+ }
105
+
106
+ const mode = config.defaultProvider || 'auto';
107
+ const providers = [];
108
+
109
+ if (mode === 'auto') {
110
+ const maestroCfg = config.providers?.['agent-maestro'] || {};
111
+ if (maestroCfg.enabled !== false) {
112
+ const available = await probeAgentMaestro(config);
113
+ if (available) providers.push(buildProvider('agent-maestro', maestroCfg));
114
+ }
115
+ const openaiCfg = config.providers?.['openai-compatible'] || {};
116
+ if (openaiCfg.enabled !== false && openaiCfg.apiKey) {
117
+ providers.push(buildProvider('openai-compatible', openaiCfg));
118
+ }
119
+ const ollamaCfg = config.providers?.['ollama'] || {};
120
+ if (ollamaCfg.enabled !== false) {
121
+ providers.push(buildProvider('ollama', ollamaCfg));
122
+ }
123
+ } else {
124
+ const cfg = config.providers?.[mode] || {};
125
+ const p = buildProvider(mode, cfg);
126
+ if (p) providers.push(p);
127
+ // fallback
128
+ for (const [name, c] of Object.entries(config.providers || {})) {
129
+ if (name !== mode && c.enabled !== false) {
130
+ const fb = buildProvider(name, c);
131
+ if (fb) providers.push(fb);
132
+ }
133
+ }
134
+ }
135
+
136
+ if (providers.length === 0) {
137
+ providers.push(buildProvider('agent-maestro', {}));
138
+ providers.push(buildProvider('ollama', {}));
139
+ }
140
+
141
+ return providers.filter(Boolean);
142
+ }
143
+
144
+ // Legacy compatibility
145
+ const DEFAULT_PROVIDERS = [
146
+ buildProvider('agent-maestro', {}),
147
+ buildProvider('ollama', {}),
148
+ ].filter(Boolean);
149
+
150
+ function loadSubjectPrompts(subject) {
151
+ try {
152
+ const promptsPath = path.join(__dirname, '..', 'config', 'prompts.json');
153
+ const prompts = JSON.parse(fs.readFileSync(promptsPath, 'utf-8'));
154
+ const discipline = (subject || '').split('-')[0];
155
+ return prompts.subjects?.[discipline] || {};
156
+ } catch { return {}; }
157
+ }
158
+
159
+ function extractJSON(text, { isArray = false, repairKeys } = {}) {
160
+ const cleaned = text.replace(/```json\s*/g, '').replace(/```\s*/g, '').trim();
161
+ const pattern = isArray ? /\[[\s\S]*\]/ : /\{[\s\S]*\}/;
162
+ const match = cleaned.match(pattern);
163
+ if (!match) return null;
164
+ let raw = match[0];
165
+ try { return JSON.parse(raw); } catch (e1) {
166
+ raw = raw.replace(/,\s*([\]}])/g, '$1');
167
+ raw = raw.replace(/(?<=:\s*"[^"]*)\n/g, '\\n');
168
+ // Repair unescaped quotes inside JSON string values
169
+ let repaired = '';
170
+ let inStr = false, escaped = false;
171
+ for (let i = 0; i < raw.length; i++) {
172
+ const ch = raw[i];
173
+ if (escaped) { repaired += ch; escaped = false; continue; }
174
+ if (ch === '\\' && inStr) { repaired += ch; escaped = true; continue; }
175
+ if (ch === '"') {
176
+ if (!inStr) { inStr = true; repaired += ch; }
177
+ else {
178
+ const after = raw.slice(i + 1).trimStart();
179
+ if (after[0] === ':' || after[0] === ',' || after[0] === '}' || after[0] === ']' || after.startsWith('\n')) {
180
+ inStr = false; repaired += ch;
181
+ } else {
182
+ repaired += '\\"';
183
+ }
184
+ }
185
+ } else { repaired += ch; }
186
+ }
187
+ raw = repaired;
188
+ try { return JSON.parse(raw); } catch (e2) {
189
+ if (repairKeys) {
190
+ const items = [];
191
+ const q = '[““”„‟]';
192
+ const keyPattern = repairKeys.map(k => `${q}${k}${q}\\s*:\\s*${q}([^”“”„‟]+)${q}`).join('\\s*,\\s*');
193
+ const re = new RegExp(keyPattern, 'g');
194
+ let m;
195
+ while ((m = re.exec(raw)) !== null) {
196
+ const obj = {};
197
+ repairKeys.forEach((k, i) => { obj[k] = m[i + 1]; });
198
+ items.push(obj);
199
+ }
200
+ if (items.length > 0) return items;
201
+ }
202
+ return null;
203
+ }
204
+ }
205
+ }
206
+
207
+ function httpRequest(urlStr, options, body, timeout = 120000) {
208
+ return new Promise((resolve, reject) => {
209
+ const url = new URL(urlStr);
210
+ const mod = url.protocol === 'https:' ? https : http;
211
+ const req = mod.request(url, { method: 'POST', headers: options.headers, timeout }, (res) => {
212
+ const chunks = [];
213
+ res.on('data', c => chunks.push(c));
214
+ res.on('end', () => {
215
+ const text = Buffer.concat(chunks).toString();
216
+ if (res.statusCode >= 400) return reject(new Error(`HTTP ${res.statusCode}: ${text.slice(0, 200)}`));
217
+ resolve(text);
218
+ });
219
+ });
220
+ req.on('error', reject);
221
+ req.on('timeout', () => { req.destroy(); reject(new Error('timeout')); });
222
+ req.write(body);
223
+ req.end();
224
+ });
225
+ }
226
+
227
+ async function callLLM(messages, { maxTokens = 4096, providers, task, timeout } = {}) {
228
+ const providerList = providers || await getProvidersForTask(task || 'default');
229
+ const errors = [];
230
+ for (const p of providerList) {
231
+ try {
232
+ const model = typeof p.model === 'function' ? p.model() : p.model;
233
+ const url = typeof p.url === 'function' ? p.url() : p.url;
234
+ const body = p.buildBody(messages, model, maxTokens);
235
+ const raw = await httpRequest(url, { headers: p.headers() }, body, timeout);
236
+ return p.parseResponse(raw);
237
+ } catch (err) {
238
+ errors.push(`${p.name}: ${err.message}`);
239
+ }
240
+ }
241
+ throw new Error(`All LLM providers failed:\n${errors.join('\n')}`);
242
+ }
243
+
244
+ async function analyze(text, subject = '', maxPoints) {
245
+ const subjectPrompts = loadSubjectPrompts(subject);
246
+ const basePrompt = subjectPrompts.analyzePrompt || `You are a knowledge extraction assistant for a student. Analyze the following study notes and extract structured knowledge entries.
247
+
248
+ For each distinct knowledge point, return a JSON array of objects with:
249
+ - "title": concise title (under 20 chars)
250
+ - "content": the knowledge point explained clearly
251
+ - "subject": precise subject classification (see rules below)
252
+ - "tags": array of relevant tags — include ALL of the following dimensions:
253
+ 1. Core concepts: key terms, names, formulas (e.g. "科举制", "赵匡胤", "勾股定理")
254
+ 2. Category dimensions: assign multi-dimensional category tags based on the subject area:
255
+ - For history: add tags from these dimensions where applicable:
256
+ "政治制度", "军事战争", "经济发展", "民族关系", "对外交流", "科技发明", "文化艺术", "社会生活", "人物"
257
+ - For math: "代数", "几何", "概率", "函数", "公式", "定理", "证明"
258
+ - For physics: "力学", "电磁", "热学", "光学", "实验", "公式"
259
+ - For other subjects: infer appropriate dimensional tags
260
+ 3. Connections: tags that link to related knowledge across different categories (e.g. a trade route entry could tag both "经济发展" and "对外交流")
261
+
262
+ Subject classification rules:
263
+ - For history: use specific dynasty like "历史-隋朝", "历史-唐朝", "历史-北宋", "历史-南宋", "历史-辽", "历史-西夏", "历史-金", "历史-元朝" etc. Do NOT use combined periods like "历史-隋唐".
264
+ - For other subjects: use patterns like "数学-代数", "物理-力学", "化学-有机" etc.
265
+ - Each knowledge point must belong to exactly ONE specific category.`;
266
+
267
+ const maxPointsLine = maxPoints ? `\nIMPORTANT: Extract at most ${maxPoints} knowledge points. Prioritize high-level concepts over fine-grained details. If the material contains more than ${maxPoints} points, merge related ones.\n` : '';
268
+
269
+ const prompt = `${basePrompt}
270
+ ${maxPointsLine}
271
+ ${subject ? `User suggested subject: "${subject}" — use this as a hint but still classify precisely.` : ''}
272
+
273
+ Input notes:
274
+ ${text}
275
+
276
+ Return ONLY valid JSON array, no other text.`;
277
+
278
+ const result = await callLLM([{ role: 'user', content: prompt }], { task: 'analyze' });
279
+ const parsed = extractJSON(result, { isArray: true });
280
+ if (!parsed) {
281
+ console.error('[analyze] LLM raw response (first 500):', result.slice(0, 500));
282
+ const cleaned = result.replace(/```json\s*/g, '').replace(/```\s*/g, '').trim();
283
+ const match = cleaned.match(/\[[\s\S]*\]/);
284
+ if (match) {
285
+ try { JSON.parse(match[0]); } catch(e) { console.error('[analyze] JSON parse error:', e.message, 'near:', match[0].slice(0, 200)); }
286
+ } else { console.error('[analyze] no array match in cleaned text'); }
287
+ throw new Error('LLM did not return valid JSON array');
288
+ }
289
+ return parsed;
290
+ }
291
+
292
+ async function findConnections(newEntry, existingEntries) {
293
+ if (existingEntries.length === 0) return [];
294
+ const existing = existingEntries.map(e => `[${e.id}] ${e.title}: ${e.content.slice(0, 80)}`).join('\n');
295
+ const prompt = `Given a new knowledge entry and existing entries, find related ones.
296
+
297
+ New entry: "${newEntry.title}" — ${newEntry.content}
298
+
299
+ Existing entries:
300
+ ${existing}
301
+
302
+ Return a JSON array of objects: [{"id": "existing_entry_id", "relation": "brief description of how they relate"}]
303
+ Only include genuinely related entries. Return empty array [] if none are related.
304
+ Return ONLY valid JSON, no other text.`;
305
+
306
+ const result = await callLLM([{ role: 'user', content: prompt }], { maxTokens: 1024, task: 'analyze' });
307
+ return extractJSON(result, { isArray: true }) || [];
308
+ }
309
+
310
+ async function askQuestion(question, contextEntries = [], history = []) {
311
+ const context = contextEntries.length > 0
312
+ ? contextEntries.map(e => `【${e.title}】(${e.subject}) ${e.content}`).join('\n\n')
313
+ : '';
314
+
315
+ const systemPrompt = `You are an expert study assistant with deep knowledge across all subjects. A student is studying and asks you questions.
316
+
317
+ IMPORTANT: Use your OWN comprehensive knowledge to answer thoroughly and accurately. The student's notes are supplementary context, not the boundary of your answer.
318
+
319
+ ${context ? `The student's existing study notes for reference:\n${context}\n` : ''}
320
+
321
+ Instructions:
322
+ 1. Answer using your full knowledge — be thorough, accurate, and educational
323
+ 2. If the student has relevant notes, reference them to build connections
324
+ 3. Use comparisons, analysis, and specific facts/data where appropriate
325
+ 4. Write in Chinese, suitable for a middle/high school student
326
+ 5. Suggest knowledge cards that capture KEY points — NEW knowledge beyond existing notes
327
+ 6. In follow-up turns, refine/expand based on the student's feedback. Accumulate ALL worthy knowledge cards from the entire conversation, not just the latest turn.
328
+
329
+ Return a JSON object:
330
+ {
331
+ "answer": "Your comprehensive answer in Chinese...",
332
+ "suggestedCards": [
333
+ {
334
+ "title": "card title (under 20 chars)",
335
+ "content": "knowledge point explained clearly",
336
+ "subject": "precise subject like 历史-唐朝",
337
+ "tags": ["relevant", "tags"]
338
+ }
339
+ ]
340
+ }
341
+
342
+ CRITICAL: The answer field must be PLAIN TEXT only — no markdown formatting (no **, no ##, no -).
343
+ Return ONLY valid JSON, no other text. Do not wrap in code fences.`;
344
+
345
+ const messages = [
346
+ { role: 'user', content: systemPrompt },
347
+ { role: 'assistant', content: '{"answer": "好的,我准备好了,请提问。", "suggestedCards": []}' },
348
+ ];
349
+ for (const h of history) {
350
+ messages.push({ role: 'user', content: h.question });
351
+ if (h.answer) messages.push({ role: 'assistant', content: JSON.stringify({ answer: h.answer, suggestedCards: h.suggestedCards || [] }) });
352
+ }
353
+ messages.push({ role: 'user', content: question });
354
+
355
+ const result = await callLLM(messages, { maxTokens: 4096, task: 'qa' });
356
+ const parsed = extractJSON(result);
357
+ if (parsed) return parsed;
358
+ const ansMatch = result.match(/"answer"\s*:\s*"([\s\S]*?)"\s*,\s*"suggestedCards"/);
359
+ return { answer: ansMatch ? ansMatch[1].replace(/\\n/g, '\n') : result, suggestedCards: [] };
360
+ }
361
+
362
+ async function restructure(instruction, entries) {
363
+ const entrySummary = entries.map(e =>
364
+ `[${e.id}] title="${e.title}" subject="${e.subject}" tags=${JSON.stringify(e.tags)} content="${e.content.slice(0, 60)}"`
365
+ ).join('\n');
366
+
367
+ const prompt = `You are a knowledge graph organizer. A student has the following knowledge entries and wants to restructure them.
368
+
369
+ Student's instruction: "${instruction}"
370
+
371
+ Current entries:
372
+ ${entrySummary}
373
+
374
+ Based on the instruction, return a JSON array of changes to make. Each change is an object:
375
+ - "action": "update" (modify subject/tags/title of existing entry) or "merge" (combine entries) or "split" (split one entry into multiple)
376
+ - For "update": { "action": "update", "id": "entry_id", "subject": "new_subject", "tags": ["new","tags"], "title": "new_title_if_changed" }
377
+ - For "merge": { "action": "merge", "ids": ["id1","id2",...], "merged_title": "combined title", "merged_content": "combined content", "subject": "subject", "tags": ["tags"] }
378
+ - For "split": not commonly needed, skip for now
379
+
380
+ Only include entries that actually need changes. Return ONLY valid JSON array, no other text.
381
+ If no changes are needed, return [].`;
382
+
383
+ const result = await callLLM([{ role: 'user', content: prompt }], { maxTokens: 4096, task: 'analyze' });
384
+ return extractJSON(result, { isArray: true }) || [];
385
+ }
386
+
387
+ async function buildQAMindMap(question, answer, cards, relatedEntries) {
388
+ const context = [
389
+ `Question: ${question}`,
390
+ `Answer: ${answer}`,
391
+ cards.length > 0 ? `Cards: ${JSON.stringify(cards.map(c => ({ title: c.title, content: c.content, subject: c.subject })))}` : '',
392
+ ].filter(Boolean).join('\n');
393
+
394
+ const prompt = `Based on this Q&A exchange, create a structured visualization to help a student understand the answer.
395
+
396
+ ${context}
397
+
398
+ Detect the type of question and generate the appropriate visualization:
399
+
400
+ 1. If it's a COMPARISON question (对比, 区别, 异同, vs, 不同, 对应, 差别):
401
+ Return:
402
+ {
403
+ "type": "comparison",
404
+ "title": "对比标题",
405
+ "columns": [
406
+ { "header": "左侧标题", "color": "#4285f4", "items": [
407
+ { "category": "维度1", "content": "内容" },
408
+ { "category": "维度2", "content": "内容" }
409
+ ]},
410
+ { "header": "右侧标题", "color": "#ea4335", "items": [
411
+ { "category": "维度1", "content": "内容" },
412
+ { "category": "维度2", "content": "内容" }
413
+ ]}
414
+ ],
415
+ "summary": "一句话总结核心区别"
416
+ }
417
+
418
+ 2. If it's a TIMELINE/PROCESS question (过程, 经过, 发展, 变化, 顺序):
419
+ Return:
420
+ {
421
+ "type": "timeline",
422
+ "title": "标题",
423
+ "steps": [
424
+ { "label": "阶段名", "content": "描述", "date": "时间(可选)" }
425
+ ]
426
+ }
427
+
428
+ 3. For other questions (concepts, explanations, analysis):
429
+ Return:
430
+ {
431
+ "type": "tree",
432
+ "title": "中心主题",
433
+ "branches": [
434
+ { "label": "分支1", "children": [
435
+ { "label": "要点", "detail": "说明" }
436
+ ]}
437
+ ]
438
+ }
439
+
440
+ Rules:
441
+ - All text in Chinese, concise and clear
442
+ - For comparison: items MUST align by category across columns for easy side-by-side reading
443
+ - For tree: max 4 branches, max 4 children each
444
+ - Return ONLY valid JSON, no other text.`;
445
+
446
+ const result = await callLLM([{ role: 'user', content: prompt }], { maxTokens: 4096, task: 'qa' });
447
+ const parsed = extractJSON(result);
448
+ if (!parsed) { console.warn('[buildQAMindMap] no JSON found'); return { type: 'tree', title: '', branches: [] }; }
449
+ return parsed;
450
+ }
451
+
452
+ async function generateSmartQuestions(entry, existingQaHistory = []) {
453
+ const subjectPrompts = loadSubjectPrompts(entry.subject);
454
+ const existingSection = existingQaHistory.length > 0
455
+ ? `\n以下问题已经被回答过,请不要生成与这些问题重复或高度相似的问题:\n${existingQaHistory.map(h => `- ${h.question}`).join('\n')}\n`
456
+ : '';
457
+
458
+ const defaultCategories = `生成的问题应该覆盖:
459
+ 1. 基本概念(是什么)
460
+ 2. 原因分析(为什么)
461
+ 3. 影响/意义(有什么影响)
462
+ 4. 比较对比(与其他知识的关联)
463
+ 5. 深入思考(评价/启示)
464
+
465
+ 返回JSON数组,每个元素: {"question": "问题内容", "category": "概念/原因/影响/对比/思考"}`;
466
+
467
+ const questionGuidance = subjectPrompts.questionsPrompt || defaultCategories;
468
+
469
+ const prompt = `你是一个学习辅导助手。根据以下知识点,生成5个有深度的学习问题,帮助学生深入理解这个知识点。
470
+
471
+ 知识点标题: ${entry.title}
472
+ 学科分类: ${entry.subject}
473
+ 内容: ${entry.content}
474
+ 标签: ${(entry.tags || []).join(', ')}
475
+ ${existingSection}
476
+ ${questionGuidance}
477
+ 重要:问题文本中如果要引用术语,请用「」而不是引号,避免JSON解析错误。
478
+ 只返回JSON,不要其他文字。`;
479
+
480
+ const result = await callLLM([{ role: 'user', content: prompt }], { maxTokens: 1024, task: 'questions' });
481
+ const parsed = extractJSON(result, { isArray: true, repairKeys: ['question', 'category'] });
482
+ if (!parsed) { console.error('[smartQ] no match in LLM response:', result.slice(0, 200)); return []; }
483
+ return parsed;
484
+ }
485
+
486
+ const TOPIC_LOG_DIR = require('path').join(__dirname, '..', 'logs', 'topic-gen');
487
+ require('fs').mkdirSync(TOPIC_LOG_DIR, { recursive: true });
488
+
489
+ function logTopicGen(label, data) {
490
+ const ts = new Date().toISOString().replace(/[:.]/g, '-');
491
+ const logPath = require('path').join(TOPIC_LOG_DIR, `${ts}_${label}.json`);
492
+ require('fs').writeFileSync(logPath, JSON.stringify(data, null, 2), 'utf-8');
493
+ console.log(`[topic-gen] ${label}: prompt=${data.promptLength} chars, qaContext=${data.qaContextLength} chars, existingText=${data.existingTextLength} chars, result=${data.resultLength} chars`);
494
+ }
495
+
496
+ async function generateTopicHTML(entry, relatedEntries = [], qaHistory = [], existingHTML = '', requirements = '', mode = '') {
497
+ const related = relatedEntries.map(e => `【${e.title}】${e.content.slice(0, 100)}`).join('\n');
498
+
499
+ // Mode-based prompt construction:
500
+ // 'annotation' - existing HTML + annotation text only, no QA
501
+ // 'merge' - existing HTML + merge instructions, no QA
502
+ // 'incremental' - existing HTML + only new QA items
503
+ // 'regenerate' - ignore existing HTML, fresh generation with QA
504
+ // '' (default) - legacy behavior: all QA + existing HTML
505
+
506
+ const skipQA = mode === 'annotation' || mode === 'merge' || mode === 'theme-light' || mode === 'theme-dark';
507
+ const skipExisting = mode === 'regenerate';
508
+
509
+ let qaContext = '';
510
+ if (!skipQA && qaHistory.length > 0) {
511
+ const categories = {};
512
+ qaHistory.filter(h => h.answer).forEach(h => {
513
+ const cat = h.category || '其他';
514
+ if (!categories[cat]) categories[cat] = [];
515
+ categories[cat].push(h);
516
+ });
517
+ qaContext = Object.entries(categories).map(([cat, items]) =>
518
+ `【${cat}类问题】\n` + items.map(h => `Q: ${h.question}\nA: ${(h.answer || '').slice(0, 800)}`).join('\n\n')
519
+ ).join('\n\n---\n\n');
520
+ }
521
+
522
+ const isUpdate = !skipExisting && !!existingHTML;
523
+ const existingText = isUpdate ? existingHTML.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').slice(0, 5000) : '';
524
+
525
+ let taskDescription, contentSection, requirementsList;
526
+
527
+ if (mode === 'theme-light' || mode === 'theme-dark') {
528
+ const themeHTML = existingHTML.slice(0, 15000);
529
+ const isLight = mode === 'theme-light';
530
+ taskDescription = `将现有专题页面的配色方案转换为${isLight ? '浅色' : '深色'}主题`;
531
+ contentSection = `当前专题页完整HTML:\n${themeHTML}\n`;
532
+ requirementsList = `要求:
533
+ 1. 【最重要】只修改颜色相关的CSS属性(background, color, border-color, box-shadow等),严格保留所有文字内容、HTML结构、布局不变
534
+ 2. ${isLight ? '浅色主题:背景使用白色系(#ffffff, #f8f9fa, #f3f4f6),文字使用深色(#1a1a1a, #333, #111),链接蓝色(#1a56db),代码块浅灰背景(#f3f4f6)' : '深色主题:背景使用深色(#0f1117, #161822, #1c1f2e),文字使用浅色(#e0e0e0, #fff, #bbb),链接蓝色(#4285f4)'}
535
+ 3. 保持渐变效果但调整为${isLight ? '浅色系' : '深色系'}渐变
536
+ 4. 表格、卡片、引用块等组件的边框和背景也要相应调整
537
+ 5. 直接输出转换后的完整HTML页面`;
538
+ } else if (mode === 'annotation') {
539
+ const annotationHTML = existingHTML.slice(0, 15000);
540
+ taskDescription = '根据用户批注对现有专题页面做局部修改';
541
+ contentSection = `当前专题页完整HTML(必须在此基础上做最小化修改):\n${annotationHTML}\n`;
542
+ requirementsList = `要求:
543
+ 1. 【最重要】仅根据用户批注做最小化局部修改,严格保留原有的所有内容、章节结构、CSS样式、颜色、强调效果
544
+ 2. 直接输出修改后的完整HTML页面,保持原有的内联CSS和所有样式不变
545
+ 3. 不要删除任何批注未提及的内容
546
+ 4. 不要改变任何未涉及部分的格式、颜色、渐变、阴影等视觉效果
547
+ 5. 只修改批注明确要求修改的部分
548
+ 6. 【用户批注】${requirements}`;
549
+ } else if (mode === 'merge') {
550
+ taskDescription = '合并多个版本的专题页内容';
551
+ contentSection = `基础版本内容:\n${existingText}\n`;
552
+ requirementsList = `要求:
553
+ 1. 合并两个版本的内容,保留所有有价值的信息
554
+ 2. 生成完整的HTML页面(含内联CSS),适合iframe嵌入
555
+ 3. 深色主题(背景 #0f1117,文字 #e0e0e0)
556
+ 4. 分章节展示,结构清晰
557
+ 5. 中文内容,适合中学生阅读
558
+ 6. 页面宽度100%,配色美观,使用渐变和阴影效果
559
+ 7. 【合并要求】${requirements}`;
560
+ } else {
561
+ taskDescription = `基于以下知识点和相关资料,${isUpdate ? '更新并扩充' : '生成'}一个美观的HTML专题页面`;
562
+ contentSection = '';
563
+ if (qaContext) contentSection += `=== 核心问答内容(必须全部融入专题页) ===\n${qaContext}\n`;
564
+ if (isUpdate) contentSection += `当前专题页内容(需要在此基础上扩充和完善):\n${existingText}\n请在现有内容的基础上扩充,保留原有结构,将新的问答内容融入对应章节。\n`;
565
+ requirementsList = `要求:
566
+ 1. 生成完整的HTML页面(含内联CSS),适合iframe嵌入
567
+ 2. 深色主题(背景 #0f1117,文字 #e0e0e0)
568
+ 3. 分章节展示:导语→背景→核心内容→影响/意义→总结
569
+ 4. 使用清晰的排版:标题、卡片、分隔线、高亮重点
570
+ 5. 中文内容,适合中学生阅读
571
+ 6. 使用你自己的知识补充完整内容,不要局限于提供的材料
572
+ 7. 页面宽度100%,不要设置max-width限制,内容撑满整个页面宽度,body使用padding: 16px 24px
573
+ 8. 配色美观,使用渐变和阴影效果
574
+ ${qaContext ? '9. 【重要】上面的问答内容是学生深入探索的结果,必须将每个问答的核心答案完整融入专题页对应章节中,不可遗漏任何一个问答\n' : ''}${requirements ? `10. 【用户特别要求】${requirements}` : ''}`;
575
+ }
576
+
577
+ const prompt = `你是一个教育内容设计师。${taskDescription}。
578
+
579
+ 主题知识点:
580
+ 标题: ${entry.title}
581
+ 学科: ${entry.subject}
582
+ 内容: ${entry.content}
583
+
584
+ ${related && !skipQA ? `相关知识点:\n${related}\n` : ''}
585
+ ${contentSection}
586
+ ${requirementsList}
587
+
588
+ 只返回HTML代码,不要包裹在代码块中。`;
589
+
590
+ const logData = {
591
+ entryTitle: entry.title, entrySubject: entry.subject,
592
+ mode: mode || 'default', isUpdate, requirements: (requirements || '').slice(0, 200),
593
+ relatedLength: related.length, qaContextLength: qaContext.length,
594
+ existingTextLength: existingText.length, promptLength: prompt.length,
595
+ qaCount: qaHistory.filter(h => h.answer).length,
596
+ existingHTMLLength: existingHTML.length,
597
+ };
598
+ console.log(`[topic-gen] START: "${entry.title}" mode=${mode || 'default'} isUpdate=${isUpdate} prompt=${prompt.length} chars`);
599
+
600
+ const result = await callLLM([{ role: 'user', content: prompt }], { maxTokens: 16384, task: 'topicPage', timeout: 300000 });
601
+ let html = result.replace(/```html\s*/g, '').replace(/```\s*/g, '').trim();
602
+ if (!html.includes('<html') && !html.includes('<!DOCTYPE')) {
603
+ html = `<!DOCTYPE html><html><head><meta charset="utf-8"></head><body style="background:#0f1117;color:#e0e0e0;font-family:system-ui;padding:24px">${html}</body></html>`;
604
+ }
605
+ if (html.replace(/<[^>]*>/g, '').trim().length < 50) {
606
+ console.log(`[topic-gen] RETRY: first result too short (${html.replace(/<[^>]*>/g, '').trim().length} chars text)`);
607
+ logData.firstResultLength = html.length;
608
+ logData.firstResultTextLength = html.replace(/<[^>]*>/g, '').trim().length;
609
+ logData.firstResultPreview = html.slice(0, 500);
610
+ const retry = await callLLM([{ role: 'user', content: prompt }], { maxTokens: 16384, task: 'topicPage', timeout: 300000 });
611
+ let retryHtml = retry.replace(/```html\s*/g, '').replace(/```\s*/g, '').trim();
612
+ if (!retryHtml.includes('<html') && !retryHtml.includes('<!DOCTYPE')) {
613
+ retryHtml = `<!DOCTYPE html><html><head><meta charset="utf-8"></head><body style="background:#0f1117;color:#e0e0e0;font-family:system-ui;padding:24px">${retryHtml}</body></html>`;
614
+ }
615
+ if (retryHtml.replace(/<[^>]*>/g, '').trim().length > html.replace(/<[^>]*>/g, '').trim().length) {
616
+ html = retryHtml;
617
+ }
618
+ logData.retried = true;
619
+ logData.retryResultTextLength = retryHtml.replace(/<[^>]*>/g, '').trim().length;
620
+ }
621
+ logData.resultLength = html.length;
622
+ logData.resultTextLength = html.replace(/<[^>]*>/g, '').trim().length;
623
+ logTopicGen(isUpdate ? 'update' : 'generate', logData);
624
+ return html;
625
+ }
626
+
627
+ async function expandEntry(entry, qaHistory = []) {
628
+ const qaContext = qaHistory.filter(h => h.answer).length > 0
629
+ ? '\n\n学生已探索过的问题:\n' + qaHistory.filter(h => h.answer).map(h => `Q: ${h.question}\nA: ${h.answer.slice(0, 200)}`).join('\n\n')
630
+ : '';
631
+ const prompt = `你是一个学习辅导助手。请将以下知识点拆解为多个子知识点,用于深入分析。
632
+
633
+ 知识点标题: ${entry.title}
634
+ 学科分类: ${entry.subject}
635
+ 内容: ${entry.content}
636
+ 标签: ${(entry.tags || []).join(', ')}
637
+ ${qaContext}
638
+
639
+ 请拆解为5-8个子知识点,每个子知识点应该是该主题下的一个具体方面。
640
+ ${qaContext ? '参考学生已探索过的问题,确保子知识点覆盖这些方向,并补充学生尚未涉及的重要方面。' : '例如「王安石变法」可以拆解为:背景、青苗法、免役法、市易法、保甲法、影响与评价等。'}
641
+
642
+ 返回JSON数组,每个元素: {"title": "子知识点标题", "content": "简要描述(50-100字)", "category": "背景/内容/影响/对比/评价"}
643
+ 重要:文本中引用术语请用「」而不是引号,避免JSON解析错误。
644
+ 只返回JSON,不要其他文字。`;
645
+
646
+ const result = await callLLM([{ role: 'user', content: prompt }], { maxTokens: 2048, task: 'expand' });
647
+ const parsed = extractJSON(result, { isArray: true, repairKeys: ['title', 'content', 'category'] });
648
+ return parsed || [];
649
+ }
650
+
651
+ async function checkDuplicates(newEntries, existingEntries) {
652
+ if (existingEntries.length === 0 || newEntries.length === 0) return [];
653
+ const existingIndex = existingEntries.map(e => `[${e.id}] ${e.title} | ${e.subject} | ${(e.tags || []).join(', ')}`).join('\n');
654
+ const newList = newEntries.map((e, i) => `[NEW-${i}] ${e.title} | ${e.content.slice(0, 100)}`).join('\n');
655
+ const prompt = `You are a knowledge deduplication assistant. Compare new knowledge entries against an existing index and identify duplicates or near-duplicates.
656
+
657
+ Existing entries index:
658
+ ${existingIndex}
659
+
660
+ New entries to check:
661
+ ${newList}
662
+
663
+ For each new entry that is a duplicate or covers the same knowledge point as an existing entry, return a match. Only flag genuine duplicates — same concept, same knowledge point. Different aspects of a broad topic are NOT duplicates.
664
+
665
+ Return a JSON array: [{"newIndex": 0, "existingId": "id", "existingTitle": "title", "reason": "brief reason in Chinese"}]
666
+ Return empty array [] if no duplicates found.
667
+ Return ONLY valid JSON, no other text.`;
668
+
669
+ const result = await callLLM([{ role: 'user', content: prompt }], { maxTokens: 1024, task: 'analyze' });
670
+ return extractJSON(result, { isArray: true }) || [];
671
+ }
672
+
673
+ module.exports = { callLLM, analyze, findConnections, askQuestion, restructure, buildQAMindMap, generateSmartQuestions, generateTopicHTML, expandEntry, extractJSON, loadLLMConfig, saveLLMConfig, probeAgentMaestro, checkDuplicates };