studylens 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/bin/studylens.js +5 -0
- package/config/llm-config.template.json +28 -0
- package/config/prompts.json +14 -0
- package/core/extractor.js +70 -0
- package/core/llm-provider.js +673 -0
- package/core/llm-provider.test.js +92 -0
- package/core/wiki-storage.js +414 -0
- package/package.json +55 -0
- package/portal/dist/assets/index-C94Qe946.js +183 -0
- package/portal/dist/index.html +12 -0
- package/portal/package.json +28 -0
- package/server/index.js +555 -0
package/README.md
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# StudyLens
|
|
2
|
+
|
|
3
|
+
AI-powered deep study assistant. Paste notes, upload files, or provide URLs — StudyLens extracts knowledge points, organizes them for browsing, and generates topic pages with AI-driven Q&A.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Multi-source ingestion** — text, PDF, DOCX, XLSX, and web URLs
|
|
8
|
+
- **LLM-powered extraction** — automatically identifies knowledge points, tags, and relationships
|
|
9
|
+
- **Knowledge graph** — visual force-directed graph of connected concepts
|
|
10
|
+
- **Topic pages** — AI-generated study pages with version history
|
|
11
|
+
- **Deep analysis** — drill down into any concept with AI-powered sub-topic expansion
|
|
12
|
+
- **Smart Q&A** — ask questions about your knowledge base with context-aware answers
|
|
13
|
+
- **Timeline & category views** — browse knowledge by time or subject
|
|
14
|
+
- **Export** — single-page HTML export with print-optimized CSS
|
|
15
|
+
- **Granularity control** — limit max knowledge points per ingestion for high-level summaries
|
|
16
|
+
- **Multi-provider LLM** — supports OpenAI-compatible APIs, Ollama, and custom endpoints
|
|
17
|
+
|
|
18
|
+
## Quick Start
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npm run setup # Install dependencies (server + portal)
|
|
22
|
+
npm run dev # Start server (port 3000) + dev portal (port 3001)
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Open `http://localhost:3001` for development (hot-reload, recommended).
|
|
26
|
+
|
|
27
|
+
Port 3000 serves the production build — run `npm run build` first to generate `portal/dist/`, otherwise it will only serve the API.
|
|
28
|
+
|
|
29
|
+
## LLM Configuration
|
|
30
|
+
|
|
31
|
+
StudyLens requires an LLM backend. On first launch the Settings panel opens automatically to guide you through setup. Three options:
|
|
32
|
+
|
|
33
|
+
### Option A: Agent Maestro (recommended for GitHub Copilot users)
|
|
34
|
+
|
|
35
|
+
Zero API key needed — uses your existing Copilot subscription via VS Code.
|
|
36
|
+
|
|
37
|
+
1. Install the [Agent Maestro](https://marketplace.visualstudio.com/items?itemName=Joouis.agent-maestro) VS Code extension
|
|
38
|
+
2. It starts a local proxy at `http://localhost:23333`
|
|
39
|
+
3. In StudyLens settings, enable `agent-maestro` and test the connection
|
|
40
|
+
|
|
41
|
+
### Option B: OpenAI-compatible API
|
|
42
|
+
|
|
43
|
+
Works with OpenAI, Azure OpenAI, DeepSeek, or any compatible endpoint.
|
|
44
|
+
|
|
45
|
+
1. In StudyLens settings, enable `openai-compatible`
|
|
46
|
+
2. Set `baseUrl` (default: `https://api.openai.com/v1`), `apiKey`, and `model`
|
|
47
|
+
|
|
48
|
+
### Option C: Ollama (fully local, free)
|
|
49
|
+
|
|
50
|
+
Run models locally with no API key or internet required.
|
|
51
|
+
|
|
52
|
+
1. Install [Ollama](https://ollama.com) and pull a model: `ollama pull llama3.2`
|
|
53
|
+
2. In StudyLens settings, enable `ollama` (default URL: `http://localhost:11434`)
|
|
54
|
+
|
|
55
|
+
Configuration is stored in `wiki/config/llm-config.json`. A template is at `config/llm-config.template.json`.
|
|
56
|
+
|
|
57
|
+
## Project Structure
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
StudyLens/
|
|
61
|
+
├── server/ # Express API server
|
|
62
|
+
│ └── index.js
|
|
63
|
+
├── core/ # Business logic
|
|
64
|
+
│ ├── extractor.js # Knowledge extraction prompts
|
|
65
|
+
│ ├── llm-provider.js # Multi-provider LLM client
|
|
66
|
+
│ └── wiki-storage.js # Markdown-based file storage
|
|
67
|
+
├── portal/ # React frontend (Vite)
|
|
68
|
+
│ └── src/
|
|
69
|
+
│ ├── components/ # UI components
|
|
70
|
+
│ └── lib/ # Shared utilities
|
|
71
|
+
├── config/ # Configuration templates
|
|
72
|
+
├── e2e/ # Playwright E2E tests
|
|
73
|
+
├── tests/ # API integration tests
|
|
74
|
+
├── scripts/ # Utility scripts
|
|
75
|
+
└── docs/ # User & developer guides
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Data Storage
|
|
79
|
+
|
|
80
|
+
All data is stored as Markdown files in the `wiki/` directory (gitignored by default):
|
|
81
|
+
|
|
82
|
+
- `wiki/entries/` — knowledge point Markdown files with YAML frontmatter
|
|
83
|
+
- `wiki/topic-pages/` — generated topic page HTML
|
|
84
|
+
- `wiki/index/` — JSON indexes for fast lookup
|
|
85
|
+
- `wiki/config/` — runtime configuration
|
|
86
|
+
|
|
87
|
+
## Testing
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
npm test # Unit tests (API + portal)
|
|
91
|
+
npm run test:e2e # Playwright E2E tests
|
|
92
|
+
npm run test:api # API tests only
|
|
93
|
+
npm run test:portal # Portal component tests only
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Scripts
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
npm run server # Start API server only (port 3000)
|
|
100
|
+
npm run portal # Start Vite dev server only (port 3001)
|
|
101
|
+
npm run dev # Start both concurrently
|
|
102
|
+
npm run setup # Install all dependencies
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## License
|
|
106
|
+
|
|
107
|
+
MIT
|
package/bin/studylens.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"defaultProvider": "auto",
|
|
3
|
+
"providers": {
|
|
4
|
+
"agent-maestro": {
|
|
5
|
+
"enabled": false,
|
|
6
|
+
"baseUrl": "http://localhost:23333/api/anthropic",
|
|
7
|
+
"model": "claude-sonnet-4-6"
|
|
8
|
+
},
|
|
9
|
+
"openai-compatible": {
|
|
10
|
+
"enabled": false,
|
|
11
|
+
"baseUrl": "https://api.openai.com/v1",
|
|
12
|
+
"apiKey": "",
|
|
13
|
+
"model": "gpt-4o"
|
|
14
|
+
},
|
|
15
|
+
"ollama": {
|
|
16
|
+
"enabled": false,
|
|
17
|
+
"baseUrl": "http://localhost:11434",
|
|
18
|
+
"model": "llama3.2"
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"taskRouting": {
|
|
22
|
+
"analyze": "default",
|
|
23
|
+
"questions": "default",
|
|
24
|
+
"topicPage": "default",
|
|
25
|
+
"qa": "default",
|
|
26
|
+
"expand": "default"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{
|
|
2
|
+
"subjects": {
|
|
3
|
+
"英语": {
|
|
4
|
+
"analyzePrompt": "You are a knowledge extraction assistant for a middle school English student. Analyze the following study notes and extract structured knowledge entries.\n\nFor each distinct knowledge point, return a JSON array of objects with:\n- \"title\": concise title in Chinese (under 20 chars), e.g. \"Unit5 比较级与最高级\"\n- \"content\": the knowledge point explained clearly in Chinese, include English examples with Chinese translations in parentheses\n- \"subject\": precise classification like \"英语-词汇与话题\", \"英语-语法\", \"英语-形容词比较级\", \"英语-写作技巧\", \"英语-动名词\" etc.\n- \"tags\": array of relevant tags including:\n 1. Grammar points: \"比较级\", \"最高级\", \"一般过去时\", \"动名词\", \"可数名词\", \"不可数名词\" etc.\n 2. Topic/Unit tags: \"自然景观\", \"运动\", \"饮食健康\" etc.\n 3. Key phrases as tags: \"be famous for\", \"imagine doing\" etc.\n 4. Skill dimensions: \"短语搭配\", \"语法规则\", \"易错点\", \"写作\" etc.\n\nImportant:\n- Each entry should focus on ONE grammar point or vocabulary cluster\n- Always include English examples with Chinese translations\n- Tag grammar-related entries with specific grammar terms",
|
|
5
|
+
"questionsPrompt": "生成的问题应该覆盖以下维度,帮助学生深入掌握英语知识点:\n1. 语法规则深挖(规则的例外情况、易错点、为什么是这样)\n2. 造句练习(要求使用特定短语或语法结构造复合句)\n3. 辨析对比(易混淆词、近义词区别、相似语法的区分)\n4. 知识迁移(把语法规则应用到新语境,如翻译句子)\n5. 综合运用(同时使用多个短语或语法点完成一个写作任务)\n\n问题要求:\n- 问题用中文提出,但涉及的英语内容保持英文\n- 鼓励学生写出完整英文句子而不是只选择答案\n- 涉及语法规则时,要求学生解释原因而不只是记忆\n- 可以引用课文中的重点短语来设计造句题\n\n返回JSON数组,每个元素: {\"question\": \"问题内容\", \"category\": \"语法/造句/辨析/迁移/综合\"}"
|
|
6
|
+
}
|
|
7
|
+
},
|
|
8
|
+
"defaultPrompts": {
|
|
9
|
+
"analyzePrompt": "You are a knowledge extraction assistant for a student. Analyze the following study notes and extract structured knowledge entries.\n\nFor each distinct knowledge point, return a JSON array of objects with:\n- \"title\": concise title (under 20 chars)\n- \"content\": the knowledge point explained clearly\n- \"subject\": precise subject classification (see rules below)\n- \"tags\": array of relevant tags — include ALL of the following dimensions:\n 1. Core concepts: key terms, names, formulas (e.g. \"科举制\", \"赵匡胤\", \"勾股定理\")\n 2. Category dimensions: assign multi-dimensional category tags based on the subject area:\n - For history: add tags from these dimensions where applicable:\n \"政治制度\", \"军事战争\", \"经济发展\", \"民族关系\", \"对外交流\", \"科技发明\", \"文化艺术\", \"社会生活\", \"人物\"\n - For math: \"代数\", \"几何\", \"概率\", \"函数\", \"公式\", \"定理\", \"证明\"\n - For physics: \"力学\", \"电磁\", \"热学\", \"光学\", \"实验\", \"公式\"\n - For other subjects: infer appropriate dimensional tags\n 3. Connections: tags that link to related knowledge across different categories\n\nSubject classification rules:\n- For history: use specific dynasty like \"历史-隋朝\", \"历史-唐朝\", \"历史-北宋\" etc.\n- For other subjects: use patterns like \"数学-代数\", \"物理-力学\", \"化学-有机\" etc.\n- Each knowledge point must belong to exactly ONE specific category.\n\nReturn ONLY valid JSON array, no other text.",
|
|
10
|
+
"topicPrompt": "你是一个教育内容设计师。基于以下知识点和相关资料,生成一个美观的HTML专题页面。\n\n要求:\n1. 生成完整的HTML页面(含内联CSS),适合iframe嵌入\n2. 深色主题(背景 #0f1117,文字 #e0e0e0)\n3. 分章节展示:导语→背景→核心内容→影响/意义→总结\n4. 使用清晰的排版:标题、卡片、分隔线、高亮重点\n5. 中文内容,适合中学生阅读\n6. 使用你自己的知识补充完整内容,不要局限于提供的材料\n7. 页面宽度100%,无需滚动条样式\n8. 配色美观,使用渐变和阴影效果",
|
|
11
|
+
"qaPrompt": "You are an expert study assistant with deep knowledge across all subjects. A student is studying and asks you questions.\n\nIMPORTANT: Use your OWN comprehensive knowledge to answer thoroughly and accurately. The student's notes are supplementary context, not the boundary of your answer.\n\nInstructions:\n1. Answer using your full knowledge — be thorough, accurate, and educational\n2. If the student has relevant notes, reference them to build connections\n3. Use comparisons, analysis, and specific facts/data where appropriate\n4. Write in Chinese, suitable for a middle/high school student\n5. Suggest knowledge cards that capture KEY points — NEW knowledge beyond existing notes\n\nReturn a JSON object:\n{\n \"answer\": \"Your comprehensive answer in Chinese...\",\n \"suggestedCards\": [\n {\n \"title\": \"card title (under 20 chars)\",\n \"content\": \"knowledge point explained clearly\",\n \"subject\": \"precise subject like 历史-唐朝\",\n \"tags\": [\"relevant\", \"tags\"]\n }\n ]\n}\n\nCRITICAL: The answer field must be PLAIN TEXT only — no markdown formatting.\nReturn ONLY valid JSON, no other text.",
|
|
12
|
+
"questionsPrompt": "生成的问题应该覆盖:\n1. 基本概念(是什么)\n2. 原因分析(为什么)\n3. 影响/意义(有什么影响)\n4. 比较对比(与其他知识的关联)\n5. 深入思考(评价/启示)\n\n返回JSON数组,每个元素: {\"question\": \"问题内容\", \"category\": \"概念/原因/影响/对比/思考\"}"
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const http = require('http');
|
|
4
|
+
const https = require('https');
|
|
5
|
+
|
|
6
|
+
async function extractFromFile(filePath) {
|
|
7
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
8
|
+
const buf = fs.readFileSync(filePath);
|
|
9
|
+
|
|
10
|
+
if (ext === '.pdf') {
|
|
11
|
+
const pdfParse = require('pdf-parse');
|
|
12
|
+
const data = await pdfParse(buf);
|
|
13
|
+
return data.text;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
if (ext === '.docx') {
|
|
17
|
+
const mammoth = require('mammoth');
|
|
18
|
+
const result = await mammoth.extractRawText({ buffer: buf });
|
|
19
|
+
return result.value;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (ext === '.xlsx' || ext === '.xls') {
|
|
23
|
+
const XLSX = require('xlsx');
|
|
24
|
+
const wb = XLSX.read(buf);
|
|
25
|
+
const texts = [];
|
|
26
|
+
for (const name of wb.SheetNames) {
|
|
27
|
+
const sheet = wb.Sheets[name];
|
|
28
|
+
texts.push(`[${name}]\n${XLSX.utils.sheet_to_csv(sheet)}`);
|
|
29
|
+
}
|
|
30
|
+
return texts.join('\n\n');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (ext === '.txt' || ext === '.md') {
|
|
34
|
+
return buf.toString('utf-8');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
throw new Error(`Unsupported file type: ${ext}`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function fetchUrl(urlStr) {
|
|
41
|
+
return new Promise((resolve, reject) => {
|
|
42
|
+
const url = new URL(urlStr);
|
|
43
|
+
const mod = url.protocol === 'https:' ? https : http;
|
|
44
|
+
mod.get(url, { timeout: 30000, headers: { 'User-Agent': 'Mozilla/5.0 StudyLens/1.0' } }, (res) => {
|
|
45
|
+
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
46
|
+
return fetchUrl(res.headers.location).then(resolve, reject);
|
|
47
|
+
}
|
|
48
|
+
if (res.statusCode >= 400) return reject(new Error(`HTTP ${res.statusCode}`));
|
|
49
|
+
const chunks = [];
|
|
50
|
+
res.on('data', c => chunks.push(c));
|
|
51
|
+
res.on('end', () => resolve(Buffer.concat(chunks).toString('utf-8')));
|
|
52
|
+
}).on('error', reject);
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
async function extractFromUrl(urlStr) {
|
|
57
|
+
const html = await fetchUrl(urlStr);
|
|
58
|
+
const cheerio = require('cheerio');
|
|
59
|
+
const $ = cheerio.load(html);
|
|
60
|
+
|
|
61
|
+
$('script, style, nav, footer, header, iframe, noscript').remove();
|
|
62
|
+
|
|
63
|
+
const article = $('article').length ? $('article') : $('main').length ? $('main') : $('body');
|
|
64
|
+
const text = article.text().replace(/\s+/g, ' ').trim();
|
|
65
|
+
|
|
66
|
+
if (text.length < 50) throw new Error('Could not extract meaningful text from URL');
|
|
67
|
+
return text.slice(0, 15000);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
module.exports = { extractFromFile, extractFromUrl };
|