abseeker 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abseeker-0.1.0/.gitignore +143 -0
- abseeker-0.1.0/LICENSE +21 -0
- abseeker-0.1.0/PKG-INFO +262 -0
- abseeker-0.1.0/README.md +228 -0
- abseeker-0.1.0/abseeker/__init__.py +47 -0
- abseeker-0.1.0/abseeker/__main__.py +6 -0
- abseeker-0.1.0/abseeker/cli/__init__.py +5 -0
- abseeker-0.1.0/abseeker/cli/main.py +589 -0
- abseeker-0.1.0/abseeker/cli/progress.py +342 -0
- abseeker-0.1.0/abseeker/config.py +244 -0
- abseeker-0.1.0/abseeker/llm/__init__.py +13 -0
- abseeker-0.1.0/abseeker/llm/base.py +211 -0
- abseeker-0.1.0/abseeker/llm/deepseek_client.py +293 -0
- abseeker-0.1.0/abseeker/llm/openai_client.py +296 -0
- abseeker-0.1.0/abseeker/parser/__init__.py +9 -0
- abseeker-0.1.0/abseeker/parser/filters.py +136 -0
- abseeker-0.1.0/abseeker/parser/record.py +160 -0
- abseeker-0.1.0/abseeker/parser/wos_parser.py +310 -0
- abseeker-0.1.0/abseeker/processor/__init__.py +14 -0
- abseeker-0.1.0/abseeker/processor/aggregator.py +408 -0
- abseeker-0.1.0/abseeker/processor/batch.py +565 -0
- abseeker-0.1.0/abseeker/processor/checkpoint.py +172 -0
- abseeker-0.1.0/abseeker/prompt/__init__.py +10 -0
- abseeker-0.1.0/abseeker/prompt/builder.py +185 -0
- abseeker-0.1.0/abseeker/prompt/templates.py +121 -0
- abseeker-0.1.0/docs/cli.md +468 -0
- abseeker-0.1.0/pyproject.toml +73 -0
- abseeker-0.1.0/tests/__init__.py +1 -0
- abseeker-0.1.0/tests/test_llm.py +310 -0
- abseeker-0.1.0/tests/test_parser.py +346 -0
- abseeker-0.1.0/tests/test_prompt.py +180 -0
- abseeker-0.1.0/tests/verify_parser.py +98 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
share/python-wheels/
|
|
20
|
+
*.egg-info/
|
|
21
|
+
.installed.cfg
|
|
22
|
+
*.egg
|
|
23
|
+
MANIFEST
|
|
24
|
+
|
|
25
|
+
# PyInstaller
|
|
26
|
+
*.manifest
|
|
27
|
+
*.spec
|
|
28
|
+
|
|
29
|
+
# Installer logs
|
|
30
|
+
pip-log.txt
|
|
31
|
+
pip-delete-this-directory.txt
|
|
32
|
+
|
|
33
|
+
# Unit test / coverage reports
|
|
34
|
+
htmlcov/
|
|
35
|
+
.tox/
|
|
36
|
+
.nox/
|
|
37
|
+
.coverage
|
|
38
|
+
.coverage.*
|
|
39
|
+
.cache
|
|
40
|
+
nosetests.xml
|
|
41
|
+
coverage.xml
|
|
42
|
+
*.cover
|
|
43
|
+
*.py,cover
|
|
44
|
+
.hypothesis/
|
|
45
|
+
.pytest_cache/
|
|
46
|
+
cover/
|
|
47
|
+
|
|
48
|
+
# Translations
|
|
49
|
+
*.mo
|
|
50
|
+
*.pot
|
|
51
|
+
|
|
52
|
+
# Django stuff:
|
|
53
|
+
*.log
|
|
54
|
+
local_settings.py
|
|
55
|
+
db.sqlite3
|
|
56
|
+
db.sqlite3-journal
|
|
57
|
+
|
|
58
|
+
# Flask stuff:
|
|
59
|
+
instance/
|
|
60
|
+
.webassets-cache
|
|
61
|
+
|
|
62
|
+
# Scrapy stuff:
|
|
63
|
+
.scrapy
|
|
64
|
+
|
|
65
|
+
# Sphinx documentation
|
|
66
|
+
docs/_build/
|
|
67
|
+
|
|
68
|
+
# PyBuilder
|
|
69
|
+
.pybuilder/
|
|
70
|
+
target/
|
|
71
|
+
|
|
72
|
+
# Jupyter Notebook
|
|
73
|
+
.ipynb_checkpoints
|
|
74
|
+
|
|
75
|
+
# IPython
|
|
76
|
+
profile_default/
|
|
77
|
+
ipython_config.py
|
|
78
|
+
|
|
79
|
+
# pyenv
|
|
80
|
+
.python-version
|
|
81
|
+
|
|
82
|
+
# pipenv
|
|
83
|
+
Pipfile.lock
|
|
84
|
+
|
|
85
|
+
# poetry
|
|
86
|
+
poetry.lock
|
|
87
|
+
|
|
88
|
+
# pdm
|
|
89
|
+
.pdm.toml
|
|
90
|
+
|
|
91
|
+
# PEP 582
|
|
92
|
+
__pypackages__/
|
|
93
|
+
|
|
94
|
+
# Celery stuff
|
|
95
|
+
celerybeat-schedule
|
|
96
|
+
celerybeat.pid
|
|
97
|
+
|
|
98
|
+
# SageMath parsed files
|
|
99
|
+
*.sage.py
|
|
100
|
+
|
|
101
|
+
# Environments
|
|
102
|
+
.env
|
|
103
|
+
.venv
|
|
104
|
+
env/
|
|
105
|
+
venv/
|
|
106
|
+
ENV/
|
|
107
|
+
env.bak/
|
|
108
|
+
venv.bak/
|
|
109
|
+
|
|
110
|
+
# Spyder project settings
|
|
111
|
+
.spyderproject
|
|
112
|
+
.spyproject
|
|
113
|
+
|
|
114
|
+
# Rope project settings
|
|
115
|
+
.ropeproject
|
|
116
|
+
|
|
117
|
+
# mkdocs documentation
|
|
118
|
+
/site
|
|
119
|
+
|
|
120
|
+
# mypy
|
|
121
|
+
.mypy_cache/
|
|
122
|
+
.dmypy.json
|
|
123
|
+
dmypy.json
|
|
124
|
+
|
|
125
|
+
# Pyre type checker
|
|
126
|
+
.pyre/
|
|
127
|
+
|
|
128
|
+
# pytype static type analyzer
|
|
129
|
+
.pytype/
|
|
130
|
+
|
|
131
|
+
# Cython debug symbols
|
|
132
|
+
cython_debug/
|
|
133
|
+
|
|
134
|
+
# PyCharm
|
|
135
|
+
.idea/
|
|
136
|
+
|
|
137
|
+
# VS Code
|
|
138
|
+
.vscode/
|
|
139
|
+
.trae
|
|
140
|
+
|
|
141
|
+
# Project specific
|
|
142
|
+
.abseeker/
|
|
143
|
+
dev
|
abseeker-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 BHM-Bob_G (github: BHM-Bob)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
abseeker-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: abseeker
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: 基于LLM的WOS学术文献智能筛选工具
|
|
5
|
+
Project-URL: Homepage, https://github.com/BHM-Bob/abseeker
|
|
6
|
+
Project-URL: Repository, https://github.com/BHM-Bob/abseeker
|
|
7
|
+
Author-email: BHM-Bob_G <bhmfly@foxmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Requires-Dist: click>=8.0.0
|
|
20
|
+
Requires-Dist: httpx>=0.25.0
|
|
21
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
22
|
+
Requires-Dist: pandas>=2.0.0
|
|
23
|
+
Requires-Dist: pydantic>=2.0.0
|
|
24
|
+
Requires-Dist: rich>=13.0.0
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: build>=1.0.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: twine>=4.0.0; extra == 'dev'
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# ABSeeker
|
|
36
|
+
|
|
37
|
+
基于LLM的WOS学术文献智能筛选工具
|
|
38
|
+
|
|
39
|
+
[](https://www.python.org/downloads/)
|
|
40
|
+
[](https://opensource.org/licenses/MIT)
|
|
41
|
+
|
|
42
|
+
## 简介
|
|
43
|
+
|
|
44
|
+
ABSeeker 是一个利用大语言模型(LLM)智能筛选 Web of Science (WOS) 学术文献的 Python 工具。它能够:
|
|
45
|
+
|
|
46
|
+
- 解析 WOS 导出的纯文本文献记录
|
|
47
|
+
- 使用 LLM 智能判断文献是否符合检索意图
|
|
48
|
+
- 提供置信度评分和判断理由
|
|
49
|
+
- 支持批量处理和断点续传
|
|
50
|
+
- 实时显示处理进度和统计信息
|
|
51
|
+
|
|
52
|
+
## 安装
|
|
53
|
+
|
|
54
|
+
### 从源码安装
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
git clone https://github.com/BHM-Bob/abseeker.git
|
|
58
|
+
cd abseeker
|
|
59
|
+
pip install -e .
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### 依赖
|
|
63
|
+
|
|
64
|
+
- Python >= 3.9
|
|
65
|
+
- pydantic >= 2.0.0
|
|
66
|
+
- httpx >= 0.25.0
|
|
67
|
+
- click >= 8.0.0
|
|
68
|
+
- rich >= 13.0.0
|
|
69
|
+
- pandas >= 2.0.0
|
|
70
|
+
|
|
71
|
+
## 快速开始
|
|
72
|
+
|
|
73
|
+
### 1. 配置 LLM 后端
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# 配置 OpenAI 兼容的 API
|
|
77
|
+
abseeker config set-llm --provider openai
|
|
78
|
+
|
|
79
|
+
# 或配置 DeepSeek
|
|
80
|
+
abseeker config set-llm --provider deepseek
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### 2. 测试连接
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
abseeker config test
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### 3. 分析文献
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
abseeker analyze savedrecs.txt --intent "研究肽类药物递送" -o results.csv
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## 使用指南
|
|
96
|
+
|
|
97
|
+
### 配置管理
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
# 查看当前配置
|
|
101
|
+
abseeker config show
|
|
102
|
+
|
|
103
|
+
# 设置 LLM 后端
|
|
104
|
+
abseeker config set-llm --provider openai --base-url https://api.example.com/v1
|
|
105
|
+
|
|
106
|
+
# 查看可用模型
|
|
107
|
+
abseeker config list-models
|
|
108
|
+
|
|
109
|
+
# 设置具体模型
|
|
110
|
+
abseeker config set-model gpt-4
|
|
111
|
+
|
|
112
|
+
# 设置请求速率限制
|
|
113
|
+
abseeker config set-rate-limit --interval 1.0 --rpm 60
|
|
114
|
+
|
|
115
|
+
# 重置配置
|
|
116
|
+
abseeker config reset
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### 文献分析
|
|
120
|
+
|
|
121
|
+
#### 基本用法
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
abseeker analyze savedrecs.txt --intent "研究深度学习在医疗领域的应用"
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
#### 范围过滤
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# 按索引范围
|
|
131
|
+
abseeker analyze savedrecs.txt -i "纳米技术" --start-index 0 --end-index 99
|
|
132
|
+
|
|
133
|
+
# 按年份范围
|
|
134
|
+
abseeker analyze savedrecs.txt -i "AI医疗" --start-year 2020 --end-year 2023
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
#### 输出格式
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# CSV 格式(默认)
|
|
141
|
+
abseeker analyze savedrecs.txt -i "研究意图" -o results.csv
|
|
142
|
+
|
|
143
|
+
# JSON 格式
|
|
144
|
+
abseeker analyze savedrecs.txt -i "研究意图" -o results.json --format json
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
#### 速率限制
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
# 设置请求间隔(秒)
|
|
151
|
+
abseeker analyze savedrecs.txt -i "研究意图" --interval 2.0
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
#### 断点续传
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# 每隔 10 篇文献自动保存
|
|
158
|
+
abseeker analyze savedrecs.txt -i "研究意图" --save-interval 10
|
|
159
|
+
|
|
160
|
+
# 从保存文件恢复(中断后)
|
|
161
|
+
abseeker analyze savedrecs.txt -i "研究意图" --from-saved results.autosave.json
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## 工作原理
|
|
165
|
+
|
|
166
|
+
1. **解析**: 读取 WOS 导出的纯文本文件,提取文献元数据(标题、作者、摘要、关键词等)
|
|
167
|
+
2. **过滤**: 根据用户指定的索引或年份范围筛选文献
|
|
168
|
+
3. **分析**: 将每篇文献的摘要和元数据发送给 LLM,判断是否符合检索意图
|
|
169
|
+
4. **输出**: 生成包含相关性判断、置信度、理由和分类的结果文件
|
|
170
|
+
|
|
171
|
+
## 输出格式
|
|
172
|
+
|
|
173
|
+
### CSV 输出
|
|
174
|
+
|
|
175
|
+
| 字段 | 说明 |
|
|
176
|
+
|------|------|
|
|
177
|
+
| title | 文献标题 |
|
|
178
|
+
| authors | 作者列表 |
|
|
179
|
+
| journal | 期刊名称 |
|
|
180
|
+
| year | 发表年份 |
|
|
181
|
+
| doi | DOI |
|
|
182
|
+
| relevant | 是否相关 (True/False) |
|
|
183
|
+
| confidence | 置信度 (0.0-1.0) |
|
|
184
|
+
| reason | 判断理由 |
|
|
185
|
+
| categories | 分类标签 |
|
|
186
|
+
|
|
187
|
+
### JSON 输出
|
|
188
|
+
|
|
189
|
+
```json
|
|
190
|
+
{
|
|
191
|
+
"results": [
|
|
192
|
+
{
|
|
193
|
+
"record": {
|
|
194
|
+
"title": "文献标题",
|
|
195
|
+
"authors": ["作者1", "作者2"],
|
|
196
|
+
"journal": "期刊名",
|
|
197
|
+
"year": 2023,
|
|
198
|
+
"doi": "10.xxxx/xxxxx"
|
|
199
|
+
},
|
|
200
|
+
"relevant": true,
|
|
201
|
+
"confidence": 0.95,
|
|
202
|
+
"reason": "该文献研究了...",
|
|
203
|
+
"categories": ["深度学习", "医疗AI"]
|
|
204
|
+
}
|
|
205
|
+
],
|
|
206
|
+
"stats": {
|
|
207
|
+
"total": 100,
|
|
208
|
+
"relevant": 25,
|
|
209
|
+
"avg_confidence": 0.85
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## 配置存储
|
|
215
|
+
|
|
216
|
+
配置文件存储在用户家目录的 `.abseeker/` 文件夹中:
|
|
217
|
+
|
|
218
|
+
- **Linux/macOS**: `~/.abseeker/config.json`
|
|
219
|
+
- **Windows**: `%USERPROFILE%\.abseeker\config.json`
|
|
220
|
+
|
|
221
|
+
## 开发
|
|
222
|
+
|
|
223
|
+
### 项目结构
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
abseeker/
|
|
227
|
+
├── abseeker/ # 主代码
|
|
228
|
+
│ ├── cli/ # 命令行接口
|
|
229
|
+
│ ├── llm/ # LLM 客户端
|
|
230
|
+
│ ├── parser/ # WOS 文件解析
|
|
231
|
+
│ ├── processor/ # 批量处理器
|
|
232
|
+
│ └── prompt/ # 提示词模板
|
|
233
|
+
├── tests/ # 测试代码
|
|
234
|
+
├── dev/ # 开发数据和文档
|
|
235
|
+
└── docs/ # 文档
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### 运行测试
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
pytest
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### 代码格式化
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
black abseeker/
|
|
248
|
+
ruff check abseeker/
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## 许可证
|
|
252
|
+
|
|
253
|
+
MIT License - 详见 [LICENSE](LICENSE) 文件
|
|
254
|
+
|
|
255
|
+
## 贡献
|
|
256
|
+
|
|
257
|
+
欢迎提交 Issue 和 Pull Request!
|
|
258
|
+
|
|
259
|
+
## 致谢
|
|
260
|
+
|
|
261
|
+
- 感谢 OpenAI/DeepSeek 等 LLM 提供商的 API
|
|
262
|
+
- 感谢 Web of Science 提供的学术文献数据
|
abseeker-0.1.0/README.md
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# ABSeeker
|
|
2
|
+
|
|
3
|
+
基于LLM的WOS学术文献智能筛选工具
|
|
4
|
+
|
|
5
|
+
[](https://www.python.org/downloads/)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
7
|
+
|
|
8
|
+
## 简介
|
|
9
|
+
|
|
10
|
+
ABSeeker 是一个利用大语言模型(LLM)智能筛选 Web of Science (WOS) 学术文献的 Python 工具。它能够:
|
|
11
|
+
|
|
12
|
+
- 解析 WOS 导出的纯文本文献记录
|
|
13
|
+
- 使用 LLM 智能判断文献是否符合检索意图
|
|
14
|
+
- 提供置信度评分和判断理由
|
|
15
|
+
- 支持批量处理和断点续传
|
|
16
|
+
- 实时显示处理进度和统计信息
|
|
17
|
+
|
|
18
|
+
## 安装
|
|
19
|
+
|
|
20
|
+
### 从源码安装
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
git clone https://github.com/BHM-Bob/abseeker.git
|
|
24
|
+
cd abseeker
|
|
25
|
+
pip install -e .
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### 依赖
|
|
29
|
+
|
|
30
|
+
- Python >= 3.9
|
|
31
|
+
- pydantic >= 2.0.0
|
|
32
|
+
- httpx >= 0.25.0
|
|
33
|
+
- click >= 8.0.0
|
|
34
|
+
- rich >= 13.0.0
|
|
35
|
+
- pandas >= 2.0.0
|
|
36
|
+
|
|
37
|
+
## 快速开始
|
|
38
|
+
|
|
39
|
+
### 1. 配置 LLM 后端
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# 配置 OpenAI 兼容的 API
|
|
43
|
+
abseeker config set-llm --provider openai
|
|
44
|
+
|
|
45
|
+
# 或配置 DeepSeek
|
|
46
|
+
abseeker config set-llm --provider deepseek
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### 2. 测试连接
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
abseeker config test
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### 3. 分析文献
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
abseeker analyze savedrecs.txt --intent "研究肽类药物递送" -o results.csv
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## 使用指南
|
|
62
|
+
|
|
63
|
+
### 配置管理
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# 查看当前配置
|
|
67
|
+
abseeker config show
|
|
68
|
+
|
|
69
|
+
# 设置 LLM 后端
|
|
70
|
+
abseeker config set-llm --provider openai --base-url https://api.example.com/v1
|
|
71
|
+
|
|
72
|
+
# 查看可用模型
|
|
73
|
+
abseeker config list-models
|
|
74
|
+
|
|
75
|
+
# 设置具体模型
|
|
76
|
+
abseeker config set-model gpt-4
|
|
77
|
+
|
|
78
|
+
# 设置请求速率限制
|
|
79
|
+
abseeker config set-rate-limit --interval 1.0 --rpm 60
|
|
80
|
+
|
|
81
|
+
# 重置配置
|
|
82
|
+
abseeker config reset
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### 文献分析
|
|
86
|
+
|
|
87
|
+
#### 基本用法
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
abseeker analyze savedrecs.txt --intent "研究深度学习在医疗领域的应用"
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
#### 范围过滤
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
# 按索引范围
|
|
97
|
+
abseeker analyze savedrecs.txt -i "纳米技术" --start-index 0 --end-index 99
|
|
98
|
+
|
|
99
|
+
# 按年份范围
|
|
100
|
+
abseeker analyze savedrecs.txt -i "AI医疗" --start-year 2020 --end-year 2023
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
#### 输出格式
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# CSV 格式(默认)
|
|
107
|
+
abseeker analyze savedrecs.txt -i "研究意图" -o results.csv
|
|
108
|
+
|
|
109
|
+
# JSON 格式
|
|
110
|
+
abseeker analyze savedrecs.txt -i "研究意图" -o results.json --format json
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
#### 速率限制
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
# 设置请求间隔(秒)
|
|
117
|
+
abseeker analyze savedrecs.txt -i "研究意图" --interval 2.0
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
#### 断点续传
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
# 每隔 10 篇文献自动保存
|
|
124
|
+
abseeker analyze savedrecs.txt -i "研究意图" --save-interval 10
|
|
125
|
+
|
|
126
|
+
# 从保存文件恢复(中断后)
|
|
127
|
+
abseeker analyze savedrecs.txt -i "研究意图" --from-saved results.autosave.json
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## 工作原理
|
|
131
|
+
|
|
132
|
+
1. **解析**: 读取 WOS 导出的纯文本文件,提取文献元数据(标题、作者、摘要、关键词等)
|
|
133
|
+
2. **过滤**: 根据用户指定的索引或年份范围筛选文献
|
|
134
|
+
3. **分析**: 将每篇文献的摘要和元数据发送给 LLM,判断是否符合检索意图
|
|
135
|
+
4. **输出**: 生成包含相关性判断、置信度、理由和分类的结果文件
|
|
136
|
+
|
|
137
|
+
## 输出格式
|
|
138
|
+
|
|
139
|
+
### CSV 输出
|
|
140
|
+
|
|
141
|
+
| 字段 | 说明 |
|
|
142
|
+
|------|------|
|
|
143
|
+
| title | 文献标题 |
|
|
144
|
+
| authors | 作者列表 |
|
|
145
|
+
| journal | 期刊名称 |
|
|
146
|
+
| year | 发表年份 |
|
|
147
|
+
| doi | DOI |
|
|
148
|
+
| relevant | 是否相关 (True/False) |
|
|
149
|
+
| confidence | 置信度 (0.0-1.0) |
|
|
150
|
+
| reason | 判断理由 |
|
|
151
|
+
| categories | 分类标签 |
|
|
152
|
+
|
|
153
|
+
### JSON 输出
|
|
154
|
+
|
|
155
|
+
```json
|
|
156
|
+
{
|
|
157
|
+
"results": [
|
|
158
|
+
{
|
|
159
|
+
"record": {
|
|
160
|
+
"title": "文献标题",
|
|
161
|
+
"authors": ["作者1", "作者2"],
|
|
162
|
+
"journal": "期刊名",
|
|
163
|
+
"year": 2023,
|
|
164
|
+
"doi": "10.xxxx/xxxxx"
|
|
165
|
+
},
|
|
166
|
+
"relevant": true,
|
|
167
|
+
"confidence": 0.95,
|
|
168
|
+
"reason": "该文献研究了...",
|
|
169
|
+
"categories": ["深度学习", "医疗AI"]
|
|
170
|
+
}
|
|
171
|
+
],
|
|
172
|
+
"stats": {
|
|
173
|
+
"total": 100,
|
|
174
|
+
"relevant": 25,
|
|
175
|
+
"avg_confidence": 0.85
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## 配置存储
|
|
181
|
+
|
|
182
|
+
配置文件存储在用户家目录的 `.abseeker/` 文件夹中:
|
|
183
|
+
|
|
184
|
+
- **Linux/macOS**: `~/.abseeker/config.json`
|
|
185
|
+
- **Windows**: `%USERPROFILE%\.abseeker\config.json`
|
|
186
|
+
|
|
187
|
+
## 开发
|
|
188
|
+
|
|
189
|
+
### 项目结构
|
|
190
|
+
|
|
191
|
+
```
|
|
192
|
+
abseeker/
|
|
193
|
+
├── abseeker/ # 主代码
|
|
194
|
+
│ ├── cli/ # 命令行接口
|
|
195
|
+
│ ├── llm/ # LLM 客户端
|
|
196
|
+
│ ├── parser/ # WOS 文件解析
|
|
197
|
+
│ ├── processor/ # 批量处理器
|
|
198
|
+
│ └── prompt/ # 提示词模板
|
|
199
|
+
├── tests/ # 测试代码
|
|
200
|
+
├── dev/ # 开发数据和文档
|
|
201
|
+
└── docs/ # 文档
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### 运行测试
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
pytest
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### 代码格式化
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
black abseeker/
|
|
214
|
+
ruff check abseeker/
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## 许可证
|
|
218
|
+
|
|
219
|
+
MIT License - 详见 [LICENSE](LICENSE) 文件
|
|
220
|
+
|
|
221
|
+
## 贡献
|
|
222
|
+
|
|
223
|
+
欢迎提交 Issue 和 Pull Request!
|
|
224
|
+
|
|
225
|
+
## 致谢
|
|
226
|
+
|
|
227
|
+
- 感谢 OpenAI/DeepSeek 等 LLM 提供商的 API
|
|
228
|
+
- 感谢 Web of Science 提供的学术文献数据
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ABSeeker - 基于LLM的WOS学术文献智能筛选工具
|
|
3
|
+
|
|
4
|
+
ABSeeker = Abstract Based Seeker
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
>>> from abseeker import WOSParser, DeepSeekClient
|
|
8
|
+
>>>
|
|
9
|
+
>>> # 解析文献
|
|
10
|
+
>>> parser = WOSParser()
|
|
11
|
+
>>> records = parser.parse_file("savedrecs.txt")
|
|
12
|
+
>>>
|
|
13
|
+
>>> # 使用LLM分析
|
|
14
|
+
>>> llm = DeepSeekClient(api_key="your-key")
|
|
15
|
+
>>> for record in records[:5]:
|
|
16
|
+
... result = llm.analyze(record, "研究肽类药物递送")
|
|
17
|
+
... print(f"{result.record.title}: {result.relevant}")
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
__version__ = "0.1.0"
|
|
21
|
+
__author__ = "Developer"
|
|
22
|
+
|
|
23
|
+
# Parser模块
|
|
24
|
+
from abseeker.parser.record import Record
|
|
25
|
+
from abseeker.parser.wos_parser import WOSParser
|
|
26
|
+
|
|
27
|
+
# LLM模块
|
|
28
|
+
from abseeker.llm.base import LLMClient, AnalysisResult, LLMError
|
|
29
|
+
from abseeker.llm.deepseek_client import DeepSeekClient
|
|
30
|
+
from abseeker.llm.openai_client import OpenAIClient
|
|
31
|
+
|
|
32
|
+
# Prompt模块
|
|
33
|
+
from abseeker.prompt.builder import PromptBuilder
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
# Parser
|
|
37
|
+
"Record",
|
|
38
|
+
"WOSParser",
|
|
39
|
+
# LLM
|
|
40
|
+
"LLMClient",
|
|
41
|
+
"AnalysisResult",
|
|
42
|
+
"LLMError",
|
|
43
|
+
"DeepSeekClient",
|
|
44
|
+
"OpenAIClient",
|
|
45
|
+
# Prompt
|
|
46
|
+
"PromptBuilder",
|
|
47
|
+
]
|