abseeker 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,143 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ share/python-wheels/
20
+ *.egg-info/
21
+ .installed.cfg
22
+ *.egg
23
+ MANIFEST
24
+
25
+ # PyInstaller
26
+ *.manifest
27
+ *.spec
28
+
29
+ # Installer logs
30
+ pip-log.txt
31
+ pip-delete-this-directory.txt
32
+
33
+ # Unit test / coverage reports
34
+ htmlcov/
35
+ .tox/
36
+ .nox/
37
+ .coverage
38
+ .coverage.*
39
+ .cache
40
+ nosetests.xml
41
+ coverage.xml
42
+ *.cover
43
+ *.py,cover
44
+ .hypothesis/
45
+ .pytest_cache/
46
+ cover/
47
+
48
+ # Translations
49
+ *.mo
50
+ *.pot
51
+
52
+ # Django stuff:
53
+ *.log
54
+ local_settings.py
55
+ db.sqlite3
56
+ db.sqlite3-journal
57
+
58
+ # Flask stuff:
59
+ instance/
60
+ .webassets-cache
61
+
62
+ # Scrapy stuff:
63
+ .scrapy
64
+
65
+ # Sphinx documentation
66
+ docs/_build/
67
+
68
+ # PyBuilder
69
+ .pybuilder/
70
+ target/
71
+
72
+ # Jupyter Notebook
73
+ .ipynb_checkpoints
74
+
75
+ # IPython
76
+ profile_default/
77
+ ipython_config.py
78
+
79
+ # pyenv
80
+ .python-version
81
+
82
+ # pipenv
83
+ Pipfile.lock
84
+
85
+ # poetry
86
+ poetry.lock
87
+
88
+ # pdm
89
+ .pdm.toml
90
+
91
+ # PEP 582
92
+ __pypackages__/
93
+
94
+ # Celery stuff
95
+ celerybeat-schedule
96
+ celerybeat.pid
97
+
98
+ # SageMath parsed files
99
+ *.sage.py
100
+
101
+ # Environments
102
+ .env
103
+ .venv
104
+ env/
105
+ venv/
106
+ ENV/
107
+ env.bak/
108
+ venv.bak/
109
+
110
+ # Spyder project settings
111
+ .spyderproject
112
+ .spyproject
113
+
114
+ # Rope project settings
115
+ .ropeproject
116
+
117
+ # mkdocs documentation
118
+ /site
119
+
120
+ # mypy
121
+ .mypy_cache/
122
+ .dmypy.json
123
+ dmypy.json
124
+
125
+ # Pyre type checker
126
+ .pyre/
127
+
128
+ # pytype static type analyzer
129
+ .pytype/
130
+
131
+ # Cython debug symbols
132
+ cython_debug/
133
+
134
+ # PyCharm
135
+ .idea/
136
+
137
+ # VS Code
138
+ .vscode/
139
+ .trae
140
+
141
+ # Project specific
142
+ .abseeker/
143
+ dev
abseeker-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 BHM-Bob_G (github: BHM-Bob)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,262 @@
1
+ Metadata-Version: 2.4
2
+ Name: abseeker
3
+ Version: 0.1.0
4
+ Summary: 基于LLM的WOS学术文献智能筛选工具
5
+ Project-URL: Homepage, https://github.com/BHM-Bob/abseeker
6
+ Project-URL: Repository, https://github.com/BHM-Bob/abseeker
7
+ Author-email: BHM-Bob_G <bhmfly@foxmail.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.9
19
+ Requires-Dist: click>=8.0.0
20
+ Requires-Dist: httpx>=0.25.0
21
+ Requires-Dist: openpyxl>=3.1.0
22
+ Requires-Dist: pandas>=2.0.0
23
+ Requires-Dist: pydantic>=2.0.0
24
+ Requires-Dist: rich>=13.0.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: black>=23.0.0; extra == 'dev'
27
+ Requires-Dist: build>=1.0.0; extra == 'dev'
28
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
29
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
30
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
31
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
32
+ Requires-Dist: twine>=4.0.0; extra == 'dev'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # ABSeeker
36
+
37
+ 基于LLM的WOS学术文献智能筛选工具
38
+
39
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
40
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
41
+
42
+ ## 简介
43
+
44
+ ABSeeker 是一个利用大语言模型(LLM)智能筛选 Web of Science (WOS) 学术文献的 Python 工具。它能够:
45
+
46
+ - 解析 WOS 导出的纯文本文献记录
47
+ - 使用 LLM 智能判断文献是否符合检索意图
48
+ - 提供置信度评分和判断理由
49
+ - 支持批量处理和断点续传
50
+ - 实时显示处理进度和统计信息
51
+
52
+ ## 安装
53
+
54
+ ### 从源码安装
55
+
56
+ ```bash
57
+ git clone https://github.com/BHM-Bob/abseeker.git
58
+ cd abseeker
59
+ pip install -e .
60
+ ```
61
+
62
+ ### 依赖
63
+
64
+ - Python >= 3.9
65
+ - pydantic >= 2.0.0
66
+ - httpx >= 0.25.0
67
+ - click >= 8.0.0
68
+ - rich >= 13.0.0
69
+ - pandas >= 2.0.0
70
+
71
+ ## 快速开始
72
+
73
+ ### 1. 配置 LLM 后端
74
+
75
+ ```bash
76
+ # 配置 OpenAI 兼容的 API
77
+ abseeker config set-llm --provider openai
78
+
79
+ # 或配置 DeepSeek
80
+ abseeker config set-llm --provider deepseek
81
+ ```
82
+
83
+ ### 2. 测试连接
84
+
85
+ ```bash
86
+ abseeker config test
87
+ ```
88
+
89
+ ### 3. 分析文献
90
+
91
+ ```bash
92
+ abseeker analyze savedrecs.txt --intent "研究肽类药物递送" -o results.csv
93
+ ```
94
+
95
+ ## 使用指南
96
+
97
+ ### 配置管理
98
+
99
+ ```bash
100
+ # 查看当前配置
101
+ abseeker config show
102
+
103
+ # 设置 LLM 后端
104
+ abseeker config set-llm --provider openai --base-url https://api.example.com/v1
105
+
106
+ # 查看可用模型
107
+ abseeker config list-models
108
+
109
+ # 设置具体模型
110
+ abseeker config set-model gpt-4
111
+
112
+ # 设置请求速率限制
113
+ abseeker config set-rate-limit --interval 1.0 --rpm 60
114
+
115
+ # 重置配置
116
+ abseeker config reset
117
+ ```
118
+
119
+ ### 文献分析
120
+
121
+ #### 基本用法
122
+
123
+ ```bash
124
+ abseeker analyze savedrecs.txt --intent "研究深度学习在医疗领域的应用"
125
+ ```
126
+
127
+ #### 范围过滤
128
+
129
+ ```bash
130
+ # 按索引范围
131
+ abseeker analyze savedrecs.txt -i "纳米技术" --start-index 0 --end-index 99
132
+
133
+ # 按年份范围
134
+ abseeker analyze savedrecs.txt -i "AI医疗" --start-year 2020 --end-year 2023
135
+ ```
136
+
137
+ #### 输出格式
138
+
139
+ ```bash
140
+ # CSV 格式(默认)
141
+ abseeker analyze savedrecs.txt -i "研究意图" -o results.csv
142
+
143
+ # JSON 格式
144
+ abseeker analyze savedrecs.txt -i "研究意图" -o results.json --format json
145
+ ```
146
+
147
+ #### 速率限制
148
+
149
+ ```bash
150
+ # 设置请求间隔(秒)
151
+ abseeker analyze savedrecs.txt -i "研究意图" --interval 2.0
152
+ ```
153
+
154
+ #### 断点续传
155
+
156
+ ```bash
157
+ # 每隔 10 篇文献自动保存
158
+ abseeker analyze savedrecs.txt -i "研究意图" --save-interval 10
159
+
160
+ # 从保存文件恢复(中断后)
161
+ abseeker analyze savedrecs.txt -i "研究意图" --from-saved results.autosave.json
162
+ ```
163
+
164
+ ## 工作原理
165
+
166
+ 1. **解析**: 读取 WOS 导出的纯文本文件,提取文献元数据(标题、作者、摘要、关键词等)
167
+ 2. **过滤**: 根据用户指定的索引或年份范围筛选文献
168
+ 3. **分析**: 将每篇文献的摘要和元数据发送给 LLM,判断是否符合检索意图
169
+ 4. **输出**: 生成包含相关性判断、置信度、理由和分类的结果文件
170
+
171
+ ## 输出格式
172
+
173
+ ### CSV 输出
174
+
175
+ | 字段 | 说明 |
176
+ |------|------|
177
+ | title | 文献标题 |
178
+ | authors | 作者列表 |
179
+ | journal | 期刊名称 |
180
+ | year | 发表年份 |
181
+ | doi | DOI |
182
+ | relevant | 是否相关 (True/False) |
183
+ | confidence | 置信度 (0.0-1.0) |
184
+ | reason | 判断理由 |
185
+ | categories | 分类标签 |
186
+
187
+ ### JSON 输出
188
+
189
+ ```json
190
+ {
191
+ "results": [
192
+ {
193
+ "record": {
194
+ "title": "文献标题",
195
+ "authors": ["作者1", "作者2"],
196
+ "journal": "期刊名",
197
+ "year": 2023,
198
+ "doi": "10.xxxx/xxxxx"
199
+ },
200
+ "relevant": true,
201
+ "confidence": 0.95,
202
+ "reason": "该文献研究了...",
203
+ "categories": ["深度学习", "医疗AI"]
204
+ }
205
+ ],
206
+ "stats": {
207
+ "total": 100,
208
+ "relevant": 25,
209
+ "avg_confidence": 0.85
210
+ }
211
+ }
212
+ ```
213
+
214
+ ## 配置存储
215
+
216
+ 配置文件存储在用户家目录的 `.abseeker/` 文件夹中:
217
+
218
+ - **Linux/macOS**: `~/.abseeker/config.json`
219
+ - **Windows**: `%USERPROFILE%\.abseeker\config.json`
220
+
221
+ ## 开发
222
+
223
+ ### 项目结构
224
+
225
+ ```
226
+ abseeker/
227
+ ├── abseeker/ # 主代码
228
+ │ ├── cli/ # 命令行接口
229
+ │ ├── llm/ # LLM 客户端
230
+ │ ├── parser/ # WOS 文件解析
231
+ │ ├── processor/ # 批量处理器
232
+ │ └── prompt/ # 提示词模板
233
+ ├── tests/ # 测试代码
234
+ ├── dev/ # 开发数据和文档
235
+ └── docs/ # 文档
236
+ ```
237
+
238
+ ### 运行测试
239
+
240
+ ```bash
241
+ pytest
242
+ ```
243
+
244
+ ### 代码格式化
245
+
246
+ ```bash
247
+ black abseeker/
248
+ ruff check abseeker/
249
+ ```
250
+
251
+ ## 许可证
252
+
253
+ MIT License - 详见 [LICENSE](LICENSE) 文件
254
+
255
+ ## 贡献
256
+
257
+ 欢迎提交 Issue 和 Pull Request!
258
+
259
+ ## 致谢
260
+
261
+ - 感谢 OpenAI/DeepSeek 等 LLM 提供商的 API
262
+ - 感谢 Web of Science 提供的学术文献数据
@@ -0,0 +1,228 @@
1
+ # ABSeeker
2
+
3
+ 基于LLM的WOS学术文献智能筛选工具
4
+
5
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
+
8
+ ## 简介
9
+
10
+ ABSeeker 是一个利用大语言模型(LLM)智能筛选 Web of Science (WOS) 学术文献的 Python 工具。它能够:
11
+
12
+ - 解析 WOS 导出的纯文本文献记录
13
+ - 使用 LLM 智能判断文献是否符合检索意图
14
+ - 提供置信度评分和判断理由
15
+ - 支持批量处理和断点续传
16
+ - 实时显示处理进度和统计信息
17
+
18
+ ## 安装
19
+
20
+ ### 从源码安装
21
+
22
+ ```bash
23
+ git clone https://github.com/BHM-Bob/abseeker.git
24
+ cd abseeker
25
+ pip install -e .
26
+ ```
27
+
28
+ ### 依赖
29
+
30
+ - Python >= 3.9
31
+ - pydantic >= 2.0.0
32
+ - httpx >= 0.25.0
33
+ - click >= 8.0.0
34
+ - rich >= 13.0.0
35
+ - pandas >= 2.0.0
36
+
37
+ ## 快速开始
38
+
39
+ ### 1. 配置 LLM 后端
40
+
41
+ ```bash
42
+ # 配置 OpenAI 兼容的 API
43
+ abseeker config set-llm --provider openai
44
+
45
+ # 或配置 DeepSeek
46
+ abseeker config set-llm --provider deepseek
47
+ ```
48
+
49
+ ### 2. 测试连接
50
+
51
+ ```bash
52
+ abseeker config test
53
+ ```
54
+
55
+ ### 3. 分析文献
56
+
57
+ ```bash
58
+ abseeker analyze savedrecs.txt --intent "研究肽类药物递送" -o results.csv
59
+ ```
60
+
61
+ ## 使用指南
62
+
63
+ ### 配置管理
64
+
65
+ ```bash
66
+ # 查看当前配置
67
+ abseeker config show
68
+
69
+ # 设置 LLM 后端
70
+ abseeker config set-llm --provider openai --base-url https://api.example.com/v1
71
+
72
+ # 查看可用模型
73
+ abseeker config list-models
74
+
75
+ # 设置具体模型
76
+ abseeker config set-model gpt-4
77
+
78
+ # 设置请求速率限制
79
+ abseeker config set-rate-limit --interval 1.0 --rpm 60
80
+
81
+ # 重置配置
82
+ abseeker config reset
83
+ ```
84
+
85
+ ### 文献分析
86
+
87
+ #### 基本用法
88
+
89
+ ```bash
90
+ abseeker analyze savedrecs.txt --intent "研究深度学习在医疗领域的应用"
91
+ ```
92
+
93
+ #### 范围过滤
94
+
95
+ ```bash
96
+ # 按索引范围
97
+ abseeker analyze savedrecs.txt -i "纳米技术" --start-index 0 --end-index 99
98
+
99
+ # 按年份范围
100
+ abseeker analyze savedrecs.txt -i "AI医疗" --start-year 2020 --end-year 2023
101
+ ```
102
+
103
+ #### 输出格式
104
+
105
+ ```bash
106
+ # CSV 格式(默认)
107
+ abseeker analyze savedrecs.txt -i "研究意图" -o results.csv
108
+
109
+ # JSON 格式
110
+ abseeker analyze savedrecs.txt -i "研究意图" -o results.json --format json
111
+ ```
112
+
113
+ #### 速率限制
114
+
115
+ ```bash
116
+ # 设置请求间隔(秒)
117
+ abseeker analyze savedrecs.txt -i "研究意图" --interval 2.0
118
+ ```
119
+
120
+ #### 断点续传
121
+
122
+ ```bash
123
+ # 每隔 10 篇文献自动保存
124
+ abseeker analyze savedrecs.txt -i "研究意图" --save-interval 10
125
+
126
+ # 从保存文件恢复(中断后)
127
+ abseeker analyze savedrecs.txt -i "研究意图" --from-saved results.autosave.json
128
+ ```
129
+
130
+ ## 工作原理
131
+
132
+ 1. **解析**: 读取 WOS 导出的纯文本文件,提取文献元数据(标题、作者、摘要、关键词等)
133
+ 2. **过滤**: 根据用户指定的索引或年份范围筛选文献
134
+ 3. **分析**: 将每篇文献的摘要和元数据发送给 LLM,判断是否符合检索意图
135
+ 4. **输出**: 生成包含相关性判断、置信度、理由和分类的结果文件
136
+
137
+ ## 输出格式
138
+
139
+ ### CSV 输出
140
+
141
+ | 字段 | 说明 |
142
+ |------|------|
143
+ | title | 文献标题 |
144
+ | authors | 作者列表 |
145
+ | journal | 期刊名称 |
146
+ | year | 发表年份 |
147
+ | doi | DOI |
148
+ | relevant | 是否相关 (True/False) |
149
+ | confidence | 置信度 (0.0-1.0) |
150
+ | reason | 判断理由 |
151
+ | categories | 分类标签 |
152
+
153
+ ### JSON 输出
154
+
155
+ ```json
156
+ {
157
+ "results": [
158
+ {
159
+ "record": {
160
+ "title": "文献标题",
161
+ "authors": ["作者1", "作者2"],
162
+ "journal": "期刊名",
163
+ "year": 2023,
164
+ "doi": "10.xxxx/xxxxx"
165
+ },
166
+ "relevant": true,
167
+ "confidence": 0.95,
168
+ "reason": "该文献研究了...",
169
+ "categories": ["深度学习", "医疗AI"]
170
+ }
171
+ ],
172
+ "stats": {
173
+ "total": 100,
174
+ "relevant": 25,
175
+ "avg_confidence": 0.85
176
+ }
177
+ }
178
+ ```
179
+
180
+ ## 配置存储
181
+
182
+ 配置文件存储在用户家目录的 `.abseeker/` 文件夹中:
183
+
184
+ - **Linux/macOS**: `~/.abseeker/config.json`
185
+ - **Windows**: `%USERPROFILE%\.abseeker\config.json`
186
+
187
+ ## 开发
188
+
189
+ ### 项目结构
190
+
191
+ ```
192
+ abseeker/
193
+ ├── abseeker/ # 主代码
194
+ │ ├── cli/ # 命令行接口
195
+ │ ├── llm/ # LLM 客户端
196
+ │ ├── parser/ # WOS 文件解析
197
+ │ ├── processor/ # 批量处理器
198
+ │ └── prompt/ # 提示词模板
199
+ ├── tests/ # 测试代码
200
+ ├── dev/ # 开发数据和文档
201
+ └── docs/ # 文档
202
+ ```
203
+
204
+ ### 运行测试
205
+
206
+ ```bash
207
+ pytest
208
+ ```
209
+
210
+ ### 代码格式化
211
+
212
+ ```bash
213
+ black abseeker/
214
+ ruff check abseeker/
215
+ ```
216
+
217
+ ## 许可证
218
+
219
+ MIT License - 详见 [LICENSE](LICENSE) 文件
220
+
221
+ ## 贡献
222
+
223
+ 欢迎提交 Issue 和 Pull Request!
224
+
225
+ ## 致谢
226
+
227
+ - 感谢 OpenAI/DeepSeek 等 LLM 提供商的 API
228
+ - 感谢 Web of Science 提供的学术文献数据
@@ -0,0 +1,47 @@
1
+ """
2
+ ABSeeker - 基于LLM的WOS学术文献智能筛选工具
3
+
4
+ ABSeeker = Abstract Based Seeker
5
+
6
+ Example:
7
+ >>> from abseeker import WOSParser, DeepSeekClient
8
+ >>>
9
+ >>> # 解析文献
10
+ >>> parser = WOSParser()
11
+ >>> records = parser.parse_file("savedrecs.txt")
12
+ >>>
13
+ >>> # 使用LLM分析
14
+ >>> llm = DeepSeekClient(api_key="your-key")
15
+ >>> for record in records[:5]:
16
+ ... result = llm.analyze(record, "研究肽类药物递送")
17
+ ... print(f"{result.record.title}: {result.relevant}")
18
+ """
19
+
20
+ __version__ = "0.1.0"
21
+ __author__ = "Developer"
22
+
23
+ # Parser模块
24
+ from abseeker.parser.record import Record
25
+ from abseeker.parser.wos_parser import WOSParser
26
+
27
+ # LLM模块
28
+ from abseeker.llm.base import LLMClient, AnalysisResult, LLMError
29
+ from abseeker.llm.deepseek_client import DeepSeekClient
30
+ from abseeker.llm.openai_client import OpenAIClient
31
+
32
+ # Prompt模块
33
+ from abseeker.prompt.builder import PromptBuilder
34
+
35
+ __all__ = [
36
+ # Parser
37
+ "Record",
38
+ "WOSParser",
39
+ # LLM
40
+ "LLMClient",
41
+ "AnalysisResult",
42
+ "LLMError",
43
+ "DeepSeekClient",
44
+ "OpenAIClient",
45
+ # Prompt
46
+ "PromptBuilder",
47
+ ]
@@ -0,0 +1,6 @@
1
+ """ABSeeker CLI入口"""
2
+
3
+ from abseeker.cli.main import cli
4
+
5
+ if __name__ == "__main__":
6
+ cli()
@@ -0,0 +1,5 @@
1
+ """CLI模块"""
2
+
3
+ from abseeker.cli.main import cli
4
+
5
+ __all__ = ["cli"]