paperseek 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. paperseek-0.1.0/LICENSE +181 -0
  2. paperseek-0.1.0/PKG-INFO +505 -0
  3. paperseek-0.1.0/README.md +465 -0
  4. paperseek-0.1.0/paperseek/__init__.py +6 -0
  5. paperseek-0.1.0/paperseek/abstract_fetcher.py +1 -0
  6. paperseek-0.1.0/paperseek/cli.py +530 -0
  7. paperseek-0.1.0/paperseek/client/__init__.py +1 -0
  8. paperseek-0.1.0/paperseek/config.py +166 -0
  9. paperseek-0.1.0/paperseek/config_store.py +155 -0
  10. paperseek-0.1.0/paperseek/diagnostics.py +355 -0
  11. paperseek-0.1.0/paperseek/disciplines.py +1 -0
  12. paperseek-0.1.0/paperseek/env_loader.py +54 -0
  13. paperseek-0.1.0/paperseek/formatter.py +166 -0
  14. paperseek-0.1.0/paperseek/history.py +587 -0
  15. paperseek-0.1.0/paperseek/llm_client.py +1 -0
  16. paperseek-0.1.0/paperseek/prompts.py +136 -0
  17. paperseek-0.1.0/paperseek/providers.py +2 -0
  18. paperseek-0.1.0/paperseek/results.py +1 -0
  19. paperseek-0.1.0/paperseek/search_agent.py +3 -0
  20. paperseek-0.1.0/paperseek/source_metadata.py +1 -0
  21. paperseek-0.1.0/paperseek/static/app.js +2387 -0
  22. paperseek-0.1.0/paperseek/static/index.html +239 -0
  23. paperseek-0.1.0/paperseek/static/styles.css +1694 -0
  24. paperseek-0.1.0/paperseek/web_app.py +495 -0
  25. paperseek-0.1.0/paperseek.egg-info/PKG-INFO +505 -0
  26. paperseek-0.1.0/paperseek.egg-info/SOURCES.txt +64 -0
  27. paperseek-0.1.0/paperseek.egg-info/dependency_links.txt +1 -0
  28. paperseek-0.1.0/paperseek.egg-info/entry_points.txt +3 -0
  29. paperseek-0.1.0/paperseek.egg-info/requires.txt +10 -0
  30. paperseek-0.1.0/paperseek.egg-info/top_level.txt +2 -0
  31. paperseek-0.1.0/paperseek_core/__init__.py +33 -0
  32. paperseek-0.1.0/paperseek_core/abstracts.py +40 -0
  33. paperseek-0.1.0/paperseek_core/agent.py +984 -0
  34. paperseek-0.1.0/paperseek_core/client/__init__.py +9 -0
  35. paperseek-0.1.0/paperseek_core/client/api.py +691 -0
  36. paperseek-0.1.0/paperseek_core/client/client.py +720 -0
  37. paperseek-0.1.0/paperseek_core/client/config.py +452 -0
  38. paperseek-0.1.0/paperseek_core/client/errors.py +189 -0
  39. paperseek-0.1.0/paperseek_core/client/models.py +916 -0
  40. paperseek-0.1.0/paperseek_core/client/response.py +21 -0
  41. paperseek-0.1.0/paperseek_core/client/rest.py +256 -0
  42. paperseek-0.1.0/paperseek_core/config.py +96 -0
  43. paperseek-0.1.0/paperseek_core/disciplines.py +199 -0
  44. paperseek-0.1.0/paperseek_core/llm.py +278 -0
  45. paperseek-0.1.0/paperseek_core/prompts.py +138 -0
  46. paperseek-0.1.0/paperseek_core/providers.py +2 -0
  47. paperseek-0.1.0/paperseek_core/results.py +146 -0
  48. paperseek-0.1.0/paperseek_core/source_metadata.py +1 -0
  49. paperseek-0.1.0/paperseek_core/sources/__init__.py +2 -0
  50. paperseek-0.1.0/paperseek_core/sources/metadata.py +126 -0
  51. paperseek-0.1.0/paperseek_core/sources/providers.py +708 -0
  52. paperseek-0.1.0/pyproject.toml +64 -0
  53. paperseek-0.1.0/setup.cfg +4 -0
  54. paperseek-0.1.0/tests/test_agent_api.py +17 -0
  55. paperseek-0.1.0/tests/test_cli_management.py +104 -0
  56. paperseek-0.1.0/tests/test_config_store.py +47 -0
  57. paperseek-0.1.0/tests/test_deployment.py +70 -0
  58. paperseek-0.1.0/tests/test_diagnostics.py +39 -0
  59. paperseek-0.1.0/tests/test_disciplines.py +270 -0
  60. paperseek-0.1.0/tests/test_history.py +172 -0
  61. paperseek-0.1.0/tests/test_llm_providers.py +78 -0
  62. paperseek-0.1.0/tests/test_packaging.py +114 -0
  63. paperseek-0.1.0/tests/test_results.py +49 -0
  64. paperseek-0.1.0/tests/test_skill_launcher.py +170 -0
  65. paperseek-0.1.0/tests/test_source_metadata.py +17 -0
  66. paperseek-0.1.0/tests/test_web_app.py +162 -0
@@ -0,0 +1,181 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction, and
10
+ distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by the copyright
13
+ owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all other entities
16
+ that control, are controlled by, or are under common control with that entity.
17
+ For the purposes of this definition, "control" means (i) the power, direct or
18
+ indirect, to cause the direction or management of such entity, whether by
19
+ contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
20
+ outstanding shares, or (iii) beneficial ownership of such entity.
21
+
22
+ "You" (or "Your") shall mean an individual or Legal Entity exercising
23
+ permissions granted by this License.
24
+
25
+ "Source" form shall mean the preferred form for making modifications, including
26
+ but not limited to software source code, documentation source, and configuration
27
+ files.
28
+
29
+ "Object" form shall mean any form resulting from mechanical transformation or
30
+ translation of a Source form, including but not limited to compiled object code,
31
+ generated documentation, and conversions to other media types.
32
+
33
+ "Work" shall mean the work of authorship, whether in Source or Object form, made
34
+ available under the License, as indicated by a copyright notice that is included
35
+ in or attached to the work (an example is provided in the Appendix below).
36
+
37
+ "Derivative Works" shall mean any work, whether in Source or Object form, that
38
+ is based on (or derived from) the Work and for which the editorial revisions,
39
+ annotations, elaborations, or other modifications represent, as a whole, an
40
+ original work of authorship. For the purposes of this License, Derivative Works
41
+ shall not include works that remain separable from, or merely link (or bind by
42
+ name) to the interfaces of, the Work and Derivative Works thereof.
43
+
44
+ "Contribution" shall mean any work of authorship, including the original version
45
+ of the Work and any modifications or additions to that Work or Derivative Works
46
+ thereof, that is intentionally submitted to Licensor for inclusion in the Work by
47
+ the copyright owner or by an individual or Legal Entity authorized to submit on
48
+ behalf of the copyright owner. For the purposes of this definition, "submitted"
49
+ means any form of electronic, verbal, or written communication sent to the
50
+ Licensor or its representatives, including but not limited to communication on
51
+ electronic mailing lists, source code control systems, and issue tracking
52
+ systems that are managed by, or on behalf of, the Licensor for the purpose of
53
+ discussing and improving the Work, but excluding communication that is
54
+ conspicuously marked or otherwise designated in writing by the copyright owner as
55
+ "Not a Contribution."
56
+
57
+ "Contributor" shall mean Licensor and any individual or Legal Entity on behalf
58
+ of whom a Contribution has been received by Licensor and subsequently
59
+ incorporated within the Work.
60
+
61
+ 2. Grant of Copyright License. Subject to the terms and conditions of this
62
+ License, each Contributor hereby grants to You a perpetual, worldwide,
63
+ non-exclusive, no-charge, royalty-free, irrevocable copyright license to
64
+ reproduce, prepare Derivative Works of, publicly display, publicly perform,
65
+ sublicense, and distribute the Work and such Derivative Works in Source or
66
+ Object form.
67
+
68
+ 3. Grant of Patent License. Subject to the terms and conditions of this License,
69
+ each Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
70
+ no-charge, royalty-free, irrevocable (except as stated in this section) patent
71
+ license to make, have made, use, offer to sell, sell, import, and otherwise
72
+ transfer the Work, where such license applies only to those patent claims
73
+ licensable by such Contributor that are necessarily infringed by their
74
+ Contribution(s) alone or by combination of their Contribution(s) with the Work to
75
+ which such Contribution(s) was submitted. If You institute patent litigation
76
+ against any entity (including a cross-claim or counterclaim in a lawsuit)
77
+ alleging that the Work or a Contribution incorporated within the Work
78
+ constitutes direct or contributory patent infringement, then any patent licenses
79
+ granted to You under this License for that Work shall terminate as of the date
80
+ such litigation is filed.
81
+
82
+ 4. Redistribution. You may reproduce and distribute copies of the Work or
83
+ Derivative Works thereof in any medium, with or without modifications, and in
84
+ Source or Object form, provided that You meet the following conditions:
85
+
86
+ (a) You must give any other recipients of the Work or Derivative Works a copy of
87
+ this License; and
88
+
89
+ (b) You must cause any modified files to carry prominent notices stating that You
90
+ changed the files; and
91
+
92
+ (c) You must retain, in the Source form of any Derivative Works that You
93
+ distribute, all copyright, patent, trademark, and attribution notices from the
94
+ Source form of the Work, excluding those notices that do not pertain to any part
95
+ of the Derivative Works; and
96
+
97
+ (d) If the Work includes a "NOTICE" text file as part of its distribution, then
98
+ any Derivative Works that You distribute must include a readable copy of the
99
+ attribution notices contained within such NOTICE file, excluding those notices
100
+ that do not pertain to any part of the Derivative Works, in at least one of the
101
+ following places: within a NOTICE text file distributed as part of the Derivative
102
+ Works; within the Source form or documentation, if provided along with the
103
+ Derivative Works; or within a display generated by the Derivative Works, if and
104
+ wherever such third-party notices normally appear. The contents of the NOTICE
105
+ file are for informational purposes only and do not modify the License. You may
106
+ add Your own attribution notices within Derivative Works that You distribute,
107
+ alongside or as an addendum to the NOTICE text from the Work, provided that such
108
+ additional attribution notices cannot be construed as modifying the License.
109
+
110
+ You may add Your own copyright statement to Your modifications and may provide
111
+ additional or different license terms and conditions for use, reproduction, or
112
+ distribution of Your modifications, or for any such Derivative Works as a whole,
113
+ provided Your use, reproduction, and distribution of the Work otherwise complies
114
+ with the conditions stated in this License.
115
+
116
+ 5. Submission of Contributions. Unless You explicitly state otherwise, any
117
+ Contribution intentionally submitted for inclusion in the Work by You to the
118
+ Licensor shall be under the terms and conditions of this License, without any
119
+ additional terms or conditions. Notwithstanding the above, nothing herein shall
120
+ supersede or modify the terms of any separate license agreement you may have
121
+ executed with Licensor regarding such Contributions.
122
+
123
+ 6. Trademarks. This License does not grant permission to use the trade names,
124
+ trademarks, service marks, or product names of the Licensor, except as required
125
+ for reasonable and customary use in describing the origin of the Work and
126
+ reproducing the content of the NOTICE file.
127
+
128
+ 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
129
+ writing, Licensor provides the Work (and each Contributor provides its
130
+ Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
131
+ either express or implied, including, without limitation, any warranties or
132
+ conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
133
+ PARTICULAR PURPOSE. You are solely responsible for determining the
134
+ appropriateness of using or redistributing the Work and assume any risks
135
+ associated with Your exercise of permissions under this License.
136
+
137
+ 8. Limitation of Liability. In no event and under no legal theory, whether in
138
+ tort (including negligence), contract, or otherwise, unless required by
139
+ applicable law (such as deliberate and grossly negligent acts) or agreed to in
140
+ writing, shall any Contributor be liable to You for damages, including any
141
+ direct, indirect, special, incidental, or consequential damages of any character
142
+ arising as a result of this License or out of the use or inability to use the
143
+ Work (including but not limited to damages for loss of goodwill, work stoppage,
144
+ computer failure or malfunction, or any and all other commercial damages or
145
+ losses), even if such Contributor has been advised of the possibility of such
146
+ damages.
147
+
148
+ 9. Accepting Warranty or Additional Liability. While redistributing the Work or
149
+ Derivative Works thereof, You may choose to offer, and charge a fee for,
150
+ acceptance of support, warranty, indemnity, or other liability obligations
151
+ and/or rights consistent with this License. However, in accepting such
152
+ obligations, You may act only on Your own behalf and on Your sole
153
+ responsibility, not on behalf of any other Contributor, and only if You agree to
154
+ indemnify, defend, and hold each Contributor harmless for any liability incurred
155
+ by, or claims asserted against, such Contributor by reason of your accepting any
156
+ such warranty or additional liability.
157
+
158
+ END OF TERMS AND CONDITIONS
159
+
160
+ APPENDIX: How to apply the Apache License to your work.
161
+
162
+ To apply the Apache License to your work, attach the following boilerplate
163
+ notice, with the fields enclosed by brackets "[]" replaced with your own
164
+ identifying information. (Don't include the brackets!) The text should be
165
+ enclosed in the appropriate comment syntax for the file format. We also
166
+ recommend that a file or class name and description of purpose be included on the
167
+ same "printed page" as the copyright notice for easier identification within
168
+ third-party archives.
169
+
170
+ Copyright 2026 PaperSeek contributors
171
+
172
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not use
173
+ this file except in compliance with the License. You may obtain a copy of the
174
+ License at
175
+
176
+ http://www.apache.org/licenses/LICENSE-2.0
177
+
178
+ Unless required by applicable law or agreed to in writing, software distributed
179
+ under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
180
+ CONDITIONS OF ANY KIND, either express or implied. See the License for the
181
+ specific language governing permissions and limitations under the License.
@@ -0,0 +1,505 @@
1
+ Metadata-Version: 2.4
2
+ Name: paperseek
3
+ Version: 0.1.0
4
+ Summary: LLM based Literature Search Agent
5
+ Author-email: MingfengHong <hongmingfeng24@mails.ucas.ac.cn>
6
+ Maintainer-email: MingfengHong <hongmingfeng24@mails.ucas.ac.cn>
7
+ License: Apache-2.0
8
+ Project-URL: Homepage, https://www.paperseek.xyz/
9
+ Project-URL: Repository, https://github.com/MingfengHong/paperseek
10
+ Project-URL: Issues, https://github.com/MingfengHong/paperseek/issues
11
+ Project-URL: Documentation, https://docs.paperseek.xyz/
12
+ Project-URL: Online-Demo, https://www.paperseek.xyz/
13
+ Keywords: literature-search,openalex,crossref,web-of-science,llm
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Environment :: Console
16
+ Classifier: Framework :: FastAPI
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: License :: OSI Approved :: Apache Software License
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3 :: Only
21
+ Classifier: Programming Language :: Python :: 3.8
22
+ Classifier: Programming Language :: Python :: 3.9
23
+ Classifier: Programming Language :: Python :: 3.10
24
+ Classifier: Programming Language :: Python :: 3.11
25
+ Classifier: Programming Language :: Python :: 3.12
26
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
27
+ Requires-Python: >=3.8
28
+ Description-Content-Type: text/markdown
29
+ License-File: LICENSE
30
+ Requires-Dist: requests>=2.25
31
+ Requires-Dist: pydantic>=2
32
+ Requires-Dist: python-dateutil>=2.5
33
+ Requires-Dist: urllib3<3,>=1.25.3
34
+ Requires-Dist: typing-extensions>=4.7
35
+ Requires-Dist: fastapi>=0.100
36
+ Requires-Dist: uvicorn>=0.23
37
+ Provides-Extra: dev
38
+ Requires-Dist: httpx>=0.24; extra == "dev"
39
+ Dynamic: license-file
40
+
41
+ # PaperSeek
42
+
43
+ [![Python](https://img.shields.io/badge/Python-3.8%2B-3776AB?logo=python&logoColor=white)](https://www.python.org/)
44
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
45
+ [![CI](https://github.com/MingfengHong/paperseek/actions/workflows/ci.yml/badge.svg)](https://github.com/MingfengHong/paperseek/actions/workflows/ci.yml)
46
+ [![Status](https://img.shields.io/badge/status-alpha-orange)](#项目状态)
47
+ [![ModelScope visits](https://img.shields.io/badge/ModelScope%20visits-2.6k-624AFF?logo=modelscope&logoColor=white)](https://modelscope.cn/studios/HongMingfeng/PaperSeek)
48
+
49
+ **文档站:[PaperSeek Docs](https://docs.paperseek.xyz/) | 语言:简体中文 | [English](README.en.md)**
50
+
51
+ **LLM based Literature Search Agent.** PaperSeek 是一个面向研究者的文献发现工具,支持用自然语言发起检索、自动迭代查询、扩展候选论文、排序结果并导出可复核的文献列表。
52
+
53
+ 在线体验:[点击体验](https://www.paperseek.xyz/)。在线版提供 `Quick Start`、`ModelScope Service` 和 `Use your own API` 三种模式:登录用户可使用站点提供的每日免费 Quick Start 额度;ModelScope 模式使用登录用户自己的 API-Inference 授权;自带 API 模式可由用户填写自己的模型和数据源 Key。详情见 [在线体验版使用说明](docs/online-demo.md)。
54
+
55
+ ![PaperSeek web interface](https://raw.githubusercontent.com/MingfengHong/paperseek/main/docs/assets/paperseek-web.png)
56
+
57
+ 完整使用说明见 [PaperSeek 用户手册](docs/user-manual.md);部署说明见 [Docker、Vercel 与 ModelScope 部署指南](docs/deployment.md)。
58
+
59
+ ## 概览
60
+
61
+ PaperSeek 将研究问题组织成一条可观察的检索工作流:
62
+
63
+ - 输入中文或英文研究问题。
64
+ - 由 LLM 生成适合数据源的检索查询。
65
+ - 根据命中数量自动放宽或收窄查询。
66
+ - 统一整理题名、作者、期刊、年份、DOI、摘要、引用数和链接等元数据。
67
+ - 对候选文献进行 LLM 相关性评分。
68
+ - 可选地通过 OpenAlex 引用关系扩展高匹配论文的前向引用和后向参考文献。
69
+ - 可选地用 Discipline Fields 把检索限制在一个或多个学科领域。
70
+ - 在网页中查看工作流、结果表和引用图,并导出 CSV。
71
+
72
+ PaperSeek 专注于第一轮候选文献发现和元数据整理。系统综述流程、全文获取、版权合规和最终研究判断仍需由研究者完成。
73
+
74
+ ## 功能概览
75
+
76
+ | 功能 | 说明 |
77
+ | --- | --- |
78
+ | 自然语言检索 | 输入研究问题,自动生成数据源查询。 |
79
+ | 多轮迭代 | 根据目标结果数量自动调整查询,默认最多 5 轮。 |
80
+ | 相关性排序 | LLM 对候选论文打分并给出简短理由。 |
81
+ | Discipline Fields | 在 Web UI 或 CLI 中选择 OpenAlex Field 学科分类;OpenAlex 使用原生 field filter,WoS 映射为 WC 类目,Crossref 作为检索上下文。 |
82
+ | OpenAlex 引用扩展 | 从高匹配论文扩展参考文献和被引论文。 |
83
+ | 结果导出 | 在 Results 页面勾选论文并导出 CSV。 |
84
+ | 引用图 | 使用箭头显示引用方向,支持拖拽、缩放和平移。 |
85
+ | CLI 与 Web UI | 既可命令行运行,也可打开本地网页交互使用。 |
86
+ | 中英文界面 | Web UI 可在 `EN` 和 `中文` 之间切换,选择会保存在当前浏览器。 |
87
+ | 本地历史记录 | 开源自托管版默认把搜索记录、日志事件和结果保存到本地 SQLite,便于回看和复用。 |
88
+ | Docker / Vercel / ModelScope 部署 | 支持 Docker 完整部署、Vercel 一键体验部署和 ModelScope Docker 创空间部署。 |
89
+ | 配置诊断 | `doctor`、`smoke`、`sources` 帮助排查 API Key、数据源和协议问题。 |
90
+ | 可选 Agent Skill | `skills/paperseek/` 可单独复制到支持 Skill 的 agent 平台,不随 Python 包自动安装。 |
91
+
92
+ ## 快速开始
93
+
94
+ ```bash
95
+ git clone https://github.com/MingfengHong/paperseek.git
96
+ cd paperseek
97
+ python -m venv .venv
98
+ source .venv/bin/activate
99
+ python -m pip install --upgrade pip
100
+ python -m pip install -e .
101
+ ```
102
+
103
+ Windows PowerShell:
104
+
105
+ ```powershell
106
+ git clone https://github.com/MingfengHong/paperseek.git
107
+ cd paperseek
108
+ python -m venv .venv
109
+ .\.venv\Scripts\Activate.ps1
110
+ python -m pip install --upgrade pip
111
+ python -m pip install -e .
112
+ ```
113
+
114
+ 启动网页界面:
115
+
116
+ ```bash
117
+ paperseek-web
118
+ ```
119
+
120
+ 打开:
121
+
122
+ ```text
123
+ http://127.0.0.1:8765/
124
+ ```
125
+
126
+ 也可以直接使用命令行:
127
+
128
+ ```bash
129
+ paperseek "open innovation and digital platforms" --source openalex
130
+ ```
131
+
132
+ ## 部署
133
+
134
+ Docker 是完整 Web UI 的推荐部署方式:
135
+
136
+ ```bash
137
+ docker compose up --build
138
+ ```
139
+
140
+ 打开:
141
+
142
+ ```text
143
+ http://127.0.0.1:8765/
144
+ ```
145
+
146
+ Vercel 可用于快速体验和轻量 Web UI 部署:
147
+
148
+ [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2FMingfengHong%2Fpaperseek)
149
+
150
+ ModelScope 创空间也可通过 Docker 方式部署。点击下方按钮查看 PaperSeek 的创空间部署步骤:
151
+
152
+ <a href="docs/deployment.md#modelscope-studio"><img src="docs/assets/deploy-modelscope.svg" alt="Deploy to ModelScope" height="32"></a>
153
+
154
+ 长时间搜索、引用扩展和大量请求建议使用 Docker 或 VPS。完整说明见 [部署指南](docs/deployment.md)。
155
+
156
+ ## 最小配置
157
+
158
+ PaperSeek 至少需要一个 LLM。默认数据源是 OpenAlex;OpenAlex 可匿名测试,但建议申请免费 API Key 以获得更稳定的访问体验。
159
+
160
+ 以 DeepSeek 为例:
161
+
162
+ ```bash
163
+ export LLM_PROVIDER=deepseek
164
+ export LLM_API_TYPE=openai_chat
165
+ export LLM_MODEL=deepseek-v4-flash
166
+ export LLM_BASE_URL=https://api.deepseek.com
167
+ export LLM_API_KEY=your-llm-api-key
168
+ paperseek-web
169
+ ```
170
+
171
+ 以中国科技云为例:
172
+
173
+ ```bash
174
+ export LLM_PROVIDER=cstcloud
175
+ export LLM_API_TYPE=openai_chat
176
+ export LLM_MODEL=DeepSeek-V4-Flash
177
+ export LLM_BASE_URL=https://uni-api.cstcloud.cn/v1
178
+ export LLM_API_KEY=your-cstcloud-api-key
179
+ paperseek-web
180
+ ```
181
+
182
+ 以 ModelScope API-Inference 为例:
183
+
184
+ ```bash
185
+ export LLM_PROVIDER=modelscope
186
+ export LLM_API_TYPE=openai_chat
187
+ export LLM_MODEL=Qwen/Qwen3-235B-A22B-Instruct-2507
188
+ export LLM_BASE_URL=https://api-inference.modelscope.cn/v1
189
+ export LLM_API_KEY=your-modelscope-token
190
+ paperseek-web
191
+ ```
192
+
193
+ Windows PowerShell:
194
+
195
+ ```powershell
196
+ $env:LLM_PROVIDER = "deepseek"
197
+ $env:LLM_API_TYPE = "openai_chat"
198
+ $env:LLM_MODEL = "deepseek-v4-flash"
199
+ $env:LLM_BASE_URL = "https://api.deepseek.com"
200
+ $env:LLM_API_KEY = "your-llm-api-key"
201
+ paperseek-web
202
+ ```
203
+
204
+ 本地 Ollama 不需要 LLM API Key:
205
+
206
+ ```bash
207
+ export LLM_PROVIDER=ollama
208
+ export LLM_API_TYPE=openai_chat
209
+ export LLM_MODEL=qwen3:8b
210
+ export LLM_BASE_URL=http://127.0.0.1:11434/v1
211
+ paperseek-web
212
+ ```
213
+
214
+ 项目提供 `.env.example`。你可以复制为 `.env` 作为本地配置参考,但不要提交真实 API Key。CLI 和 Web 后端会自动读取当前目录或项目根目录下的 `.env`;已经存在的系统环境变量优先于 `.env`。
215
+
216
+ ## Web UI
217
+
218
+ PaperSeek 的网页界面分为四个工作区:
219
+
220
+ | 页面 | 用途 |
221
+ | --- | --- |
222
+ | Search | 输入研究问题,选择 Discipline Fields,配置数据源、LLM、迭代次数和目标结果数;实时查看工作流与系统日志。 |
223
+ | Results | 查看最终排序结果,搜索、过滤、排序、勾选,并导出论文 CSV。 |
224
+ | Citation Map | 查看 OpenAlex 引用扩展形成的关系图,按箭头方向探索论文之间的引用关系。 |
225
+ | History | 查看本地保存的搜索运行、最终检索式、结果列表和运行事件。 |
226
+
227
+ 如果后端已经通过系统环境变量或 `.env` 配置了 API Key,Web UI 会显示环境中已配置的状态,不会把密钥内容发送到浏览器。Web UI 中填写的 API Key、Base URL 和参数只用于当前浏览器会话,不写入本地配置文件。历史记录会保存运行参数摘要、检索式、日志事件和结果,但不会保存任何 API Key。
228
+
229
+ Results 导出的 CSV 文件名使用研究问题主题和本地日期,便于区分多次运行。
230
+
231
+ ## CLI 用法
232
+
233
+ 基本检索:
234
+
235
+ ```bash
236
+ paperseek "responsible AI governance in public sector" --source openalex
237
+ ```
238
+
239
+ 显式子命令:
240
+
241
+ ```bash
242
+ paperseek search "digital platforms and open innovation" --source openalex
243
+ ```
244
+
245
+ JSON 输出:
246
+
247
+ ```bash
248
+ paperseek search "open innovation" --source openalex --output json
249
+ ```
250
+
251
+ 常用参数:
252
+
253
+ ```bash
254
+ paperseek search "your research question" \
255
+ --source openalex \
256
+ --field management \
257
+ --discipline "Computer Science" \
258
+ --min 5 \
259
+ --max 50 \
260
+ --iterations 5 \
261
+ --llm-provider deepseek \
262
+ --llm-api-type openai_chat \
263
+ --llm-model deepseek-v4-flash \
264
+ --llm-base-url https://api.deepseek.com \
265
+ --llm-key your-llm-api-key
266
+ ```
267
+
268
+ 诊断配置:
269
+
270
+ ```bash
271
+ paperseek doctor
272
+ paperseek doctor --source openalex --json
273
+ ```
274
+
275
+ 测试数据源最小真实请求:
276
+
277
+ ```bash
278
+ paperseek smoke --source openalex --query "machine learning"
279
+ paperseek smoke --source crossref --query "open innovation" --json
280
+ ```
281
+
282
+ 查看数据源能力:
283
+
284
+ ```bash
285
+ paperseek sources
286
+ paperseek sources --json
287
+ ```
288
+
289
+ 查看本地历史记录:
290
+
291
+ ```bash
292
+ paperseek history list
293
+ paperseek history show <RUN_ID> --json
294
+ paperseek history path
295
+ ```
296
+
297
+ 保存 CLI 用户级配置:
298
+
299
+ ```bash
300
+ paperseek config path
301
+ paperseek config set LLM_API_KEY your-llm-api-key
302
+ paperseek config list
303
+ paperseek config unset LLM_API_KEY
304
+ ```
305
+
306
+ 环境变量优先于用户级配置。`paperseek config list` 会遮蔽密钥。
307
+
308
+ Discipline Fields 支持使用 OpenAlex Field ID、字段标签或 `https://openalex.org/fields/<id>` URL。多个字段可以重复传入 `--discipline` / `--discipline-field`,也可以在环境变量中用分号分隔,例如:
309
+
310
+ ```bash
311
+ export DISCIPLINE_FIELDS="17;14"
312
+ paperseek search "open innovation and digital platforms" --source openalex
313
+ ```
314
+
315
+ `--field` / `SEARCH_FIELD` 是自由文本领域提示,主要影响 LLM 生成的检索式;`--discipline` / `DISCIPLINE_FIELDS` 是结构化学科限制。OpenAlex 会应用 `primary_topic.field.id` 过滤,WoS Starter 会映射到 `WC=` 类目,Crossref 没有共享学科 taxonomy 时会把所选学科作为查询上下文。
316
+
317
+ ## 数据源
318
+
319
+ | 数据源 | 默认状态 | API Key | 适合场景 | 说明 |
320
+ | --- | --- | --- | --- | --- |
321
+ | OpenAlex | 默认 | 推荐 | 精确检索、摘要、引用数、引用扩展、引用图 | 开放学术元数据库,适合通用文献发现与引用关系探索。 |
322
+ | Crossref | 支持 | 通常不需要 | DOI、出版元数据、期刊和出版社信息校验 | DOI 与出版元数据注册库,适合题录校验和 DOI 补全。 |
323
+ | Web of Science Starter | 适配中 | 必需 | 已有 Clarivate API 权限的机构用户 | 商业数据库 API,返回字段和可用性取决于订阅计划与机构授权。 |
324
+
325
+ ## LLM 服务商
326
+
327
+ PaperSeek 支持两类主流接口协议:OpenAI 风格接口和 Anthropic Messages API。Provider 表示模型服务商,API Type 表示请求协议。
328
+
329
+ | Provider | 默认 API Type | 默认模型 |
330
+ | --- | --- | --- |
331
+ | OpenAI | `openai_responses` | `gpt-5.4-mini` |
332
+ | Anthropic | `anthropic_messages` | `claude-sonnet-4-6` |
333
+ | Google Gemini | `openai_chat` | `gemini-3.5-flash` |
334
+ | DeepSeek | `openai_chat` | `deepseek-v4-flash` |
335
+ | 中国科技云 CSTCloud | `openai_chat` | `DeepSeek-V4-Flash` |
336
+ | 阿里云百炼 DashScope | `openai_chat` | `qwen3.6-plus` |
337
+ | Kimi Moonshot | `openai_chat` | `kimi-k2.6` |
338
+ | 智谱 AI GLM | `openai_chat` | `glm-5.1` |
339
+ | 硅基流动 SiliconFlow | `openai_chat` | `deepseek-ai/DeepSeek-V4-Flash` |
340
+ | OpenRouter | `openai_chat` | `openai/gpt-5.4-mini` |
341
+ | 火山方舟 | `openai_chat` | `doubao-seed-2-0-mini-260428` |
342
+ | 腾讯混元 | `openai_chat` | `hunyuan-turbos-latest` |
343
+ | 百度千帆 | `openai_chat` | `ernie-5.0` |
344
+ | ModelScope 魔搭 | `openai_chat` | `Qwen/Qwen3-235B-A22B-Instruct-2507` |
345
+ | Ollama | `openai_chat` | `qwen3:8b` |
346
+ | Custom | `openai_chat` | 空,用户自行填写 |
347
+
348
+ 默认模型用于初始化表单和命令参数示例。实际可用模型以各服务商控制台、账号权限和兼容层为准。
349
+
350
+ 中国科技云 CSTCloud 提供 OpenAI API Compatible 大模型接口,Base URL 为 `https://uni-api.cstcloud.cn/v1`。PaperSeek 内置的 provider 为 `cstcloud`,默认模型为 `DeepSeek-V4-Flash`。获取 Key 可打开 [中国科技云 API Keys](https://uni-api.cstcloud.cn/api_keys),登录中国科技云统一认证后按页面要求提交申请信息;中国科学院院内用户可使用中国科技云通行证登录,通行证通常为院邮箱账号及密码。接口说明见 [中国科技云大模型 API 接口使用手册](https://uni-api.cstcloud.cn/doc/llm/)。
351
+
352
+ ## 工作流
353
+
354
+ 一次搜索通常包含四步:
355
+
356
+ 1. **Query Generation**:LLM 根据研究问题、可选 Field Hint 和 Discipline Fields 生成初始查询。
357
+ 2. **Source Search**:请求 OpenAlex、Crossref 或 WoS Starter,并记录 HTTP 状态和命中数量。
358
+ 3. **Query Refinement**:若命中数过少或过多,LLM 调整查询并继续下一轮。
359
+ 4. **Ranking & Results**:将候选池交给 LLM 统一评分,输出前若干条结果。
360
+
361
+ 如果启用 OpenAlex 引用扩展,PaperSeek 会从高匹配论文中选择 seed paper,加入 seed 的参考文献和被引论文,再对完整候选池统一评分。默认最大输出 50 条。
362
+
363
+ 候选池较大时,LLM 排序会自动分批并发执行。默认批大小为 `8`、并发为 `4`;超过 32 篇候选论文时会自适应放大批大小。单个排序批次失败时只回退该批次,不会使整次检索失败。
364
+
365
+ ## 引用图
366
+
367
+ Citation Map 使用箭头表示引用方向:
368
+
369
+ ```text
370
+ A -> B 表示 A 引用了 B
371
+ ```
372
+
373
+ 图中节点来自最终结果和 OpenAlex 引用扩展记录。你可以拖动节点、缩放和平移画布,并查看节点详情。引用图适合发现关键词检索遗漏的经典文献、相邻主题和近期延伸研究。
374
+
375
+ ## 环境变量
376
+
377
+ | 变量 | 说明 |
378
+ | --- | --- |
379
+ | `DATA_SOURCE` | `openalex`、`crossref` 或 `wos`,默认 `openalex`。 |
380
+ | `LLM_PROVIDER` | LLM 服务商,例如 `openai`、`deepseek`、`cstcloud`、`anthropic`、`ollama`。 |
381
+ | `LLM_API_TYPE` | `openai_responses`、`openai_chat` 或 `anthropic_messages`。 |
382
+ | `LLM_MODEL` | 模型名称。 |
383
+ | `LLM_BASE_URL` | API Base URL。 |
384
+ | `LLM_API_KEY` | LLM API Key;Ollama 可不填。 |
385
+ | `OPENALEX_API_KEY` | OpenAlex API Key,推荐填写。 |
386
+ | `OPENALEX_EMAIL` | OpenAlex 联系邮箱。 |
387
+ | `CROSSREF_EMAIL` | Crossref polite pool 邮箱。 |
388
+ | `WOS_API_KEY` | Clarivate Web of Science Starter API Key。 |
389
+ | `WOS_DB` | WoS 数据库代码,默认 `WOS`。 |
390
+ | `SEARCH_FIELD` | 自由文本学科或领域提示。 |
391
+ | `DISCIPLINE_FIELDS` | OpenAlex Field ID、标签或 URL;多个值建议用分号分隔,并会映射到 OpenAlex / WoS 学科限定。 |
392
+ | `TARGET_MIN` / `TARGET_MAX` | 目标结果数量范围。 |
393
+ | `MAX_ITERATIONS` | 最大查询调整轮数。 |
394
+ | `EXPAND_CITATIONS` | 是否启用 OpenAlex 引用扩展,默认 `true`。 |
395
+ | `FETCH_ABSTRACTS` | 是否尝试从外部 DOI 元数据补摘要,默认 `false`。 |
396
+ | `CITATION_SEED_COUNT` | 引用扩展 seed 论文数量。 |
397
+ | `CITATION_PER_SEED` | 每个 seed 抓取的引用邻居数量。 |
398
+ | `CITATION_MAX_RECORDS` | 引用扩展候选上限。 |
399
+ | `RANKING_BATCH_SIZE` | LLM 排序批大小,默认 `8`;候选数较多时会自适应放大以减少批次数。 |
400
+ | `RANKING_CONCURRENCY` | LLM 排序并发数,默认 `4`;单个批次失败不会使整个检索失败。 |
401
+ | `LLM_TIMEOUT_SECONDS` | 单次 LLM 请求超时秒数,默认 `180`,最小 `30`。 |
402
+ | `PAPERSEEK_HISTORY_ENABLED` | 是否启用本地历史记录,默认 `true`。 |
403
+ | `PAPERSEEK_TIMEZONE` | 本地历史记录时间戳时区,默认 `Asia/Shanghai`;Web UI 会优先使用浏览器检测到的时区。 |
404
+ | `PAPERSEEK_DATA_DIR` | 本地数据目录,默认 `~/.paperseek`。 |
405
+ | `PAPERSEEK_HISTORY_DB` | 本地历史 SQLite 数据库路径,默认 `~/.paperseek/paperseek.db`。 |
406
+
407
+ ## API 获取方式
408
+
409
+ ### OpenAlex
410
+
411
+ OpenAlex 可匿名访问,但推荐配置免费 API Key:
412
+
413
+ 1. 打开 [OpenAlex](https://openalex.org/) 并注册账号。
414
+ 2. 进入 [OpenAlex API settings](https://openalex.org/settings/api)。
415
+ 3. 复制 API Key。
416
+ 4. 在 Web UI 填写 `OpenAlex API Key`,或设置 `OPENALEX_API_KEY`。
417
+
418
+ ### Crossref
419
+
420
+ Crossref REST API 通常不需要 API Key。建议设置邮箱进入 polite pool:
421
+
422
+ ```bash
423
+ export CROSSREF_EMAIL=you@example.org
424
+ ```
425
+
426
+ 如果需要更高限额、优先支持或生产级 SLA,可以了解 Crossref Metadata Plus。PaperSeek 使用 Crossref 公共/Polite REST API 路径。
427
+
428
+ ### Web of Science Starter API
429
+
430
+ WoS Starter 需要在 Clarivate Developer Portal 申请,通常适合已有机构订阅权限的用户:
431
+
432
+ 1. 打开 [Clarivate Developer Portal 注册页](https://developer.clarivate.com/signup),点击 `Register`。
433
+ 2. 建议使用机构邮箱注册,并尽量与 Web of Science 数据库账号一致。
434
+ 3. 登录后进入 [Applications](https://developer.clarivate.com/applications),点击 `Register Application`。
435
+ 4. 填写应用信息:
436
+ - `Application ID` 使用数字、小写字母、`-` 或 `_`。
437
+ - `Application Name` 可写为机构或项目名称。
438
+ - `Application Description` 可说明用于 Web of Science API 检索。
439
+ - `Client Type` 保持默认 `Public: Single Page Application`。
440
+ - 不勾选 OAuth2.0 Flows。
441
+ 5. 打开 [Web of Science Starter API](https://developer.clarivate.com/apis/wos-starter) 页面。
442
+ 6. 找到刚注册的 Application,点击 `Subscribe`。
443
+ 7. 按身份和机构权限选择计划。机构成员通常选择 Institutional Member 相关计划。
444
+ 8. 看到 `Subscription approval is pending` 后等待审批。机构申请通常需要数个工作日。
445
+ 9. 获得 API Key 后,在 Web UI 填写 `WoS API Key`,或设置 `WOS_API_KEY`。
446
+
447
+ WoS Starter 的权限、每日请求量和返回字段取决于订阅计划与机构授权。若遇到 HTTP 401,先检查是否使用 `https://` 和正确 key;若遇到 Clarivate 返回的非标准 HTTP 512,应优先检查 Clarivate 服务状态、订阅审批和检索式兼容性。
448
+
449
+ ## Python API 与 core
450
+
451
+ 社区版安装包已经内置可复用核心模块 `paperseek_core`,不需要额外安装单独的 `paperseek-core` 仓库依赖。常规用户和下游代码建议从 `paperseek` 导入稳定入口:
452
+
453
+ ```python
454
+ from paperseek import PaperSeekAgent
455
+ ```
456
+
457
+ `LiteratureSearchAgent` 和 `WosSearchAgent` 仍保留为兼容旧代码的别名,新代码请使用 `PaperSeekAgent`。
458
+
459
+ ## Agent Skill
460
+
461
+ 仓库包含一个可选的 PaperSeek Skill:
462
+
463
+ ```text
464
+ skills/paperseek/
465
+ ```
466
+
467
+ 它用于指导支持 Skill 的 AI agent 正确调用 PaperSeek,包括数据源选择、配置诊断、JSON 结果解析和引用图边界。Skill 使用 progressive disclosure:`SKILL.md` 保持简短,详细命令契约放在 `references/`。
468
+
469
+ 这个 Skill **不会随 Python 包自动安装**。如果需要在 agent 平台中使用,可以手动复制或链接 `skills/paperseek/` 到对应平台的 Skill 目录。
470
+
471
+ Skill 中的 launcher 与自包含 runtime:
472
+
473
+ ```text
474
+ skills/paperseek/scripts/paperseek.py
475
+ skills/paperseek/scripts/paperseek_skill_runtime.py
476
+ ```
477
+
478
+ 单独发布 Skill 时,复制 `skills/paperseek/` 即可。`paperseek.py` 会优先调用完整 PaperSeek 包;如果未安装包,会回退到 `paperseek_skill_runtime.py`,使用 Python 标准库直接完成 OpenAlex、Crossref 和带 key 的 WoS Starter 核心文献检索。Web UI、引用图和完整历史管理仍需要安装完整包。
479
+
480
+ ## 项目状态
481
+
482
+ PaperSeek 当前处于 alpha 阶段。CLI、Web UI、OpenAlex、Crossref、引用扩展、CSV 导出和 Skill 基本可用,但仍建议对正式研究结论进行人工复核。
483
+
484
+ 欢迎贡献:
485
+
486
+ - 新数据源适配器。
487
+ - 更稳健的查询生成与排序提示词。
488
+ - 更好的引用图交互。
489
+ - Web API、CLI、provider parsing 和导出行为测试。
490
+ - 文档、示例和错误诊断改进。
491
+
492
+ 贡献前可阅读 [CONTRIBUTING.md](CONTRIBUTING.md)。安全问题请参考 [SECURITY.md](SECURITY.md)。
493
+
494
+ ## 致谢
495
+
496
+ PaperSeek 的设计和实现参考了以下开源项目:
497
+
498
+ - [dr-dumpling/paper-search-cli](https://github.com/dr-dumpling/paper-search-cli/):参考 CLI 使用方式与文献检索工作流设计。
499
+ - [666ghj/MiroFish](https://github.com/666ghj/MiroFish):参考 Web 前端界面的左右分栏和工作流展示风格。
500
+ - [clarivate/wosstarter_python_client](https://github.com/clarivate/wosstarter_python_client):参考 Web of Science Starter API 的客户端调用方式。
501
+ - [Lloyd-Jahn/openclaw-paper-search](https://github.com/Lloyd-Jahn/openclaw-paper-search):参考文献搜索工具的组织方式。
502
+
503
+ ## 开源协议
504
+
505
+ PaperSeek 使用 [Apache License 2.0](LICENSE) 开源。