academic-refchecker 1.2.54__tar.gz → 1.2.55__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {academic_refchecker-1.2.54/src/academic_refchecker.egg-info → academic_refchecker-1.2.55}/PKG-INFO +23 -23
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/README.md +22 -22
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/pyproject.toml +5 -7
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/scripts/download_db.py +1 -1
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55/src/academic_refchecker.egg-info}/PKG-INFO +23 -23
- academic_refchecker-1.2.55/src/academic_refchecker.egg-info/SOURCES.txt +57 -0
- academic_refchecker-1.2.55/src/academic_refchecker.egg-info/entry_points.txt +2 -0
- academic_refchecker-1.2.55/src/academic_refchecker.egg-info/top_level.txt +1 -0
- academic_refchecker-1.2.55/src/refchecker/__main__.py +11 -0
- academic_refchecker-1.2.55/src/refchecker/__version__.py +5 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/crossref.py +5 -5
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/enhanced_hybrid_checker.py +1 -1
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/github_checker.py +4 -4
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/local_semantic_scholar.py +7 -7
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/openalex.py +6 -6
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/openreview_checker.py +8 -8
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/pdf_paper_checker.py +1 -1
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/semantic_scholar.py +10 -10
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/webpage_checker.py +3 -3
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/core/parallel_processor.py +6 -6
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/core/refchecker.py +63 -63
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/arxiv_utils.py +3 -3
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/biblatex_parser.py +4 -4
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/bibliography_utils.py +5 -5
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/bibtex_parser.py +5 -5
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/error_utils.py +1 -1
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/text_utils.py +10 -10
- academic_refchecker-1.2.54/src/__version__.py +0 -3
- academic_refchecker-1.2.54/src/academic_refchecker.egg-info/SOURCES.txt +0 -56
- academic_refchecker-1.2.54/src/academic_refchecker.egg-info/entry_points.txt +0 -2
- academic_refchecker-1.2.54/src/academic_refchecker.egg-info/top_level.txt +0 -9
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/LICENSE +0 -0
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/MANIFEST.in +0 -0
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/requirements.txt +0 -0
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/scripts/run_tests.py +0 -0
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/setup.cfg +0 -0
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/src/academic_refchecker.egg-info/dependency_links.txt +0 -0
- {academic_refchecker-1.2.54 → academic_refchecker-1.2.55}/src/academic_refchecker.egg-info/requires.txt +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/__init__.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/__init__.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/config/__init__.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/config/logging.conf +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/config/settings.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/core/__init__.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/core/db_connection_pool.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/database/__init__.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/database/download_semantic_scholar_db.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/llm/__init__.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/llm/base.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/llm/providers.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/scripts/__init__.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/scripts/start_vllm_server.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/services/__init__.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/services/pdf_processor.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/__init__.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/author_utils.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/config_validator.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/db_utils.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/doi_utils.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/mock_objects.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/unicode_utils.py +0 -0
- {academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/utils/url_utils.py +0 -0
{academic_refchecker-1.2.54/src/academic_refchecker.egg-info → academic_refchecker-1.2.55}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.55
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -156,17 +156,17 @@ Learn about RefChecker's design philosophy and development process in this detai
|
|
|
156
156
|
|
|
157
157
|
1. **Check a famous paper:**
|
|
158
158
|
```bash
|
|
159
|
-
python
|
|
159
|
+
python run_refchecker.py --paper 1706.03762
|
|
160
160
|
```
|
|
161
161
|
|
|
162
162
|
2. **Check your own PDF:**
|
|
163
163
|
```bash
|
|
164
|
-
python
|
|
164
|
+
python run_refchecker.py --paper /path/to/your/paper.pdf
|
|
165
165
|
```
|
|
166
166
|
|
|
167
167
|
3. **For faster processing with local database** (see [Local Database Setup](#local-database-setup)):
|
|
168
168
|
```bash
|
|
169
|
-
python
|
|
169
|
+
python run_refchecker.py --paper 1706.03762 --db-path semantic_scholar_db/semantic_scholar.db
|
|
170
170
|
```
|
|
171
171
|
|
|
172
172
|
> **⚡ Performance Tip**: Reference verification takes 5-10 seconds per reference without a Semantic Scholar API key due to rate limiting. With an API key, verification speeds up to 1-2 seconds per reference. Set `SEMANTIC_SCHOLAR_API_KEY` environment variable or use `--semantic-scholar-api-key` for faster processing.
|
|
@@ -192,13 +192,13 @@ RefChecker supports AI-powered bibliography parsing using Large Language Models
|
|
|
192
192
|
export REFCHECKER_LLM_PROVIDER=anthropic
|
|
193
193
|
export ANTHROPIC_API_KEY=your_api_key_here
|
|
194
194
|
|
|
195
|
-
python
|
|
195
|
+
python run_refchecker.py --paper 1706.03762
|
|
196
196
|
```
|
|
197
197
|
|
|
198
198
|
2. **Using Command Line Arguments**:
|
|
199
199
|
```bash
|
|
200
200
|
# Enable LLM with specific provider and model
|
|
201
|
-
python
|
|
201
|
+
python run_refchecker.py --paper 1706.03762 \
|
|
202
202
|
--llm-provider anthropic \
|
|
203
203
|
--llm-model claude-sonnet-4-20250514 \
|
|
204
204
|
```
|
|
@@ -211,7 +211,7 @@ RefChecker supports AI-powered bibliography parsing using Large Language Models
|
|
|
211
211
|
With `OPENAI_API_KEY` environment variable:
|
|
212
212
|
|
|
213
213
|
```bash
|
|
214
|
-
python
|
|
214
|
+
python run_refchecker.py --paper /path/to/paper.pdf \
|
|
215
215
|
--llm-provider openai \
|
|
216
216
|
--llm-model gpt-4o \
|
|
217
217
|
```
|
|
@@ -221,7 +221,7 @@ python refchecker.py --paper /path/to/paper.pdf \
|
|
|
221
221
|
With `ANTHROPIC_API_KEY` environment variable:
|
|
222
222
|
|
|
223
223
|
```bash
|
|
224
|
-
python
|
|
224
|
+
python run_refchecker.py --paper https://arxiv.org/abs/1706.03762 \
|
|
225
225
|
--llm-provider anthropic \
|
|
226
226
|
--llm-model claude-sonnet-4-20250514 \
|
|
227
227
|
```
|
|
@@ -229,7 +229,7 @@ python refchecker.py --paper https://arxiv.org/abs/1706.03762 \
|
|
|
229
229
|
#### Google Gemini
|
|
230
230
|
|
|
231
231
|
```bash
|
|
232
|
-
python
|
|
232
|
+
python run_refchecker.py --paper paper.tex \
|
|
233
233
|
--llm-provider google \
|
|
234
234
|
--llm-model gemini-2.5-flash
|
|
235
235
|
```
|
|
@@ -237,7 +237,7 @@ python refchecker.py --paper paper.tex \
|
|
|
237
237
|
#### Azure OpenAI
|
|
238
238
|
|
|
239
239
|
```bash
|
|
240
|
-
python
|
|
240
|
+
python run_refchecker.py --paper paper.txt \
|
|
241
241
|
--llm-provider azure \
|
|
242
242
|
--llm-model gpt-4 \
|
|
243
243
|
--llm-endpoint https://your-resource.openai.azure.com/
|
|
@@ -249,7 +249,7 @@ For running models locally:
|
|
|
249
249
|
|
|
250
250
|
```bash
|
|
251
251
|
# automatic Huggingface model download with VLLM server launch
|
|
252
|
-
python
|
|
252
|
+
python run_refchecker.py --paper paper.pdf \
|
|
253
253
|
--llm-provider vllm \
|
|
254
254
|
--llm-model meta-llama/Llama-3.1-8B-Instruct
|
|
255
255
|
```
|
|
@@ -319,43 +319,43 @@ Check papers in various formats and online locations:
|
|
|
319
319
|
|
|
320
320
|
```bash
|
|
321
321
|
# Check a specific ArXiv paper by ID
|
|
322
|
-
python
|
|
322
|
+
python run_refchecker.py --paper 1706.03762
|
|
323
323
|
|
|
324
324
|
# Check by ArXiv URL
|
|
325
|
-
python
|
|
325
|
+
python run_refchecker.py --paper https://arxiv.org/abs/1706.03762
|
|
326
326
|
|
|
327
327
|
# Check by ArXiv PDF URL
|
|
328
|
-
python
|
|
328
|
+
python run_refchecker.py --paper https://arxiv.org/pdf/1706.03762.pdf
|
|
329
329
|
```
|
|
330
330
|
|
|
331
331
|
#### Local PDF Files
|
|
332
332
|
|
|
333
333
|
```bash
|
|
334
334
|
# Check a local PDF file
|
|
335
|
-
python
|
|
335
|
+
python run_refchecker.py --paper /path/to/your/paper.pdf
|
|
336
336
|
|
|
337
337
|
# Check with offline database for faster processing
|
|
338
|
-
python
|
|
338
|
+
python run_refchecker.py --paper /path/to/your/paper.pdf --db-path semantic_scholar_db/semantic_scholar.db
|
|
339
339
|
```
|
|
340
340
|
|
|
341
341
|
#### LaTeX Files
|
|
342
342
|
|
|
343
343
|
```bash
|
|
344
344
|
# Check a LaTeX document
|
|
345
|
-
python
|
|
345
|
+
python run_refchecker.py --paper /path/to/your/paper.tex
|
|
346
346
|
|
|
347
347
|
# Check with debug mode for detailed processing info
|
|
348
|
-
python
|
|
348
|
+
python run_refchecker.py --paper /path/to/your/paper.tex --debug
|
|
349
349
|
```
|
|
350
350
|
|
|
351
351
|
#### Text Files
|
|
352
352
|
|
|
353
353
|
```bash
|
|
354
354
|
# Check a plain text file containing paper content
|
|
355
|
-
python
|
|
355
|
+
python run_refchecker.py --paper /path/to/your/paper.txt
|
|
356
356
|
|
|
357
357
|
# Combine with local database for offline verification
|
|
358
|
-
python
|
|
358
|
+
python run_refchecker.py --paper /path/to/your/paper.txt --db-path semantic_scholar_db/semantic_scholar.db
|
|
359
359
|
```
|
|
360
360
|
|
|
361
361
|
|
|
@@ -367,10 +367,10 @@ By default, no files are generated. To save detailed results, use the `--output-
|
|
|
367
367
|
|
|
368
368
|
```bash
|
|
369
369
|
# Save to default filename (reference_errors.txt)
|
|
370
|
-
python
|
|
370
|
+
python run_refchecker.py --paper 1706.03762 --output-file
|
|
371
371
|
|
|
372
372
|
# Save to custom filename
|
|
373
|
-
python
|
|
373
|
+
python run_refchecker.py --paper 1706.03762 --output-file my_errors.txt
|
|
374
374
|
```
|
|
375
375
|
|
|
376
376
|
The output file contains a detailed report of references with errors and warnings, including corrected references.
|
|
@@ -574,7 +574,7 @@ python download_semantic_scholar_db.py \
|
|
|
574
574
|
|
|
575
575
|
## 🧪 Testing
|
|
576
576
|
|
|
577
|
-
RefChecker includes a comprehensive test suite with
|
|
577
|
+
RefChecker includes a comprehensive test suite with **490+ tests** covering unit, integration, and end-to-end scenarios. The tests ensure reliability across all components and provide examples of how to use the system.
|
|
578
578
|
|
|
579
579
|
### Quick Test Run
|
|
580
580
|
|
|
@@ -95,17 +95,17 @@ Learn about RefChecker's design philosophy and development process in this detai
|
|
|
95
95
|
|
|
96
96
|
1. **Check a famous paper:**
|
|
97
97
|
```bash
|
|
98
|
-
python
|
|
98
|
+
python run_refchecker.py --paper 1706.03762
|
|
99
99
|
```
|
|
100
100
|
|
|
101
101
|
2. **Check your own PDF:**
|
|
102
102
|
```bash
|
|
103
|
-
python
|
|
103
|
+
python run_refchecker.py --paper /path/to/your/paper.pdf
|
|
104
104
|
```
|
|
105
105
|
|
|
106
106
|
3. **For faster processing with local database** (see [Local Database Setup](#local-database-setup)):
|
|
107
107
|
```bash
|
|
108
|
-
python
|
|
108
|
+
python run_refchecker.py --paper 1706.03762 --db-path semantic_scholar_db/semantic_scholar.db
|
|
109
109
|
```
|
|
110
110
|
|
|
111
111
|
> **⚡ Performance Tip**: Reference verification takes 5-10 seconds per reference without a Semantic Scholar API key due to rate limiting. With an API key, verification speeds up to 1-2 seconds per reference. Set `SEMANTIC_SCHOLAR_API_KEY` environment variable or use `--semantic-scholar-api-key` for faster processing.
|
|
@@ -131,13 +131,13 @@ RefChecker supports AI-powered bibliography parsing using Large Language Models
|
|
|
131
131
|
export REFCHECKER_LLM_PROVIDER=anthropic
|
|
132
132
|
export ANTHROPIC_API_KEY=your_api_key_here
|
|
133
133
|
|
|
134
|
-
python
|
|
134
|
+
python run_refchecker.py --paper 1706.03762
|
|
135
135
|
```
|
|
136
136
|
|
|
137
137
|
2. **Using Command Line Arguments**:
|
|
138
138
|
```bash
|
|
139
139
|
# Enable LLM with specific provider and model
|
|
140
|
-
python
|
|
140
|
+
python run_refchecker.py --paper 1706.03762 \
|
|
141
141
|
--llm-provider anthropic \
|
|
142
142
|
--llm-model claude-sonnet-4-20250514 \
|
|
143
143
|
```
|
|
@@ -150,7 +150,7 @@ RefChecker supports AI-powered bibliography parsing using Large Language Models
|
|
|
150
150
|
With `OPENAI_API_KEY` environment variable:
|
|
151
151
|
|
|
152
152
|
```bash
|
|
153
|
-
python
|
|
153
|
+
python run_refchecker.py --paper /path/to/paper.pdf \
|
|
154
154
|
--llm-provider openai \
|
|
155
155
|
--llm-model gpt-4o \
|
|
156
156
|
```
|
|
@@ -160,7 +160,7 @@ python refchecker.py --paper /path/to/paper.pdf \
|
|
|
160
160
|
With `ANTHROPIC_API_KEY` environment variable:
|
|
161
161
|
|
|
162
162
|
```bash
|
|
163
|
-
python
|
|
163
|
+
python run_refchecker.py --paper https://arxiv.org/abs/1706.03762 \
|
|
164
164
|
--llm-provider anthropic \
|
|
165
165
|
--llm-model claude-sonnet-4-20250514 \
|
|
166
166
|
```
|
|
@@ -168,7 +168,7 @@ python refchecker.py --paper https://arxiv.org/abs/1706.03762 \
|
|
|
168
168
|
#### Google Gemini
|
|
169
169
|
|
|
170
170
|
```bash
|
|
171
|
-
python
|
|
171
|
+
python run_refchecker.py --paper paper.tex \
|
|
172
172
|
--llm-provider google \
|
|
173
173
|
--llm-model gemini-2.5-flash
|
|
174
174
|
```
|
|
@@ -176,7 +176,7 @@ python refchecker.py --paper paper.tex \
|
|
|
176
176
|
#### Azure OpenAI
|
|
177
177
|
|
|
178
178
|
```bash
|
|
179
|
-
python
|
|
179
|
+
python run_refchecker.py --paper paper.txt \
|
|
180
180
|
--llm-provider azure \
|
|
181
181
|
--llm-model gpt-4 \
|
|
182
182
|
--llm-endpoint https://your-resource.openai.azure.com/
|
|
@@ -188,7 +188,7 @@ For running models locally:
|
|
|
188
188
|
|
|
189
189
|
```bash
|
|
190
190
|
# automatic Huggingface model download with VLLM server launch
|
|
191
|
-
python
|
|
191
|
+
python run_refchecker.py --paper paper.pdf \
|
|
192
192
|
--llm-provider vllm \
|
|
193
193
|
--llm-model meta-llama/Llama-3.1-8B-Instruct
|
|
194
194
|
```
|
|
@@ -258,43 +258,43 @@ Check papers in various formats and online locations:
|
|
|
258
258
|
|
|
259
259
|
```bash
|
|
260
260
|
# Check a specific ArXiv paper by ID
|
|
261
|
-
python
|
|
261
|
+
python run_refchecker.py --paper 1706.03762
|
|
262
262
|
|
|
263
263
|
# Check by ArXiv URL
|
|
264
|
-
python
|
|
264
|
+
python run_refchecker.py --paper https://arxiv.org/abs/1706.03762
|
|
265
265
|
|
|
266
266
|
# Check by ArXiv PDF URL
|
|
267
|
-
python
|
|
267
|
+
python run_refchecker.py --paper https://arxiv.org/pdf/1706.03762.pdf
|
|
268
268
|
```
|
|
269
269
|
|
|
270
270
|
#### Local PDF Files
|
|
271
271
|
|
|
272
272
|
```bash
|
|
273
273
|
# Check a local PDF file
|
|
274
|
-
python
|
|
274
|
+
python run_refchecker.py --paper /path/to/your/paper.pdf
|
|
275
275
|
|
|
276
276
|
# Check with offline database for faster processing
|
|
277
|
-
python
|
|
277
|
+
python run_refchecker.py --paper /path/to/your/paper.pdf --db-path semantic_scholar_db/semantic_scholar.db
|
|
278
278
|
```
|
|
279
279
|
|
|
280
280
|
#### LaTeX Files
|
|
281
281
|
|
|
282
282
|
```bash
|
|
283
283
|
# Check a LaTeX document
|
|
284
|
-
python
|
|
284
|
+
python run_refchecker.py --paper /path/to/your/paper.tex
|
|
285
285
|
|
|
286
286
|
# Check with debug mode for detailed processing info
|
|
287
|
-
python
|
|
287
|
+
python run_refchecker.py --paper /path/to/your/paper.tex --debug
|
|
288
288
|
```
|
|
289
289
|
|
|
290
290
|
#### Text Files
|
|
291
291
|
|
|
292
292
|
```bash
|
|
293
293
|
# Check a plain text file containing paper content
|
|
294
|
-
python
|
|
294
|
+
python run_refchecker.py --paper /path/to/your/paper.txt
|
|
295
295
|
|
|
296
296
|
# Combine with local database for offline verification
|
|
297
|
-
python
|
|
297
|
+
python run_refchecker.py --paper /path/to/your/paper.txt --db-path semantic_scholar_db/semantic_scholar.db
|
|
298
298
|
```
|
|
299
299
|
|
|
300
300
|
|
|
@@ -306,10 +306,10 @@ By default, no files are generated. To save detailed results, use the `--output-
|
|
|
306
306
|
|
|
307
307
|
```bash
|
|
308
308
|
# Save to default filename (reference_errors.txt)
|
|
309
|
-
python
|
|
309
|
+
python run_refchecker.py --paper 1706.03762 --output-file
|
|
310
310
|
|
|
311
311
|
# Save to custom filename
|
|
312
|
-
python
|
|
312
|
+
python run_refchecker.py --paper 1706.03762 --output-file my_errors.txt
|
|
313
313
|
```
|
|
314
314
|
|
|
315
315
|
The output file contains a detailed report of references with errors and warnings, including corrected references.
|
|
@@ -513,7 +513,7 @@ python download_semantic_scholar_db.py \
|
|
|
513
513
|
|
|
514
514
|
## 🧪 Testing
|
|
515
515
|
|
|
516
|
-
RefChecker includes a comprehensive test suite with
|
|
516
|
+
RefChecker includes a comprehensive test suite with **490+ tests** covering unit, integration, and end-to-end scenarios. The tests ensure reliability across all components and provide examples of how to use the system.
|
|
517
517
|
|
|
518
518
|
### Quick Test Run
|
|
519
519
|
|
|
@@ -77,17 +77,15 @@ Repository = "https://github.com/markrussinovich/refchecker"
|
|
|
77
77
|
"Bug Tracker" = "https://github.com/markrussinovich/refchecker/issues"
|
|
78
78
|
|
|
79
79
|
[project.scripts]
|
|
80
|
-
academic-refchecker = "core.refchecker:main"
|
|
80
|
+
academic-refchecker = "refchecker.core.refchecker:main"
|
|
81
81
|
|
|
82
82
|
[tool.setuptools.packages.find]
|
|
83
83
|
where = ["src"]
|
|
84
|
-
|
|
85
|
-
[tool.setuptools]
|
|
86
|
-
py-modules = ["__version__"]
|
|
84
|
+
include = ["refchecker*"]
|
|
87
85
|
|
|
88
86
|
[tool.setuptools.dynamic]
|
|
89
|
-
version = {attr = "__version__.__version__"}
|
|
87
|
+
version = {attr = "refchecker.__version__.__version__"}
|
|
90
88
|
|
|
91
89
|
[tool.setuptools.package-data]
|
|
92
|
-
"
|
|
93
|
-
"
|
|
90
|
+
"refchecker" = ["*.txt", "*.md", "*.conf"]
|
|
91
|
+
"refchecker.config" = ["*.conf"]
|
|
@@ -9,7 +9,7 @@ import os
|
|
|
9
9
|
# Add the src directory to Python path
|
|
10
10
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
|
11
11
|
|
|
12
|
-
from database.download_semantic_scholar_db import main
|
|
12
|
+
from refchecker.database.download_semantic_scholar_db import main
|
|
13
13
|
|
|
14
14
|
if __name__ == "__main__":
|
|
15
15
|
main()
|
{academic_refchecker-1.2.54 → academic_refchecker-1.2.55/src/academic_refchecker.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: academic-refchecker
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.55
|
|
4
4
|
Summary: A comprehensive tool for validating reference accuracy in academic papers
|
|
5
5
|
Author-email: Mark Russinovich <markrussinovich@hotmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -156,17 +156,17 @@ Learn about RefChecker's design philosophy and development process in this detai
|
|
|
156
156
|
|
|
157
157
|
1. **Check a famous paper:**
|
|
158
158
|
```bash
|
|
159
|
-
python
|
|
159
|
+
python run_refchecker.py --paper 1706.03762
|
|
160
160
|
```
|
|
161
161
|
|
|
162
162
|
2. **Check your own PDF:**
|
|
163
163
|
```bash
|
|
164
|
-
python
|
|
164
|
+
python run_refchecker.py --paper /path/to/your/paper.pdf
|
|
165
165
|
```
|
|
166
166
|
|
|
167
167
|
3. **For faster processing with local database** (see [Local Database Setup](#local-database-setup)):
|
|
168
168
|
```bash
|
|
169
|
-
python
|
|
169
|
+
python run_refchecker.py --paper 1706.03762 --db-path semantic_scholar_db/semantic_scholar.db
|
|
170
170
|
```
|
|
171
171
|
|
|
172
172
|
> **⚡ Performance Tip**: Reference verification takes 5-10 seconds per reference without a Semantic Scholar API key due to rate limiting. With an API key, verification speeds up to 1-2 seconds per reference. Set `SEMANTIC_SCHOLAR_API_KEY` environment variable or use `--semantic-scholar-api-key` for faster processing.
|
|
@@ -192,13 +192,13 @@ RefChecker supports AI-powered bibliography parsing using Large Language Models
|
|
|
192
192
|
export REFCHECKER_LLM_PROVIDER=anthropic
|
|
193
193
|
export ANTHROPIC_API_KEY=your_api_key_here
|
|
194
194
|
|
|
195
|
-
python
|
|
195
|
+
python run_refchecker.py --paper 1706.03762
|
|
196
196
|
```
|
|
197
197
|
|
|
198
198
|
2. **Using Command Line Arguments**:
|
|
199
199
|
```bash
|
|
200
200
|
# Enable LLM with specific provider and model
|
|
201
|
-
python
|
|
201
|
+
python run_refchecker.py --paper 1706.03762 \
|
|
202
202
|
--llm-provider anthropic \
|
|
203
203
|
--llm-model claude-sonnet-4-20250514 \
|
|
204
204
|
```
|
|
@@ -211,7 +211,7 @@ RefChecker supports AI-powered bibliography parsing using Large Language Models
|
|
|
211
211
|
With `OPENAI_API_KEY` environment variable:
|
|
212
212
|
|
|
213
213
|
```bash
|
|
214
|
-
python
|
|
214
|
+
python run_refchecker.py --paper /path/to/paper.pdf \
|
|
215
215
|
--llm-provider openai \
|
|
216
216
|
--llm-model gpt-4o \
|
|
217
217
|
```
|
|
@@ -221,7 +221,7 @@ python refchecker.py --paper /path/to/paper.pdf \
|
|
|
221
221
|
With `ANTHROPIC_API_KEY` environment variable:
|
|
222
222
|
|
|
223
223
|
```bash
|
|
224
|
-
python
|
|
224
|
+
python run_refchecker.py --paper https://arxiv.org/abs/1706.03762 \
|
|
225
225
|
--llm-provider anthropic \
|
|
226
226
|
--llm-model claude-sonnet-4-20250514 \
|
|
227
227
|
```
|
|
@@ -229,7 +229,7 @@ python refchecker.py --paper https://arxiv.org/abs/1706.03762 \
|
|
|
229
229
|
#### Google Gemini
|
|
230
230
|
|
|
231
231
|
```bash
|
|
232
|
-
python
|
|
232
|
+
python run_refchecker.py --paper paper.tex \
|
|
233
233
|
--llm-provider google \
|
|
234
234
|
--llm-model gemini-2.5-flash
|
|
235
235
|
```
|
|
@@ -237,7 +237,7 @@ python refchecker.py --paper paper.tex \
|
|
|
237
237
|
#### Azure OpenAI
|
|
238
238
|
|
|
239
239
|
```bash
|
|
240
|
-
python
|
|
240
|
+
python run_refchecker.py --paper paper.txt \
|
|
241
241
|
--llm-provider azure \
|
|
242
242
|
--llm-model gpt-4 \
|
|
243
243
|
--llm-endpoint https://your-resource.openai.azure.com/
|
|
@@ -249,7 +249,7 @@ For running models locally:
|
|
|
249
249
|
|
|
250
250
|
```bash
|
|
251
251
|
# automatic Huggingface model download with VLLM server launch
|
|
252
|
-
python
|
|
252
|
+
python run_refchecker.py --paper paper.pdf \
|
|
253
253
|
--llm-provider vllm \
|
|
254
254
|
--llm-model meta-llama/Llama-3.1-8B-Instruct
|
|
255
255
|
```
|
|
@@ -319,43 +319,43 @@ Check papers in various formats and online locations:
|
|
|
319
319
|
|
|
320
320
|
```bash
|
|
321
321
|
# Check a specific ArXiv paper by ID
|
|
322
|
-
python
|
|
322
|
+
python run_refchecker.py --paper 1706.03762
|
|
323
323
|
|
|
324
324
|
# Check by ArXiv URL
|
|
325
|
-
python
|
|
325
|
+
python run_refchecker.py --paper https://arxiv.org/abs/1706.03762
|
|
326
326
|
|
|
327
327
|
# Check by ArXiv PDF URL
|
|
328
|
-
python
|
|
328
|
+
python run_refchecker.py --paper https://arxiv.org/pdf/1706.03762.pdf
|
|
329
329
|
```
|
|
330
330
|
|
|
331
331
|
#### Local PDF Files
|
|
332
332
|
|
|
333
333
|
```bash
|
|
334
334
|
# Check a local PDF file
|
|
335
|
-
python
|
|
335
|
+
python run_refchecker.py --paper /path/to/your/paper.pdf
|
|
336
336
|
|
|
337
337
|
# Check with offline database for faster processing
|
|
338
|
-
python
|
|
338
|
+
python run_refchecker.py --paper /path/to/your/paper.pdf --db-path semantic_scholar_db/semantic_scholar.db
|
|
339
339
|
```
|
|
340
340
|
|
|
341
341
|
#### LaTeX Files
|
|
342
342
|
|
|
343
343
|
```bash
|
|
344
344
|
# Check a LaTeX document
|
|
345
|
-
python
|
|
345
|
+
python run_refchecker.py --paper /path/to/your/paper.tex
|
|
346
346
|
|
|
347
347
|
# Check with debug mode for detailed processing info
|
|
348
|
-
python
|
|
348
|
+
python run_refchecker.py --paper /path/to/your/paper.tex --debug
|
|
349
349
|
```
|
|
350
350
|
|
|
351
351
|
#### Text Files
|
|
352
352
|
|
|
353
353
|
```bash
|
|
354
354
|
# Check a plain text file containing paper content
|
|
355
|
-
python
|
|
355
|
+
python run_refchecker.py --paper /path/to/your/paper.txt
|
|
356
356
|
|
|
357
357
|
# Combine with local database for offline verification
|
|
358
|
-
python
|
|
358
|
+
python run_refchecker.py --paper /path/to/your/paper.txt --db-path semantic_scholar_db/semantic_scholar.db
|
|
359
359
|
```
|
|
360
360
|
|
|
361
361
|
|
|
@@ -367,10 +367,10 @@ By default, no files are generated. To save detailed results, use the `--output-
|
|
|
367
367
|
|
|
368
368
|
```bash
|
|
369
369
|
# Save to default filename (reference_errors.txt)
|
|
370
|
-
python
|
|
370
|
+
python run_refchecker.py --paper 1706.03762 --output-file
|
|
371
371
|
|
|
372
372
|
# Save to custom filename
|
|
373
|
-
python
|
|
373
|
+
python run_refchecker.py --paper 1706.03762 --output-file my_errors.txt
|
|
374
374
|
```
|
|
375
375
|
|
|
376
376
|
The output file contains a detailed report of references with errors and warnings, including corrected references.
|
|
@@ -574,7 +574,7 @@ python download_semantic_scholar_db.py \
|
|
|
574
574
|
|
|
575
575
|
## 🧪 Testing
|
|
576
576
|
|
|
577
|
-
RefChecker includes a comprehensive test suite with
|
|
577
|
+
RefChecker includes a comprehensive test suite with **490+ tests** covering unit, integration, and end-to-end scenarios. The tests ensure reliability across all components and provide examples of how to use the system.
|
|
578
578
|
|
|
579
579
|
### Quick Test Run
|
|
580
580
|
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
MANIFEST.in
|
|
3
|
+
README.md
|
|
4
|
+
pyproject.toml
|
|
5
|
+
requirements.txt
|
|
6
|
+
scripts/download_db.py
|
|
7
|
+
scripts/run_tests.py
|
|
8
|
+
scripts/start_vllm_server.py
|
|
9
|
+
src/academic_refchecker.egg-info/PKG-INFO
|
|
10
|
+
src/academic_refchecker.egg-info/SOURCES.txt
|
|
11
|
+
src/academic_refchecker.egg-info/dependency_links.txt
|
|
12
|
+
src/academic_refchecker.egg-info/entry_points.txt
|
|
13
|
+
src/academic_refchecker.egg-info/requires.txt
|
|
14
|
+
src/academic_refchecker.egg-info/top_level.txt
|
|
15
|
+
src/refchecker/__init__.py
|
|
16
|
+
src/refchecker/__main__.py
|
|
17
|
+
src/refchecker/__version__.py
|
|
18
|
+
src/refchecker/checkers/__init__.py
|
|
19
|
+
src/refchecker/checkers/crossref.py
|
|
20
|
+
src/refchecker/checkers/enhanced_hybrid_checker.py
|
|
21
|
+
src/refchecker/checkers/github_checker.py
|
|
22
|
+
src/refchecker/checkers/local_semantic_scholar.py
|
|
23
|
+
src/refchecker/checkers/openalex.py
|
|
24
|
+
src/refchecker/checkers/openreview_checker.py
|
|
25
|
+
src/refchecker/checkers/pdf_paper_checker.py
|
|
26
|
+
src/refchecker/checkers/semantic_scholar.py
|
|
27
|
+
src/refchecker/checkers/webpage_checker.py
|
|
28
|
+
src/refchecker/config/__init__.py
|
|
29
|
+
src/refchecker/config/logging.conf
|
|
30
|
+
src/refchecker/config/settings.py
|
|
31
|
+
src/refchecker/core/__init__.py
|
|
32
|
+
src/refchecker/core/db_connection_pool.py
|
|
33
|
+
src/refchecker/core/parallel_processor.py
|
|
34
|
+
src/refchecker/core/refchecker.py
|
|
35
|
+
src/refchecker/database/__init__.py
|
|
36
|
+
src/refchecker/database/download_semantic_scholar_db.py
|
|
37
|
+
src/refchecker/llm/__init__.py
|
|
38
|
+
src/refchecker/llm/base.py
|
|
39
|
+
src/refchecker/llm/providers.py
|
|
40
|
+
src/refchecker/scripts/__init__.py
|
|
41
|
+
src/refchecker/scripts/start_vllm_server.py
|
|
42
|
+
src/refchecker/services/__init__.py
|
|
43
|
+
src/refchecker/services/pdf_processor.py
|
|
44
|
+
src/refchecker/utils/__init__.py
|
|
45
|
+
src/refchecker/utils/arxiv_utils.py
|
|
46
|
+
src/refchecker/utils/author_utils.py
|
|
47
|
+
src/refchecker/utils/biblatex_parser.py
|
|
48
|
+
src/refchecker/utils/bibliography_utils.py
|
|
49
|
+
src/refchecker/utils/bibtex_parser.py
|
|
50
|
+
src/refchecker/utils/config_validator.py
|
|
51
|
+
src/refchecker/utils/db_utils.py
|
|
52
|
+
src/refchecker/utils/doi_utils.py
|
|
53
|
+
src/refchecker/utils/error_utils.py
|
|
54
|
+
src/refchecker/utils/mock_objects.py
|
|
55
|
+
src/refchecker/utils/text_utils.py
|
|
56
|
+
src/refchecker/utils/unicode_utils.py
|
|
57
|
+
src/refchecker/utils/url_utils.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
refchecker
|
{academic_refchecker-1.2.54/src → academic_refchecker-1.2.55/src/refchecker}/checkers/crossref.py
RENAMED
|
@@ -30,9 +30,9 @@ import logging
|
|
|
30
30
|
import re
|
|
31
31
|
from typing import Dict, List, Tuple, Optional, Any, Union
|
|
32
32
|
from urllib.parse import quote_plus
|
|
33
|
-
from utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
|
|
34
|
-
from utils.error_utils import format_year_mismatch, format_doi_mismatch
|
|
35
|
-
from config.settings import get_config
|
|
33
|
+
from refchecker.utils.text_utils import normalize_text, clean_title_basic, find_best_match, is_name_match, compare_authors, clean_title_for_search
|
|
34
|
+
from refchecker.utils.error_utils import format_year_mismatch, format_doi_mismatch
|
|
35
|
+
from refchecker.config.settings import get_config
|
|
36
36
|
|
|
37
37
|
# Set up logging
|
|
38
38
|
logger = logging.getLogger(__name__)
|
|
@@ -358,7 +358,7 @@ class CrossRefReferenceChecker:
|
|
|
358
358
|
# Check for DOI
|
|
359
359
|
doi = work_data.get('DOI')
|
|
360
360
|
if doi:
|
|
361
|
-
from utils.doi_utils import construct_doi_url
|
|
361
|
+
from refchecker.utils.doi_utils import construct_doi_url
|
|
362
362
|
doi_url = construct_doi_url(doi)
|
|
363
363
|
logger.debug(f"Generated DOI URL: {doi_url}")
|
|
364
364
|
return doi_url
|
|
@@ -487,7 +487,7 @@ class CrossRefReferenceChecker:
|
|
|
487
487
|
work_doi = work_data.get('DOI')
|
|
488
488
|
if doi and work_doi:
|
|
489
489
|
# Compare DOIs using the proper comparison function
|
|
490
|
-
from utils.doi_utils import compare_dois
|
|
490
|
+
from refchecker.utils.doi_utils import compare_dois
|
|
491
491
|
if not compare_dois(doi, work_doi):
|
|
492
492
|
errors.append({
|
|
493
493
|
'error_type': 'doi',
|
|
@@ -542,7 +542,7 @@ class EnhancedHybridReferenceChecker:
|
|
|
542
542
|
"""
|
|
543
543
|
Compare author lists (delegates to shared utility)
|
|
544
544
|
"""
|
|
545
|
-
from utils.text_utils import compare_authors
|
|
545
|
+
from refchecker.utils.text_utils import compare_authors
|
|
546
546
|
return compare_authors(cited_authors, correct_authors)
|
|
547
547
|
|
|
548
548
|
# Backward compatibility alias
|