bibtex-updater 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bibtex_updater-0.4.0/.gitignore +40 -0
- bibtex_updater-0.4.0/LICENSE +21 -0
- bibtex_updater-0.4.0/PKG-INFO +317 -0
- bibtex_updater-0.4.0/README.md +251 -0
- bibtex_updater-0.4.0/docs/BIBTEX_UPDATER.md +254 -0
- bibtex_updater-0.4.0/docs/FILTER_BIBLIOGRAPHY.md +151 -0
- bibtex_updater-0.4.0/docs/REFERENCE_FACT_CHECKER.md +215 -0
- bibtex_updater-0.4.0/docs/ZOTERO_UPDATER.md +317 -0
- bibtex_updater-0.4.0/examples/README.md +42 -0
- bibtex_updater-0.4.0/examples/latexmkrc +88 -0
- bibtex_updater-0.4.0/examples/obsidian-zotero-sync/README.md +189 -0
- bibtex_updater-0.4.0/examples/obsidian-zotero-sync/zotero-bulk-sync.md +140 -0
- bibtex_updater-0.4.0/examples/obsidian-zotero-sync/zotero-enrich-keywords.md +142 -0
- bibtex_updater-0.4.0/examples/obsidian-zotero-sync/zotero-paper-template.md +169 -0
- bibtex_updater-0.4.0/examples/obsidian-zotero-sync/zotero-sync.md +92 -0
- bibtex_updater-0.4.0/examples/workflows/filter-bibliography.yml +63 -0
- bibtex_updater-0.4.0/examples/workflows/update-and-filter-bibliography.yml +75 -0
- bibtex_updater-0.4.0/examples/workflows/update-bibliography.yml +28 -0
- bibtex_updater-0.4.0/examples/workflows/validate-references.yml +100 -0
- bibtex_updater-0.4.0/pyproject.toml +177 -0
- bibtex_updater-0.4.0/src/bibtex_updater/__init__.py +147 -0
- bibtex_updater-0.4.0/src/bibtex_updater/_version.py +34 -0
- bibtex_updater-0.4.0/src/bibtex_updater/cli/__init__.py +1 -0
- bibtex_updater-0.4.0/src/bibtex_updater/cli/fact_checker_cli.py +18 -0
- bibtex_updater-0.4.0/src/bibtex_updater/cli/filter_cli.py +18 -0
- bibtex_updater-0.4.0/src/bibtex_updater/cli/obsidian_keywords_cli.py +173 -0
- bibtex_updater-0.4.0/src/bibtex_updater/cli/updater_cli.py +18 -0
- bibtex_updater-0.4.0/src/bibtex_updater/cli/zotero_cli.py +18 -0
- bibtex_updater-0.4.0/src/bibtex_updater/cli/zotero_organizer_cli.py +288 -0
- bibtex_updater-0.4.0/src/bibtex_updater/fact_checker.py +1718 -0
- bibtex_updater-0.4.0/src/bibtex_updater/filter.py +610 -0
- bibtex_updater-0.4.0/src/bibtex_updater/obsidian_keywords.py +388 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/__init__.py +13 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/backends/__init__.py +19 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/backends/base.py +218 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/backends/claude_backend.py +287 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/backends/embedding_backend.py +281 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/backends/openai_backend.py +293 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/cache.py +168 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/classifier.py +147 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/collection_manager.py +347 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/config.py +101 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/main.py +448 -0
- bibtex_updater-0.4.0/src/bibtex_updater/organizer/taxonomy.py +357 -0
- bibtex_updater-0.4.0/src/bibtex_updater/updater.py +4256 -0
- bibtex_updater-0.4.0/src/bibtex_updater/utils.py +1152 -0
- bibtex_updater-0.4.0/src/bibtex_updater/zotero.py +586 -0
- bibtex_updater-0.4.0/src/bibtex_updater/zotero_sync.py +468 -0
- bibtex_updater-0.4.0/tests/__init__.py +1 -0
- bibtex_updater-0.4.0/tests/conftest.py +334 -0
- bibtex_updater-0.4.0/tests/fixtures/main.tex +42 -0
- bibtex_updater-0.4.0/tests/fixtures/sample.bib +141 -0
- bibtex_updater-0.4.0/tests/fixtures/subdir/appendix.tex +22 -0
- bibtex_updater-0.4.0/tests/test_bib_utils.py +353 -0
- bibtex_updater-0.4.0/tests/test_detector.py +205 -0
- bibtex_updater-0.4.0/tests/test_fact_checker.py +794 -0
- bibtex_updater-0.4.0/tests/test_field_checker.py +421 -0
- bibtex_updater-0.4.0/tests/test_filter_bibliography.py +646 -0
- bibtex_updater-0.4.0/tests/test_integration.py +355 -0
- bibtex_updater-0.4.0/tests/test_obsidian_keywords.py +527 -0
- bibtex_updater-0.4.0/tests/test_organizer/__init__.py +1 -0
- bibtex_updater-0.4.0/tests/test_organizer/test_backends.py +206 -0
- bibtex_updater-0.4.0/tests/test_organizer/test_classifier_base.py +131 -0
- bibtex_updater-0.4.0/tests/test_organizer/test_collection_manager.py +215 -0
- bibtex_updater-0.4.0/tests/test_organizer/test_integration.py +267 -0
- bibtex_updater-0.4.0/tests/test_resolver_stages.py +279 -0
- bibtex_updater-0.4.0/tests/test_scholarly.py +673 -0
- bibtex_updater-0.4.0/tests/test_updater.py +376 -0
- bibtex_updater-0.4.0/tests/test_utils.py +317 -0
- bibtex_updater-0.4.0/tests/test_zotero_sync.py +642 -0
- bibtex_updater-0.4.0/tests/test_zotero_updater.py +799 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# Virtual environments
|
|
7
|
+
.venv/
|
|
8
|
+
venv/
|
|
9
|
+
ENV/
|
|
10
|
+
|
|
11
|
+
# IDE
|
|
12
|
+
.idea/
|
|
13
|
+
.vscode/
|
|
14
|
+
*.swp
|
|
15
|
+
*.swo
|
|
16
|
+
|
|
17
|
+
# Testing
|
|
18
|
+
.pytest_cache/
|
|
19
|
+
.coverage
|
|
20
|
+
htmlcov/
|
|
21
|
+
.tox/
|
|
22
|
+
|
|
23
|
+
# Cache files
|
|
24
|
+
.cache.replace_preprints.json
|
|
25
|
+
*.cache
|
|
26
|
+
|
|
27
|
+
# Build
|
|
28
|
+
dist/
|
|
29
|
+
build/
|
|
30
|
+
*.egg-info/
|
|
31
|
+
src/*.egg-info/
|
|
32
|
+
|
|
33
|
+
# Version file (auto-generated by hatch-vcs)
|
|
34
|
+
src/bibtex_updater/_version.py
|
|
35
|
+
|
|
36
|
+
# OS
|
|
37
|
+
.DS_Store
|
|
38
|
+
Thumbs.db
|
|
39
|
+
CLAUDE.md
|
|
40
|
+
.claude/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Patrik Reizinger
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bibtex-updater
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Replace preprint BibTeX entries with published versions and validate bibliography references
|
|
5
|
+
Project-URL: Homepage, https://github.com/rpatrik96/bibtexupdater
|
|
6
|
+
Project-URL: Documentation, https://github.com/rpatrik96/bibtexupdater#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/rpatrik96/bibtexupdater.git
|
|
8
|
+
Project-URL: Issues, https://github.com/rpatrik96/bibtexupdater/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/rpatrik96/bibtexupdater/blob/main/CHANGELOG.md
|
|
10
|
+
Author: Patrik Reizinger
|
|
11
|
+
License: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: academic,arxiv,bibliography,bibtex,citation,crossref,latex,preprint,research
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Environment :: Console
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering
|
|
25
|
+
Classifier: Topic :: Text Processing :: Markup :: LaTeX
|
|
26
|
+
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.10
|
|
28
|
+
Requires-Dist: bibtexparser>=1.4.0
|
|
29
|
+
Requires-Dist: crossref-commons>=0.0.7
|
|
30
|
+
Requires-Dist: httpx>=0.24.0
|
|
31
|
+
Requires-Dist: rapidfuzz>=3.0.0
|
|
32
|
+
Requires-Dist: requests>=2.28.0
|
|
33
|
+
Provides-Extra: all
|
|
34
|
+
Requires-Dist: pyyaml>=6.0; extra == 'all'
|
|
35
|
+
Requires-Dist: pyzotero>=1.5.0; extra == 'all'
|
|
36
|
+
Requires-Dist: scholarly>=1.7.0; extra == 'all'
|
|
37
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == 'all'
|
|
38
|
+
Provides-Extra: dev
|
|
39
|
+
Requires-Dist: black>=24.0.0; extra == 'dev'
|
|
40
|
+
Requires-Dist: build>=1.0.0; extra == 'dev'
|
|
41
|
+
Requires-Dist: mypy>=1.13.0; extra == 'dev'
|
|
42
|
+
Requires-Dist: pre-commit>=3.6.0; extra == 'dev'
|
|
43
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
44
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
45
|
+
Requires-Dist: ruff>=0.7.0; extra == 'dev'
|
|
46
|
+
Requires-Dist: twine>=5.0.0; extra == 'dev'
|
|
47
|
+
Requires-Dist: types-requests>=2.31.0; extra == 'dev'
|
|
48
|
+
Provides-Extra: organizer
|
|
49
|
+
Requires-Dist: pyyaml>=6.0; extra == 'organizer'
|
|
50
|
+
Requires-Dist: pyzotero>=1.5.0; extra == 'organizer'
|
|
51
|
+
Provides-Extra: organizer-claude
|
|
52
|
+
Requires-Dist: pyyaml>=6.0; extra == 'organizer-claude'
|
|
53
|
+
Requires-Dist: pyzotero>=1.5.0; extra == 'organizer-claude'
|
|
54
|
+
Provides-Extra: organizer-embedding
|
|
55
|
+
Requires-Dist: pyyaml>=6.0; extra == 'organizer-embedding'
|
|
56
|
+
Requires-Dist: pyzotero>=1.5.0; extra == 'organizer-embedding'
|
|
57
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == 'organizer-embedding'
|
|
58
|
+
Provides-Extra: organizer-openai
|
|
59
|
+
Requires-Dist: pyyaml>=6.0; extra == 'organizer-openai'
|
|
60
|
+
Requires-Dist: pyzotero>=1.5.0; extra == 'organizer-openai'
|
|
61
|
+
Provides-Extra: scholarly
|
|
62
|
+
Requires-Dist: scholarly>=1.7.0; extra == 'scholarly'
|
|
63
|
+
Provides-Extra: zotero
|
|
64
|
+
Requires-Dist: pyzotero>=1.5.0; extra == 'zotero'
|
|
65
|
+
Description-Content-Type: text/markdown
|
|
66
|
+
|
|
67
|
+
# BibTeX Updater
|
|
68
|
+
|
|
69
|
+
Tools for managing BibTeX bibliographies: automatically update preprints to published versions, validate references against external databases, and filter to only cited references.
|
|
70
|
+
|
|
71
|
+
## Installation
|
|
72
|
+
|
|
73
|
+
### From PyPI (Recommended)
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install bibtex-updater
|
|
77
|
+
|
|
78
|
+
# With Google Scholar support
|
|
79
|
+
pip install bibtex-updater[scholarly]
|
|
80
|
+
|
|
81
|
+
# With Zotero support
|
|
82
|
+
pip install bibtex-updater[zotero]
|
|
83
|
+
|
|
84
|
+
# All optional dependencies
|
|
85
|
+
pip install bibtex-updater[all]
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### From Source
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
git clone https://github.com/rpatrik96/bibtexupdater.git
|
|
92
|
+
cd bibtexupdater
|
|
93
|
+
pip install -e ".[dev]"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Using uv (No Installation)
|
|
97
|
+
|
|
98
|
+
Run directly without managing virtual environments using [uv](https://docs.astral.sh/uv/):
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
# Run any command directly
|
|
102
|
+
uv run --with "bibtex-updater[all]" bibtex-update references.bib -o updated.bib
|
|
103
|
+
|
|
104
|
+
# Or use the provided wrapper script
|
|
105
|
+
./scripts/bibtex-x update references.bib -o updated.bib
|
|
106
|
+
./scripts/bibtex-x check references.bib
|
|
107
|
+
./scripts/bibtex-x filter paper.tex -b references.bib -o filtered.bib
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## CLI Commands
|
|
111
|
+
|
|
112
|
+
| Command | Description |
|
|
113
|
+
|---------|-------------|
|
|
114
|
+
| `bibtex-update` | Replace preprints with published versions |
|
|
115
|
+
| `bibtex-check` | Validate references exist with correct metadata |
|
|
116
|
+
| `bibtex-filter` | Filter to only cited entries |
|
|
117
|
+
| `bibtex-zotero` | Update preprints in Zotero library |
|
|
118
|
+
|
|
119
|
+
## Quick Start
|
|
120
|
+
|
|
121
|
+
### Update Preprints
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# Update preprints to published versions
|
|
125
|
+
bibtex-update references.bib -o updated.bib
|
|
126
|
+
|
|
127
|
+
# Preview changes (dry run)
|
|
128
|
+
bibtex-update references.bib --dry-run --verbose
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Validate References (Fact-Check)
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# Check if references exist and have correct metadata
|
|
135
|
+
bibtex-check references.bib --report report.json
|
|
136
|
+
|
|
137
|
+
# Strict mode: exit with error if hallucinated/not-found entries
|
|
138
|
+
bibtex-check references.bib --strict
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Filter Bibliography
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
# Filter to only cited entries
|
|
145
|
+
bibtex-filter paper.tex -b references.bib -o filtered.bib
|
|
146
|
+
|
|
147
|
+
# Multiple tex files
|
|
148
|
+
bibtex-filter *.tex -b references.bib -o filtered.bib
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### Update Zotero Library
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Set credentials (get from zotero.org/settings/keys)
|
|
155
|
+
export ZOTERO_LIBRARY_ID="your_user_id"
|
|
156
|
+
export ZOTERO_API_KEY="your_api_key"
|
|
157
|
+
|
|
158
|
+
# Preview changes
|
|
159
|
+
bibtex-zotero --dry-run
|
|
160
|
+
|
|
161
|
+
# Apply updates
|
|
162
|
+
bibtex-zotero
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Sync BibTeX Updates to Zotero
|
|
166
|
+
|
|
167
|
+
When updating a `.bib` file, you can simultaneously update matching entries in your Zotero library:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
# Set Zotero credentials
|
|
171
|
+
export ZOTERO_LIBRARY_ID="your_user_id"
|
|
172
|
+
export ZOTERO_API_KEY="your_api_key"
|
|
173
|
+
|
|
174
|
+
# Update bib file AND sync to Zotero
|
|
175
|
+
bibtex-update references.bib -o updated.bib --zotero
|
|
176
|
+
|
|
177
|
+
# Preview Zotero changes only (bib changes still apply)
|
|
178
|
+
bibtex-update references.bib -o updated.bib --zotero --zotero-dry-run
|
|
179
|
+
|
|
180
|
+
# Limit to a specific Zotero collection
|
|
181
|
+
bibtex-update references.bib -o updated.bib --zotero --zotero-collection ABCD1234
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
The sync matches bib entries to Zotero items by:
|
|
185
|
+
1. **arXiv ID** - Most reliable for preprints
|
|
186
|
+
2. **DOI** - For preprints with DOIs (e.g., bioRxiv)
|
|
187
|
+
3. **Title + Author** - Fuzzy matching as fallback
|
|
188
|
+
|
|
189
|
+
## Standalone Scripts
|
|
190
|
+
|
|
191
|
+
For environments without pip (e.g., Overleaf), `filter_bibliography.py` can be used directly as it has no dependencies:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
# Copy the script and run directly
|
|
195
|
+
python filter_bibliography.py paper.tex -b references.bib -o filtered.bib
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Documentation
|
|
199
|
+
|
|
200
|
+
| Document | Description |
|
|
201
|
+
|----------|-------------|
|
|
202
|
+
| [docs/BIBTEX_UPDATER.md](docs/BIBTEX_UPDATER.md) | Full BibTeX updater documentation |
|
|
203
|
+
| [docs/REFERENCE_FACT_CHECKER.md](docs/REFERENCE_FACT_CHECKER.md) | Full reference fact-checker documentation |
|
|
204
|
+
| [docs/ZOTERO_UPDATER.md](docs/ZOTERO_UPDATER.md) | Full Zotero updater documentation |
|
|
205
|
+
| [docs/FILTER_BIBLIOGRAPHY.md](docs/FILTER_BIBLIOGRAPHY.md) | Full filter documentation |
|
|
206
|
+
| [examples/](examples/) | Example workflows and configuration files |
|
|
207
|
+
|
|
208
|
+
## Overleaf Integration
|
|
209
|
+
|
|
210
|
+
Both tools integrate with Overleaf via GitHub Actions or latexmkrc.
|
|
211
|
+
|
|
212
|
+
### GitHub Actions (Recommended)
|
|
213
|
+
|
|
214
|
+
1. Enable GitHub sync in Overleaf (Menu -> Sync -> GitHub)
|
|
215
|
+
2. Copy a workflow from [examples/workflows/](examples/workflows/) to `.github/workflows/`
|
|
216
|
+
3. Changes synced from Overleaf automatically trigger updates
|
|
217
|
+
|
|
218
|
+
### latexmkrc (Direct Overleaf)
|
|
219
|
+
|
|
220
|
+
For `filter_bibliography.py` only (no dependencies required):
|
|
221
|
+
|
|
222
|
+
1. Upload `filter_bibliography.py` to your Overleaf project
|
|
223
|
+
2. Create `.latexmkrc` based on [examples/latexmkrc](examples/latexmkrc)
|
|
224
|
+
3. Recompile - filtered bibliography appears in your file list
|
|
225
|
+
|
|
226
|
+
## Features
|
|
227
|
+
|
|
228
|
+
### BibTeX Updater (`bibtex-update`)
|
|
229
|
+
|
|
230
|
+
- **Multi-source resolution**: arXiv, Crossref, DBLP, Semantic Scholar, Google Scholar
|
|
231
|
+
- **High accuracy**: Title and author fuzzy matching with confidence thresholds
|
|
232
|
+
- **Batch processing**: Multiple files with concurrent workers (default: 8)
|
|
233
|
+
- **Deduplication**: Merge duplicates by DOI or normalized title+authors
|
|
234
|
+
- **Smart caching**: On-disk cache + semantic resolution cache with TTL
|
|
235
|
+
- **Per-service rate limiting**: Optimized rate limits per API (Crossref, S2, DBLP, arXiv)
|
|
236
|
+
- **Batch API support**: Faster bulk lookups via arXiv/S2/Crossref batch endpoints
|
|
237
|
+
|
|
238
|
+
### Zotero Updater (`bibtex-zotero`)
|
|
239
|
+
|
|
240
|
+
- **Direct Zotero integration**: Fetches and updates items via Zotero API
|
|
241
|
+
- **Same resolution pipeline**: Uses the same multi-source resolution
|
|
242
|
+
- **Preserves metadata**: Keeps notes, tags, and attachments intact
|
|
243
|
+
- **Idempotent**: Already-published papers are automatically skipped
|
|
244
|
+
- **Dry-run mode**: Preview changes before applying
|
|
245
|
+
|
|
246
|
+
### Reference Fact-Checker (`bibtex-check`)
|
|
247
|
+
|
|
248
|
+
- **Multi-source validation**: Crossref, DBLP, Semantic Scholar
|
|
249
|
+
- **Detailed mismatch detection**: Title, author, year, venue comparisons
|
|
250
|
+
- **Hallucination detection**: Identifies likely fabricated references
|
|
251
|
+
- **Structured reports**: JSON and JSONL output formats
|
|
252
|
+
- **CI/CD integration**: Strict mode with exit codes for automation
|
|
253
|
+
|
|
254
|
+
### Filter Bibliography (`bibtex-filter`)
|
|
255
|
+
|
|
256
|
+
- **Zero dependencies**: Uses only Python standard library
|
|
257
|
+
- **Works on Overleaf**: No pip install needed
|
|
258
|
+
- **Multiple bib files**: Merge and filter from multiple sources
|
|
259
|
+
- **Citation detection**: Supports natbib, biblatex, and standard LaTeX citations
|
|
260
|
+
|
|
261
|
+
## Python API
|
|
262
|
+
|
|
263
|
+
```python
|
|
264
|
+
from bibtex_updater import Detector, Resolver, Updater, HttpClient, RateLimiter, DiskCache
|
|
265
|
+
|
|
266
|
+
# Create HTTP client with rate limiting and caching
|
|
267
|
+
rate_limiter = RateLimiter(req_per_min=30)
|
|
268
|
+
cache = DiskCache(".cache.json")
|
|
269
|
+
http_client = HttpClient(
|
|
270
|
+
timeout=30.0,
|
|
271
|
+
user_agent="bibtex-updater/0.1.0",
|
|
272
|
+
rate_limiter=rate_limiter,
|
|
273
|
+
cache=cache
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Detect preprints
|
|
277
|
+
detector = Detector()
|
|
278
|
+
detection = detector.detect(entry)
|
|
279
|
+
|
|
280
|
+
if detection.is_preprint:
|
|
281
|
+
# Resolve to published version
|
|
282
|
+
resolver = Resolver(http_client)
|
|
283
|
+
candidate = resolver.resolve(detection)
|
|
284
|
+
|
|
285
|
+
if candidate and candidate.confidence >= 0.9:
|
|
286
|
+
# Update the entry
|
|
287
|
+
updater = Updater()
|
|
288
|
+
updated_entry = updater.update_entry(entry, candidate.record, detection)
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
## Development
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
# Clone and install in development mode
|
|
295
|
+
git clone https://github.com/rpatrik96/bibtexupdater.git
|
|
296
|
+
cd bibtexupdater
|
|
297
|
+
pip install -e ".[dev,all]"
|
|
298
|
+
|
|
299
|
+
# Run tests
|
|
300
|
+
pytest tests/ -v
|
|
301
|
+
|
|
302
|
+
# Run tests with coverage
|
|
303
|
+
pytest tests/ -v --cov=bibtex_updater --cov-report=term-missing
|
|
304
|
+
|
|
305
|
+
# Code quality
|
|
306
|
+
pre-commit run --all-files
|
|
307
|
+
|
|
308
|
+
# Build package
|
|
309
|
+
python -m build
|
|
310
|
+
|
|
311
|
+
# Check package
|
|
312
|
+
twine check dist/*
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
## License
|
|
316
|
+
|
|
317
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# BibTeX Updater
|
|
2
|
+
|
|
3
|
+
Tools for managing BibTeX bibliographies: automatically update preprints to published versions, validate references against external databases, and filter to only cited references.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
### From PyPI (Recommended)
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install bibtex-updater
|
|
11
|
+
|
|
12
|
+
# With Google Scholar support
|
|
13
|
+
pip install bibtex-updater[scholarly]
|
|
14
|
+
|
|
15
|
+
# With Zotero support
|
|
16
|
+
pip install bibtex-updater[zotero]
|
|
17
|
+
|
|
18
|
+
# All optional dependencies
|
|
19
|
+
pip install bibtex-updater[all]
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### From Source
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
git clone https://github.com/rpatrik96/bibtexupdater.git
|
|
26
|
+
cd bibtexupdater
|
|
27
|
+
pip install -e ".[dev]"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Using uv (No Installation)
|
|
31
|
+
|
|
32
|
+
Run directly without managing virtual environments using [uv](https://docs.astral.sh/uv/):
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
# Run any command directly
|
|
36
|
+
uv run --with "bibtex-updater[all]" bibtex-update references.bib -o updated.bib
|
|
37
|
+
|
|
38
|
+
# Or use the provided wrapper script
|
|
39
|
+
./scripts/bibtex-x update references.bib -o updated.bib
|
|
40
|
+
./scripts/bibtex-x check references.bib
|
|
41
|
+
./scripts/bibtex-x filter paper.tex -b references.bib -o filtered.bib
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## CLI Commands
|
|
45
|
+
|
|
46
|
+
| Command | Description |
|
|
47
|
+
|---------|-------------|
|
|
48
|
+
| `bibtex-update` | Replace preprints with published versions |
|
|
49
|
+
| `bibtex-check` | Validate references exist with correct metadata |
|
|
50
|
+
| `bibtex-filter` | Filter to only cited entries |
|
|
51
|
+
| `bibtex-zotero` | Update preprints in Zotero library |
|
|
52
|
+
|
|
53
|
+
## Quick Start
|
|
54
|
+
|
|
55
|
+
### Update Preprints
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
# Update preprints to published versions
|
|
59
|
+
bibtex-update references.bib -o updated.bib
|
|
60
|
+
|
|
61
|
+
# Preview changes (dry run)
|
|
62
|
+
bibtex-update references.bib --dry-run --verbose
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Validate References (Fact-Check)
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# Check if references exist and have correct metadata
|
|
69
|
+
bibtex-check references.bib --report report.json
|
|
70
|
+
|
|
71
|
+
# Strict mode: exit with error if hallucinated/not-found entries
|
|
72
|
+
bibtex-check references.bib --strict
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Filter Bibliography
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# Filter to only cited entries
|
|
79
|
+
bibtex-filter paper.tex -b references.bib -o filtered.bib
|
|
80
|
+
|
|
81
|
+
# Multiple tex files
|
|
82
|
+
bibtex-filter *.tex -b references.bib -o filtered.bib
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Update Zotero Library
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
# Set credentials (get from zotero.org/settings/keys)
|
|
89
|
+
export ZOTERO_LIBRARY_ID="your_user_id"
|
|
90
|
+
export ZOTERO_API_KEY="your_api_key"
|
|
91
|
+
|
|
92
|
+
# Preview changes
|
|
93
|
+
bibtex-zotero --dry-run
|
|
94
|
+
|
|
95
|
+
# Apply updates
|
|
96
|
+
bibtex-zotero
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Sync BibTeX Updates to Zotero
|
|
100
|
+
|
|
101
|
+
When updating a `.bib` file, you can simultaneously update matching entries in your Zotero library:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# Set Zotero credentials
|
|
105
|
+
export ZOTERO_LIBRARY_ID="your_user_id"
|
|
106
|
+
export ZOTERO_API_KEY="your_api_key"
|
|
107
|
+
|
|
108
|
+
# Update bib file AND sync to Zotero
|
|
109
|
+
bibtex-update references.bib -o updated.bib --zotero
|
|
110
|
+
|
|
111
|
+
# Preview Zotero changes only (bib changes still apply)
|
|
112
|
+
bibtex-update references.bib -o updated.bib --zotero --zotero-dry-run
|
|
113
|
+
|
|
114
|
+
# Limit to a specific Zotero collection
|
|
115
|
+
bibtex-update references.bib -o updated.bib --zotero --zotero-collection ABCD1234
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
The sync matches bib entries to Zotero items by:
|
|
119
|
+
1. **arXiv ID** - Most reliable for preprints
|
|
120
|
+
2. **DOI** - For preprints with DOIs (e.g., bioRxiv)
|
|
121
|
+
3. **Title + Author** - Fuzzy matching as fallback
|
|
122
|
+
|
|
123
|
+
## Standalone Scripts
|
|
124
|
+
|
|
125
|
+
For environments without pip (e.g., Overleaf), `filter_bibliography.py` can be used directly as it has no dependencies:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
# Copy the script and run directly
|
|
129
|
+
python filter_bibliography.py paper.tex -b references.bib -o filtered.bib
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Documentation
|
|
133
|
+
|
|
134
|
+
| Document | Description |
|
|
135
|
+
|----------|-------------|
|
|
136
|
+
| [docs/BIBTEX_UPDATER.md](docs/BIBTEX_UPDATER.md) | Full BibTeX updater documentation |
|
|
137
|
+
| [docs/REFERENCE_FACT_CHECKER.md](docs/REFERENCE_FACT_CHECKER.md) | Full reference fact-checker documentation |
|
|
138
|
+
| [docs/ZOTERO_UPDATER.md](docs/ZOTERO_UPDATER.md) | Full Zotero updater documentation |
|
|
139
|
+
| [docs/FILTER_BIBLIOGRAPHY.md](docs/FILTER_BIBLIOGRAPHY.md) | Full filter documentation |
|
|
140
|
+
| [examples/](examples/) | Example workflows and configuration files |
|
|
141
|
+
|
|
142
|
+
## Overleaf Integration
|
|
143
|
+
|
|
144
|
+
Both tools integrate with Overleaf via GitHub Actions or latexmkrc.
|
|
145
|
+
|
|
146
|
+
### GitHub Actions (Recommended)
|
|
147
|
+
|
|
148
|
+
1. Enable GitHub sync in Overleaf (Menu -> Sync -> GitHub)
|
|
149
|
+
2. Copy a workflow from [examples/workflows/](examples/workflows/) to `.github/workflows/`
|
|
150
|
+
3. Changes synced from Overleaf automatically trigger updates
|
|
151
|
+
|
|
152
|
+
### latexmkrc (Direct Overleaf)
|
|
153
|
+
|
|
154
|
+
For `filter_bibliography.py` only (no dependencies required):
|
|
155
|
+
|
|
156
|
+
1. Upload `filter_bibliography.py` to your Overleaf project
|
|
157
|
+
2. Create `.latexmkrc` based on [examples/latexmkrc](examples/latexmkrc)
|
|
158
|
+
3. Recompile - filtered bibliography appears in your file list
|
|
159
|
+
|
|
160
|
+
## Features
|
|
161
|
+
|
|
162
|
+
### BibTeX Updater (`bibtex-update`)
|
|
163
|
+
|
|
164
|
+
- **Multi-source resolution**: arXiv, Crossref, DBLP, Semantic Scholar, Google Scholar
|
|
165
|
+
- **High accuracy**: Title and author fuzzy matching with confidence thresholds
|
|
166
|
+
- **Batch processing**: Multiple files with concurrent workers (default: 8)
|
|
167
|
+
- **Deduplication**: Merge duplicates by DOI or normalized title+authors
|
|
168
|
+
- **Smart caching**: On-disk cache + semantic resolution cache with TTL
|
|
169
|
+
- **Per-service rate limiting**: Optimized rate limits per API (Crossref, S2, DBLP, arXiv)
|
|
170
|
+
- **Batch API support**: Faster bulk lookups via arXiv/S2/Crossref batch endpoints
|
|
171
|
+
|
|
172
|
+
### Zotero Updater (`bibtex-zotero`)
|
|
173
|
+
|
|
174
|
+
- **Direct Zotero integration**: Fetches and updates items via Zotero API
|
|
175
|
+
- **Same resolution pipeline**: Uses the same multi-source resolution
|
|
176
|
+
- **Preserves metadata**: Keeps notes, tags, and attachments intact
|
|
177
|
+
- **Idempotent**: Already-published papers are automatically skipped
|
|
178
|
+
- **Dry-run mode**: Preview changes before applying
|
|
179
|
+
|
|
180
|
+
### Reference Fact-Checker (`bibtex-check`)
|
|
181
|
+
|
|
182
|
+
- **Multi-source validation**: Crossref, DBLP, Semantic Scholar
|
|
183
|
+
- **Detailed mismatch detection**: Title, author, year, venue comparisons
|
|
184
|
+
- **Hallucination detection**: Identifies likely fabricated references
|
|
185
|
+
- **Structured reports**: JSON and JSONL output formats
|
|
186
|
+
- **CI/CD integration**: Strict mode with exit codes for automation
|
|
187
|
+
|
|
188
|
+
### Filter Bibliography (`bibtex-filter`)
|
|
189
|
+
|
|
190
|
+
- **Zero dependencies**: Uses only Python standard library
|
|
191
|
+
- **Works on Overleaf**: No pip install needed
|
|
192
|
+
- **Multiple bib files**: Merge and filter from multiple sources
|
|
193
|
+
- **Citation detection**: Supports natbib, biblatex, and standard LaTeX citations
|
|
194
|
+
|
|
195
|
+
## Python API
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
from bibtex_updater import Detector, Resolver, Updater, HttpClient, RateLimiter, DiskCache
|
|
199
|
+
|
|
200
|
+
# Create HTTP client with rate limiting and caching
|
|
201
|
+
rate_limiter = RateLimiter(req_per_min=30)
|
|
202
|
+
cache = DiskCache(".cache.json")
|
|
203
|
+
http_client = HttpClient(
|
|
204
|
+
timeout=30.0,
|
|
205
|
+
user_agent="bibtex-updater/0.1.0",
|
|
206
|
+
rate_limiter=rate_limiter,
|
|
207
|
+
cache=cache
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Detect preprints
|
|
211
|
+
detector = Detector()
|
|
212
|
+
detection = detector.detect(entry)
|
|
213
|
+
|
|
214
|
+
if detection.is_preprint:
|
|
215
|
+
# Resolve to published version
|
|
216
|
+
resolver = Resolver(http_client)
|
|
217
|
+
candidate = resolver.resolve(detection)
|
|
218
|
+
|
|
219
|
+
if candidate and candidate.confidence >= 0.9:
|
|
220
|
+
# Update the entry
|
|
221
|
+
updater = Updater()
|
|
222
|
+
updated_entry = updater.update_entry(entry, candidate.record, detection)
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
## Development
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
# Clone and install in development mode
|
|
229
|
+
git clone https://github.com/rpatrik96/bibtexupdater.git
|
|
230
|
+
cd bibtexupdater
|
|
231
|
+
pip install -e ".[dev,all]"
|
|
232
|
+
|
|
233
|
+
# Run tests
|
|
234
|
+
pytest tests/ -v
|
|
235
|
+
|
|
236
|
+
# Run tests with coverage
|
|
237
|
+
pytest tests/ -v --cov=bibtex_updater --cov-report=term-missing
|
|
238
|
+
|
|
239
|
+
# Code quality
|
|
240
|
+
pre-commit run --all-files
|
|
241
|
+
|
|
242
|
+
# Build package
|
|
243
|
+
python -m build
|
|
244
|
+
|
|
245
|
+
# Check package
|
|
246
|
+
twine check dist/*
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
## License
|
|
250
|
+
|
|
251
|
+
MIT License - see [LICENSE](LICENSE) for details.
|