bibtex-updater 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. bibtex_updater-0.4.0/.gitignore +40 -0
  2. bibtex_updater-0.4.0/LICENSE +21 -0
  3. bibtex_updater-0.4.0/PKG-INFO +317 -0
  4. bibtex_updater-0.4.0/README.md +251 -0
  5. bibtex_updater-0.4.0/docs/BIBTEX_UPDATER.md +254 -0
  6. bibtex_updater-0.4.0/docs/FILTER_BIBLIOGRAPHY.md +151 -0
  7. bibtex_updater-0.4.0/docs/REFERENCE_FACT_CHECKER.md +215 -0
  8. bibtex_updater-0.4.0/docs/ZOTERO_UPDATER.md +317 -0
  9. bibtex_updater-0.4.0/examples/README.md +42 -0
  10. bibtex_updater-0.4.0/examples/latexmkrc +88 -0
  11. bibtex_updater-0.4.0/examples/obsidian-zotero-sync/README.md +189 -0
  12. bibtex_updater-0.4.0/examples/obsidian-zotero-sync/zotero-bulk-sync.md +140 -0
  13. bibtex_updater-0.4.0/examples/obsidian-zotero-sync/zotero-enrich-keywords.md +142 -0
  14. bibtex_updater-0.4.0/examples/obsidian-zotero-sync/zotero-paper-template.md +169 -0
  15. bibtex_updater-0.4.0/examples/obsidian-zotero-sync/zotero-sync.md +92 -0
  16. bibtex_updater-0.4.0/examples/workflows/filter-bibliography.yml +63 -0
  17. bibtex_updater-0.4.0/examples/workflows/update-and-filter-bibliography.yml +75 -0
  18. bibtex_updater-0.4.0/examples/workflows/update-bibliography.yml +28 -0
  19. bibtex_updater-0.4.0/examples/workflows/validate-references.yml +100 -0
  20. bibtex_updater-0.4.0/pyproject.toml +177 -0
  21. bibtex_updater-0.4.0/src/bibtex_updater/__init__.py +147 -0
  22. bibtex_updater-0.4.0/src/bibtex_updater/_version.py +34 -0
  23. bibtex_updater-0.4.0/src/bibtex_updater/cli/__init__.py +1 -0
  24. bibtex_updater-0.4.0/src/bibtex_updater/cli/fact_checker_cli.py +18 -0
  25. bibtex_updater-0.4.0/src/bibtex_updater/cli/filter_cli.py +18 -0
  26. bibtex_updater-0.4.0/src/bibtex_updater/cli/obsidian_keywords_cli.py +173 -0
  27. bibtex_updater-0.4.0/src/bibtex_updater/cli/updater_cli.py +18 -0
  28. bibtex_updater-0.4.0/src/bibtex_updater/cli/zotero_cli.py +18 -0
  29. bibtex_updater-0.4.0/src/bibtex_updater/cli/zotero_organizer_cli.py +288 -0
  30. bibtex_updater-0.4.0/src/bibtex_updater/fact_checker.py +1718 -0
  31. bibtex_updater-0.4.0/src/bibtex_updater/filter.py +610 -0
  32. bibtex_updater-0.4.0/src/bibtex_updater/obsidian_keywords.py +388 -0
  33. bibtex_updater-0.4.0/src/bibtex_updater/organizer/__init__.py +13 -0
  34. bibtex_updater-0.4.0/src/bibtex_updater/organizer/backends/__init__.py +19 -0
  35. bibtex_updater-0.4.0/src/bibtex_updater/organizer/backends/base.py +218 -0
  36. bibtex_updater-0.4.0/src/bibtex_updater/organizer/backends/claude_backend.py +287 -0
  37. bibtex_updater-0.4.0/src/bibtex_updater/organizer/backends/embedding_backend.py +281 -0
  38. bibtex_updater-0.4.0/src/bibtex_updater/organizer/backends/openai_backend.py +293 -0
  39. bibtex_updater-0.4.0/src/bibtex_updater/organizer/cache.py +168 -0
  40. bibtex_updater-0.4.0/src/bibtex_updater/organizer/classifier.py +147 -0
  41. bibtex_updater-0.4.0/src/bibtex_updater/organizer/collection_manager.py +347 -0
  42. bibtex_updater-0.4.0/src/bibtex_updater/organizer/config.py +101 -0
  43. bibtex_updater-0.4.0/src/bibtex_updater/organizer/main.py +448 -0
  44. bibtex_updater-0.4.0/src/bibtex_updater/organizer/taxonomy.py +357 -0
  45. bibtex_updater-0.4.0/src/bibtex_updater/updater.py +4256 -0
  46. bibtex_updater-0.4.0/src/bibtex_updater/utils.py +1152 -0
  47. bibtex_updater-0.4.0/src/bibtex_updater/zotero.py +586 -0
  48. bibtex_updater-0.4.0/src/bibtex_updater/zotero_sync.py +468 -0
  49. bibtex_updater-0.4.0/tests/__init__.py +1 -0
  50. bibtex_updater-0.4.0/tests/conftest.py +334 -0
  51. bibtex_updater-0.4.0/tests/fixtures/main.tex +42 -0
  52. bibtex_updater-0.4.0/tests/fixtures/sample.bib +141 -0
  53. bibtex_updater-0.4.0/tests/fixtures/subdir/appendix.tex +22 -0
  54. bibtex_updater-0.4.0/tests/test_bib_utils.py +353 -0
  55. bibtex_updater-0.4.0/tests/test_detector.py +205 -0
  56. bibtex_updater-0.4.0/tests/test_fact_checker.py +794 -0
  57. bibtex_updater-0.4.0/tests/test_field_checker.py +421 -0
  58. bibtex_updater-0.4.0/tests/test_filter_bibliography.py +646 -0
  59. bibtex_updater-0.4.0/tests/test_integration.py +355 -0
  60. bibtex_updater-0.4.0/tests/test_obsidian_keywords.py +527 -0
  61. bibtex_updater-0.4.0/tests/test_organizer/__init__.py +1 -0
  62. bibtex_updater-0.4.0/tests/test_organizer/test_backends.py +206 -0
  63. bibtex_updater-0.4.0/tests/test_organizer/test_classifier_base.py +131 -0
  64. bibtex_updater-0.4.0/tests/test_organizer/test_collection_manager.py +215 -0
  65. bibtex_updater-0.4.0/tests/test_organizer/test_integration.py +267 -0
  66. bibtex_updater-0.4.0/tests/test_resolver_stages.py +279 -0
  67. bibtex_updater-0.4.0/tests/test_scholarly.py +673 -0
  68. bibtex_updater-0.4.0/tests/test_updater.py +376 -0
  69. bibtex_updater-0.4.0/tests/test_utils.py +317 -0
  70. bibtex_updater-0.4.0/tests/test_zotero_sync.py +642 -0
  71. bibtex_updater-0.4.0/tests/test_zotero_updater.py +799 -0
@@ -0,0 +1,40 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual environments
7
+ .venv/
8
+ venv/
9
+ ENV/
10
+
11
+ # IDE
12
+ .idea/
13
+ .vscode/
14
+ *.swp
15
+ *.swo
16
+
17
+ # Testing
18
+ .pytest_cache/
19
+ .coverage
20
+ htmlcov/
21
+ .tox/
22
+
23
+ # Cache files
24
+ .cache.replace_preprints.json
25
+ *.cache
26
+
27
+ # Build
28
+ dist/
29
+ build/
30
+ *.egg-info/
31
+ src/*.egg-info/
32
+
33
+ # Version file (auto-generated by hatch-vcs)
34
+ src/bibtex_updater/_version.py
35
+
36
+ # OS
37
+ .DS_Store
38
+ Thumbs.db
39
+ CLAUDE.md
40
+ .claude/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Patrik Reizinger
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,317 @@
1
+ Metadata-Version: 2.4
2
+ Name: bibtex-updater
3
+ Version: 0.4.0
4
+ Summary: Replace preprint BibTeX entries with published versions and validate bibliography references
5
+ Project-URL: Homepage, https://github.com/rpatrik96/bibtexupdater
6
+ Project-URL: Documentation, https://github.com/rpatrik96/bibtexupdater#readme
7
+ Project-URL: Repository, https://github.com/rpatrik96/bibtexupdater.git
8
+ Project-URL: Issues, https://github.com/rpatrik96/bibtexupdater/issues
9
+ Project-URL: Changelog, https://github.com/rpatrik96/bibtexupdater/blob/main/CHANGELOG.md
10
+ Author: Patrik Reizinger
11
+ License: MIT
12
+ License-File: LICENSE
13
+ Keywords: academic,arxiv,bibliography,bibtex,citation,crossref,latex,preprint,research
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Environment :: Console
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Python :: 3.13
24
+ Classifier: Topic :: Scientific/Engineering
25
+ Classifier: Topic :: Text Processing :: Markup :: LaTeX
26
+ Classifier: Typing :: Typed
27
+ Requires-Python: >=3.10
28
+ Requires-Dist: bibtexparser>=1.4.0
29
+ Requires-Dist: crossref-commons>=0.0.7
30
+ Requires-Dist: httpx>=0.24.0
31
+ Requires-Dist: rapidfuzz>=3.0.0
32
+ Requires-Dist: requests>=2.28.0
33
+ Provides-Extra: all
34
+ Requires-Dist: pyyaml>=6.0; extra == 'all'
35
+ Requires-Dist: pyzotero>=1.5.0; extra == 'all'
36
+ Requires-Dist: scholarly>=1.7.0; extra == 'all'
37
+ Requires-Dist: sentence-transformers>=2.2.0; extra == 'all'
38
+ Provides-Extra: dev
39
+ Requires-Dist: black>=24.0.0; extra == 'dev'
40
+ Requires-Dist: build>=1.0.0; extra == 'dev'
41
+ Requires-Dist: mypy>=1.13.0; extra == 'dev'
42
+ Requires-Dist: pre-commit>=3.6.0; extra == 'dev'
43
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
44
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
45
+ Requires-Dist: ruff>=0.7.0; extra == 'dev'
46
+ Requires-Dist: twine>=5.0.0; extra == 'dev'
47
+ Requires-Dist: types-requests>=2.31.0; extra == 'dev'
48
+ Provides-Extra: organizer
49
+ Requires-Dist: pyyaml>=6.0; extra == 'organizer'
50
+ Requires-Dist: pyzotero>=1.5.0; extra == 'organizer'
51
+ Provides-Extra: organizer-claude
52
+ Requires-Dist: pyyaml>=6.0; extra == 'organizer-claude'
53
+ Requires-Dist: pyzotero>=1.5.0; extra == 'organizer-claude'
54
+ Provides-Extra: organizer-embedding
55
+ Requires-Dist: pyyaml>=6.0; extra == 'organizer-embedding'
56
+ Requires-Dist: pyzotero>=1.5.0; extra == 'organizer-embedding'
57
+ Requires-Dist: sentence-transformers>=2.2.0; extra == 'organizer-embedding'
58
+ Provides-Extra: organizer-openai
59
+ Requires-Dist: pyyaml>=6.0; extra == 'organizer-openai'
60
+ Requires-Dist: pyzotero>=1.5.0; extra == 'organizer-openai'
61
+ Provides-Extra: scholarly
62
+ Requires-Dist: scholarly>=1.7.0; extra == 'scholarly'
63
+ Provides-Extra: zotero
64
+ Requires-Dist: pyzotero>=1.5.0; extra == 'zotero'
65
+ Description-Content-Type: text/markdown
66
+
67
+ # BibTeX Updater
68
+
69
+ Tools for managing BibTeX bibliographies: automatically update preprints to published versions, validate references against external databases, and filter to only cited references.
70
+
71
+ ## Installation
72
+
73
+ ### From PyPI (Recommended)
74
+
75
+ ```bash
76
+ pip install bibtex-updater
77
+
78
+ # With Google Scholar support
79
+ pip install bibtex-updater[scholarly]
80
+
81
+ # With Zotero support
82
+ pip install bibtex-updater[zotero]
83
+
84
+ # All optional dependencies
85
+ pip install bibtex-updater[all]
86
+ ```
87
+
88
+ ### From Source
89
+
90
+ ```bash
91
+ git clone https://github.com/rpatrik96/bibtexupdater.git
92
+ cd bibtexupdater
93
+ pip install -e ".[dev]"
94
+ ```
95
+
96
+ ### Using uv (No Installation)
97
+
98
+ Run directly without managing virtual environments using [uv](https://docs.astral.sh/uv/):
99
+
100
+ ```bash
101
+ # Run any command directly
102
+ uv run --with "bibtex-updater[all]" bibtex-update references.bib -o updated.bib
103
+
104
+ # Or use the provided wrapper script
105
+ ./scripts/bibtex-x update references.bib -o updated.bib
106
+ ./scripts/bibtex-x check references.bib
107
+ ./scripts/bibtex-x filter paper.tex -b references.bib -o filtered.bib
108
+ ```
109
+
110
+ ## CLI Commands
111
+
112
+ | Command | Description |
113
+ |---------|-------------|
114
+ | `bibtex-update` | Replace preprints with published versions |
115
+ | `bibtex-check` | Validate references exist with correct metadata |
116
+ | `bibtex-filter` | Filter to only cited entries |
117
+ | `bibtex-zotero` | Update preprints in Zotero library |
118
+
119
+ ## Quick Start
120
+
121
+ ### Update Preprints
122
+
123
+ ```bash
124
+ # Update preprints to published versions
125
+ bibtex-update references.bib -o updated.bib
126
+
127
+ # Preview changes (dry run)
128
+ bibtex-update references.bib --dry-run --verbose
129
+ ```
130
+
131
+ ### Validate References (Fact-Check)
132
+
133
+ ```bash
134
+ # Check if references exist and have correct metadata
135
+ bibtex-check references.bib --report report.json
136
+
137
+ # Strict mode: exit with error if hallucinated/not-found entries
138
+ bibtex-check references.bib --strict
139
+ ```
140
+
141
+ ### Filter Bibliography
142
+
143
+ ```bash
144
+ # Filter to only cited entries
145
+ bibtex-filter paper.tex -b references.bib -o filtered.bib
146
+
147
+ # Multiple tex files
148
+ bibtex-filter *.tex -b references.bib -o filtered.bib
149
+ ```
150
+
151
+ ### Update Zotero Library
152
+
153
+ ```bash
154
+ # Set credentials (get from zotero.org/settings/keys)
155
+ export ZOTERO_LIBRARY_ID="your_user_id"
156
+ export ZOTERO_API_KEY="your_api_key"
157
+
158
+ # Preview changes
159
+ bibtex-zotero --dry-run
160
+
161
+ # Apply updates
162
+ bibtex-zotero
163
+ ```
164
+
165
+ ### Sync BibTeX Updates to Zotero
166
+
167
+ When updating a `.bib` file, you can simultaneously update matching entries in your Zotero library:
168
+
169
+ ```bash
170
+ # Set Zotero credentials
171
+ export ZOTERO_LIBRARY_ID="your_user_id"
172
+ export ZOTERO_API_KEY="your_api_key"
173
+
174
+ # Update bib file AND sync to Zotero
175
+ bibtex-update references.bib -o updated.bib --zotero
176
+
177
+ # Preview Zotero changes only (bib changes still apply)
178
+ bibtex-update references.bib -o updated.bib --zotero --zotero-dry-run
179
+
180
+ # Limit to a specific Zotero collection
181
+ bibtex-update references.bib -o updated.bib --zotero --zotero-collection ABCD1234
182
+ ```
183
+
184
+ The sync matches bib entries to Zotero items by:
185
+ 1. **arXiv ID** - Most reliable for preprints
186
+ 2. **DOI** - For preprints with DOIs (e.g., bioRxiv)
187
+ 3. **Title + Author** - Fuzzy matching as fallback
188
+
189
+ ## Standalone Scripts
190
+
191
+ For environments without pip (e.g., Overleaf), `filter_bibliography.py` can be used directly as it has no dependencies:
192
+
193
+ ```bash
194
+ # Copy the script and run directly
195
+ python filter_bibliography.py paper.tex -b references.bib -o filtered.bib
196
+ ```
197
+
198
+ ## Documentation
199
+
200
+ | Document | Description |
201
+ |----------|-------------|
202
+ | [docs/BIBTEX_UPDATER.md](docs/BIBTEX_UPDATER.md) | Full BibTeX updater documentation |
203
+ | [docs/REFERENCE_FACT_CHECKER.md](docs/REFERENCE_FACT_CHECKER.md) | Full reference fact-checker documentation |
204
+ | [docs/ZOTERO_UPDATER.md](docs/ZOTERO_UPDATER.md) | Full Zotero updater documentation |
205
+ | [docs/FILTER_BIBLIOGRAPHY.md](docs/FILTER_BIBLIOGRAPHY.md) | Full filter documentation |
206
+ | [examples/](examples/) | Example workflows and configuration files |
207
+
208
+ ## Overleaf Integration
209
+
210
+ Both tools integrate with Overleaf via GitHub Actions or latexmkrc.
211
+
212
+ ### GitHub Actions (Recommended)
213
+
214
+ 1. Enable GitHub sync in Overleaf (Menu -> Sync -> GitHub)
215
+ 2. Copy a workflow from [examples/workflows/](examples/workflows/) to `.github/workflows/`
216
+ 3. Changes synced from Overleaf automatically trigger updates
217
+
218
+ ### latexmkrc (Direct Overleaf)
219
+
220
+ For `filter_bibliography.py` only (no dependencies required):
221
+
222
+ 1. Upload `filter_bibliography.py` to your Overleaf project
223
+ 2. Create `.latexmkrc` based on [examples/latexmkrc](examples/latexmkrc)
224
+ 3. Recompile - filtered bibliography appears in your file list
225
+
226
+ ## Features
227
+
228
+ ### BibTeX Updater (`bibtex-update`)
229
+
230
+ - **Multi-source resolution**: arXiv, Crossref, DBLP, Semantic Scholar, Google Scholar
231
+ - **High accuracy**: Title and author fuzzy matching with confidence thresholds
232
+ - **Batch processing**: Multiple files with concurrent workers (default: 8)
233
+ - **Deduplication**: Merge duplicates by DOI or normalized title+authors
234
+ - **Smart caching**: On-disk cache + semantic resolution cache with TTL
235
+ - **Per-service rate limiting**: Optimized rate limits per API (Crossref, S2, DBLP, arXiv)
236
+ - **Batch API support**: Faster bulk lookups via arXiv/S2/Crossref batch endpoints
237
+
238
+ ### Zotero Updater (`bibtex-zotero`)
239
+
240
+ - **Direct Zotero integration**: Fetches and updates items via Zotero API
241
+ - **Same resolution pipeline**: Uses the same multi-source resolution
242
+ - **Preserves metadata**: Keeps notes, tags, and attachments intact
243
+ - **Idempotent**: Already-published papers are automatically skipped
244
+ - **Dry-run mode**: Preview changes before applying
245
+
246
+ ### Reference Fact-Checker (`bibtex-check`)
247
+
248
+ - **Multi-source validation**: Crossref, DBLP, Semantic Scholar
249
+ - **Detailed mismatch detection**: Title, author, year, venue comparisons
250
+ - **Hallucination detection**: Identifies likely fabricated references
251
+ - **Structured reports**: JSON and JSONL output formats
252
+ - **CI/CD integration**: Strict mode with exit codes for automation
253
+
254
+ ### Filter Bibliography (`bibtex-filter`)
255
+
256
+ - **Zero dependencies**: Uses only Python standard library
257
+ - **Works on Overleaf**: No pip install needed
258
+ - **Multiple bib files**: Merge and filter from multiple sources
259
+ - **Citation detection**: Supports natbib, biblatex, and standard LaTeX citations
260
+
261
+ ## Python API
262
+
263
+ ```python
264
+ from bibtex_updater import Detector, Resolver, Updater, HttpClient, RateLimiter, DiskCache
265
+
266
+ # Create HTTP client with rate limiting and caching
267
+ rate_limiter = RateLimiter(req_per_min=30)
268
+ cache = DiskCache(".cache.json")
269
+ http_client = HttpClient(
270
+ timeout=30.0,
271
+ user_agent="bibtex-updater/0.1.0",
272
+ rate_limiter=rate_limiter,
273
+ cache=cache
274
+ )
275
+
276
+ # Detect preprints
277
+ detector = Detector()
278
+ detection = detector.detect(entry)
279
+
280
+ if detection.is_preprint:
281
+ # Resolve to published version
282
+ resolver = Resolver(http_client)
283
+ candidate = resolver.resolve(detection)
284
+
285
+ if candidate and candidate.confidence >= 0.9:
286
+ # Update the entry
287
+ updater = Updater()
288
+ updated_entry = updater.update_entry(entry, candidate.record, detection)
289
+ ```
290
+
291
+ ## Development
292
+
293
+ ```bash
294
+ # Clone and install in development mode
295
+ git clone https://github.com/rpatrik96/bibtexupdater.git
296
+ cd bibtexupdater
297
+ pip install -e ".[dev,all]"
298
+
299
+ # Run tests
300
+ pytest tests/ -v
301
+
302
+ # Run tests with coverage
303
+ pytest tests/ -v --cov=bibtex_updater --cov-report=term-missing
304
+
305
+ # Code quality
306
+ pre-commit run --all-files
307
+
308
+ # Build package
309
+ python -m build
310
+
311
+ # Check package
312
+ twine check dist/*
313
+ ```
314
+
315
+ ## License
316
+
317
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,251 @@
1
+ # BibTeX Updater
2
+
3
+ Tools for managing BibTeX bibliographies: automatically update preprints to published versions, validate references against external databases, and filter to only cited references.
4
+
5
+ ## Installation
6
+
7
+ ### From PyPI (Recommended)
8
+
9
+ ```bash
10
+ pip install bibtex-updater
11
+
12
+ # With Google Scholar support
13
+ pip install bibtex-updater[scholarly]
14
+
15
+ # With Zotero support
16
+ pip install bibtex-updater[zotero]
17
+
18
+ # All optional dependencies
19
+ pip install bibtex-updater[all]
20
+ ```
21
+
22
+ ### From Source
23
+
24
+ ```bash
25
+ git clone https://github.com/rpatrik96/bibtexupdater.git
26
+ cd bibtexupdater
27
+ pip install -e ".[dev]"
28
+ ```
29
+
30
+ ### Using uv (No Installation)
31
+
32
+ Run directly without managing virtual environments using [uv](https://docs.astral.sh/uv/):
33
+
34
+ ```bash
35
+ # Run any command directly
36
+ uv run --with "bibtex-updater[all]" bibtex-update references.bib -o updated.bib
37
+
38
+ # Or use the provided wrapper script
39
+ ./scripts/bibtex-x update references.bib -o updated.bib
40
+ ./scripts/bibtex-x check references.bib
41
+ ./scripts/bibtex-x filter paper.tex -b references.bib -o filtered.bib
42
+ ```
43
+
44
+ ## CLI Commands
45
+
46
+ | Command | Description |
47
+ |---------|-------------|
48
+ | `bibtex-update` | Replace preprints with published versions |
49
+ | `bibtex-check` | Validate references exist with correct metadata |
50
+ | `bibtex-filter` | Filter to only cited entries |
51
+ | `bibtex-zotero` | Update preprints in Zotero library |
52
+
53
+ ## Quick Start
54
+
55
+ ### Update Preprints
56
+
57
+ ```bash
58
+ # Update preprints to published versions
59
+ bibtex-update references.bib -o updated.bib
60
+
61
+ # Preview changes (dry run)
62
+ bibtex-update references.bib --dry-run --verbose
63
+ ```
64
+
65
+ ### Validate References (Fact-Check)
66
+
67
+ ```bash
68
+ # Check if references exist and have correct metadata
69
+ bibtex-check references.bib --report report.json
70
+
71
+ # Strict mode: exit with error if hallucinated/not-found entries
72
+ bibtex-check references.bib --strict
73
+ ```
74
+
75
+ ### Filter Bibliography
76
+
77
+ ```bash
78
+ # Filter to only cited entries
79
+ bibtex-filter paper.tex -b references.bib -o filtered.bib
80
+
81
+ # Multiple tex files
82
+ bibtex-filter *.tex -b references.bib -o filtered.bib
83
+ ```
84
+
85
+ ### Update Zotero Library
86
+
87
+ ```bash
88
+ # Set credentials (get from zotero.org/settings/keys)
89
+ export ZOTERO_LIBRARY_ID="your_user_id"
90
+ export ZOTERO_API_KEY="your_api_key"
91
+
92
+ # Preview changes
93
+ bibtex-zotero --dry-run
94
+
95
+ # Apply updates
96
+ bibtex-zotero
97
+ ```
98
+
99
+ ### Sync BibTeX Updates to Zotero
100
+
101
+ When updating a `.bib` file, you can simultaneously update matching entries in your Zotero library:
102
+
103
+ ```bash
104
+ # Set Zotero credentials
105
+ export ZOTERO_LIBRARY_ID="your_user_id"
106
+ export ZOTERO_API_KEY="your_api_key"
107
+
108
+ # Update bib file AND sync to Zotero
109
+ bibtex-update references.bib -o updated.bib --zotero
110
+
111
+ # Preview Zotero changes only (bib changes still apply)
112
+ bibtex-update references.bib -o updated.bib --zotero --zotero-dry-run
113
+
114
+ # Limit to a specific Zotero collection
115
+ bibtex-update references.bib -o updated.bib --zotero --zotero-collection ABCD1234
116
+ ```
117
+
118
+ The sync matches bib entries to Zotero items by:
119
+ 1. **arXiv ID** - Most reliable for preprints
120
+ 2. **DOI** - For preprints with DOIs (e.g., bioRxiv)
121
+ 3. **Title + Author** - Fuzzy matching as fallback
122
+
123
+ ## Standalone Scripts
124
+
125
+ For environments without pip (e.g., Overleaf), `filter_bibliography.py` can be used directly as it has no dependencies:
126
+
127
+ ```bash
128
+ # Copy the script and run directly
129
+ python filter_bibliography.py paper.tex -b references.bib -o filtered.bib
130
+ ```
131
+
132
+ ## Documentation
133
+
134
+ | Document | Description |
135
+ |----------|-------------|
136
+ | [docs/BIBTEX_UPDATER.md](docs/BIBTEX_UPDATER.md) | Full BibTeX updater documentation |
137
+ | [docs/REFERENCE_FACT_CHECKER.md](docs/REFERENCE_FACT_CHECKER.md) | Full reference fact-checker documentation |
138
+ | [docs/ZOTERO_UPDATER.md](docs/ZOTERO_UPDATER.md) | Full Zotero updater documentation |
139
+ | [docs/FILTER_BIBLIOGRAPHY.md](docs/FILTER_BIBLIOGRAPHY.md) | Full filter documentation |
140
+ | [examples/](examples/) | Example workflows and configuration files |
141
+
142
+ ## Overleaf Integration
143
+
144
+ Both tools integrate with Overleaf via GitHub Actions or latexmkrc.
145
+
146
+ ### GitHub Actions (Recommended)
147
+
148
+ 1. Enable GitHub sync in Overleaf (Menu -> Sync -> GitHub)
149
+ 2. Copy a workflow from [examples/workflows/](examples/workflows/) to `.github/workflows/`
150
+ 3. Changes synced from Overleaf automatically trigger updates
151
+
152
+ ### latexmkrc (Direct Overleaf)
153
+
154
+ For `filter_bibliography.py` only (no dependencies required):
155
+
156
+ 1. Upload `filter_bibliography.py` to your Overleaf project
157
+ 2. Create `.latexmkrc` based on [examples/latexmkrc](examples/latexmkrc)
158
+ 3. Recompile - filtered bibliography appears in your file list
159
+
160
+ ## Features
161
+
162
+ ### BibTeX Updater (`bibtex-update`)
163
+
164
+ - **Multi-source resolution**: arXiv, Crossref, DBLP, Semantic Scholar, Google Scholar
165
+ - **High accuracy**: Title and author fuzzy matching with confidence thresholds
166
+ - **Batch processing**: Multiple files with concurrent workers (default: 8)
167
+ - **Deduplication**: Merge duplicates by DOI or normalized title+authors
168
+ - **Smart caching**: On-disk cache + semantic resolution cache with TTL
169
+ - **Per-service rate limiting**: Optimized rate limits per API (Crossref, S2, DBLP, arXiv)
170
+ - **Batch API support**: Faster bulk lookups via arXiv/S2/Crossref batch endpoints
171
+
172
+ ### Zotero Updater (`bibtex-zotero`)
173
+
174
+ - **Direct Zotero integration**: Fetches and updates items via Zotero API
175
+ - **Same resolution pipeline**: Uses the same multi-source resolution
176
+ - **Preserves metadata**: Keeps notes, tags, and attachments intact
177
+ - **Idempotent**: Already-published papers are automatically skipped
178
+ - **Dry-run mode**: Preview changes before applying
179
+
180
+ ### Reference Fact-Checker (`bibtex-check`)
181
+
182
+ - **Multi-source validation**: Crossref, DBLP, Semantic Scholar
183
+ - **Detailed mismatch detection**: Title, author, year, venue comparisons
184
+ - **Hallucination detection**: Identifies likely fabricated references
185
+ - **Structured reports**: JSON and JSONL output formats
186
+ - **CI/CD integration**: Strict mode with exit codes for automation
187
+
188
+ ### Filter Bibliography (`bibtex-filter`)
189
+
190
+ - **Zero dependencies**: Uses only Python standard library
191
+ - **Works on Overleaf**: No pip install needed
192
+ - **Multiple bib files**: Merge and filter from multiple sources
193
+ - **Citation detection**: Supports natbib, biblatex, and standard LaTeX citations
194
+
195
+ ## Python API
196
+
197
+ ```python
198
+ from bibtex_updater import Detector, Resolver, Updater, HttpClient, RateLimiter, DiskCache
199
+
200
+ # Create HTTP client with rate limiting and caching
201
+ rate_limiter = RateLimiter(req_per_min=30)
202
+ cache = DiskCache(".cache.json")
203
+ http_client = HttpClient(
204
+ timeout=30.0,
205
+ user_agent="bibtex-updater/0.1.0",
206
+ rate_limiter=rate_limiter,
207
+ cache=cache
208
+ )
209
+
210
+ # Detect preprints
211
+ detector = Detector()
212
+ detection = detector.detect(entry)
213
+
214
+ if detection.is_preprint:
215
+ # Resolve to published version
216
+ resolver = Resolver(http_client)
217
+ candidate = resolver.resolve(detection)
218
+
219
+ if candidate and candidate.confidence >= 0.9:
220
+ # Update the entry
221
+ updater = Updater()
222
+ updated_entry = updater.update_entry(entry, candidate.record, detection)
223
+ ```
224
+
225
+ ## Development
226
+
227
+ ```bash
228
+ # Clone and install in development mode
229
+ git clone https://github.com/rpatrik96/bibtexupdater.git
230
+ cd bibtexupdater
231
+ pip install -e ".[dev,all]"
232
+
233
+ # Run tests
234
+ pytest tests/ -v
235
+
236
+ # Run tests with coverage
237
+ pytest tests/ -v --cov=bibtex_updater --cov-report=term-missing
238
+
239
+ # Code quality
240
+ pre-commit run --all-files
241
+
242
+ # Build package
243
+ python -m build
244
+
245
+ # Check package
246
+ twine check dist/*
247
+ ```
248
+
249
+ ## License
250
+
251
+ MIT License - see [LICENSE](LICENSE) for details.