bibcite-cli 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/PKG-INFO +5 -2
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/Readme.md +4 -1
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/pyproject.toml +1 -1
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/src/bibcite/__init__.py +1 -1
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/src/bibcite/cli.py +64 -15
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/src/bibcite/sources.py +21 -1
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/uv.lock +1 -1
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/.gitignore +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/LICENSE +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/src/bibcite/bibfile.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/src/bibcite/data/strings.bib +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/src/bibcite/normalize.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/src/bibcite/resolve.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/src/bibcite/venues.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/tests/test_bibfile.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/tests/test_entry_types.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/tests/test_normalize.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/tests/test_strings_override.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.2.0}/tests/test_venues.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bibcite-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX for agents and humans
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -57,10 +57,13 @@ bibcite add refs.bib 2103.14030 --json
|
|
|
57
57
|
# Add a raw BibTeX entry you already have (venue still canonicalized, file still tidied)
|
|
58
58
|
bibcite add refs.bib --bibtex "$(pbpaste)"
|
|
59
59
|
|
|
60
|
+
# One-shot cleanup: upgrade preprints → tidy → lint
|
|
61
|
+
bibcite fix refs.bib
|
|
62
|
+
|
|
60
63
|
# Upgrade every arXiv entry in a file to its published version (bibMatcher, CLI-style)
|
|
61
64
|
bibcite upgrade refs.bib --dry-run
|
|
62
65
|
|
|
63
|
-
# Just format, or just lint
|
|
66
|
+
# Just format, or just lint (check is read-only)
|
|
64
67
|
bibcite tidy refs.bib
|
|
65
68
|
bibcite check refs.bib
|
|
66
69
|
```
|
|
@@ -45,10 +45,13 @@ bibcite add refs.bib 2103.14030 --json
|
|
|
45
45
|
# Add a raw BibTeX entry you already have (venue still canonicalized, file still tidied)
|
|
46
46
|
bibcite add refs.bib --bibtex "$(pbpaste)"
|
|
47
47
|
|
|
48
|
+
# One-shot cleanup: upgrade preprints → tidy → lint
|
|
49
|
+
bibcite fix refs.bib
|
|
50
|
+
|
|
48
51
|
# Upgrade every arXiv entry in a file to its published version (bibMatcher, CLI-style)
|
|
49
52
|
bibcite upgrade refs.bib --dry-run
|
|
50
53
|
|
|
51
|
-
# Just format, or just lint
|
|
54
|
+
# Just format, or just lint (check is read-only)
|
|
52
55
|
bibcite tidy refs.bib
|
|
53
56
|
bibcite check refs.bib
|
|
54
57
|
```
|
|
@@ -8,6 +8,7 @@ citation key it prints.
|
|
|
8
8
|
import argparse
|
|
9
9
|
import json
|
|
10
10
|
import sys
|
|
11
|
+
import time
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
|
|
13
14
|
from . import bibfile
|
|
@@ -123,21 +124,27 @@ def cmd_add(args) -> int:
|
|
|
123
124
|
# upgrade: batch-match arXiv entries in an existing file (bibMatcher, CLI-style)
|
|
124
125
|
# ---------------------------------------------------------------------------
|
|
125
126
|
|
|
126
|
-
def
|
|
127
|
-
path
|
|
127
|
+
def _upgrade_entries(path: Path, dry_run: bool) -> dict:
|
|
128
|
+
"""Match every preprint entry in ``path`` to its published version and
|
|
129
|
+
rewrite it in place (unless dry_run). Returns the report; does NOT tidy —
|
|
130
|
+
callers decide."""
|
|
128
131
|
db = bibfile.load_bib_file(path)
|
|
129
132
|
if db is None or not db.entries:
|
|
130
133
|
_log(f"[bibcite] nothing to do in {path}")
|
|
131
|
-
return 0
|
|
134
|
+
return {"upgraded": 0, "matched": 0, "entries": []}
|
|
132
135
|
|
|
133
136
|
report = []
|
|
134
137
|
changed = 0
|
|
138
|
+
processed = 0
|
|
135
139
|
for entry in db.entries:
|
|
136
140
|
if not bibfile.is_preprint(entry):
|
|
137
141
|
continue
|
|
138
142
|
title = entry.get("title", "").replace("{", "").replace("}", "")
|
|
139
143
|
if not title:
|
|
140
144
|
continue
|
|
145
|
+
if processed:
|
|
146
|
+
time.sleep(1) # be polite to the APIs on batch runs
|
|
147
|
+
processed += 1
|
|
141
148
|
_log(f"[upgrade] matching: {title[:80]}")
|
|
142
149
|
aid = bibfile.entry_arxiv_id(entry)
|
|
143
150
|
hint = (
|
|
@@ -149,7 +156,7 @@ def cmd_upgrade(args) -> int:
|
|
|
149
156
|
continue
|
|
150
157
|
canonical = canonicalize(match.venue, match.year or entry.get("year"))
|
|
151
158
|
venue_name = canonical.name if canonical else match.venue
|
|
152
|
-
if not
|
|
159
|
+
if not dry_run:
|
|
153
160
|
entry.pop("journal", None)
|
|
154
161
|
entry.pop("booktitle", None)
|
|
155
162
|
entry.pop("howpublished", None)
|
|
@@ -179,17 +186,23 @@ def cmd_upgrade(args) -> int:
|
|
|
179
186
|
}
|
|
180
187
|
)
|
|
181
188
|
|
|
182
|
-
if changed and not
|
|
189
|
+
if changed and not dry_run:
|
|
183
190
|
bibfile._write_db(path, db)
|
|
184
|
-
if not args.no_tidy:
|
|
185
|
-
bibfile.run_tidy(path)
|
|
186
191
|
|
|
187
192
|
matched = sum(1 for r in report if r["matched"])
|
|
188
193
|
for r in report:
|
|
189
194
|
mark = "✓" if r["matched"] else "✗"
|
|
190
195
|
_log(f"{mark} {r['key']}: {r.get('venue', 'no match')}")
|
|
191
|
-
_log(f"[bibcite] {matched} matched, {changed} upgraded{' (dry-run)' if
|
|
192
|
-
|
|
196
|
+
_log(f"[bibcite] {matched} matched, {changed} upgraded{' (dry-run)' if dry_run else ''}")
|
|
197
|
+
return {"upgraded": changed, "matched": matched, "entries": report}
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def cmd_upgrade(args) -> int:
|
|
201
|
+
path = Path(args.file)
|
|
202
|
+
result = _upgrade_entries(path, args.dry_run)
|
|
203
|
+
if result["upgraded"] and not args.no_tidy:
|
|
204
|
+
bibfile.run_tidy(path)
|
|
205
|
+
_emit({**result, "dry_run": args.dry_run})
|
|
193
206
|
return 0
|
|
194
207
|
|
|
195
208
|
|
|
@@ -201,12 +214,11 @@ def cmd_tidy(args) -> int:
|
|
|
201
214
|
return 0 if bibfile.run_tidy(Path(args.file)) else 1
|
|
202
215
|
|
|
203
216
|
|
|
204
|
-
def
|
|
205
|
-
|
|
217
|
+
def _check_problems(path: Path) -> tuple[int, list] | None:
|
|
218
|
+
"""(entry count, problem list) for a .bib file, or None if unparseable."""
|
|
206
219
|
db = bibfile.load_bib_file(path)
|
|
207
220
|
if db is None:
|
|
208
|
-
|
|
209
|
-
return 1
|
|
221
|
+
return None
|
|
210
222
|
problems = []
|
|
211
223
|
seen_titles: dict[str, str] = {}
|
|
212
224
|
for entry in db.entries:
|
|
@@ -223,7 +235,37 @@ def cmd_check(args) -> int:
|
|
|
223
235
|
for p in problems:
|
|
224
236
|
_log(f"{p['key']}: {p['issue']}")
|
|
225
237
|
_log(f"[bibcite] {len(db.entries)} entries, {len(problems)} issues")
|
|
226
|
-
|
|
238
|
+
return len(db.entries), problems
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def cmd_check(args) -> int:
|
|
242
|
+
checked = _check_problems(Path(args.file))
|
|
243
|
+
if checked is None:
|
|
244
|
+
_log(f"[bibcite] {args.file} could not be parsed")
|
|
245
|
+
return 1
|
|
246
|
+
entries, problems = checked
|
|
247
|
+
_emit({"entries": entries, "problems": problems})
|
|
248
|
+
return 0
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def cmd_fix(args) -> int:
|
|
252
|
+
"""One-shot cleanup: upgrade preprints, always tidy, then re-lint."""
|
|
253
|
+
path = Path(args.file)
|
|
254
|
+
if not path.exists():
|
|
255
|
+
_log(f"[bibcite] {path} does not exist")
|
|
256
|
+
return 1
|
|
257
|
+
result = _upgrade_entries(path, dry_run=False)
|
|
258
|
+
tidied = bibfile.run_tidy(path)
|
|
259
|
+
checked = _check_problems(path)
|
|
260
|
+
entries, problems = checked if checked else (0, [])
|
|
261
|
+
_emit(
|
|
262
|
+
{
|
|
263
|
+
**result,
|
|
264
|
+
"tidied": tidied,
|
|
265
|
+
"entries_total": entries,
|
|
266
|
+
"remaining_problems": problems,
|
|
267
|
+
}
|
|
268
|
+
)
|
|
227
269
|
return 0
|
|
228
270
|
|
|
229
271
|
|
|
@@ -260,10 +302,17 @@ def main(argv=None) -> int:
|
|
|
260
302
|
t.add_argument("file")
|
|
261
303
|
t.set_defaults(fn=cmd_tidy)
|
|
262
304
|
|
|
263
|
-
c = sub.add_parser("check", help="offline
|
|
305
|
+
c = sub.add_parser("check", help="offline read-only lint of a .bib file (prints JSON)")
|
|
264
306
|
c.add_argument("file")
|
|
265
307
|
c.set_defaults(fn=cmd_check)
|
|
266
308
|
|
|
309
|
+
f = sub.add_parser(
|
|
310
|
+
"fix",
|
|
311
|
+
help="one-shot cleanup: upgrade preprints to published versions, tidy, then lint (prints JSON)",
|
|
312
|
+
)
|
|
313
|
+
f.add_argument("file")
|
|
314
|
+
f.set_defaults(fn=cmd_fix)
|
|
315
|
+
|
|
267
316
|
args = p.parse_args(argv)
|
|
268
317
|
return args.fn(args)
|
|
269
318
|
|
|
@@ -7,6 +7,7 @@ preprint venues (arXiv / CoRR / bioRxiv / ...).
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
import html
|
|
10
|
+
import os
|
|
10
11
|
import re
|
|
11
12
|
import sys
|
|
12
13
|
import time
|
|
@@ -44,6 +45,14 @@ def _client(browser: bool = False) -> httpx.Client:
|
|
|
44
45
|
)
|
|
45
46
|
|
|
46
47
|
|
|
48
|
+
def _s2_headers() -> dict:
|
|
49
|
+
"""Semantic Scholar's unauthenticated pool is shared globally and 429s
|
|
50
|
+
often; a free API key (https://api.semanticscholar.org) gets a private
|
|
51
|
+
quota. Set S2_API_KEY (or SEMANTIC_SCHOLAR_API_KEY)."""
|
|
52
|
+
key = os.environ.get("S2_API_KEY") or os.environ.get("SEMANTIC_SCHOLAR_API_KEY")
|
|
53
|
+
return {"x-api-key": key} if key else {}
|
|
54
|
+
|
|
55
|
+
|
|
47
56
|
@dataclass
|
|
48
57
|
class Match:
|
|
49
58
|
source: str
|
|
@@ -260,6 +269,7 @@ def s2_arxiv_metadata(arxiv_id: str) -> ArxivMeta | None:
|
|
|
260
269
|
r = c.get(
|
|
261
270
|
f"https://api.semanticscholar.org/graph/v1/paper/arXiv:{arxiv_id}",
|
|
262
271
|
params={"fields": "title,year,authors"},
|
|
272
|
+
headers=_s2_headers(),
|
|
263
273
|
)
|
|
264
274
|
if r.status_code != 200:
|
|
265
275
|
return None
|
|
@@ -284,6 +294,7 @@ def try_semantic_scholar(
|
|
|
284
294
|
r = c.get(
|
|
285
295
|
f"https://api.semanticscholar.org/graph/v1/paper/arXiv:{arxiv_id}",
|
|
286
296
|
params={"fields": S2_FIELDS},
|
|
297
|
+
headers=_s2_headers(),
|
|
287
298
|
)
|
|
288
299
|
if r.status_code == 429:
|
|
289
300
|
raise SourceUnavailable("Semantic Scholar rate-limited (429)")
|
|
@@ -294,6 +305,7 @@ def try_semantic_scholar(
|
|
|
294
305
|
r = c.get(
|
|
295
306
|
"https://api.semanticscholar.org/graph/v1/paper/search",
|
|
296
307
|
params={"query": title, "fields": S2_FIELDS, "limit": 5},
|
|
308
|
+
headers=_s2_headers(),
|
|
297
309
|
)
|
|
298
310
|
if r.status_code == 429:
|
|
299
311
|
raise SourceUnavailable("Semantic Scholar rate-limited (429)")
|
|
@@ -575,19 +587,27 @@ CASCADE = (
|
|
|
575
587
|
("openalex", lambda t, y, a, au: try_openalex(t)),
|
|
576
588
|
)
|
|
577
589
|
|
|
590
|
+
# Sources that rate-limited/blocked us this process: skip them for the rest of
|
|
591
|
+
# the run instead of hammering them once per entry during batch `upgrade`
|
|
592
|
+
# (PaperMemory's DISABLE_MATCH, ported).
|
|
593
|
+
_DISABLED: dict[str, str] = {}
|
|
594
|
+
|
|
578
595
|
|
|
579
596
|
def find_published(
|
|
580
597
|
title: str, year: str = "", arxiv_id: str = "", author_hint: str = ""
|
|
581
598
|
) -> Match | None:
|
|
582
599
|
"""Try each source in order; first verified hit wins."""
|
|
583
600
|
for name, fn in CASCADE:
|
|
601
|
+
if name in _DISABLED:
|
|
602
|
+
continue
|
|
584
603
|
try:
|
|
585
604
|
m = fn(title, year, arxiv_id, author_hint)
|
|
586
605
|
if m:
|
|
587
606
|
return m
|
|
588
607
|
_log(f"[{name}] no publication found")
|
|
589
608
|
except SourceUnavailable as e:
|
|
590
|
-
|
|
609
|
+
_DISABLED[name] = str(e)
|
|
610
|
+
_log(f"[{name}] disabled for the rest of this run: {e}")
|
|
591
611
|
except Exception as e: # network hiccup on one source must not kill the run
|
|
592
612
|
_log(f"[{name}] error: {type(e).__name__}: {e}")
|
|
593
613
|
return None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|