bibcite-cli 0.2.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/PKG-INFO +14 -3
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/Readme.md +12 -2
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/pyproject.toml +4 -1
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/src/bibcite/__init__.py +1 -1
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/src/bibcite/bibfile.py +80 -14
- bibcite_cli-0.4.0/src/bibcite/cache.py +48 -0
- bibcite_cli-0.4.0/src/bibcite/cli.py +513 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/src/bibcite/normalize.py +38 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/src/bibcite/resolve.py +38 -11
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/src/bibcite/sources.py +104 -5
- bibcite_cli-0.4.0/tests/test_bugfixes.py +91 -0
- bibcite_cli-0.4.0/tests/test_round2.py +70 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/uv.lock +1 -1
- bibcite_cli-0.2.0/src/bibcite/cli.py +0 -321
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/.gitignore +0 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/LICENSE +0 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/src/bibcite/data/strings.bib +0 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/src/bibcite/venues.py +0 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/tests/test_bibfile.py +0 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/tests/test_entry_types.py +0 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/tests/test_normalize.py +0 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/tests/test_strings_override.py +0 -0
- {bibcite_cli-0.2.0 → bibcite_cli-0.4.0}/tests/test_venues.py +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bibcite-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX for agents and humans
|
|
5
|
+
Project-URL: Repository, https://github.com/leo1oel/bibcite
|
|
5
6
|
License-Expression: MIT
|
|
6
7
|
License-File: LICENSE
|
|
7
8
|
Keywords: arxiv,bibliography,bibtex,citations,dblp
|
|
@@ -32,7 +33,7 @@ After every write, the file is formatted with [bibtex-tidy](https://github.com/F
|
|
|
32
33
|
uv tool install --editable .
|
|
33
34
|
|
|
34
35
|
# from git, no checkout needed
|
|
35
|
-
uv tool install git+https://github.com
|
|
36
|
+
uv tool install git+https://github.com/leo1oel/bibcite
|
|
36
37
|
|
|
37
38
|
# once published to PyPI (package name bibcite-cli, command name bibcite)
|
|
38
39
|
uv tool install bibcite-cli # or: uvx --from bibcite-cli bibcite ...
|
|
@@ -57,6 +58,13 @@ bibcite add refs.bib 2103.14030 --json
|
|
|
57
58
|
# Add a raw BibTeX entry you already have (venue still canonicalized, file still tidied)
|
|
58
59
|
bibcite add refs.bib --bibtex "$(pbpaste)"
|
|
59
60
|
|
|
61
|
+
# Batch add (one query per line; shares rate-limit state, tidies once)
|
|
62
|
+
bibcite add refs.bib --from ids.txt
|
|
63
|
+
|
|
64
|
+
# Overwrite a bad existing entry (keeps its key), or delete one
|
|
65
|
+
bibcite add refs.bib <query> --replace
|
|
66
|
+
bibcite remove refs.bib <key>
|
|
67
|
+
|
|
60
68
|
# One-shot cleanup: upgrade preprints → tidy → lint
|
|
61
69
|
bibcite fix refs.bib
|
|
62
70
|
|
|
@@ -68,8 +76,11 @@ bibcite tidy refs.bib
|
|
|
68
76
|
bibcite check refs.bib
|
|
69
77
|
```
|
|
70
78
|
|
|
71
|
-
|
|
79
|
+
`add`/`upgrade`/`check`/`fix`/`remove` print a machine-readable JSON result on stdout (`action`, `key`, `venue`, `source`, ...); all diagnostics go to stderr.
|
|
72
80
|
`add` is idempotent: an existing entry returns `action: exists` with its key, and an existing arXiv entry matched to a published version is upgraded in place, keeping its citation key.
|
|
81
|
+
Exit codes: 0 success, 2 paper not found (ask for a better identifier), 3 sources/tool failure (retry later).
|
|
82
|
+
Successful matches are cached at `~/.cache/bibcite/published.json` (published papers only — preprint status is never cached); bypass with `--no-cache` or `BIBCITE_NO_CACHE=1`.
|
|
83
|
+
Entries marked `pubstate = {preprint}` are treated as confirmed preprint-only and muted from `check`/`upgrade`.
|
|
73
84
|
|
|
74
85
|
## For agents
|
|
75
86
|
|
|
@@ -20,7 +20,7 @@ After every write, the file is formatted with [bibtex-tidy](https://github.com/F
|
|
|
20
20
|
uv tool install --editable .
|
|
21
21
|
|
|
22
22
|
# from git, no checkout needed
|
|
23
|
-
uv tool install git+https://github.com
|
|
23
|
+
uv tool install git+https://github.com/leo1oel/bibcite
|
|
24
24
|
|
|
25
25
|
# once published to PyPI (package name bibcite-cli, command name bibcite)
|
|
26
26
|
uv tool install bibcite-cli # or: uvx --from bibcite-cli bibcite ...
|
|
@@ -45,6 +45,13 @@ bibcite add refs.bib 2103.14030 --json
|
|
|
45
45
|
# Add a raw BibTeX entry you already have (venue still canonicalized, file still tidied)
|
|
46
46
|
bibcite add refs.bib --bibtex "$(pbpaste)"
|
|
47
47
|
|
|
48
|
+
# Batch add (one query per line; shares rate-limit state, tidies once)
|
|
49
|
+
bibcite add refs.bib --from ids.txt
|
|
50
|
+
|
|
51
|
+
# Overwrite a bad existing entry (keeps its key), or delete one
|
|
52
|
+
bibcite add refs.bib <query> --replace
|
|
53
|
+
bibcite remove refs.bib <key>
|
|
54
|
+
|
|
48
55
|
# One-shot cleanup: upgrade preprints → tidy → lint
|
|
49
56
|
bibcite fix refs.bib
|
|
50
57
|
|
|
@@ -56,8 +63,11 @@ bibcite tidy refs.bib
|
|
|
56
63
|
bibcite check refs.bib
|
|
57
64
|
```
|
|
58
65
|
|
|
59
|
-
|
|
66
|
+
`add`/`upgrade`/`check`/`fix`/`remove` print a machine-readable JSON result on stdout (`action`, `key`, `venue`, `source`, ...); all diagnostics go to stderr.
|
|
60
67
|
`add` is idempotent: an existing entry returns `action: exists` with its key, and an existing arXiv entry matched to a published version is upgraded in place, keeping its citation key.
|
|
68
|
+
Exit codes: 0 success, 2 paper not found (ask for a better identifier), 3 sources/tool failure (retry later).
|
|
69
|
+
Successful matches are cached at `~/.cache/bibcite/published.json` (published papers only — preprint status is never cached); bypass with `--no-cache` or `BIBCITE_NO_CACHE=1`.
|
|
70
|
+
Entries marked `pubstate = {preprint}` are treated as confirmed preprint-only and muted from `check`/`upgrade`.
|
|
61
71
|
|
|
62
72
|
## For agents
|
|
63
73
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "bibcite-cli"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.0"
|
|
4
4
|
description = "Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX for agents and humans"
|
|
5
5
|
readme = "Readme.md"
|
|
6
6
|
license = "MIT"
|
|
@@ -11,6 +11,9 @@ dependencies = [
|
|
|
11
11
|
]
|
|
12
12
|
keywords = ["bibtex", "arxiv", "citations", "dblp", "bibliography"]
|
|
13
13
|
|
|
14
|
+
[project.urls]
|
|
15
|
+
Repository = "https://github.com/leo1oel/bibcite"
|
|
16
|
+
|
|
14
17
|
[project.scripts]
|
|
15
18
|
bibcite = "bibcite.cli:main"
|
|
16
19
|
|
|
@@ -14,23 +14,44 @@ from bibtexparser.bwriter import BibTexWriter
|
|
|
14
14
|
from .normalize import norm_title
|
|
15
15
|
|
|
16
16
|
# The exact bibtex-tidy invocation requested by the user; keep in sync with
|
|
17
|
-
# their LaTeX workflow.
|
|
17
|
+
# their LaTeX workflow. NOTE: no --generate-keys — bibcite owns key
|
|
18
|
+
# generation (make_key ASCII-folds names, so Hyvärinen -> hyvarinen2000...,
|
|
19
|
+
# where tidy would emit hyv_arinen2000...), and stable keys keep existing
|
|
20
|
+
# \cite{} commands valid.
|
|
18
21
|
TIDY_ARGS = [
|
|
19
22
|
"--modify",
|
|
20
|
-
|
|
23
|
+
# volume/number/pages/doi are kept (bibliographic substance the user
|
|
24
|
+
# asked to retain); the omit list drops only true noise.
|
|
25
|
+
"--omit=publisher,timestamp,biburl,bibsource,abstract,month,series,editor,note,date,address",
|
|
21
26
|
"--curly",
|
|
22
27
|
"--blank-lines",
|
|
23
28
|
"--trailing-commas",
|
|
24
29
|
"--sort=-year",
|
|
25
30
|
"--duplicates=citation",
|
|
26
31
|
"--merge=first",
|
|
27
|
-
"--sort-fields=author,title,booktitle,journal,year,url,pdf",
|
|
32
|
+
"--sort-fields=author,title,booktitle,journal,volume,number,pages,year,doi,url,pdf",
|
|
28
33
|
"--strip-enclosing-braces",
|
|
29
34
|
"--tidy-comments",
|
|
30
|
-
"--generate-keys",
|
|
31
35
|
]
|
|
32
36
|
|
|
33
|
-
NOISE_FIELDS = ("timestamp", "biburl", "bibsource", "crossref")
|
|
37
|
+
NOISE_FIELDS = ("timestamp", "biburl", "bibsource", "crossref", "month")
|
|
38
|
+
|
|
39
|
+
# BibTeX month macros. bibtexparser's common_strings only defines jan..dec;
|
|
40
|
+
# CrossRef's transform endpoint emits bare full names (month=June), which
|
|
41
|
+
# otherwise KeyError during string interpolation.
|
|
42
|
+
MONTH_STRINGS = {
|
|
43
|
+
m[:3]: m.capitalize()
|
|
44
|
+
for m in (
|
|
45
|
+
"january february march april may june july august september "
|
|
46
|
+
"october november december"
|
|
47
|
+
).split()
|
|
48
|
+
} | {
|
|
49
|
+
m: m.capitalize()
|
|
50
|
+
for m in (
|
|
51
|
+
"january february march april may june july august september "
|
|
52
|
+
"october november december"
|
|
53
|
+
).split()
|
|
54
|
+
}
|
|
34
55
|
|
|
35
56
|
ARXIV_ID_RE = re.compile(r"(\d{4}\.\d{4,5})(v\d+)?")
|
|
36
57
|
|
|
@@ -42,11 +63,18 @@ def _log(msg: str):
|
|
|
42
63
|
def _parser() -> BibTexParser:
|
|
43
64
|
p = BibTexParser(common_strings=True)
|
|
44
65
|
p.ignore_nonstandard_types = False
|
|
66
|
+
p.bib_database.strings.update(MONTH_STRINGS)
|
|
45
67
|
return p
|
|
46
68
|
|
|
47
69
|
|
|
48
70
|
def parse_bib(text: str) -> BibDatabase:
|
|
49
|
-
|
|
71
|
+
try:
|
|
72
|
+
return bibtexparser.loads(text, parser=_parser())
|
|
73
|
+
except Exception as e:
|
|
74
|
+
# Undefined @string macros raise bare KeyError('macro'); rewrap so
|
|
75
|
+
# callers see a real message and KeyError never masquerades as a
|
|
76
|
+
# LookupError "not found" upstream.
|
|
77
|
+
raise ValueError(f"BibTeX parse failed: {type(e).__name__}: {e}") from e
|
|
50
78
|
|
|
51
79
|
|
|
52
80
|
def parse_bibtex_entry(text: str) -> dict:
|
|
@@ -117,36 +145,74 @@ def find_existing(db: BibDatabase, title: str, arxiv_id: str = "", doi: str = ""
|
|
|
117
145
|
return None
|
|
118
146
|
|
|
119
147
|
|
|
120
|
-
def upsert_entry(
|
|
148
|
+
def upsert_entry(
|
|
149
|
+
path: Path, entry: dict, replace: bool = False, replace_key: str = ""
|
|
150
|
+
) -> tuple[str, str]:
|
|
121
151
|
"""Insert or upgrade ``entry`` in ``path``.
|
|
122
152
|
|
|
123
|
-
Returns (action, key)
|
|
153
|
+
Returns (action, key), action in "added" | "upgraded" | "exists" |
|
|
154
|
+
"replaced" | "no_match_to_replace". With ``replace``, an existing
|
|
155
|
+
matching entry is overwritten; ``replace_key`` targets a specific entry
|
|
156
|
+
by citation key (for when title drift defeats the automatic match). The
|
|
157
|
+
existing key is always kept so \\cite{} commands stay valid. A replace
|
|
158
|
+
that matches nothing is an ERROR, not a silent add — that is how
|
|
159
|
+
duplicate entries sneak into a file.
|
|
124
160
|
"""
|
|
125
161
|
db = load_bib_file(path)
|
|
126
162
|
if db is None: # unparseable file: append blindly
|
|
163
|
+
if replace or replace_key:
|
|
164
|
+
return "no_match_to_replace", replace_key or entry["ID"]
|
|
127
165
|
with path.open("a") as f:
|
|
128
166
|
f.write("\n" + entry_to_bibtex(entry))
|
|
129
167
|
return "added", entry["ID"]
|
|
130
168
|
|
|
131
|
-
|
|
132
|
-
db
|
|
133
|
-
|
|
169
|
+
if replace_key:
|
|
170
|
+
existing = next((e for e in db.entries if e.get("ID") == replace_key), None)
|
|
171
|
+
else:
|
|
172
|
+
existing = find_existing(
|
|
173
|
+
db, entry.get("title", ""), entry_arxiv_id(entry), entry.get("doi", "")
|
|
174
|
+
)
|
|
175
|
+
|
|
134
176
|
if existing is not None:
|
|
135
|
-
|
|
177
|
+
upgrade = is_preprint(existing) and not is_preprint(entry)
|
|
178
|
+
if replace or replace_key or upgrade:
|
|
136
179
|
key = existing["ID"]
|
|
137
180
|
existing.clear()
|
|
138
|
-
existing.update(entry)
|
|
181
|
+
existing.update({k: str(v) for k, v in entry.items() if v})
|
|
139
182
|
existing["ID"] = key # keep the key the user may already \cite
|
|
140
183
|
_write_db(path, db)
|
|
141
|
-
return "upgraded", key
|
|
184
|
+
return ("replaced" if (replace or replace_key) else "upgraded"), key
|
|
142
185
|
return "exists", existing["ID"]
|
|
143
186
|
|
|
187
|
+
if replace or replace_key:
|
|
188
|
+
return "no_match_to_replace", replace_key or entry["ID"]
|
|
189
|
+
|
|
144
190
|
db.entries.append({k: str(v) for k, v in entry.items() if v})
|
|
145
191
|
_write_db(path, db)
|
|
146
192
|
return "added", entry["ID"]
|
|
147
193
|
|
|
148
194
|
|
|
195
|
+
def remove_entry(path: Path, key: str) -> bool:
|
|
196
|
+
"""Delete the entry with citation key ``key``. True if something was
|
|
197
|
+
removed."""
|
|
198
|
+
db = load_bib_file(path)
|
|
199
|
+
if db is None:
|
|
200
|
+
return False
|
|
201
|
+
before = len(db.entries)
|
|
202
|
+
db.entries = [e for e in db.entries if e.get("ID") != key]
|
|
203
|
+
if len(db.entries) == before:
|
|
204
|
+
return False
|
|
205
|
+
_write_db(path, db)
|
|
206
|
+
return True
|
|
207
|
+
|
|
208
|
+
|
|
149
209
|
def _write_db(path: Path, db: BibDatabase):
|
|
210
|
+
# Never write our injected month macros back out as @string blocks (they
|
|
211
|
+
# exist only so parsing month=June doesn't crash); this also scrubs any
|
|
212
|
+
# that leaked into a file before this guard existed. User-defined
|
|
213
|
+
# @strings are untouched.
|
|
214
|
+
for k in MONTH_STRINGS:
|
|
215
|
+
db.strings.pop(k, None)
|
|
150
216
|
writer = BibTexWriter()
|
|
151
217
|
writer.indent = " "
|
|
152
218
|
writer.order_entries_by = None # preserve file order; tidy re-sorts anyway
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Local cache of successful publication matches.
|
|
2
|
+
|
|
3
|
+
Keyed by normalized title. Only *published* matches are stored — a paper that
|
|
4
|
+
is published stays published, while a preprint may get published tomorrow, so
|
|
5
|
+
negative/preprint results are never cached. Re-running `fix`/`upgrade` or
|
|
6
|
+
re-adding known papers therefore costs zero API calls.
|
|
7
|
+
|
|
8
|
+
Disable with --no-cache or BIBCITE_NO_CACHE=1. Lives at
|
|
9
|
+
$XDG_CACHE_HOME/bibcite/published.json (~/.cache/bibcite/published.json).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
DISABLED = os.environ.get("BIBCITE_NO_CACHE", "") == "1"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _path() -> Path:
|
|
21
|
+
root = os.environ.get("XDG_CACHE_HOME") or "~/.cache"
|
|
22
|
+
return Path(root).expanduser() / "bibcite" / "published.json"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _load() -> dict:
|
|
26
|
+
try:
|
|
27
|
+
return json.loads(_path().read_text())
|
|
28
|
+
except Exception:
|
|
29
|
+
return {}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get(key: str) -> dict | None:
|
|
33
|
+
if DISABLED or not key:
|
|
34
|
+
return None
|
|
35
|
+
return _load().get(key)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def put(key: str, value: dict):
|
|
39
|
+
if DISABLED or not key:
|
|
40
|
+
return
|
|
41
|
+
try:
|
|
42
|
+
data = _load()
|
|
43
|
+
data[key] = value
|
|
44
|
+
p = _path()
|
|
45
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
46
|
+
p.write_text(json.dumps(data, ensure_ascii=False))
|
|
47
|
+
except Exception as e: # cache must never break resolution
|
|
48
|
+
print(f"[cache] write failed: {e}", file=sys.stderr)
|