bibcite-cli 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/PKG-INFO +18 -4
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/Readme.md +16 -3
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/pyproject.toml +4 -1
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/src/bibcite/__init__.py +1 -1
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/src/bibcite/bibfile.py +52 -9
- bibcite_cli-0.3.0/src/bibcite/cache.py +48 -0
- bibcite_cli-0.3.0/src/bibcite/cli.py +431 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/src/bibcite/normalize.py +22 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/src/bibcite/resolve.py +38 -11
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/src/bibcite/sources.py +42 -5
- bibcite_cli-0.3.0/tests/test_bugfixes.py +91 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/uv.lock +1 -1
- bibcite_cli-0.1.0/src/bibcite/cli.py +0 -272
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/.gitignore +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/LICENSE +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/src/bibcite/data/strings.bib +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/src/bibcite/venues.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/tests/test_bibfile.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/tests/test_entry_types.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/tests/test_normalize.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/tests/test_strings_override.py +0 -0
- {bibcite_cli-0.1.0 → bibcite_cli-0.3.0}/tests/test_venues.py +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bibcite-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX for agents and humans
|
|
5
|
+
Project-URL: Repository, https://github.com/leo1oel/bibcite
|
|
5
6
|
License-Expression: MIT
|
|
6
7
|
License-File: LICENSE
|
|
7
8
|
Keywords: arxiv,bibliography,bibtex,citations,dblp
|
|
@@ -32,7 +33,7 @@ After every write, the file is formatted with [bibtex-tidy](https://github.com/F
|
|
|
32
33
|
uv tool install --editable .
|
|
33
34
|
|
|
34
35
|
# from git, no checkout needed
|
|
35
|
-
uv tool install git+https://github.com
|
|
36
|
+
uv tool install git+https://github.com/leo1oel/bibcite
|
|
36
37
|
|
|
37
38
|
# once published to PyPI (package name bibcite-cli, command name bibcite)
|
|
38
39
|
uv tool install bibcite-cli # or: uvx --from bibcite-cli bibcite ...
|
|
@@ -57,16 +58,29 @@ bibcite add refs.bib 2103.14030 --json
|
|
|
57
58
|
# Add a raw BibTeX entry you already have (venue still canonicalized, file still tidied)
|
|
58
59
|
bibcite add refs.bib --bibtex "$(pbpaste)"
|
|
59
60
|
|
|
61
|
+
# Batch add (one query per line; shares rate-limit state, tidies once)
|
|
62
|
+
bibcite add refs.bib --from ids.txt
|
|
63
|
+
|
|
64
|
+
# Overwrite a bad existing entry (keeps its key), or delete one
|
|
65
|
+
bibcite add refs.bib <query> --replace
|
|
66
|
+
bibcite remove refs.bib <key>
|
|
67
|
+
|
|
68
|
+
# One-shot cleanup: upgrade preprints → tidy → lint
|
|
69
|
+
bibcite fix refs.bib
|
|
70
|
+
|
|
60
71
|
# Upgrade every arXiv entry in a file to its published version (bibMatcher, CLI-style)
|
|
61
72
|
bibcite upgrade refs.bib --dry-run
|
|
62
73
|
|
|
63
|
-
# Just format, or just lint
|
|
74
|
+
# Just format, or just lint (check is read-only)
|
|
64
75
|
bibcite tidy refs.bib
|
|
65
76
|
bibcite check refs.bib
|
|
66
77
|
```
|
|
67
78
|
|
|
68
|
-
|
|
79
|
+
`add`/`upgrade`/`check`/`fix`/`remove` print a machine-readable JSON result on stdout (`action`, `key`, `venue`, `source`, ...); all diagnostics go to stderr.
|
|
69
80
|
`add` is idempotent: an existing entry returns `action: exists` with its key, and an existing arXiv entry matched to a published version is upgraded in place, keeping its citation key.
|
|
81
|
+
Exit codes: 0 success, 2 paper not found (ask for a better identifier), 3 sources/tool failure (retry later).
|
|
82
|
+
Successful matches are cached at `~/.cache/bibcite/published.json` (published papers only — preprint status is never cached); bypass with `--no-cache` or `BIBCITE_NO_CACHE=1`.
|
|
83
|
+
Entries marked `pubstate = {preprint}` are treated as confirmed preprint-only and muted from `check`/`upgrade`.
|
|
70
84
|
|
|
71
85
|
## For agents
|
|
72
86
|
|
|
@@ -20,7 +20,7 @@ After every write, the file is formatted with [bibtex-tidy](https://github.com/F
|
|
|
20
20
|
uv tool install --editable .
|
|
21
21
|
|
|
22
22
|
# from git, no checkout needed
|
|
23
|
-
uv tool install git+https://github.com
|
|
23
|
+
uv tool install git+https://github.com/leo1oel/bibcite
|
|
24
24
|
|
|
25
25
|
# once published to PyPI (package name bibcite-cli, command name bibcite)
|
|
26
26
|
uv tool install bibcite-cli # or: uvx --from bibcite-cli bibcite ...
|
|
@@ -45,16 +45,29 @@ bibcite add refs.bib 2103.14030 --json
|
|
|
45
45
|
# Add a raw BibTeX entry you already have (venue still canonicalized, file still tidied)
|
|
46
46
|
bibcite add refs.bib --bibtex "$(pbpaste)"
|
|
47
47
|
|
|
48
|
+
# Batch add (one query per line; shares rate-limit state, tidies once)
|
|
49
|
+
bibcite add refs.bib --from ids.txt
|
|
50
|
+
|
|
51
|
+
# Overwrite a bad existing entry (keeps its key), or delete one
|
|
52
|
+
bibcite add refs.bib <query> --replace
|
|
53
|
+
bibcite remove refs.bib <key>
|
|
54
|
+
|
|
55
|
+
# One-shot cleanup: upgrade preprints → tidy → lint
|
|
56
|
+
bibcite fix refs.bib
|
|
57
|
+
|
|
48
58
|
# Upgrade every arXiv entry in a file to its published version (bibMatcher, CLI-style)
|
|
49
59
|
bibcite upgrade refs.bib --dry-run
|
|
50
60
|
|
|
51
|
-
# Just format, or just lint
|
|
61
|
+
# Just format, or just lint (check is read-only)
|
|
52
62
|
bibcite tidy refs.bib
|
|
53
63
|
bibcite check refs.bib
|
|
54
64
|
```
|
|
55
65
|
|
|
56
|
-
|
|
66
|
+
`add`/`upgrade`/`check`/`fix`/`remove` print a machine-readable JSON result on stdout (`action`, `key`, `venue`, `source`, ...); all diagnostics go to stderr.
|
|
57
67
|
`add` is idempotent: an existing entry returns `action: exists` with its key, and an existing arXiv entry matched to a published version is upgraded in place, keeping its citation key.
|
|
68
|
+
Exit codes: 0 success, 2 paper not found (ask for a better identifier), 3 sources/tool failure (retry later).
|
|
69
|
+
Successful matches are cached at `~/.cache/bibcite/published.json` (published papers only — preprint status is never cached); bypass with `--no-cache` or `BIBCITE_NO_CACHE=1`.
|
|
70
|
+
Entries marked `pubstate = {preprint}` are treated as confirmed preprint-only and muted from `check`/`upgrade`.
|
|
58
71
|
|
|
59
72
|
## For agents
|
|
60
73
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "bibcite-cli"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
description = "Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX for agents and humans"
|
|
5
5
|
readme = "Readme.md"
|
|
6
6
|
license = "MIT"
|
|
@@ -11,6 +11,9 @@ dependencies = [
|
|
|
11
11
|
]
|
|
12
12
|
keywords = ["bibtex", "arxiv", "citations", "dblp", "bibliography"]
|
|
13
13
|
|
|
14
|
+
[project.urls]
|
|
15
|
+
Repository = "https://github.com/leo1oel/bibcite"
|
|
16
|
+
|
|
14
17
|
[project.scripts]
|
|
15
18
|
bibcite = "bibcite.cli:main"
|
|
16
19
|
|
|
@@ -14,7 +14,10 @@ from bibtexparser.bwriter import BibTexWriter
|
|
|
14
14
|
from .normalize import norm_title
|
|
15
15
|
|
|
16
16
|
# The exact bibtex-tidy invocation requested by the user; keep in sync with
|
|
17
|
-
# their LaTeX workflow.
|
|
17
|
+
# their LaTeX workflow. NOTE: no --generate-keys — bibcite owns key
|
|
18
|
+
# generation (make_key ASCII-folds names, so Hyvärinen -> hyvarinen2000...,
|
|
19
|
+
# where tidy would emit hyv_arinen2000...), and stable keys keep existing
|
|
20
|
+
# \cite{} commands valid.
|
|
18
21
|
TIDY_ARGS = [
|
|
19
22
|
"--modify",
|
|
20
23
|
"--omit=pages,publisher,doi,timestamp,biburl,bibsource,abstract,month,series,volume,editor,note,date,number,address",
|
|
@@ -27,10 +30,26 @@ TIDY_ARGS = [
|
|
|
27
30
|
"--sort-fields=author,title,booktitle,journal,year,url,pdf",
|
|
28
31
|
"--strip-enclosing-braces",
|
|
29
32
|
"--tidy-comments",
|
|
30
|
-
"--generate-keys",
|
|
31
33
|
]
|
|
32
34
|
|
|
33
|
-
NOISE_FIELDS = ("timestamp", "biburl", "bibsource", "crossref")
|
|
35
|
+
NOISE_FIELDS = ("timestamp", "biburl", "bibsource", "crossref", "month")
|
|
36
|
+
|
|
37
|
+
# BibTeX month macros. bibtexparser's common_strings only defines jan..dec;
|
|
38
|
+
# CrossRef's transform endpoint emits bare full names (month=June), which
|
|
39
|
+
# otherwise KeyError during string interpolation.
|
|
40
|
+
MONTH_STRINGS = {
|
|
41
|
+
m[:3]: m.capitalize()
|
|
42
|
+
for m in (
|
|
43
|
+
"january february march april may june july august september "
|
|
44
|
+
"october november december"
|
|
45
|
+
).split()
|
|
46
|
+
} | {
|
|
47
|
+
m: m.capitalize()
|
|
48
|
+
for m in (
|
|
49
|
+
"january february march april may june july august september "
|
|
50
|
+
"october november december"
|
|
51
|
+
).split()
|
|
52
|
+
}
|
|
34
53
|
|
|
35
54
|
ARXIV_ID_RE = re.compile(r"(\d{4}\.\d{4,5})(v\d+)?")
|
|
36
55
|
|
|
@@ -42,11 +61,18 @@ def _log(msg: str):
|
|
|
42
61
|
def _parser() -> BibTexParser:
|
|
43
62
|
p = BibTexParser(common_strings=True)
|
|
44
63
|
p.ignore_nonstandard_types = False
|
|
64
|
+
p.bib_database.strings.update(MONTH_STRINGS)
|
|
45
65
|
return p
|
|
46
66
|
|
|
47
67
|
|
|
48
68
|
def parse_bib(text: str) -> BibDatabase:
|
|
49
|
-
|
|
69
|
+
try:
|
|
70
|
+
return bibtexparser.loads(text, parser=_parser())
|
|
71
|
+
except Exception as e:
|
|
72
|
+
# Undefined @string macros raise bare KeyError('macro'); rewrap so
|
|
73
|
+
# callers see a real message and KeyError never masquerades as a
|
|
74
|
+
# LookupError "not found" upstream.
|
|
75
|
+
raise ValueError(f"BibTeX parse failed: {type(e).__name__}: {e}") from e
|
|
50
76
|
|
|
51
77
|
|
|
52
78
|
def parse_bibtex_entry(text: str) -> dict:
|
|
@@ -117,10 +143,12 @@ def find_existing(db: BibDatabase, title: str, arxiv_id: str = "", doi: str = ""
|
|
|
117
143
|
return None
|
|
118
144
|
|
|
119
145
|
|
|
120
|
-
def upsert_entry(path: Path, entry: dict) -> tuple[str, str]:
|
|
146
|
+
def upsert_entry(path: Path, entry: dict, replace: bool = False) -> tuple[str, str]:
|
|
121
147
|
"""Insert or upgrade ``entry`` in ``path``.
|
|
122
148
|
|
|
123
|
-
Returns (action, key)
|
|
149
|
+
Returns (action, key), action in "added" | "upgraded" | "exists" |
|
|
150
|
+
"replaced". With ``replace``, an existing matching entry is overwritten
|
|
151
|
+
(its citation key is kept so existing \\cite{} commands stay valid).
|
|
124
152
|
"""
|
|
125
153
|
db = load_bib_file(path)
|
|
126
154
|
if db is None: # unparseable file: append blindly
|
|
@@ -132,13 +160,14 @@ def upsert_entry(path: Path, entry: dict) -> tuple[str, str]:
|
|
|
132
160
|
db, entry.get("title", ""), entry_arxiv_id(entry), entry.get("doi", "")
|
|
133
161
|
)
|
|
134
162
|
if existing is not None:
|
|
135
|
-
|
|
163
|
+
upgrade = is_preprint(existing) and not is_preprint(entry)
|
|
164
|
+
if replace or upgrade:
|
|
136
165
|
key = existing["ID"]
|
|
137
166
|
existing.clear()
|
|
138
|
-
existing.update(entry)
|
|
167
|
+
existing.update({k: str(v) for k, v in entry.items() if v})
|
|
139
168
|
existing["ID"] = key # keep the key the user may already \cite
|
|
140
169
|
_write_db(path, db)
|
|
141
|
-
return "upgraded", key
|
|
170
|
+
return ("replaced" if replace else "upgraded"), key
|
|
142
171
|
return "exists", existing["ID"]
|
|
143
172
|
|
|
144
173
|
db.entries.append({k: str(v) for k, v in entry.items() if v})
|
|
@@ -146,6 +175,20 @@ def upsert_entry(path: Path, entry: dict) -> tuple[str, str]:
|
|
|
146
175
|
return "added", entry["ID"]
|
|
147
176
|
|
|
148
177
|
|
|
178
|
+
def remove_entry(path: Path, key: str) -> bool:
|
|
179
|
+
"""Delete the entry with citation key ``key``. True if something was
|
|
180
|
+
removed."""
|
|
181
|
+
db = load_bib_file(path)
|
|
182
|
+
if db is None:
|
|
183
|
+
return False
|
|
184
|
+
before = len(db.entries)
|
|
185
|
+
db.entries = [e for e in db.entries if e.get("ID") != key]
|
|
186
|
+
if len(db.entries) == before:
|
|
187
|
+
return False
|
|
188
|
+
_write_db(path, db)
|
|
189
|
+
return True
|
|
190
|
+
|
|
191
|
+
|
|
149
192
|
def _write_db(path: Path, db: BibDatabase):
|
|
150
193
|
writer = BibTexWriter()
|
|
151
194
|
writer.indent = " "
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Local cache of successful publication matches.
|
|
2
|
+
|
|
3
|
+
Keyed by normalized title. Only *published* matches are stored — a paper that
|
|
4
|
+
is published stays published, while a preprint may get published tomorrow, so
|
|
5
|
+
negative/preprint results are never cached. Re-running `fix`/`upgrade` or
|
|
6
|
+
re-adding known papers therefore costs zero API calls.
|
|
7
|
+
|
|
8
|
+
Disable with --no-cache or BIBCITE_NO_CACHE=1. Lives at
|
|
9
|
+
$XDG_CACHE_HOME/bibcite/published.json (~/.cache/bibcite/published.json).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
DISABLED = os.environ.get("BIBCITE_NO_CACHE", "") == "1"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _path() -> Path:
|
|
21
|
+
root = os.environ.get("XDG_CACHE_HOME") or "~/.cache"
|
|
22
|
+
return Path(root).expanduser() / "bibcite" / "published.json"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _load() -> dict:
|
|
26
|
+
try:
|
|
27
|
+
return json.loads(_path().read_text())
|
|
28
|
+
except Exception:
|
|
29
|
+
return {}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get(key: str) -> dict | None:
|
|
33
|
+
if DISABLED or not key:
|
|
34
|
+
return None
|
|
35
|
+
return _load().get(key)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def put(key: str, value: dict):
|
|
39
|
+
if DISABLED or not key:
|
|
40
|
+
return
|
|
41
|
+
try:
|
|
42
|
+
data = _load()
|
|
43
|
+
data[key] = value
|
|
44
|
+
p = _path()
|
|
45
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
46
|
+
p.write_text(json.dumps(data, ensure_ascii=False))
|
|
47
|
+
except Exception as e: # cache must never break resolution
|
|
48
|
+
print(f"[cache] write failed: {e}", file=sys.stderr)
|