bibcite-cli 0.1.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bibcite-cli
3
- Version: 0.1.0
3
+ Version: 0.3.0
4
4
  Summary: Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX for agents and humans
5
+ Project-URL: Repository, https://github.com/leo1oel/bibcite
5
6
  License-Expression: MIT
6
7
  License-File: LICENSE
7
8
  Keywords: arxiv,bibliography,bibtex,citations,dblp
@@ -32,7 +33,7 @@ After every write, the file is formatted with [bibtex-tidy](https://github.com/F
32
33
  uv tool install --editable .
33
34
 
34
35
  # from git, no checkout needed
35
- uv tool install git+https://github.com/<you>/bibcite
36
+ uv tool install git+https://github.com/leo1oel/bibcite
36
37
 
37
38
  # once published to PyPI (package name bibcite-cli, command name bibcite)
38
39
  uv tool install bibcite-cli # or: uvx --from bibcite-cli bibcite ...
@@ -57,16 +58,29 @@ bibcite add refs.bib 2103.14030 --json
57
58
  # Add a raw BibTeX entry you already have (venue still canonicalized, file still tidied)
58
59
  bibcite add refs.bib --bibtex "$(pbpaste)"
59
60
 
61
+ # Batch add (one query per line; shares rate-limit state, tidies once)
62
+ bibcite add refs.bib --from ids.txt
63
+
64
+ # Overwrite a bad existing entry (keeps its key), or delete one
65
+ bibcite add refs.bib <query> --replace
66
+ bibcite remove refs.bib <key>
67
+
68
+ # One-shot cleanup: upgrade preprints → tidy → lint
69
+ bibcite fix refs.bib
70
+
60
71
  # Upgrade every arXiv entry in a file to its published version (bibMatcher, CLI-style)
61
72
  bibcite upgrade refs.bib --dry-run
62
73
 
63
- # Just format, or just lint
74
+ # Just format, or just lint (check is read-only)
64
75
  bibcite tidy refs.bib
65
76
  bibcite check refs.bib
66
77
  ```
67
78
 
68
- `--json` prints a machine-readable result on stdout (`action`, `key`, `venue`, `source`, ...); all diagnostics go to stderr.
79
+ `add`/`upgrade`/`check`/`fix`/`remove` print a machine-readable JSON result on stdout (`action`, `key`, `venue`, `source`, ...); all diagnostics go to stderr.
69
80
  `add` is idempotent: an existing entry returns `action: exists` with its key, and an existing arXiv entry matched to a published version is upgraded in place, keeping its citation key.
81
+ Exit codes: 0 success, 2 paper not found (ask for a better identifier), 3 sources/tool failure (retry later).
82
+ Successful matches are cached at `~/.cache/bibcite/published.json` (published papers only — preprint status is never cached); bypass with `--no-cache` or `BIBCITE_NO_CACHE=1`.
83
+ Entries marked `pubstate = {preprint}` are treated as confirmed preprint-only and muted from `check`/`upgrade`.
70
84
 
71
85
  ## For agents
72
86
 
@@ -20,7 +20,7 @@ After every write, the file is formatted with [bibtex-tidy](https://github.com/F
20
20
  uv tool install --editable .
21
21
 
22
22
  # from git, no checkout needed
23
- uv tool install git+https://github.com/<you>/bibcite
23
+ uv tool install git+https://github.com/leo1oel/bibcite
24
24
 
25
25
  # once published to PyPI (package name bibcite-cli, command name bibcite)
26
26
  uv tool install bibcite-cli # or: uvx --from bibcite-cli bibcite ...
@@ -45,16 +45,29 @@ bibcite add refs.bib 2103.14030 --json
45
45
  # Add a raw BibTeX entry you already have (venue still canonicalized, file still tidied)
46
46
  bibcite add refs.bib --bibtex "$(pbpaste)"
47
47
 
48
+ # Batch add (one query per line; shares rate-limit state, tidies once)
49
+ bibcite add refs.bib --from ids.txt
50
+
51
+ # Overwrite a bad existing entry (keeps its key), or delete one
52
+ bibcite add refs.bib <query> --replace
53
+ bibcite remove refs.bib <key>
54
+
55
+ # One-shot cleanup: upgrade preprints → tidy → lint
56
+ bibcite fix refs.bib
57
+
48
58
  # Upgrade every arXiv entry in a file to its published version (bibMatcher, CLI-style)
49
59
  bibcite upgrade refs.bib --dry-run
50
60
 
51
- # Just format, or just lint
61
+ # Just format, or just lint (check is read-only)
52
62
  bibcite tidy refs.bib
53
63
  bibcite check refs.bib
54
64
  ```
55
65
 
56
- `--json` prints a machine-readable result on stdout (`action`, `key`, `venue`, `source`, ...); all diagnostics go to stderr.
66
+ `add`/`upgrade`/`check`/`fix`/`remove` print a machine-readable JSON result on stdout (`action`, `key`, `venue`, `source`, ...); all diagnostics go to stderr.
57
67
  `add` is idempotent: an existing entry returns `action: exists` with its key, and an existing arXiv entry matched to a published version is upgraded in place, keeping its citation key.
68
+ Exit codes: 0 success, 2 paper not found (ask for a better identifier), 3 sources/tool failure (retry later).
69
+ Successful matches are cached at `~/.cache/bibcite/published.json` (published papers only — preprint status is never cached); bypass with `--no-cache` or `BIBCITE_NO_CACHE=1`.
70
+ Entries marked `pubstate = {preprint}` are treated as confirmed preprint-only and muted from `check`/`upgrade`.
58
71
 
59
72
  ## For agents
60
73
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "bibcite-cli"
3
- version = "0.1.0"
3
+ version = "0.3.0"
4
4
  description = "Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX for agents and humans"
5
5
  readme = "Readme.md"
6
6
  license = "MIT"
@@ -11,6 +11,9 @@ dependencies = [
11
11
  ]
12
12
  keywords = ["bibtex", "arxiv", "citations", "dblp", "bibliography"]
13
13
 
14
+ [project.urls]
15
+ Repository = "https://github.com/leo1oel/bibcite"
16
+
14
17
  [project.scripts]
15
18
  bibcite = "bibcite.cli:main"
16
19
 
@@ -1,3 +1,3 @@
1
1
  """bibcite: canonical BibTeX resolution for papers (arXiv id / DOI / title)."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.3.0"
@@ -14,7 +14,10 @@ from bibtexparser.bwriter import BibTexWriter
14
14
  from .normalize import norm_title
15
15
 
16
16
  # The exact bibtex-tidy invocation requested by the user; keep in sync with
17
- # their LaTeX workflow.
17
+ # their LaTeX workflow. NOTE: no --generate-keys — bibcite owns key
18
+ # generation (make_key ASCII-folds names, so Hyvärinen -> hyvarinen2000...,
19
+ # where tidy would emit hyv_arinen2000...), and stable keys keep existing
20
+ # \cite{} commands valid.
18
21
  TIDY_ARGS = [
19
22
  "--modify",
20
23
  "--omit=pages,publisher,doi,timestamp,biburl,bibsource,abstract,month,series,volume,editor,note,date,number,address",
@@ -27,10 +30,26 @@ TIDY_ARGS = [
27
30
  "--sort-fields=author,title,booktitle,journal,year,url,pdf",
28
31
  "--strip-enclosing-braces",
29
32
  "--tidy-comments",
30
- "--generate-keys",
31
33
  ]
32
34
 
33
- NOISE_FIELDS = ("timestamp", "biburl", "bibsource", "crossref")
35
+ NOISE_FIELDS = ("timestamp", "biburl", "bibsource", "crossref", "month")
36
+
37
+ # BibTeX month macros. bibtexparser's common_strings only defines jan..dec;
38
+ # CrossRef's transform endpoint emits bare full names (month=June), which
39
+ # otherwise KeyError during string interpolation.
40
+ MONTH_STRINGS = {
41
+ m[:3]: m.capitalize()
42
+ for m in (
43
+ "january february march april may june july august september "
44
+ "october november december"
45
+ ).split()
46
+ } | {
47
+ m: m.capitalize()
48
+ for m in (
49
+ "january february march april may june july august september "
50
+ "october november december"
51
+ ).split()
52
+ }
34
53
 
35
54
  ARXIV_ID_RE = re.compile(r"(\d{4}\.\d{4,5})(v\d+)?")
36
55
 
@@ -42,11 +61,18 @@ def _log(msg: str):
42
61
  def _parser() -> BibTexParser:
43
62
  p = BibTexParser(common_strings=True)
44
63
  p.ignore_nonstandard_types = False
64
+ p.bib_database.strings.update(MONTH_STRINGS)
45
65
  return p
46
66
 
47
67
 
48
68
  def parse_bib(text: str) -> BibDatabase:
49
- return bibtexparser.loads(text, parser=_parser())
69
+ try:
70
+ return bibtexparser.loads(text, parser=_parser())
71
+ except Exception as e:
72
+ # Undefined @string macros raise bare KeyError('macro'); rewrap so
73
+ # callers see a real message and KeyError never masquerades as a
74
+ # LookupError "not found" upstream.
75
+ raise ValueError(f"BibTeX parse failed: {type(e).__name__}: {e}") from e
50
76
 
51
77
 
52
78
  def parse_bibtex_entry(text: str) -> dict:
@@ -117,10 +143,12 @@ def find_existing(db: BibDatabase, title: str, arxiv_id: str = "", doi: str = ""
117
143
  return None
118
144
 
119
145
 
120
- def upsert_entry(path: Path, entry: dict) -> tuple[str, str]:
146
+ def upsert_entry(path: Path, entry: dict, replace: bool = False) -> tuple[str, str]:
121
147
  """Insert or upgrade ``entry`` in ``path``.
122
148
 
123
- Returns (action, key) where action is "added" | "upgraded" | "exists".
149
+ Returns (action, key), action in "added" | "upgraded" | "exists" |
150
+ "replaced". With ``replace``, an existing matching entry is overwritten
151
+ (its citation key is kept so existing \\cite{} commands stay valid).
124
152
  """
125
153
  db = load_bib_file(path)
126
154
  if db is None: # unparseable file: append blindly
@@ -132,13 +160,14 @@ def upsert_entry(path: Path, entry: dict) -> tuple[str, str]:
132
160
  db, entry.get("title", ""), entry_arxiv_id(entry), entry.get("doi", "")
133
161
  )
134
162
  if existing is not None:
135
- if is_preprint(existing) and not is_preprint(entry):
163
+ upgrade = is_preprint(existing) and not is_preprint(entry)
164
+ if replace or upgrade:
136
165
  key = existing["ID"]
137
166
  existing.clear()
138
- existing.update(entry)
167
+ existing.update({k: str(v) for k, v in entry.items() if v})
139
168
  existing["ID"] = key # keep the key the user may already \cite
140
169
  _write_db(path, db)
141
- return "upgraded", key
170
+ return ("replaced" if replace else "upgraded"), key
142
171
  return "exists", existing["ID"]
143
172
 
144
173
  db.entries.append({k: str(v) for k, v in entry.items() if v})
@@ -146,6 +175,20 @@ def upsert_entry(path: Path, entry: dict) -> tuple[str, str]:
146
175
  return "added", entry["ID"]
147
176
 
148
177
 
178
+ def remove_entry(path: Path, key: str) -> bool:
179
+ """Delete the entry with citation key ``key``. True if something was
180
+ removed."""
181
+ db = load_bib_file(path)
182
+ if db is None:
183
+ return False
184
+ before = len(db.entries)
185
+ db.entries = [e for e in db.entries if e.get("ID") != key]
186
+ if len(db.entries) == before:
187
+ return False
188
+ _write_db(path, db)
189
+ return True
190
+
191
+
149
192
  def _write_db(path: Path, db: BibDatabase):
150
193
  writer = BibTexWriter()
151
194
  writer.indent = " "
@@ -0,0 +1,48 @@
1
+ """Local cache of successful publication matches.
2
+
3
+ Keyed by normalized title. Only *published* matches are stored — a paper that
4
+ is published stays published, while a preprint may get published tomorrow, so
5
+ negative/preprint results are never cached. Re-running `fix`/`upgrade` or
6
+ re-adding known papers therefore costs zero API calls.
7
+
8
+ Disable with --no-cache or BIBCITE_NO_CACHE=1. Lives at
9
+ $XDG_CACHE_HOME/bibcite/published.json (~/.cache/bibcite/published.json).
10
+ """
11
+
12
+ import json
13
+ import os
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ DISABLED = os.environ.get("BIBCITE_NO_CACHE", "") == "1"
18
+
19
+
20
+ def _path() -> Path:
21
+ root = os.environ.get("XDG_CACHE_HOME") or "~/.cache"
22
+ return Path(root).expanduser() / "bibcite" / "published.json"
23
+
24
+
25
+ def _load() -> dict:
26
+ try:
27
+ return json.loads(_path().read_text())
28
+ except Exception:
29
+ return {}
30
+
31
+
32
+ def get(key: str) -> dict | None:
33
+ if DISABLED or not key:
34
+ return None
35
+ return _load().get(key)
36
+
37
+
38
+ def put(key: str, value: dict):
39
+ if DISABLED or not key:
40
+ return
41
+ try:
42
+ data = _load()
43
+ data[key] = value
44
+ p = _path()
45
+ p.parent.mkdir(parents=True, exist_ok=True)
46
+ p.write_text(json.dumps(data, ensure_ascii=False))
47
+ except Exception as e: # cache must never break resolution
48
+ print(f"[cache] write failed: {e}", file=sys.stderr)