bibcite-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
4
+ dist/
5
+ .pytest_cache/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Leonardo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,74 @@
1
+ Metadata-Version: 2.4
2
+ Name: bibcite-cli
3
+ Version: 0.1.0
4
+ Summary: Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX for agents and humans
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Keywords: arxiv,bibliography,bibtex,citations,dblp
8
+ Requires-Python: >=3.10
9
+ Requires-Dist: bibtexparser<2,>=1.4
10
+ Requires-Dist: httpx>=0.27
11
+ Description-Content-Type: text/markdown
12
+
13
+ # bibcite
14
+
15
+ Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX, and manage `.bib` files so agents never hand-edit them.
16
+
17
+ The publication-matching cascade is ported from [PaperMemory](https://github.com/vict0rsch/PaperMemory)'s bibMatcher:
18
+ DBLP → Semantic Scholar → Google Scholar → CrossRef → Unpaywall.
19
+ A match must have an identical normalized title, a plausible year, and a non-preprint venue.
20
+
21
+ Venue names are canonicalized against the `@string` table vendored in `src/bibcite/data/strings.bib` (journals / conferences / workshops), including year-aware rules (NIPS before 2018 vs NeurIPS, WACV before 2017).
22
+
23
+ Entry types are strict: conference/workshop papers become `@inproceedings` + `booktitle`, journal papers `@article` + `journal`, and unpublished arXiv preprints `@misc` + `howpublished = {arXiv preprint arXiv:ID}`.
24
+ Types coming from authoritative source BibTeX (DBLP) are preserved.
25
+
26
+ After every write, the file is formatted with [bibtex-tidy](https://github.com/FlamingTempura/bibtex-tidy) using the canonical flags in `bibfile.TIDY_ARGS` (requires `bibtex-tidy` on PATH or `npx`).
27
+
28
+ ## Install
29
+
30
+ ```bash
31
+ # from a local checkout (development)
32
+ uv tool install --editable .
33
+
34
+ # from git, no checkout needed
35
+ uv tool install git+https://github.com/<you>/bibcite
36
+
37
+ # once published to PyPI (package name bibcite-cli, command name bibcite)
38
+ uv tool install bibcite-cli # or: uvx --from bibcite-cli bibcite ...
39
+
40
+ # plus, once (required for the tidy step):
41
+ npm install -g bibtex-tidy
42
+ ```
43
+
44
+ To use your own venue table instead of the vendored one, set `BIBCITE_STRINGS=/path/to/strings.bib` or place it at `~/.config/bibcite/strings.bib`.
45
+
46
+ ## Usage
47
+
48
+ ```bash
49
+ # Preview the BibTeX for a paper (nothing written)
50
+ bibcite get 1706.03762
51
+ bibcite get "Attention is all you need"
52
+ bibcite get 10.1109/CVPR52688.2022.01167
53
+
54
+ # Resolve and write into a .bib file, dedupe, then bibtex-tidy; prints the final key
55
+ bibcite add refs.bib 2103.14030 --json
56
+
57
+ # Add a raw BibTeX entry you already have (venue still canonicalized, file still tidied)
58
+ bibcite add refs.bib --bibtex "$(pbpaste)"
59
+
60
+ # Upgrade every arXiv entry in a file to its published version (bibMatcher, CLI-style)
61
+ bibcite upgrade refs.bib --dry-run
62
+
63
+ # Just format, or just lint
64
+ bibcite tidy refs.bib
65
+ bibcite check refs.bib
66
+ ```
67
+
68
+ `--json` prints a machine-readable result on stdout (`action`, `key`, `venue`, `source`, ...); all diagnostics go to stderr.
69
+ `add` is idempotent: an existing entry returns `action: exists` with its key, and an existing arXiv entry matched to a published version is upgraded in place, keeping its citation key.
70
+
71
+ ## For agents
72
+
73
+ Never edit `.bib` files by hand.
74
+ Call `bibcite add <file> <query> --json` and use the returned `key` in `\cite{...}`.
@@ -0,0 +1,62 @@
1
+ # bibcite
2
+
3
+ Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX, and manage `.bib` files so agents never hand-edit them.
4
+
5
+ The publication-matching cascade is ported from [PaperMemory](https://github.com/vict0rsch/PaperMemory)'s bibMatcher:
6
+ DBLP → Semantic Scholar → Google Scholar → CrossRef → Unpaywall.
7
+ A match must have an identical normalized title, a plausible year, and a non-preprint venue.
8
+
9
+ Venue names are canonicalized against the `@string` table vendored in `src/bibcite/data/strings.bib` (journals / conferences / workshops), including year-aware rules (NIPS before 2018 vs NeurIPS, WACV before 2017).
10
+
11
+ Entry types are strict: conference/workshop papers become `@inproceedings` + `booktitle`, journal papers `@article` + `journal`, and unpublished arXiv preprints `@misc` + `howpublished = {arXiv preprint arXiv:ID}`.
12
+ Types coming from authoritative source BibTeX (DBLP) are preserved.
13
+
14
+ After every write, the file is formatted with [bibtex-tidy](https://github.com/FlamingTempura/bibtex-tidy) using the canonical flags in `bibfile.TIDY_ARGS` (requires `bibtex-tidy` on PATH or `npx`).
15
+
16
+ ## Install
17
+
18
+ ```bash
19
+ # from a local checkout (development)
20
+ uv tool install --editable .
21
+
22
+ # from git, no checkout needed
23
+ uv tool install git+https://github.com/<you>/bibcite
24
+
25
+ # once published to PyPI (package name bibcite-cli, command name bibcite)
26
+ uv tool install bibcite-cli # or: uvx --from bibcite-cli bibcite ...
27
+
28
+ # plus, once (required for the tidy step):
29
+ npm install -g bibtex-tidy
30
+ ```
31
+
32
+ To use your own venue table instead of the vendored one, set `BIBCITE_STRINGS=/path/to/strings.bib` or place it at `~/.config/bibcite/strings.bib`.
33
+
34
+ ## Usage
35
+
36
+ ```bash
37
+ # Preview the BibTeX for a paper (nothing written)
38
+ bibcite get 1706.03762
39
+ bibcite get "Attention is all you need"
40
+ bibcite get 10.1109/CVPR52688.2022.01167
41
+
42
+ # Resolve and write into a .bib file, dedupe, then bibtex-tidy; prints the final key
43
+ bibcite add refs.bib 2103.14030 --json
44
+
45
+ # Add a raw BibTeX entry you already have (venue still canonicalized, file still tidied)
46
+ bibcite add refs.bib --bibtex "$(pbpaste)"
47
+
48
+ # Upgrade every arXiv entry in a file to its published version (bibMatcher, CLI-style)
49
+ bibcite upgrade refs.bib --dry-run
50
+
51
+ # Just format, or just lint
52
+ bibcite tidy refs.bib
53
+ bibcite check refs.bib
54
+ ```
55
+
56
+ `--json` prints a machine-readable result on stdout (`action`, `key`, `venue`, `source`, ...); all diagnostics go to stderr.
57
+ `add` is idempotent: an existing entry returns `action: exists` with its key, and an existing arXiv entry matched to a published version is upgraded in place, keeping its citation key.
58
+
59
+ ## For agents
60
+
61
+ Never edit `.bib` files by hand.
62
+ Call `bibcite add <file> <query> --json` and use the returned `key` in `\cite{...}`.
@@ -0,0 +1,27 @@
1
+ [project]
2
+ name = "bibcite-cli"
3
+ version = "0.1.0"
4
+ description = "Resolve papers (arXiv id / DOI / title) to canonical, normalized BibTeX for agents and humans"
5
+ readme = "Readme.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.10"
8
+ dependencies = [
9
+ "bibtexparser>=1.4,<2",
10
+ "httpx>=0.27",
11
+ ]
12
+ keywords = ["bibtex", "arxiv", "citations", "dblp", "bibliography"]
13
+
14
+ [project.scripts]
15
+ bibcite = "bibcite.cli:main"
16
+
17
+ [dependency-groups]
18
+ dev = [
19
+ "pytest>=8",
20
+ ]
21
+
22
+ [build-system]
23
+ requires = ["hatchling"]
24
+ build-backend = "hatchling.build"
25
+
26
+ [tool.hatch.build.targets.wheel]
27
+ packages = ["src/bibcite"]
@@ -0,0 +1,3 @@
1
+ """bibcite: canonical BibTeX resolution for papers (arXiv id / DOI / title)."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,194 @@
1
+ """Reading/writing .bib files, deduplication, and the bibtex-tidy runner."""
2
+
3
+ import re
4
+ import shutil
5
+ import subprocess
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ import bibtexparser
10
+ from bibtexparser.bibdatabase import BibDatabase
11
+ from bibtexparser.bparser import BibTexParser
12
+ from bibtexparser.bwriter import BibTexWriter
13
+
14
+ from .normalize import norm_title
15
+
16
+ # The exact bibtex-tidy invocation requested by the user; keep in sync with
17
+ # their LaTeX workflow.
18
+ TIDY_ARGS = [
19
+ "--modify",
20
+ "--omit=pages,publisher,doi,timestamp,biburl,bibsource,abstract,month,series,volume,editor,note,date,number,address",
21
+ "--curly",
22
+ "--blank-lines",
23
+ "--trailing-commas",
24
+ "--sort=-year",
25
+ "--duplicates=citation",
26
+ "--merge=first",
27
+ "--sort-fields=author,title,booktitle,journal,year,url,pdf",
28
+ "--strip-enclosing-braces",
29
+ "--tidy-comments",
30
+ "--generate-keys",
31
+ ]
32
+
33
+ NOISE_FIELDS = ("timestamp", "biburl", "bibsource", "crossref")
34
+
35
+ ARXIV_ID_RE = re.compile(r"(\d{4}\.\d{4,5})(v\d+)?")
36
+
37
+
38
+ def _log(msg: str):
39
+ print(msg, file=sys.stderr)
40
+
41
+
42
+ def _parser() -> BibTexParser:
43
+ p = BibTexParser(common_strings=True)
44
+ p.ignore_nonstandard_types = False
45
+ return p
46
+
47
+
48
+ def parse_bib(text: str) -> BibDatabase:
49
+ return bibtexparser.loads(text, parser=_parser())
50
+
51
+
52
+ def parse_bibtex_entry(text: str) -> dict:
53
+ """First entry of a bibtex string as a dict (fields + ID + ENTRYTYPE)."""
54
+ db = parse_bib(text)
55
+ if not db.entries:
56
+ raise ValueError("No BibTeX entry could be parsed")
57
+ entry = dict(db.entries[0])
58
+ for f in NOISE_FIELDS:
59
+ entry.pop(f, None)
60
+ return entry
61
+
62
+
63
+ def entry_to_bibtex(entry: dict) -> str:
64
+ db = BibDatabase()
65
+ db.entries = [{k: str(v) for k, v in entry.items() if v}]
66
+ writer = BibTexWriter()
67
+ writer.indent = " "
68
+ return bibtexparser.dumps(db, writer).strip() + "\n"
69
+
70
+
71
+ def entry_arxiv_id(entry: dict) -> str:
72
+ """Extract an arXiv id from eprint/url/journal/note fields, if any."""
73
+ for f in ("eprint", "url", "journal", "note", "doi"):
74
+ v = entry.get(f, "")
75
+ if "arxiv" in v.lower() or f == "eprint":
76
+ m = ARXIV_ID_RE.search(v)
77
+ if m:
78
+ return m.group(1)
79
+ return ""
80
+
81
+
82
+ def is_preprint(entry: dict) -> bool:
83
+ """Preprint = the venue fields say arXiv/preprint, or there is no venue.
84
+
85
+ eprint/archiveprefix/url fields do NOT count: published entries keep
86
+ their arXiv pointers.
87
+ """
88
+ venue = " ".join(
89
+ str(entry.get(f, "")) for f in ("journal", "booktitle", "howpublished")
90
+ ).lower()
91
+ if "arxiv" in venue or "preprint" in venue or "corr" in venue.split():
92
+ return True
93
+ return not entry.get("journal") and not entry.get("booktitle")
94
+
95
+
96
+ def load_bib_file(path: Path) -> BibDatabase | None:
97
+ """Parse an existing .bib file; None when it cannot be parsed (we then
98
+ degrade to append-only mode)."""
99
+ if not path.exists() or not path.read_text().strip():
100
+ return BibDatabase()
101
+ try:
102
+ return parse_bib(path.read_text())
103
+ except Exception as e:
104
+ _log(f"[bibcite] warning: could not parse {path} ({e}); appending without dedup")
105
+ return None
106
+
107
+
108
+ def find_existing(db: BibDatabase, title: str, arxiv_id: str = "", doi: str = "") -> dict | None:
109
+ ref = norm_title(title)
110
+ for entry in db.entries:
111
+ if arxiv_id and entry_arxiv_id(entry) == arxiv_id:
112
+ return entry
113
+ if doi and entry.get("doi", "").lower() == doi.lower():
114
+ return entry
115
+ if ref and norm_title(entry.get("title", "")) == ref:
116
+ return entry
117
+ return None
118
+
119
+
120
+ def upsert_entry(path: Path, entry: dict) -> tuple[str, str]:
121
+ """Insert or upgrade ``entry`` in ``path``.
122
+
123
+ Returns (action, key) where action is "added" | "upgraded" | "exists".
124
+ """
125
+ db = load_bib_file(path)
126
+ if db is None: # unparseable file: append blindly
127
+ with path.open("a") as f:
128
+ f.write("\n" + entry_to_bibtex(entry))
129
+ return "added", entry["ID"]
130
+
131
+ existing = find_existing(
132
+ db, entry.get("title", ""), entry_arxiv_id(entry), entry.get("doi", "")
133
+ )
134
+ if existing is not None:
135
+ if is_preprint(existing) and not is_preprint(entry):
136
+ key = existing["ID"]
137
+ existing.clear()
138
+ existing.update(entry)
139
+ existing["ID"] = key # keep the key the user may already \cite
140
+ _write_db(path, db)
141
+ return "upgraded", key
142
+ return "exists", existing["ID"]
143
+
144
+ db.entries.append({k: str(v) for k, v in entry.items() if v})
145
+ _write_db(path, db)
146
+ return "added", entry["ID"]
147
+
148
+
149
+ def _write_db(path: Path, db: BibDatabase):
150
+ writer = BibTexWriter()
151
+ writer.indent = " "
152
+ writer.order_entries_by = None # preserve file order; tidy re-sorts anyway
153
+ path.write_text(bibtexparser.dumps(db, writer))
154
+
155
+
156
+ # ---------------------------------------------------------------------------
157
+ # bibtex-tidy
158
+ # ---------------------------------------------------------------------------
159
+
160
+ def tidy_command() -> list[str] | None:
161
+ exe = shutil.which("bibtex-tidy")
162
+ if exe:
163
+ return [exe]
164
+ if shutil.which("npx"):
165
+ return ["npx", "--yes", "bibtex-tidy"]
166
+ return None
167
+
168
+
169
+ def run_tidy(path: Path) -> bool:
170
+ cmd = tidy_command()
171
+ if cmd is None:
172
+ _log("[bibcite] bibtex-tidy not found (npm i -g bibtex-tidy); skipping tidy")
173
+ return False
174
+ proc = subprocess.run(
175
+ cmd + [str(path)] + TIDY_ARGS, capture_output=True, text=True
176
+ )
177
+ if proc.returncode != 0:
178
+ _log(f"[bibcite] bibtex-tidy failed:\n{proc.stderr.strip()}")
179
+ return False
180
+ _log(f"[bibcite] bibtex-tidy: {proc.stdout.strip().splitlines()[-1] if proc.stdout.strip() else 'ok'}")
181
+ return True
182
+
183
+
184
+ def key_after_tidy(path: Path, title: str, fallback_key: str) -> str:
185
+ """bibtex-tidy --generate-keys rewrites keys; re-read the file to report
186
+ the final key for the entry with this title."""
187
+ db = load_bib_file(path)
188
+ if db is None:
189
+ return fallback_key
190
+ ref = norm_title(title)
191
+ for entry in db.entries:
192
+ if norm_title(entry.get("title", "")) == ref:
193
+ return entry["ID"]
194
+ return fallback_key
@@ -0,0 +1,272 @@
1
+ """bibcite CLI.
2
+
3
+ Designed to be called by agents: never hand-edit a .bib file — let
4
+ ``bibcite add`` resolve, canonicalize, dedupe, write, and tidy, then use the
5
+ citation key it prints.
6
+ """
7
+
8
+ import argparse
9
+ import json
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ from . import bibfile
14
+ from .normalize import first_author_last_name, norm_title
15
+ from .resolve import Resolved, guess_entry_type, resolve
16
+ from .sources import find_published
17
+ from .venues import canonicalize
18
+
19
+
20
+ def _log(msg: str):
21
+ print(msg, file=sys.stderr)
22
+
23
+
24
+ def _emit(payload: dict, as_json: bool = True):
25
+ """File-mutating commands always print one JSON object on stdout — the
26
+ agent-facing contract. Only `get` has a plain mode (BibTeX on stdout for
27
+ previewing/piping)."""
28
+ if as_json:
29
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
30
+ else:
31
+ for k, v in payload.items():
32
+ if k != "bibtex":
33
+ _log(f"{k}: {v}")
34
+ if payload.get("bibtex"):
35
+ print(payload["bibtex"], end="")
36
+ elif payload.get("key"):
37
+ print(payload["key"])
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # get
42
+ # ---------------------------------------------------------------------------
43
+
44
+ def _resolve_or_none(query: str, require_published: bool):
45
+ try:
46
+ return resolve(query, require_published=require_published)
47
+ except (LookupError, ValueError) as e:
48
+ _log(f"[bibcite] {e}")
49
+ except Exception as e:
50
+ _log(f"[bibcite] network error: {type(e).__name__}: {e}")
51
+ return None
52
+
53
+
54
+ def cmd_get(args) -> int:
55
+ query = " ".join(args.query)
56
+ res = _resolve_or_none(query, args.require_published)
57
+ if res is None:
58
+ return 2
59
+ _emit(
60
+ {
61
+ "action": "resolved",
62
+ "key": res.entry["ID"],
63
+ "title": res.entry.get("title", ""),
64
+ "venue": res.venue or "arXiv (preprint, no published venue found)",
65
+ "published": res.published,
66
+ "source": res.source,
67
+ "bibtex": res.bibtex,
68
+ },
69
+ args.json,
70
+ )
71
+ return 0
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # add
76
+ # ---------------------------------------------------------------------------
77
+
78
+ def cmd_add(args) -> int:
79
+ path = Path(args.file)
80
+ if args.bibtex:
81
+ text = sys.stdin.read() if args.bibtex == "-" else args.bibtex
82
+ entry = bibfile.parse_bibtex_entry(text)
83
+ raw_venue = entry.get("booktitle", "") or entry.get("journal", "")
84
+ canonical = canonicalize(raw_venue, entry.get("year"))
85
+ if canonical:
86
+ entry.pop("booktitle", None)
87
+ entry.pop("journal", None)
88
+ entry["ENTRYTYPE"] = canonical.entry_type
89
+ entry[canonical.bib_field] = canonical.name
90
+ res = Resolved(entry, "user-bibtex", canonical.name if canonical else raw_venue, True)
91
+ else:
92
+ if not args.query:
93
+ _log("[bibcite] provide a query (arXiv id / DOI / title) or --bibtex")
94
+ return 2
95
+ query = " ".join(args.query)
96
+ res = _resolve_or_none(query, args.require_published)
97
+ if res is None:
98
+ return 2
99
+
100
+ action, key = bibfile.upsert_entry(path, res.entry)
101
+ tidied = False
102
+ if action != "exists" and not args.no_tidy:
103
+ tidied = bibfile.run_tidy(path)
104
+ if tidied:
105
+ key = bibfile.key_after_tidy(path, res.entry.get("title", ""), key)
106
+
107
+ _emit(
108
+ {
109
+ "action": action,
110
+ "key": key,
111
+ "title": res.entry.get("title", ""),
112
+ "venue": res.venue or "arXiv (preprint)",
113
+ "published": res.published,
114
+ "source": res.source,
115
+ "file": str(path),
116
+ "tidied": tidied,
117
+ }
118
+ )
119
+ return 0
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # upgrade: batch-match arXiv entries in an existing file (bibMatcher, CLI-style)
124
+ # ---------------------------------------------------------------------------
125
+
126
+ def cmd_upgrade(args) -> int:
127
+ path = Path(args.file)
128
+ db = bibfile.load_bib_file(path)
129
+ if db is None or not db.entries:
130
+ _log(f"[bibcite] nothing to do in {path}")
131
+ return 0
132
+
133
+ report = []
134
+ changed = 0
135
+ for entry in db.entries:
136
+ if not bibfile.is_preprint(entry):
137
+ continue
138
+ title = entry.get("title", "").replace("{", "").replace("}", "")
139
+ if not title:
140
+ continue
141
+ _log(f"[upgrade] matching: {title[:80]}")
142
+ aid = bibfile.entry_arxiv_id(entry)
143
+ hint = (
144
+ first_author_last_name(entry["author"]) if entry.get("author") else ""
145
+ )
146
+ match = find_published(title, entry.get("year", ""), aid, hint)
147
+ if not match:
148
+ report.append({"key": entry["ID"], "title": title, "matched": False})
149
+ continue
150
+ canonical = canonicalize(match.venue, match.year or entry.get("year"))
151
+ venue_name = canonical.name if canonical else match.venue
152
+ if not args.dry_run:
153
+ entry.pop("journal", None)
154
+ entry.pop("booktitle", None)
155
+ entry.pop("howpublished", None)
156
+ if canonical:
157
+ entry["ENTRYTYPE"] = canonical.entry_type
158
+ entry[canonical.bib_field] = canonical.name
159
+ else:
160
+ entry["ENTRYTYPE"] = guess_entry_type(match.venue)
161
+ field = (
162
+ "booktitle"
163
+ if entry["ENTRYTYPE"] == "inproceedings"
164
+ else "journal"
165
+ )
166
+ entry[field] = match.venue
167
+ if match.year:
168
+ entry["year"] = match.year
169
+ if match.doi and not entry.get("doi"):
170
+ entry["doi"] = match.doi
171
+ changed += 1
172
+ report.append(
173
+ {
174
+ "key": entry["ID"],
175
+ "title": title,
176
+ "matched": True,
177
+ "venue": venue_name,
178
+ "source": match.source,
179
+ }
180
+ )
181
+
182
+ if changed and not args.dry_run:
183
+ bibfile._write_db(path, db)
184
+ if not args.no_tidy:
185
+ bibfile.run_tidy(path)
186
+
187
+ matched = sum(1 for r in report if r["matched"])
188
+ for r in report:
189
+ mark = "✓" if r["matched"] else "✗"
190
+ _log(f"{mark} {r['key']}: {r.get('venue', 'no match')}")
191
+ _log(f"[bibcite] {matched} matched, {changed} upgraded{' (dry-run)' if args.dry_run else ''}")
192
+ _emit({"upgraded": changed, "matched": matched, "dry_run": args.dry_run, "entries": report})
193
+ return 0
194
+
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # tidy / check
198
+ # ---------------------------------------------------------------------------
199
+
200
+ def cmd_tidy(args) -> int:
201
+ return 0 if bibfile.run_tidy(Path(args.file)) else 1
202
+
203
+
204
+ def cmd_check(args) -> int:
205
+ path = Path(args.file)
206
+ db = bibfile.load_bib_file(path)
207
+ if db is None:
208
+ _log(f"[bibcite] {path} could not be parsed")
209
+ return 1
210
+ problems = []
211
+ seen_titles: dict[str, str] = {}
212
+ for entry in db.entries:
213
+ key = entry.get("ID", "?")
214
+ nt = norm_title(entry.get("title", ""))
215
+ if nt and nt in seen_titles:
216
+ problems.append({"key": key, "issue": f"duplicate title of {seen_titles[nt]}"})
217
+ seen_titles.setdefault(nt, key)
218
+ for f in ("author", "title", "year"):
219
+ if not entry.get(f):
220
+ problems.append({"key": key, "issue": f"missing {f}"})
221
+ if bibfile.is_preprint(entry):
222
+ problems.append({"key": key, "issue": "arXiv preprint (try `bibcite upgrade`)"})
223
+ for p in problems:
224
+ _log(f"{p['key']}: {p['issue']}")
225
+ _log(f"[bibcite] {len(db.entries)} entries, {len(problems)} issues")
226
+ _emit({"entries": len(db.entries), "problems": problems})
227
+ return 0
228
+
229
+
230
+ # ---------------------------------------------------------------------------
231
+
232
+ def main(argv=None) -> int:
233
+ p = argparse.ArgumentParser(
234
+ prog="bibcite",
235
+ description="Resolve papers to canonical BibTeX and manage .bib files (agents: use `add`, never hand-edit).",
236
+ )
237
+ sub = p.add_subparsers(dest="cmd", required=True)
238
+
239
+ g = sub.add_parser("get", help="resolve a query and print BibTeX to stdout")
240
+ g.add_argument("query", nargs="+", help="arXiv id / arXiv URL / DOI / title")
241
+ g.add_argument("--json", action="store_true", help="print a JSON object instead of BibTeX")
242
+ g.add_argument("--require-published", action="store_true", help="fail instead of falling back to an arXiv entry")
243
+ g.set_defaults(fn=cmd_get)
244
+
245
+ a = sub.add_parser("add", help="resolve and write into a .bib file, then run bibtex-tidy (prints JSON)")
246
+ a.add_argument("file", help="target .bib file (created if missing)")
247
+ a.add_argument("query", nargs="*", help="arXiv id / arXiv URL / DOI / title")
248
+ a.add_argument("--bibtex", help="raw BibTeX entry to add instead of a query ('-' reads stdin)")
249
+ a.add_argument("--no-tidy", action="store_true")
250
+ a.add_argument("--require-published", action="store_true")
251
+ a.set_defaults(fn=cmd_add)
252
+
253
+ u = sub.add_parser("upgrade", help="match all arXiv entries in a file to their published versions (prints JSON)")
254
+ u.add_argument("file")
255
+ u.add_argument("--dry-run", action="store_true")
256
+ u.add_argument("--no-tidy", action="store_true")
257
+ u.set_defaults(fn=cmd_upgrade)
258
+
259
+ t = sub.add_parser("tidy", help="run bibtex-tidy with the canonical flags")
260
+ t.add_argument("file")
261
+ t.set_defaults(fn=cmd_tidy)
262
+
263
+ c = sub.add_parser("check", help="offline sanity check of a .bib file (prints JSON)")
264
+ c.add_argument("file")
265
+ c.set_defaults(fn=cmd_check)
266
+
267
+ args = p.parse_args(argv)
268
+ return args.fn(args)
269
+
270
+
271
+ if __name__ == "__main__":
272
+ raise SystemExit(main())