fetchbib 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fetchbib-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Matthew R. DeVerna
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: fetchbib
3
+ Version: 0.1.0
4
+ Summary: Resolve DOIs and search queries into formatted BibTeX entries.
5
+ Requires-Python: >=3.9
6
+ License-File: LICENSE
7
+ Requires-Dist: requests
8
+ Dynamic: license-file
@@ -0,0 +1,130 @@
1
+ # fetchbib
2
+
3
+ A command-line tool to resolve DOIs and free-text search queries into formatted BibTeX entries.
4
+ Powered by [doi.org](https://www.doi.org/) and the [Crossref API](https://api.crossref.org/).
5
+
6
+ ## Installation
7
+
8
+ ```bash
9
+ pip install fetchbib
10
+ ```
11
+
12
+ Requires Python 3.9+.
13
+
14
+ ## Quick start
15
+
16
+ Fetch BibTeX by DOI (bare or full URL):
17
+
18
+ ```bash
19
+ fbib 10.2196/jmir.1933
20
+ fbib https://doi.org/10.2196/jmir.1933
21
+ ```
22
+
23
+ ```bibtex
24
+ @article{Eysenbach2011,
25
+ author = {Eysenbach, Gunther},
26
+ doi = {10.2196/jmir.1933},
27
+ journal = {Journal of Medical Internet Research},
28
+ title = {Can Tweets Predict Citations? Metrics of Social Impact Based on Twitter and Correlation with Traditional Metrics of Scientific Impact},
29
+ year = {2011}
30
+ }
31
+ ```
32
+
33
+ Search by free text:
34
+
35
+ ```bash
36
+ fbib "Eysenbach JMIR 2011"
37
+ ```
38
+
39
+ ## Usage
40
+
41
+ ```
42
+ fbib [-h] [-f FILE] [-o OUTPUT] [-a] [-v] [--config-email EMAIL]
43
+ [inputs ...]
44
+ ```
45
+
46
+ ### Flexible input
47
+
48
+ `fbib` accepts DOIs in any format — bare, full URL, or free-text search queries — and you can mix them freely.
49
+ Inputs are comma-separated, so all of the following work:
50
+
51
+ ```bash
52
+ # Multiple positional arguments
53
+ fbib 10.2196/jmir.1933 10.1038/nature12373
54
+
55
+ # Comma-separated string
56
+ fbib "10.2196/jmir.1933, 10.1038/nature12373"
57
+
58
+ # Full DOI URLs
59
+ fbib "https://doi.org/10.2196/jmir.1933, https://doi.org/10.1038/nature12373"
60
+
61
+ # Mix DOIs, URLs, and search queries
62
+ fbib 10.2196/jmir.1933 "Eysenbach JMIR 2011"
63
+ ```
64
+
65
+ From a file (`--file`), each line is treated the same way — one entry per line, or comma-separated on a single line:
66
+
67
+ ```bash
68
+ fbib --file dois.txt
69
+ ```
70
+
71
+ Duplicate inputs are automatically removed.
72
+
73
+ ### Write to a file
74
+
75
+ Overwrite (default):
76
+
77
+ ```bash
78
+ fbib --output refs.bib 10.2196/jmir.1933
79
+ ```
80
+
81
+ Append to an existing `.bib` file:
82
+
83
+ ```bash
84
+ fbib --append --output refs.bib 10.1038/nature12373
85
+ ```
86
+
87
+ ### Verbose mode
88
+
89
+ See which DOI was matched when searching by free text:
90
+
91
+ ```bash
92
+ fbib -v "Eysenbach JMIR 2011"
93
+ # stderr: Searching for: "Eysenbach JMIR 2011" -> DOI: 10.2196/jmir.1933
94
+ ```
95
+
96
+ ### Configure email
97
+
98
+ Crossref gives better rate limits to requests that include a contact email. Set yours once and it will be used for all future requests:
99
+
100
+ ```bash
101
+ fbib --config-email you@example.com
102
+ ```
103
+
104
+ The email is stored in `~/.config/fetchbib/config.json`. If not set, a default placeholder is used.
105
+
106
+ ## Development
107
+
108
+ Clone the repo and sync dependencies with [uv](https://docs.astral.sh/uv/):
109
+
110
+ ```bash
111
+ git clone https://github.com/mr-devs/fetchbib.git
112
+ cd fetchbib
113
+ uv sync
114
+ ```
115
+
116
+ Run unit tests:
117
+
118
+ ```bash
119
+ uv run pytest
120
+ ```
121
+
122
+ Run integration tests (hits live APIs):
123
+
124
+ ```bash
125
+ uv run pytest -m integration
126
+ ```
127
+
128
+ ## License
129
+
130
+ MIT
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "fetchbib"
7
+ version = "0.1.0"
8
+ description = "Resolve DOIs and search queries into formatted BibTeX entries."
9
+ requires-python = ">=3.9"
10
+ dependencies = ["requests"]
11
+
12
+ [project.scripts]
13
+ fbib = "fetchbib.cli:main"
14
+
15
+ [tool.setuptools.packages.find]
16
+ where = ["src"]
17
+
18
+ [dependency-groups]
19
+ dev = ["pytest"]
20
+
21
+ [tool.pytest.ini_options]
22
+ markers = ["integration: tests that hit live APIs (deselect with '-m not integration')"]
23
+ addopts = "-m 'not integration'"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
@@ -0,0 +1,149 @@
1
+ """Command-line interface for fetchbib.
2
+
3
+ Entry point: ``fbib``
4
+ """
5
+
6
+ import argparse
7
+ import sys
8
+
9
+ from fetchbib import config
10
+ from fetchbib.formatter import format_bibtex
11
+ from fetchbib.resolver import (
12
+ ResolverError,
13
+ is_doi,
14
+ normalize_doi_input,
15
+ resolve_doi,
16
+ search_crossref,
17
+ )
18
+
19
+
20
+ def main() -> None:
21
+ """Parse arguments and resolve each input to formatted BibTeX."""
22
+ parser = argparse.ArgumentParser(
23
+ prog="fbib",
24
+ description="Resolve DOIs or search queries into formatted BibTeX.",
25
+ )
26
+ parser.add_argument(
27
+ "inputs",
28
+ nargs="*",
29
+ help="DOIs or search queries (comma-separated values are split)",
30
+ )
31
+ parser.add_argument(
32
+ "-f",
33
+ "--file",
34
+ help="Path to a text file with one input per line",
35
+ )
36
+ parser.add_argument(
37
+ "-o",
38
+ "--output",
39
+ help="Write results to this file instead of stdout",
40
+ )
41
+ parser.add_argument(
42
+ "-a",
43
+ "--append",
44
+ action="store_true",
45
+ help="Append to the output file instead of overwriting (requires --output)",
46
+ )
47
+ parser.add_argument(
48
+ "-v",
49
+ "--verbose",
50
+ action="store_true",
51
+ help="Print which DOI was selected for search queries",
52
+ )
53
+ parser.add_argument(
54
+ "--config-email",
55
+ metavar="EMAIL",
56
+ help="Set the email used in the User-Agent header and exit",
57
+ )
58
+
59
+ args = parser.parse_args()
60
+
61
+ # --config-email: save and exit immediately
62
+ if args.config_email:
63
+ config.set_email(args.config_email)
64
+ sys.exit(0)
65
+
66
+ # Collect inputs
67
+ queries = _collect_inputs(args)
68
+ if not queries:
69
+ print(
70
+ "Error: no inputs provided. Pass DOIs/queries as arguments or use --file.",
71
+ file=sys.stderr,
72
+ )
73
+ sys.exit(1)
74
+
75
+ # Resolve each input
76
+ results: list[str] = []
77
+ had_error = False
78
+
79
+ for query in queries:
80
+ try:
81
+ bibtex = _resolve_single(query, verbose=args.verbose)
82
+ results.append(bibtex)
83
+ except ResolverError as exc:
84
+ print(f"Error resolving '{query}': {exc}", file=sys.stderr)
85
+ had_error = True
86
+
87
+ output_text = "\n\n".join(results)
88
+ if results:
89
+ output_text += "\n"
90
+
91
+ # Write output
92
+ if args.output:
93
+ mode = "a" if args.append else "w"
94
+ with open(args.output, mode) as f:
95
+ f.write(output_text)
96
+ else:
97
+ print(output_text, end="")
98
+
99
+ if had_error:
100
+ sys.exit(1)
101
+
102
+
103
+ def _collect_inputs(args: argparse.Namespace) -> list[str]:
104
+ """Gather inputs from positional args and --file, deduplicate."""
105
+ raw: list[str] = []
106
+
107
+ # Positional args (each may be comma-separated)
108
+ for arg in args.inputs or []:
109
+ raw.extend(_split_and_strip(arg))
110
+
111
+ # File input (each line may also be comma-separated)
112
+ if args.file:
113
+ try:
114
+ with open(args.file) as f:
115
+ for line in f:
116
+ raw.extend(_split_and_strip(line))
117
+ except FileNotFoundError:
118
+ print(f"Error: file not found: {args.file}", file=sys.stderr)
119
+ sys.exit(1)
120
+
121
+ # Deduplicate preserving order
122
+ seen: set[str] = set()
123
+ unique: list[str] = []
124
+ for item in raw:
125
+ if item not in seen:
126
+ seen.add(item)
127
+ unique.append(item)
128
+ return unique
129
+
130
+
131
+ def _split_and_strip(value: str) -> list[str]:
132
+ """Split a string on commas and return non-empty stripped parts."""
133
+ return [part.strip() for part in value.split(",") if part.strip()]
134
+
135
+
136
+ def _resolve_single(query: str, *, verbose: bool) -> str:
137
+ """Resolve a single query to formatted BibTeX."""
138
+ query = normalize_doi_input(query)
139
+ if is_doi(query):
140
+ raw = resolve_doi(query)
141
+ else:
142
+ doi = search_crossref(query)
143
+ if verbose:
144
+ print(
145
+ f'Searching for: "{query}" -> DOI: {doi}',
146
+ file=sys.stderr,
147
+ )
148
+ raw = resolve_doi(doi)
149
+ return format_bibtex(raw)
@@ -0,0 +1,40 @@
1
+ """User configuration for fetchbib.
2
+
3
+ Reads and writes a JSON config file at ~/.config/fetchbib/config.json.
4
+ """
5
+
6
+ import json
7
+ from pathlib import Path
8
+
9
+ CONFIG_DIR = Path.home() / ".config" / "fetchbib"
10
+ CONFIG_FILE = CONFIG_DIR / "config.json"
11
+
12
+ DEFAULT_EMAIL = "fetchbib@example.com"
13
+
14
+
15
+ def get_email() -> str:
16
+ """Return the configured email, or the default if none is set."""
17
+ cfg = _read_config()
18
+ return cfg.get("email", DEFAULT_EMAIL)
19
+
20
+
21
+ def set_email(email: str) -> None:
22
+ """Persist the email to the config file."""
23
+ cfg = _read_config()
24
+ cfg["email"] = email
25
+ _write_config(cfg)
26
+
27
+
28
+ def _read_config() -> dict:
29
+ """Read the config file, returning an empty dict if it doesn't exist."""
30
+ if not CONFIG_FILE.exists():
31
+ return {}
32
+ with open(CONFIG_FILE) as f:
33
+ return json.load(f)
34
+
35
+
36
+ def _write_config(cfg: dict) -> None:
37
+ """Write the config dict to disk, creating the directory if needed."""
38
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
39
+ with open(CONFIG_FILE, "w") as f:
40
+ json.dump(cfg, f, indent=2)
@@ -0,0 +1,102 @@
1
+ """BibTeX string formatter.
2
+
3
+ Transforms raw (often single-line) BibTeX into a clean, readable format
4
+ with alphabetized fields, 2-space indentation, and proper line breaks.
5
+ """
6
+
7
+
8
+ def format_bibtex(raw: str) -> str:
9
+ """Format a raw BibTeX entry into a clean, readable string.
10
+
11
+ Rules:
12
+ - Entry header (@type{key,) stays on the first line.
13
+ - Each field is on its own line with 2-space indentation.
14
+ - Fields are alphabetized.
15
+ - Closing brace is on its own line.
16
+ - Trailing commas are removed.
17
+
18
+ Commas inside braced values (e.g. author names) are preserved — only
19
+ top-level commas are treated as field separators.
20
+ """
21
+ header, fields_block = _split_header(raw.strip())
22
+ fields = _parse_fields(fields_block)
23
+ fields.sort(key=lambda kv: kv[0].lower())
24
+
25
+ field_lines = [f" {key} = {value}" for key, value in fields]
26
+ return header + "\n" + ",\n".join(field_lines) + "\n}"
27
+
28
+
29
+ def _split_header(entry: str) -> tuple[str, str]:
30
+ """Split a BibTeX entry into header and fields block.
31
+
32
+ The header is everything up to and including the first top-level comma
33
+ after the citation key (e.g. '@article{Key2020,').
34
+ The fields block is the rest, minus the final closing '}'.
35
+ """
36
+ # Find the first comma that is not inside braces — this ends the key.
37
+ depth = 0
38
+ for i, ch in enumerate(entry):
39
+ if ch == "{":
40
+ depth += 1
41
+ elif ch == "}":
42
+ depth -= 1
43
+ elif ch == "," and depth == 1:
44
+ header = entry[: i + 1]
45
+ rest = entry[i + 1 :]
46
+ # Strip the outermost closing brace from the rest
47
+ rest = rest.strip()
48
+ if rest.endswith("}"):
49
+ rest = rest[:-1].strip()
50
+ return header, rest
51
+ # Fallback: no fields found
52
+ return entry, ""
53
+
54
+
55
+ def _parse_fields(block: str) -> list[tuple[str, str]]:
56
+ """Parse a block of BibTeX fields into (key, value) pairs.
57
+
58
+ Splits on top-level commas only (not commas inside braces).
59
+ """
60
+ if not block.strip():
61
+ return []
62
+
63
+ fields = []
64
+ for raw_field in _split_top_level(block, ","):
65
+ raw_field = raw_field.strip()
66
+ if not raw_field:
67
+ continue
68
+ # Split on the first '=' to get key and value
69
+ eq_pos = raw_field.find("=")
70
+ if eq_pos == -1:
71
+ continue
72
+ key = raw_field[:eq_pos].strip()
73
+ value = raw_field[eq_pos + 1 :].strip()
74
+ fields.append((key, value))
75
+ return fields
76
+
77
+
78
+ def _split_top_level(text: str, delimiter: str) -> list[str]:
79
+ """Split text on a delimiter, but only at brace depth 0."""
80
+ parts = []
81
+ depth = 0
82
+ current: list[str] = []
83
+
84
+ for ch in text:
85
+ if ch == "{":
86
+ depth += 1
87
+ current.append(ch)
88
+ elif ch == "}":
89
+ depth -= 1
90
+ current.append(ch)
91
+ elif ch == delimiter and depth == 0:
92
+ parts.append("".join(current))
93
+ current = []
94
+ else:
95
+ current.append(ch)
96
+
97
+ # Append whatever is left
98
+ trailing = "".join(current).strip()
99
+ if trailing:
100
+ parts.append(trailing)
101
+
102
+ return parts
@@ -0,0 +1,95 @@
1
+ """DOI resolution and Crossref search.
2
+
3
+ Provides functions to check if a string is a DOI, resolve a DOI to BibTeX,
4
+ search Crossref for a DOI, and an orchestrator that combines them.
5
+ """
6
+
7
+ import re
8
+
9
+ import requests
10
+
11
+ from fetchbib import config
12
+
13
+ DOI_PATTERN = re.compile(r"^10\.\d{4,9}/[-._;()/:A-Z0-9]+$", re.IGNORECASE)
14
+ DOI_URL_PREFIXES = (
15
+ "https://doi.org/",
16
+ "http://doi.org/",
17
+ "https://dx.doi.org/",
18
+ "http://dx.doi.org/",
19
+ )
20
+
21
+ DOI_BASE_URL = "https://doi.org/"
22
+ CROSSREF_API_URL = "https://api.crossref.org/works"
23
+
24
+
25
+ class ResolverError(Exception):
26
+ """Raised when DOI resolution or Crossref search fails."""
27
+
28
+
29
+ def normalize_doi_input(value: str) -> str:
30
+ """Strip common DOI URL prefixes, returning a bare DOI if possible.
31
+
32
+ For example, ``https://doi.org/10.2196/jmir.1933`` becomes
33
+ ``10.2196/jmir.1933``. Non-URL strings are returned unchanged.
34
+ """
35
+ for prefix in DOI_URL_PREFIXES:
36
+ if value.startswith(prefix):
37
+ return value[len(prefix) :]
38
+ return value
39
+
40
+
41
+ def is_doi(value: str) -> bool:
42
+ """Return True if *value* looks like a bare DOI (e.g. 10.xxxx/yyyy)."""
43
+ return bool(DOI_PATTERN.match(value))
44
+
45
+
46
+ def get_user_agent() -> str:
47
+ """Build the User-Agent string using the configured email."""
48
+ email = config.get_email()
49
+ return f"fetchbib/1.0 (mailto:{email})"
50
+
51
+
52
+ def resolve_doi(doi: str) -> str:
53
+ """Fetch BibTeX for a DOI from doi.org.
54
+
55
+ Raises ResolverError on non-200 responses.
56
+ """
57
+ headers = {
58
+ "Accept": "text/bibliography; style=bibtex",
59
+ "User-Agent": get_user_agent(),
60
+ }
61
+ resp = requests.get(f"{DOI_BASE_URL}{doi}", headers=headers)
62
+ if resp.status_code != 200:
63
+ raise ResolverError(
64
+ f"DOI resolution failed for '{doi}': HTTP {resp.status_code}"
65
+ )
66
+ return resp.text
67
+
68
+
69
+ def search_crossref(query: str) -> str:
70
+ """Search Crossref and return the DOI of the first result.
71
+
72
+ Raises ResolverError on non-200 responses or empty results.
73
+ """
74
+ headers = {"User-Agent": get_user_agent()}
75
+ resp = requests.get(CROSSREF_API_URL, params={"query": query}, headers=headers)
76
+ if resp.status_code != 200:
77
+ raise ResolverError(f"Crossref search failed: HTTP {resp.status_code}")
78
+ items = resp.json()["message"]["items"]
79
+ if not items:
80
+ raise ResolverError(f"No results found for query: '{query}'")
81
+ return items[0]["DOI"]
82
+
83
+
84
+ def resolve(query: str) -> str:
85
+ """Resolve a DOI, DOI URL, or free-text query to raw BibTeX.
86
+
87
+ DOI URLs (e.g. https://doi.org/10.xxxx/yyyy) are normalized to bare
88
+ DOIs before resolution. If the input is a DOI, fetches directly.
89
+ Otherwise searches Crossref for the top result and resolves that DOI.
90
+ """
91
+ query = normalize_doi_input(query)
92
+ if is_doi(query):
93
+ return resolve_doi(query)
94
+ doi = search_crossref(query)
95
+ return resolve_doi(doi)
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: fetchbib
3
+ Version: 0.1.0
4
+ Summary: Resolve DOIs and search queries into formatted BibTeX entries.
5
+ Requires-Python: >=3.9
6
+ License-File: LICENSE
7
+ Requires-Dist: requests
8
+ Dynamic: license-file
@@ -0,0 +1,18 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/fetchbib/__init__.py
5
+ src/fetchbib/cli.py
6
+ src/fetchbib/config.py
7
+ src/fetchbib/formatter.py
8
+ src/fetchbib/resolver.py
9
+ src/fetchbib.egg-info/PKG-INFO
10
+ src/fetchbib.egg-info/SOURCES.txt
11
+ src/fetchbib.egg-info/dependency_links.txt
12
+ src/fetchbib.egg-info/entry_points.txt
13
+ src/fetchbib.egg-info/requires.txt
14
+ src/fetchbib.egg-info/top_level.txt
15
+ tests/test_cli.py
16
+ tests/test_formatter.py
17
+ tests/test_integration.py
18
+ tests/test_resolver.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ fbib = fetchbib.cli:main
@@ -0,0 +1 @@
1
+ requests
@@ -0,0 +1 @@
1
+ fetchbib
@@ -0,0 +1,257 @@
1
+ """Tests for the CLI (Phase 3).
2
+
3
+ All resolver calls are mocked — no network access needed.
4
+ """
5
+
6
+ import json
7
+ import sys
8
+ import tempfile
9
+ from io import StringIO
10
+ from pathlib import Path
11
+ from unittest.mock import patch
12
+
13
+ import pytest
14
+
15
+ from fetchbib.resolver import ResolverError
16
+
17
+ # Sample raw BibTeX that the mock resolver returns (unformatted).
18
+ RAW_BIBTEX_A = "@article{Key1,author={Alice},year={2020}}"
19
+ RAW_BIBTEX_B = "@article{Key2,author={Bob},year={2021}}"
20
+
21
+
22
+ def run_cli(args: list[str]) -> tuple[int, str, str]:
23
+ """Run the CLI main() with the given args, returning (exit_code, stdout, stderr)."""
24
+ from fetchbib.cli import main
25
+
26
+ old_argv = sys.argv
27
+ sys.argv = ["fbib"] + args
28
+
29
+ stdout_capture = StringIO()
30
+ stderr_capture = StringIO()
31
+
32
+ exit_code = 0
33
+ try:
34
+ with patch("sys.stdout", stdout_capture), patch("sys.stderr", stderr_capture):
35
+ main()
36
+ except SystemExit as e:
37
+ exit_code = e.code if e.code is not None else 0
38
+ finally:
39
+ sys.argv = old_argv
40
+
41
+ return exit_code, stdout_capture.getvalue(), stderr_capture.getvalue()
42
+
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # Input parsing
46
+ # ---------------------------------------------------------------------------
47
+
48
+
49
+ class TestInputParsing:
50
+ """Tests for how the CLI collects and processes inputs."""
51
+
52
+ @patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
53
+ def test_single_positional_doi(self, mock_resolve):
54
+ code, stdout, _ = run_cli(["10.2196/jmir.1933"])
55
+
56
+ mock_resolve.assert_called_once_with("10.2196/jmir.1933")
57
+ assert "@article{Key1," in stdout
58
+ assert code == 0
59
+
60
+ @patch("fetchbib.cli.resolve_doi", side_effect=[RAW_BIBTEX_A, RAW_BIBTEX_B])
61
+ def test_multiple_positional_arguments(self, mock_resolve):
62
+ code, stdout, _ = run_cli(["10.2196/jmir.1933", "10.1000/xyz123"])
63
+
64
+ assert mock_resolve.call_count == 2
65
+ assert "Key1" in stdout
66
+ assert "Key2" in stdout
67
+ assert code == 0
68
+
69
+ @patch("fetchbib.cli.resolve_doi", side_effect=[RAW_BIBTEX_A, RAW_BIBTEX_B])
70
+ def test_comma_separated_string_is_split(self, mock_resolve):
71
+ code, stdout, _ = run_cli(["10.2196/jmir.1933, 10.1000/xyz123"])
72
+
73
+ assert mock_resolve.call_count == 2
74
+ calls = [c.args[0] for c in mock_resolve.call_args_list]
75
+ assert "10.2196/jmir.1933" in calls
76
+ assert "10.1000/xyz123" in calls
77
+ assert code == 0
78
+
79
+ @patch("fetchbib.cli.resolve_doi", side_effect=[RAW_BIBTEX_A, RAW_BIBTEX_B])
80
+ def test_file_input_reads_lines(self, mock_resolve):
81
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
82
+ f.write("10.2196/jmir.1933\n\n10.1000/xyz123\n")
83
+ f.flush()
84
+ code, stdout, _ = run_cli(["--file", f.name])
85
+
86
+ assert mock_resolve.call_count == 2
87
+ assert code == 0
88
+
89
+ @patch("fetchbib.cli.resolve_doi", side_effect=[RAW_BIBTEX_A, RAW_BIBTEX_B])
90
+ def test_file_input_splits_comma_separated_line(self, mock_resolve):
91
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
92
+ f.write("10.2196/jmir.1933, 10.1000/xyz123\n")
93
+ f.flush()
94
+ code, stdout, _ = run_cli(["--file", f.name])
95
+
96
+ assert mock_resolve.call_count == 2
97
+ calls = [c.args[0] for c in mock_resolve.call_args_list]
98
+ assert "10.2196/jmir.1933" in calls
99
+ assert "10.1000/xyz123" in calls
100
+ assert code == 0
101
+
102
+ @patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
103
+ def test_doi_url_is_normalized(self, mock_resolve):
104
+ code, stdout, _ = run_cli(["https://doi.org/10.2196/jmir.1933"])
105
+
106
+ mock_resolve.assert_called_once_with("10.2196/jmir.1933")
107
+ assert "@article{Key1," in stdout
108
+ assert code == 0
109
+
110
+ @patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
111
+ def test_duplicate_inputs_are_deduplicated(self, mock_resolve):
112
+ code, _, _ = run_cli(["10.2196/jmir.1933", "10.2196/jmir.1933"])
113
+
114
+ mock_resolve.assert_called_once()
115
+ assert code == 0
116
+
117
+
118
+ # ---------------------------------------------------------------------------
119
+ # Error handling
120
+ # ---------------------------------------------------------------------------
121
+
122
+
123
+ class TestErrorHandling:
124
+ """Tests for error conditions and exit codes."""
125
+
126
+ def test_nonexistent_file_exits_1(self):
127
+ code, _, stderr = run_cli(["--file", "nonexistent_file.txt"])
128
+
129
+ assert code == 1
130
+ assert "nonexistent_file.txt" in stderr
131
+
132
+ @patch("fetchbib.cli.resolve_doi")
133
+ def test_resolution_error_does_not_stop_others(self, mock_resolve):
134
+ mock_resolve.side_effect = [
135
+ ResolverError("fail"),
136
+ RAW_BIBTEX_B,
137
+ ]
138
+
139
+ code, stdout, stderr = run_cli(["10.1234/bad", "10.1234/good"])
140
+
141
+ assert "Key2" in stdout
142
+ assert "fail" in stderr
143
+ assert code == 1
144
+
145
+ def test_no_inputs_exits_1(self):
146
+ code, _, stderr = run_cli([])
147
+
148
+ assert code == 1
149
+ assert stderr # should contain some usage hint
150
+
151
+
152
+ # ---------------------------------------------------------------------------
153
+ # Verbose mode
154
+ # ---------------------------------------------------------------------------
155
+
156
+
157
+ class TestVerbose:
158
+ """Tests for the --verbose flag."""
159
+
160
+ @patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
161
+ @patch("fetchbib.cli.search_crossref", return_value="10.2196/jmir.1933")
162
+ def test_verbose_prints_search_mapping(self, mock_search, mock_resolve):
163
+ code, _, stderr = run_cli(["-v", "Eysenbach JMIR 2011"])
164
+
165
+ assert "Eysenbach JMIR 2011" in stderr
166
+ assert "10.2196/jmir.1933" in stderr
167
+ assert code == 0
168
+
169
+
170
+ # ---------------------------------------------------------------------------
171
+ # Output file
172
+ # ---------------------------------------------------------------------------
173
+
174
+
175
+ class TestOutputFile:
176
+ """Tests for --output and --append flags."""
177
+
178
+ @patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
179
+ def test_output_writes_to_file(self, mock_resolve):
180
+ with tempfile.NamedTemporaryFile(suffix=".bib", delete=False) as f:
181
+ path = f.name
182
+
183
+ code, stdout, _ = run_cli(["--output", path, "10.1234/test"])
184
+
185
+ assert code == 0
186
+ assert stdout == "" # nothing to stdout
187
+ content = Path(path).read_text()
188
+ assert "Key1" in content
189
+
190
+ @patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
191
+ def test_output_overwrites_by_default(self, mock_resolve):
192
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".bib", delete=False) as f:
193
+ f.write("OLD CONTENT\n")
194
+ path = f.name
195
+
196
+ run_cli(["--output", path, "10.1234/test"])
197
+
198
+ content = Path(path).read_text()
199
+ assert "OLD CONTENT" not in content
200
+ assert "Key1" in content
201
+
202
+ @patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_B)
203
+ def test_append_flag_preserves_existing(self, mock_resolve):
204
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".bib", delete=False) as f:
205
+ f.write("EXISTING ENTRY\n\n")
206
+ path = f.name
207
+
208
+ code, stdout, _ = run_cli(["--append", "--output", path, "10.1234/test"])
209
+
210
+ assert code == 0
211
+ assert stdout == ""
212
+ content = Path(path).read_text()
213
+ assert "EXISTING ENTRY" in content
214
+ assert "Key2" in content
215
+
216
+
217
+ # ---------------------------------------------------------------------------
218
+ # Config email
219
+ # ---------------------------------------------------------------------------
220
+
221
+
222
+ class TestConfigEmail:
223
+ """Tests for --config-email."""
224
+
225
+ def test_config_email_saves_and_exits(self, tmp_path):
226
+ config_file = tmp_path / "config.json"
227
+ with (
228
+ patch("fetchbib.config.CONFIG_FILE", config_file),
229
+ patch("fetchbib.config.CONFIG_DIR", tmp_path),
230
+ ):
231
+ code, _, _ = run_cli(["--config-email", "user@university.edu"])
232
+
233
+ assert code == 0
234
+ saved = json.loads(config_file.read_text())
235
+ assert saved["email"] == "user@university.edu"
236
+
237
+ @patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
238
+ def test_config_email_used_by_resolver(self, mock_resolve, tmp_path):
239
+ config_file = tmp_path / "config.json"
240
+ config_file.write_text(json.dumps({"email": "custom@uni.edu"}))
241
+
242
+ with (
243
+ patch("fetchbib.config.CONFIG_FILE", config_file),
244
+ patch("fetchbib.config.CONFIG_DIR", tmp_path),
245
+ ):
246
+ run_cli(["10.1234/test"])
247
+
248
+ headers = mock_resolve.call_args # we can't easily check headers here
249
+ # Instead, verify the user agent function reads the config
250
+ with (
251
+ patch("fetchbib.config.CONFIG_FILE", config_file),
252
+ patch("fetchbib.config.CONFIG_DIR", tmp_path),
253
+ ):
254
+ from fetchbib.resolver import get_user_agent
255
+
256
+ ua = get_user_agent()
257
+ assert "custom@uni.edu" in ua
@@ -0,0 +1,72 @@
1
+ """Tests for the BibTeX formatter (Phase 1)."""
2
+
3
+ from fetchbib.formatter import format_bibtex
4
+
5
+
6
+ class TestFormatBibtex:
7
+ """Tests for format_bibtex()."""
8
+
9
+ def test_single_line_is_formatted(self):
10
+ """Single-line BibTeX is split into indented, alphabetized fields."""
11
+ raw = (
12
+ "@article{Eysenbach2011,"
13
+ "doi={10.2196/jmir.1933},"
14
+ "title={Can Tweets Predict Citations?},"
15
+ "author={Eysenbach, Gunther},"
16
+ "year={2011},"
17
+ "journal={JMIR}}"
18
+ )
19
+ expected = (
20
+ "@article{Eysenbach2011,\n"
21
+ " author = {Eysenbach, Gunther},\n"
22
+ " doi = {10.2196/jmir.1933},\n"
23
+ " journal = {JMIR},\n"
24
+ " title = {Can Tweets Predict Citations?},\n"
25
+ " year = {2011}\n"
26
+ "}"
27
+ )
28
+ assert format_bibtex(raw) == expected
29
+
30
+ def test_idempotent(self):
31
+ """Already-formatted BibTeX passes through unchanged."""
32
+ clean = (
33
+ "@article{Eysenbach2011,\n"
34
+ " author = {Eysenbach, Gunther},\n"
35
+ " doi = {10.2196/jmir.1933},\n"
36
+ " journal = {JMIR},\n"
37
+ " title = {Can Tweets Predict Citations?},\n"
38
+ " year = {2011}\n"
39
+ "}"
40
+ )
41
+ assert format_bibtex(clean) == clean
42
+
43
+ def test_author_commas_preserved(self):
44
+ """Commas inside braces (author names) are not treated as field separators."""
45
+ raw = (
46
+ "@article{Key2020,"
47
+ "author={Last, First and Last2, First2},"
48
+ "title={A Title},"
49
+ "year={2020}}"
50
+ )
51
+ result = format_bibtex(raw)
52
+ assert "author = {Last, First and Last2, First2}" in result
53
+
54
+ def test_trailing_comma_removed(self):
55
+ """A trailing comma before the closing brace is stripped."""
56
+ raw = "@article{Key2020," "author={Someone}," "year={2020},}"
57
+ result = format_bibtex(raw)
58
+ # The last field line should not end with a comma
59
+ lines = result.strip().split("\n")
60
+ last_field_line = lines[-2] # line before closing '}'
61
+ assert not last_field_line.rstrip().endswith(",")
62
+
63
+ def test_nested_braces_preserved(self):
64
+ """Nested braces in field values are kept intact."""
65
+ raw = (
66
+ "@inproceedings{Key2021,"
67
+ "title={A {GPU}-Accelerated Approach},"
68
+ "author={Smith, John},"
69
+ "year={2021}}"
70
+ )
71
+ result = format_bibtex(raw)
72
+ assert "title = {A {GPU}-Accelerated Approach}" in result
@@ -0,0 +1,58 @@
1
+ """Integration tests that hit live APIs.
2
+
3
+ Skipped by default. Run with:
4
+ pytest -m integration
5
+ """
6
+
7
+ import tempfile
8
+
9
+ import pytest
10
+
11
+ from fetchbib.formatter import format_bibtex
12
+ from fetchbib.resolver import resolve, resolve_doi, search_crossref
13
+
14
+ pytestmark = pytest.mark.integration
15
+
16
+
17
+ class TestLiveResolution:
18
+ """End-to-end tests against doi.org and Crossref."""
19
+
20
+ def test_doi_resolution(self):
21
+ raw = resolve_doi("10.2196/jmir.1933")
22
+ result = format_bibtex(raw)
23
+ assert "Eysenbach" in result
24
+ assert "2011" in result
25
+
26
+ def test_free_text_search(self):
27
+ doi = search_crossref("Eysenbach JMIR 2011")
28
+ raw = resolve_doi(doi)
29
+ result = format_bibtex(raw)
30
+ assert "Eysenbach" in result
31
+
32
+ def test_file_input_via_cli(self):
33
+ """Resolve a DOI through the full CLI path."""
34
+ import sys
35
+ from io import StringIO
36
+ from unittest.mock import patch
37
+
38
+ from fetchbib.cli import main
39
+
40
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
41
+ f.write("10.2196/jmir.1933\n")
42
+ f.flush()
43
+ path = f.name
44
+
45
+ old_argv = sys.argv
46
+ sys.argv = ["fbib", "--file", path]
47
+ stdout_capture = StringIO()
48
+
49
+ try:
50
+ with patch("sys.stdout", stdout_capture):
51
+ main()
52
+ except SystemExit:
53
+ pass
54
+ finally:
55
+ sys.argv = old_argv
56
+
57
+ output = stdout_capture.getvalue()
58
+ assert "Eysenbach" in output
@@ -0,0 +1,233 @@
1
+ """Tests for the resolver (Phase 2).
2
+
3
+ All HTTP calls are mocked — no network access needed.
4
+ """
5
+
6
+ from unittest.mock import MagicMock, patch
7
+
8
+ import pytest
9
+
10
+ from fetchbib.resolver import (
11
+ ResolverError,
12
+ is_doi,
13
+ normalize_doi_input,
14
+ resolve,
15
+ resolve_doi,
16
+ search_crossref,
17
+ )
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # is_doi
21
+ # ---------------------------------------------------------------------------
22
+
23
+
24
+ class TestIsDoi:
25
+ """Tests for DOI pattern matching."""
26
+
27
+ @pytest.mark.parametrize(
28
+ "value",
29
+ [
30
+ "10.2196/jmir.1933",
31
+ "10.1000/xyz123",
32
+ "10.1234/some-thing_(here)",
33
+ ],
34
+ )
35
+ def test_valid_dois(self, value):
36
+ assert is_doi(value) is True
37
+
38
+ @pytest.mark.parametrize(
39
+ "value",
40
+ [
41
+ "not a doi",
42
+ "10.12345", # no suffix after slash
43
+ "",
44
+ "http://doi.org/10.2196/jmir.1933", # full URL
45
+ ],
46
+ )
47
+ def test_invalid_inputs(self, value):
48
+ assert is_doi(value) is False
49
+
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # normalize_doi_input
53
+ # ---------------------------------------------------------------------------
54
+
55
+
56
+ class TestNormalizeDOIInput:
57
+ """Tests for stripping DOI URL prefixes."""
58
+
59
+ @pytest.mark.parametrize(
60
+ "url,expected",
61
+ [
62
+ ("https://doi.org/10.2196/jmir.1933", "10.2196/jmir.1933"),
63
+ ("http://doi.org/10.2196/jmir.1933", "10.2196/jmir.1933"),
64
+ ("https://dx.doi.org/10.2196/jmir.1933", "10.2196/jmir.1933"),
65
+ ("http://dx.doi.org/10.2196/jmir.1933", "10.2196/jmir.1933"),
66
+ ],
67
+ )
68
+ def test_strips_doi_url_prefixes(self, url, expected):
69
+ assert normalize_doi_input(url) == expected
70
+
71
+ def test_bare_doi_unchanged(self):
72
+ assert normalize_doi_input("10.2196/jmir.1933") == "10.2196/jmir.1933"
73
+
74
+ def test_non_doi_string_unchanged(self):
75
+ assert normalize_doi_input("Eysenbach JMIR 2011") == "Eysenbach JMIR 2011"
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # resolve_doi
80
+ # ---------------------------------------------------------------------------
81
+
82
+
83
+ class TestResolveDoi:
84
+ """Tests for fetching BibTeX from doi.org."""
85
+
86
+ @patch("fetchbib.resolver.requests.get")
87
+ def test_returns_bibtex_on_success(self, mock_get):
88
+ bibtex = "@article{Key, author={Someone}, year={2020}}"
89
+ mock_resp = MagicMock()
90
+ mock_resp.status_code = 200
91
+ mock_resp.text = bibtex
92
+ mock_get.return_value = mock_resp
93
+
94
+ result = resolve_doi("10.1234/test")
95
+
96
+ assert result == bibtex
97
+ # Verify correct headers were sent
98
+ call_kwargs = mock_get.call_args
99
+ headers = call_kwargs.kwargs.get("headers") or call_kwargs[1].get("headers", {})
100
+ assert headers["Accept"] == "text/bibliography; style=bibtex"
101
+ assert "fetchbib" in headers["User-Agent"]
102
+
103
+ @patch("fetchbib.resolver.requests.get")
104
+ def test_raises_on_http_failure(self, mock_get):
105
+ mock_resp = MagicMock()
106
+ mock_resp.status_code = 404
107
+ mock_get.return_value = mock_resp
108
+
109
+ with pytest.raises(ResolverError, match="404"):
110
+ resolve_doi("10.1234/missing")
111
+
112
+
113
+ # ---------------------------------------------------------------------------
114
+ # search_crossref
115
+ # ---------------------------------------------------------------------------
116
+
117
+
118
+ class TestSearchCrossref:
119
+ """Tests for the Crossref search API."""
120
+
121
+ @patch("fetchbib.resolver.requests.get")
122
+ def test_extracts_doi_from_first_result(self, mock_get):
123
+ mock_resp = MagicMock()
124
+ mock_resp.status_code = 200
125
+ mock_resp.json.return_value = {
126
+ "message": {
127
+ "items": [
128
+ {"DOI": "10.2196/jmir.1933"},
129
+ {"DOI": "10.9999/other"},
130
+ ]
131
+ }
132
+ }
133
+ mock_get.return_value = mock_resp
134
+
135
+ assert search_crossref("Eysenbach JMIR 2011") == "10.2196/jmir.1933"
136
+
137
+ @patch("fetchbib.resolver.requests.get")
138
+ def test_raises_on_empty_results(self, mock_get):
139
+ mock_resp = MagicMock()
140
+ mock_resp.status_code = 200
141
+ mock_resp.json.return_value = {"message": {"items": []}}
142
+ mock_get.return_value = mock_resp
143
+
144
+ with pytest.raises(ResolverError, match="[Nn]o results"):
145
+ search_crossref("nonexistent gibberish query")
146
+
147
+ @patch("fetchbib.resolver.requests.get")
148
+ def test_raises_on_http_failure(self, mock_get):
149
+ mock_resp = MagicMock()
150
+ mock_resp.status_code = 503
151
+ mock_get.return_value = mock_resp
152
+
153
+ with pytest.raises(ResolverError, match="503"):
154
+ search_crossref("anything")
155
+
156
+
157
+ # ---------------------------------------------------------------------------
158
+ # resolve (orchestrator)
159
+ # ---------------------------------------------------------------------------
160
+
161
+
162
+ class TestResolve:
163
+ """Tests for the top-level resolve() orchestrator."""
164
+
165
+ @patch("fetchbib.resolver.resolve_doi")
166
+ @patch("fetchbib.resolver.search_crossref")
167
+ def test_routes_doi_directly(self, mock_search, mock_resolve_doi):
168
+ mock_resolve_doi.return_value = "@article{...}"
169
+
170
+ resolve("10.2196/jmir.1933")
171
+
172
+ mock_resolve_doi.assert_called_once_with("10.2196/jmir.1933")
173
+ mock_search.assert_not_called()
174
+
175
+ @patch("fetchbib.resolver.resolve_doi")
176
+ @patch("fetchbib.resolver.search_crossref")
177
+ def test_routes_doi_url_directly(self, mock_search, mock_resolve_doi):
178
+ mock_resolve_doi.return_value = "@article{...}"
179
+
180
+ resolve("https://doi.org/10.2196/jmir.1933")
181
+
182
+ mock_resolve_doi.assert_called_once_with("10.2196/jmir.1933")
183
+ mock_search.assert_not_called()
184
+
185
+ @patch("fetchbib.resolver.resolve_doi")
186
+ @patch("fetchbib.resolver.search_crossref")
187
+ def test_routes_non_doi_through_search(self, mock_search, mock_resolve_doi):
188
+ mock_search.return_value = "10.2196/jmir.1933"
189
+ mock_resolve_doi.return_value = "@article{...}"
190
+
191
+ resolve("Eysenbach JMIR 2011")
192
+
193
+ mock_search.assert_called_once_with("Eysenbach JMIR 2011")
194
+ mock_resolve_doi.assert_called_once_with("10.2196/jmir.1933")
195
+
196
+
197
+ # ---------------------------------------------------------------------------
198
+ # Config / User-Agent
199
+ # ---------------------------------------------------------------------------
200
+
201
+
202
+ class TestUserAgentConfig:
203
+ """Tests for configurable User-Agent email."""
204
+
205
+ @patch("fetchbib.resolver.requests.get")
206
+ @patch("fetchbib.resolver.config.get_email", return_value="custom@university.edu")
207
+ def test_custom_email_in_user_agent(self, _mock_email, mock_get):
208
+ mock_resp = MagicMock()
209
+ mock_resp.status_code = 200
210
+ mock_resp.text = "@article{...}"
211
+ mock_get.return_value = mock_resp
212
+
213
+ resolve_doi("10.1234/test")
214
+
215
+ headers = (
216
+ mock_get.call_args.kwargs.get("headers") or mock_get.call_args[1]["headers"]
217
+ )
218
+ assert "custom@university.edu" in headers["User-Agent"]
219
+
220
+ @patch("fetchbib.resolver.requests.get")
221
+ @patch("fetchbib.resolver.config.get_email", return_value="fetchbib@example.com")
222
+ def test_default_email_in_user_agent(self, _mock_email, mock_get):
223
+ mock_resp = MagicMock()
224
+ mock_resp.status_code = 200
225
+ mock_resp.text = "@article{...}"
226
+ mock_get.return_value = mock_resp
227
+
228
+ resolve_doi("10.1234/test")
229
+
230
+ headers = (
231
+ mock_get.call_args.kwargs.get("headers") or mock_get.call_args[1]["headers"]
232
+ )
233
+ assert "fetchbib@example.com" in headers["User-Agent"]