fetchbib 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fetchbib-0.1.0/LICENSE +21 -0
- fetchbib-0.1.0/PKG-INFO +8 -0
- fetchbib-0.1.0/README.md +130 -0
- fetchbib-0.1.0/pyproject.toml +23 -0
- fetchbib-0.1.0/setup.cfg +4 -0
- fetchbib-0.1.0/src/fetchbib/__init__.py +0 -0
- fetchbib-0.1.0/src/fetchbib/cli.py +149 -0
- fetchbib-0.1.0/src/fetchbib/config.py +40 -0
- fetchbib-0.1.0/src/fetchbib/formatter.py +102 -0
- fetchbib-0.1.0/src/fetchbib/resolver.py +95 -0
- fetchbib-0.1.0/src/fetchbib.egg-info/PKG-INFO +8 -0
- fetchbib-0.1.0/src/fetchbib.egg-info/SOURCES.txt +18 -0
- fetchbib-0.1.0/src/fetchbib.egg-info/dependency_links.txt +1 -0
- fetchbib-0.1.0/src/fetchbib.egg-info/entry_points.txt +2 -0
- fetchbib-0.1.0/src/fetchbib.egg-info/requires.txt +1 -0
- fetchbib-0.1.0/src/fetchbib.egg-info/top_level.txt +1 -0
- fetchbib-0.1.0/tests/test_cli.py +257 -0
- fetchbib-0.1.0/tests/test_formatter.py +72 -0
- fetchbib-0.1.0/tests/test_integration.py +58 -0
- fetchbib-0.1.0/tests/test_resolver.py +233 -0
fetchbib-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Matthew R. DeVerna
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
fetchbib-0.1.0/PKG-INFO
ADDED
fetchbib-0.1.0/README.md
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# fetchbib
|
|
2
|
+
|
|
3
|
+
A command-line tool to resolve DOIs and free-text search queries into formatted BibTeX entries.
|
|
4
|
+
Powered by [doi.org](https://www.doi.org/) and the [Crossref API](https://api.crossref.org/).
|
|
5
|
+
|
|
6
|
+
## Installation
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
pip install fetchbib
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Requires Python 3.9+.
|
|
13
|
+
|
|
14
|
+
## Quick start
|
|
15
|
+
|
|
16
|
+
Fetch BibTeX by DOI (bare or full URL):
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
fbib 10.2196/jmir.1933
|
|
20
|
+
fbib https://doi.org/10.2196/jmir.1933
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
```bibtex
|
|
24
|
+
@article{Eysenbach2011,
|
|
25
|
+
author = {Eysenbach, Gunther},
|
|
26
|
+
doi = {10.2196/jmir.1933},
|
|
27
|
+
journal = {Journal of Medical Internet Research},
|
|
28
|
+
title = {Can Tweets Predict Citations? Metrics of Social Impact Based on Twitter and Correlation with Traditional Metrics of Scientific Impact},
|
|
29
|
+
year = {2011}
|
|
30
|
+
}
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Search by free text:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
fbib "Eysenbach JMIR 2011"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Usage
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
fbib [-h] [-f FILE] [-o OUTPUT] [-a] [-v] [--config-email EMAIL]
|
|
43
|
+
[inputs ...]
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Flexible input
|
|
47
|
+
|
|
48
|
+
`fbib` accepts DOIs in any format — bare, full URL, or free-text search queries — and you can mix them freely.
|
|
49
|
+
Inputs are comma-separated, so all of the following work:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# Multiple positional arguments
|
|
53
|
+
fbib 10.2196/jmir.1933 10.1038/nature12373
|
|
54
|
+
|
|
55
|
+
# Comma-separated string
|
|
56
|
+
fbib "10.2196/jmir.1933, 10.1038/nature12373"
|
|
57
|
+
|
|
58
|
+
# Full DOI URLs
|
|
59
|
+
fbib "https://doi.org/10.2196/jmir.1933, https://doi.org/10.1038/nature12373"
|
|
60
|
+
|
|
61
|
+
# Mix DOIs, URLs, and search queries
|
|
62
|
+
fbib 10.2196/jmir.1933 "Eysenbach JMIR 2011"
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
From a file (`--file`), each line is treated the same way — one entry per line, or comma-separated on a single line:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
fbib --file dois.txt
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Duplicate inputs are automatically removed.
|
|
72
|
+
|
|
73
|
+
### Write to a file
|
|
74
|
+
|
|
75
|
+
Overwrite (default):
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
fbib --output refs.bib 10.2196/jmir.1933
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Append to an existing `.bib` file:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
fbib --append --output refs.bib 10.1038/nature12373
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Verbose mode
|
|
88
|
+
|
|
89
|
+
See which DOI was matched when searching by free text:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
fbib -v "Eysenbach JMIR 2011"
|
|
93
|
+
# stderr: Searching for: "Eysenbach JMIR 2011" -> DOI: 10.2196/jmir.1933
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Configure email
|
|
97
|
+
|
|
98
|
+
Crossref gives better rate limits to requests that include a contact email. Set yours once and it will be used for all future requests:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
fbib --config-email you@example.com
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
The email is stored in `~/.config/fetchbib/config.json`. If not set, a default placeholder is used.
|
|
105
|
+
|
|
106
|
+
## Development
|
|
107
|
+
|
|
108
|
+
Clone the repo and sync dependencies with [uv](https://docs.astral.sh/uv/):
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
git clone https://github.com/mr-devs/fetchbib.git
|
|
112
|
+
cd fetchbib
|
|
113
|
+
uv sync
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Run unit tests:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
uv run pytest
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Run integration tests (hits live APIs):
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
uv run pytest -m integration
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## License
|
|
129
|
+
|
|
130
|
+
MIT
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "fetchbib"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Resolve DOIs and search queries into formatted BibTeX entries."
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
dependencies = ["requests"]
|
|
11
|
+
|
|
12
|
+
[project.scripts]
|
|
13
|
+
fbib = "fetchbib.cli:main"
|
|
14
|
+
|
|
15
|
+
[tool.setuptools.packages.find]
|
|
16
|
+
where = ["src"]
|
|
17
|
+
|
|
18
|
+
[dependency-groups]
|
|
19
|
+
dev = ["pytest"]
|
|
20
|
+
|
|
21
|
+
[tool.pytest.ini_options]
|
|
22
|
+
markers = ["integration: tests that hit live APIs (deselect with '-m not integration')"]
|
|
23
|
+
addopts = "-m 'not integration'"
|
fetchbib-0.1.0/setup.cfg
ADDED
|
File without changes
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Command-line interface for fetchbib.
|
|
2
|
+
|
|
3
|
+
Entry point: ``fbib``
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from fetchbib import config
|
|
10
|
+
from fetchbib.formatter import format_bibtex
|
|
11
|
+
from fetchbib.resolver import (
|
|
12
|
+
ResolverError,
|
|
13
|
+
is_doi,
|
|
14
|
+
normalize_doi_input,
|
|
15
|
+
resolve_doi,
|
|
16
|
+
search_crossref,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def main() -> None:
|
|
21
|
+
"""Parse arguments and resolve each input to formatted BibTeX."""
|
|
22
|
+
parser = argparse.ArgumentParser(
|
|
23
|
+
prog="fbib",
|
|
24
|
+
description="Resolve DOIs or search queries into formatted BibTeX.",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"inputs",
|
|
28
|
+
nargs="*",
|
|
29
|
+
help="DOIs or search queries (comma-separated values are split)",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"-f",
|
|
33
|
+
"--file",
|
|
34
|
+
help="Path to a text file with one input per line",
|
|
35
|
+
)
|
|
36
|
+
parser.add_argument(
|
|
37
|
+
"-o",
|
|
38
|
+
"--output",
|
|
39
|
+
help="Write results to this file instead of stdout",
|
|
40
|
+
)
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"-a",
|
|
43
|
+
"--append",
|
|
44
|
+
action="store_true",
|
|
45
|
+
help="Append to the output file instead of overwriting (requires --output)",
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"-v",
|
|
49
|
+
"--verbose",
|
|
50
|
+
action="store_true",
|
|
51
|
+
help="Print which DOI was selected for search queries",
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--config-email",
|
|
55
|
+
metavar="EMAIL",
|
|
56
|
+
help="Set the email used in the User-Agent header and exit",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
args = parser.parse_args()
|
|
60
|
+
|
|
61
|
+
# --config-email: save and exit immediately
|
|
62
|
+
if args.config_email:
|
|
63
|
+
config.set_email(args.config_email)
|
|
64
|
+
sys.exit(0)
|
|
65
|
+
|
|
66
|
+
# Collect inputs
|
|
67
|
+
queries = _collect_inputs(args)
|
|
68
|
+
if not queries:
|
|
69
|
+
print(
|
|
70
|
+
"Error: no inputs provided. Pass DOIs/queries as arguments or use --file.",
|
|
71
|
+
file=sys.stderr,
|
|
72
|
+
)
|
|
73
|
+
sys.exit(1)
|
|
74
|
+
|
|
75
|
+
# Resolve each input
|
|
76
|
+
results: list[str] = []
|
|
77
|
+
had_error = False
|
|
78
|
+
|
|
79
|
+
for query in queries:
|
|
80
|
+
try:
|
|
81
|
+
bibtex = _resolve_single(query, verbose=args.verbose)
|
|
82
|
+
results.append(bibtex)
|
|
83
|
+
except ResolverError as exc:
|
|
84
|
+
print(f"Error resolving '{query}': {exc}", file=sys.stderr)
|
|
85
|
+
had_error = True
|
|
86
|
+
|
|
87
|
+
output_text = "\n\n".join(results)
|
|
88
|
+
if results:
|
|
89
|
+
output_text += "\n"
|
|
90
|
+
|
|
91
|
+
# Write output
|
|
92
|
+
if args.output:
|
|
93
|
+
mode = "a" if args.append else "w"
|
|
94
|
+
with open(args.output, mode) as f:
|
|
95
|
+
f.write(output_text)
|
|
96
|
+
else:
|
|
97
|
+
print(output_text, end="")
|
|
98
|
+
|
|
99
|
+
if had_error:
|
|
100
|
+
sys.exit(1)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _collect_inputs(args: argparse.Namespace) -> list[str]:
|
|
104
|
+
"""Gather inputs from positional args and --file, deduplicate."""
|
|
105
|
+
raw: list[str] = []
|
|
106
|
+
|
|
107
|
+
# Positional args (each may be comma-separated)
|
|
108
|
+
for arg in args.inputs or []:
|
|
109
|
+
raw.extend(_split_and_strip(arg))
|
|
110
|
+
|
|
111
|
+
# File input (each line may also be comma-separated)
|
|
112
|
+
if args.file:
|
|
113
|
+
try:
|
|
114
|
+
with open(args.file) as f:
|
|
115
|
+
for line in f:
|
|
116
|
+
raw.extend(_split_and_strip(line))
|
|
117
|
+
except FileNotFoundError:
|
|
118
|
+
print(f"Error: file not found: {args.file}", file=sys.stderr)
|
|
119
|
+
sys.exit(1)
|
|
120
|
+
|
|
121
|
+
# Deduplicate preserving order
|
|
122
|
+
seen: set[str] = set()
|
|
123
|
+
unique: list[str] = []
|
|
124
|
+
for item in raw:
|
|
125
|
+
if item not in seen:
|
|
126
|
+
seen.add(item)
|
|
127
|
+
unique.append(item)
|
|
128
|
+
return unique
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _split_and_strip(value: str) -> list[str]:
|
|
132
|
+
"""Split a string on commas and return non-empty stripped parts."""
|
|
133
|
+
return [part.strip() for part in value.split(",") if part.strip()]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _resolve_single(query: str, *, verbose: bool) -> str:
|
|
137
|
+
"""Resolve a single query to formatted BibTeX."""
|
|
138
|
+
query = normalize_doi_input(query)
|
|
139
|
+
if is_doi(query):
|
|
140
|
+
raw = resolve_doi(query)
|
|
141
|
+
else:
|
|
142
|
+
doi = search_crossref(query)
|
|
143
|
+
if verbose:
|
|
144
|
+
print(
|
|
145
|
+
f'Searching for: "{query}" -> DOI: {doi}',
|
|
146
|
+
file=sys.stderr,
|
|
147
|
+
)
|
|
148
|
+
raw = resolve_doi(doi)
|
|
149
|
+
return format_bibtex(raw)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""User configuration for fetchbib.
|
|
2
|
+
|
|
3
|
+
Reads and writes a JSON config file at ~/.config/fetchbib/config.json.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
CONFIG_DIR = Path.home() / ".config" / "fetchbib"
|
|
10
|
+
CONFIG_FILE = CONFIG_DIR / "config.json"
|
|
11
|
+
|
|
12
|
+
DEFAULT_EMAIL = "fetchbib@example.com"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_email() -> str:
|
|
16
|
+
"""Return the configured email, or the default if none is set."""
|
|
17
|
+
cfg = _read_config()
|
|
18
|
+
return cfg.get("email", DEFAULT_EMAIL)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def set_email(email: str) -> None:
|
|
22
|
+
"""Persist the email to the config file."""
|
|
23
|
+
cfg = _read_config()
|
|
24
|
+
cfg["email"] = email
|
|
25
|
+
_write_config(cfg)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _read_config() -> dict:
|
|
29
|
+
"""Read the config file, returning an empty dict if it doesn't exist."""
|
|
30
|
+
if not CONFIG_FILE.exists():
|
|
31
|
+
return {}
|
|
32
|
+
with open(CONFIG_FILE) as f:
|
|
33
|
+
return json.load(f)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _write_config(cfg: dict) -> None:
|
|
37
|
+
"""Write the config dict to disk, creating the directory if needed."""
|
|
38
|
+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
with open(CONFIG_FILE, "w") as f:
|
|
40
|
+
json.dump(cfg, f, indent=2)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""BibTeX string formatter.
|
|
2
|
+
|
|
3
|
+
Transforms raw (often single-line) BibTeX into a clean, readable format
|
|
4
|
+
with alphabetized fields, 2-space indentation, and proper line breaks.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def format_bibtex(raw: str) -> str:
|
|
9
|
+
"""Format a raw BibTeX entry into a clean, readable string.
|
|
10
|
+
|
|
11
|
+
Rules:
|
|
12
|
+
- Entry header (@type{key,) stays on the first line.
|
|
13
|
+
- Each field is on its own line with 2-space indentation.
|
|
14
|
+
- Fields are alphabetized.
|
|
15
|
+
- Closing brace is on its own line.
|
|
16
|
+
- Trailing commas are removed.
|
|
17
|
+
|
|
18
|
+
Commas inside braced values (e.g. author names) are preserved — only
|
|
19
|
+
top-level commas are treated as field separators.
|
|
20
|
+
"""
|
|
21
|
+
header, fields_block = _split_header(raw.strip())
|
|
22
|
+
fields = _parse_fields(fields_block)
|
|
23
|
+
fields.sort(key=lambda kv: kv[0].lower())
|
|
24
|
+
|
|
25
|
+
field_lines = [f" {key} = {value}" for key, value in fields]
|
|
26
|
+
return header + "\n" + ",\n".join(field_lines) + "\n}"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _split_header(entry: str) -> tuple[str, str]:
|
|
30
|
+
"""Split a BibTeX entry into header and fields block.
|
|
31
|
+
|
|
32
|
+
The header is everything up to and including the first top-level comma
|
|
33
|
+
after the citation key (e.g. '@article{Key2020,').
|
|
34
|
+
The fields block is the rest, minus the final closing '}'.
|
|
35
|
+
"""
|
|
36
|
+
# Find the first comma that is not inside braces — this ends the key.
|
|
37
|
+
depth = 0
|
|
38
|
+
for i, ch in enumerate(entry):
|
|
39
|
+
if ch == "{":
|
|
40
|
+
depth += 1
|
|
41
|
+
elif ch == "}":
|
|
42
|
+
depth -= 1
|
|
43
|
+
elif ch == "," and depth == 1:
|
|
44
|
+
header = entry[: i + 1]
|
|
45
|
+
rest = entry[i + 1 :]
|
|
46
|
+
# Strip the outermost closing brace from the rest
|
|
47
|
+
rest = rest.strip()
|
|
48
|
+
if rest.endswith("}"):
|
|
49
|
+
rest = rest[:-1].strip()
|
|
50
|
+
return header, rest
|
|
51
|
+
# Fallback: no fields found
|
|
52
|
+
return entry, ""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _parse_fields(block: str) -> list[tuple[str, str]]:
|
|
56
|
+
"""Parse a block of BibTeX fields into (key, value) pairs.
|
|
57
|
+
|
|
58
|
+
Splits on top-level commas only (not commas inside braces).
|
|
59
|
+
"""
|
|
60
|
+
if not block.strip():
|
|
61
|
+
return []
|
|
62
|
+
|
|
63
|
+
fields = []
|
|
64
|
+
for raw_field in _split_top_level(block, ","):
|
|
65
|
+
raw_field = raw_field.strip()
|
|
66
|
+
if not raw_field:
|
|
67
|
+
continue
|
|
68
|
+
# Split on the first '=' to get key and value
|
|
69
|
+
eq_pos = raw_field.find("=")
|
|
70
|
+
if eq_pos == -1:
|
|
71
|
+
continue
|
|
72
|
+
key = raw_field[:eq_pos].strip()
|
|
73
|
+
value = raw_field[eq_pos + 1 :].strip()
|
|
74
|
+
fields.append((key, value))
|
|
75
|
+
return fields
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _split_top_level(text: str, delimiter: str) -> list[str]:
|
|
79
|
+
"""Split text on a delimiter, but only at brace depth 0."""
|
|
80
|
+
parts = []
|
|
81
|
+
depth = 0
|
|
82
|
+
current: list[str] = []
|
|
83
|
+
|
|
84
|
+
for ch in text:
|
|
85
|
+
if ch == "{":
|
|
86
|
+
depth += 1
|
|
87
|
+
current.append(ch)
|
|
88
|
+
elif ch == "}":
|
|
89
|
+
depth -= 1
|
|
90
|
+
current.append(ch)
|
|
91
|
+
elif ch == delimiter and depth == 0:
|
|
92
|
+
parts.append("".join(current))
|
|
93
|
+
current = []
|
|
94
|
+
else:
|
|
95
|
+
current.append(ch)
|
|
96
|
+
|
|
97
|
+
# Append whatever is left
|
|
98
|
+
trailing = "".join(current).strip()
|
|
99
|
+
if trailing:
|
|
100
|
+
parts.append(trailing)
|
|
101
|
+
|
|
102
|
+
return parts
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""DOI resolution and Crossref search.
|
|
2
|
+
|
|
3
|
+
Provides functions to check if a string is a DOI, resolve a DOI to BibTeX,
|
|
4
|
+
search Crossref for a DOI, and an orchestrator that combines them.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from fetchbib import config
|
|
12
|
+
|
|
13
|
+
DOI_PATTERN = re.compile(r"^10\.\d{4,9}/[-._;()/:A-Z0-9]+$", re.IGNORECASE)
|
|
14
|
+
DOI_URL_PREFIXES = (
|
|
15
|
+
"https://doi.org/",
|
|
16
|
+
"http://doi.org/",
|
|
17
|
+
"https://dx.doi.org/",
|
|
18
|
+
"http://dx.doi.org/",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
DOI_BASE_URL = "https://doi.org/"
|
|
22
|
+
CROSSREF_API_URL = "https://api.crossref.org/works"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ResolverError(Exception):
|
|
26
|
+
"""Raised when DOI resolution or Crossref search fails."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def normalize_doi_input(value: str) -> str:
|
|
30
|
+
"""Strip common DOI URL prefixes, returning a bare DOI if possible.
|
|
31
|
+
|
|
32
|
+
For example, ``https://doi.org/10.2196/jmir.1933`` becomes
|
|
33
|
+
``10.2196/jmir.1933``. Non-URL strings are returned unchanged.
|
|
34
|
+
"""
|
|
35
|
+
for prefix in DOI_URL_PREFIXES:
|
|
36
|
+
if value.startswith(prefix):
|
|
37
|
+
return value[len(prefix) :]
|
|
38
|
+
return value
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def is_doi(value: str) -> bool:
|
|
42
|
+
"""Return True if *value* looks like a bare DOI (e.g. 10.xxxx/yyyy)."""
|
|
43
|
+
return bool(DOI_PATTERN.match(value))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_user_agent() -> str:
|
|
47
|
+
"""Build the User-Agent string using the configured email."""
|
|
48
|
+
email = config.get_email()
|
|
49
|
+
return f"fetchbib/1.0 (mailto:{email})"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def resolve_doi(doi: str) -> str:
|
|
53
|
+
"""Fetch BibTeX for a DOI from doi.org.
|
|
54
|
+
|
|
55
|
+
Raises ResolverError on non-200 responses.
|
|
56
|
+
"""
|
|
57
|
+
headers = {
|
|
58
|
+
"Accept": "text/bibliography; style=bibtex",
|
|
59
|
+
"User-Agent": get_user_agent(),
|
|
60
|
+
}
|
|
61
|
+
resp = requests.get(f"{DOI_BASE_URL}{doi}", headers=headers)
|
|
62
|
+
if resp.status_code != 200:
|
|
63
|
+
raise ResolverError(
|
|
64
|
+
f"DOI resolution failed for '{doi}': HTTP {resp.status_code}"
|
|
65
|
+
)
|
|
66
|
+
return resp.text
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def search_crossref(query: str) -> str:
|
|
70
|
+
"""Search Crossref and return the DOI of the first result.
|
|
71
|
+
|
|
72
|
+
Raises ResolverError on non-200 responses or empty results.
|
|
73
|
+
"""
|
|
74
|
+
headers = {"User-Agent": get_user_agent()}
|
|
75
|
+
resp = requests.get(CROSSREF_API_URL, params={"query": query}, headers=headers)
|
|
76
|
+
if resp.status_code != 200:
|
|
77
|
+
raise ResolverError(f"Crossref search failed: HTTP {resp.status_code}")
|
|
78
|
+
items = resp.json()["message"]["items"]
|
|
79
|
+
if not items:
|
|
80
|
+
raise ResolverError(f"No results found for query: '{query}'")
|
|
81
|
+
return items[0]["DOI"]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def resolve(query: str) -> str:
|
|
85
|
+
"""Resolve a DOI, DOI URL, or free-text query to raw BibTeX.
|
|
86
|
+
|
|
87
|
+
DOI URLs (e.g. https://doi.org/10.xxxx/yyyy) are normalized to bare
|
|
88
|
+
DOIs before resolution. If the input is a DOI, fetches directly.
|
|
89
|
+
Otherwise searches Crossref for the top result and resolves that DOI.
|
|
90
|
+
"""
|
|
91
|
+
query = normalize_doi_input(query)
|
|
92
|
+
if is_doi(query):
|
|
93
|
+
return resolve_doi(query)
|
|
94
|
+
doi = search_crossref(query)
|
|
95
|
+
return resolve_doi(doi)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/fetchbib/__init__.py
|
|
5
|
+
src/fetchbib/cli.py
|
|
6
|
+
src/fetchbib/config.py
|
|
7
|
+
src/fetchbib/formatter.py
|
|
8
|
+
src/fetchbib/resolver.py
|
|
9
|
+
src/fetchbib.egg-info/PKG-INFO
|
|
10
|
+
src/fetchbib.egg-info/SOURCES.txt
|
|
11
|
+
src/fetchbib.egg-info/dependency_links.txt
|
|
12
|
+
src/fetchbib.egg-info/entry_points.txt
|
|
13
|
+
src/fetchbib.egg-info/requires.txt
|
|
14
|
+
src/fetchbib.egg-info/top_level.txt
|
|
15
|
+
tests/test_cli.py
|
|
16
|
+
tests/test_formatter.py
|
|
17
|
+
tests/test_integration.py
|
|
18
|
+
tests/test_resolver.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
requests
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
fetchbib
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""Tests for the CLI (Phase 3).
|
|
2
|
+
|
|
3
|
+
All resolver calls are mocked — no network access needed.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
import tempfile
|
|
9
|
+
from io import StringIO
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from unittest.mock import patch
|
|
12
|
+
|
|
13
|
+
import pytest
|
|
14
|
+
|
|
15
|
+
from fetchbib.resolver import ResolverError
|
|
16
|
+
|
|
17
|
+
# Sample raw BibTeX that the mock resolver returns (unformatted).
|
|
18
|
+
RAW_BIBTEX_A = "@article{Key1,author={Alice},year={2020}}"
|
|
19
|
+
RAW_BIBTEX_B = "@article{Key2,author={Bob},year={2021}}"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def run_cli(args: list[str]) -> tuple[int, str, str]:
|
|
23
|
+
"""Run the CLI main() with the given args, returning (exit_code, stdout, stderr)."""
|
|
24
|
+
from fetchbib.cli import main
|
|
25
|
+
|
|
26
|
+
old_argv = sys.argv
|
|
27
|
+
sys.argv = ["fbib"] + args
|
|
28
|
+
|
|
29
|
+
stdout_capture = StringIO()
|
|
30
|
+
stderr_capture = StringIO()
|
|
31
|
+
|
|
32
|
+
exit_code = 0
|
|
33
|
+
try:
|
|
34
|
+
with patch("sys.stdout", stdout_capture), patch("sys.stderr", stderr_capture):
|
|
35
|
+
main()
|
|
36
|
+
except SystemExit as e:
|
|
37
|
+
exit_code = e.code if e.code is not None else 0
|
|
38
|
+
finally:
|
|
39
|
+
sys.argv = old_argv
|
|
40
|
+
|
|
41
|
+
return exit_code, stdout_capture.getvalue(), stderr_capture.getvalue()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
# Input parsing
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class TestInputParsing:
|
|
50
|
+
"""Tests for how the CLI collects and processes inputs."""
|
|
51
|
+
|
|
52
|
+
@patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
|
|
53
|
+
def test_single_positional_doi(self, mock_resolve):
|
|
54
|
+
code, stdout, _ = run_cli(["10.2196/jmir.1933"])
|
|
55
|
+
|
|
56
|
+
mock_resolve.assert_called_once_with("10.2196/jmir.1933")
|
|
57
|
+
assert "@article{Key1," in stdout
|
|
58
|
+
assert code == 0
|
|
59
|
+
|
|
60
|
+
@patch("fetchbib.cli.resolve_doi", side_effect=[RAW_BIBTEX_A, RAW_BIBTEX_B])
|
|
61
|
+
def test_multiple_positional_arguments(self, mock_resolve):
|
|
62
|
+
code, stdout, _ = run_cli(["10.2196/jmir.1933", "10.1000/xyz123"])
|
|
63
|
+
|
|
64
|
+
assert mock_resolve.call_count == 2
|
|
65
|
+
assert "Key1" in stdout
|
|
66
|
+
assert "Key2" in stdout
|
|
67
|
+
assert code == 0
|
|
68
|
+
|
|
69
|
+
@patch("fetchbib.cli.resolve_doi", side_effect=[RAW_BIBTEX_A, RAW_BIBTEX_B])
|
|
70
|
+
def test_comma_separated_string_is_split(self, mock_resolve):
|
|
71
|
+
code, stdout, _ = run_cli(["10.2196/jmir.1933, 10.1000/xyz123"])
|
|
72
|
+
|
|
73
|
+
assert mock_resolve.call_count == 2
|
|
74
|
+
calls = [c.args[0] for c in mock_resolve.call_args_list]
|
|
75
|
+
assert "10.2196/jmir.1933" in calls
|
|
76
|
+
assert "10.1000/xyz123" in calls
|
|
77
|
+
assert code == 0
|
|
78
|
+
|
|
79
|
+
@patch("fetchbib.cli.resolve_doi", side_effect=[RAW_BIBTEX_A, RAW_BIBTEX_B])
|
|
80
|
+
def test_file_input_reads_lines(self, mock_resolve):
|
|
81
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
82
|
+
f.write("10.2196/jmir.1933\n\n10.1000/xyz123\n")
|
|
83
|
+
f.flush()
|
|
84
|
+
code, stdout, _ = run_cli(["--file", f.name])
|
|
85
|
+
|
|
86
|
+
assert mock_resolve.call_count == 2
|
|
87
|
+
assert code == 0
|
|
88
|
+
|
|
89
|
+
@patch("fetchbib.cli.resolve_doi", side_effect=[RAW_BIBTEX_A, RAW_BIBTEX_B])
|
|
90
|
+
def test_file_input_splits_comma_separated_line(self, mock_resolve):
|
|
91
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
92
|
+
f.write("10.2196/jmir.1933, 10.1000/xyz123\n")
|
|
93
|
+
f.flush()
|
|
94
|
+
code, stdout, _ = run_cli(["--file", f.name])
|
|
95
|
+
|
|
96
|
+
assert mock_resolve.call_count == 2
|
|
97
|
+
calls = [c.args[0] for c in mock_resolve.call_args_list]
|
|
98
|
+
assert "10.2196/jmir.1933" in calls
|
|
99
|
+
assert "10.1000/xyz123" in calls
|
|
100
|
+
assert code == 0
|
|
101
|
+
|
|
102
|
+
@patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
|
|
103
|
+
def test_doi_url_is_normalized(self, mock_resolve):
|
|
104
|
+
code, stdout, _ = run_cli(["https://doi.org/10.2196/jmir.1933"])
|
|
105
|
+
|
|
106
|
+
mock_resolve.assert_called_once_with("10.2196/jmir.1933")
|
|
107
|
+
assert "@article{Key1," in stdout
|
|
108
|
+
assert code == 0
|
|
109
|
+
|
|
110
|
+
@patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
|
|
111
|
+
def test_duplicate_inputs_are_deduplicated(self, mock_resolve):
|
|
112
|
+
code, _, _ = run_cli(["10.2196/jmir.1933", "10.2196/jmir.1933"])
|
|
113
|
+
|
|
114
|
+
mock_resolve.assert_called_once()
|
|
115
|
+
assert code == 0
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
# Error handling
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class TestErrorHandling:
|
|
124
|
+
"""Tests for error conditions and exit codes."""
|
|
125
|
+
|
|
126
|
+
def test_nonexistent_file_exits_1(self):
|
|
127
|
+
code, _, stderr = run_cli(["--file", "nonexistent_file.txt"])
|
|
128
|
+
|
|
129
|
+
assert code == 1
|
|
130
|
+
assert "nonexistent_file.txt" in stderr
|
|
131
|
+
|
|
132
|
+
@patch("fetchbib.cli.resolve_doi")
|
|
133
|
+
def test_resolution_error_does_not_stop_others(self, mock_resolve):
|
|
134
|
+
mock_resolve.side_effect = [
|
|
135
|
+
ResolverError("fail"),
|
|
136
|
+
RAW_BIBTEX_B,
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
code, stdout, stderr = run_cli(["10.1234/bad", "10.1234/good"])
|
|
140
|
+
|
|
141
|
+
assert "Key2" in stdout
|
|
142
|
+
assert "fail" in stderr
|
|
143
|
+
assert code == 1
|
|
144
|
+
|
|
145
|
+
def test_no_inputs_exits_1(self):
|
|
146
|
+
code, _, stderr = run_cli([])
|
|
147
|
+
|
|
148
|
+
assert code == 1
|
|
149
|
+
assert stderr # should contain some usage hint
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ---------------------------------------------------------------------------
|
|
153
|
+
# Verbose mode
|
|
154
|
+
# ---------------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class TestVerbose:
|
|
158
|
+
"""Tests for the --verbose flag."""
|
|
159
|
+
|
|
160
|
+
@patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
|
|
161
|
+
@patch("fetchbib.cli.search_crossref", return_value="10.2196/jmir.1933")
|
|
162
|
+
def test_verbose_prints_search_mapping(self, mock_search, mock_resolve):
|
|
163
|
+
code, _, stderr = run_cli(["-v", "Eysenbach JMIR 2011"])
|
|
164
|
+
|
|
165
|
+
assert "Eysenbach JMIR 2011" in stderr
|
|
166
|
+
assert "10.2196/jmir.1933" in stderr
|
|
167
|
+
assert code == 0
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
# Output file
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class TestOutputFile:
|
|
176
|
+
"""Tests for --output and --append flags."""
|
|
177
|
+
|
|
178
|
+
@patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
|
|
179
|
+
def test_output_writes_to_file(self, mock_resolve):
|
|
180
|
+
with tempfile.NamedTemporaryFile(suffix=".bib", delete=False) as f:
|
|
181
|
+
path = f.name
|
|
182
|
+
|
|
183
|
+
code, stdout, _ = run_cli(["--output", path, "10.1234/test"])
|
|
184
|
+
|
|
185
|
+
assert code == 0
|
|
186
|
+
assert stdout == "" # nothing to stdout
|
|
187
|
+
content = Path(path).read_text()
|
|
188
|
+
assert "Key1" in content
|
|
189
|
+
|
|
190
|
+
@patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
|
|
191
|
+
def test_output_overwrites_by_default(self, mock_resolve):
|
|
192
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".bib", delete=False) as f:
|
|
193
|
+
f.write("OLD CONTENT\n")
|
|
194
|
+
path = f.name
|
|
195
|
+
|
|
196
|
+
run_cli(["--output", path, "10.1234/test"])
|
|
197
|
+
|
|
198
|
+
content = Path(path).read_text()
|
|
199
|
+
assert "OLD CONTENT" not in content
|
|
200
|
+
assert "Key1" in content
|
|
201
|
+
|
|
202
|
+
@patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_B)
|
|
203
|
+
def test_append_flag_preserves_existing(self, mock_resolve):
|
|
204
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".bib", delete=False) as f:
|
|
205
|
+
f.write("EXISTING ENTRY\n\n")
|
|
206
|
+
path = f.name
|
|
207
|
+
|
|
208
|
+
code, stdout, _ = run_cli(["--append", "--output", path, "10.1234/test"])
|
|
209
|
+
|
|
210
|
+
assert code == 0
|
|
211
|
+
assert stdout == ""
|
|
212
|
+
content = Path(path).read_text()
|
|
213
|
+
assert "EXISTING ENTRY" in content
|
|
214
|
+
assert "Key2" in content
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# ---------------------------------------------------------------------------
|
|
218
|
+
# Config email
|
|
219
|
+
# ---------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class TestConfigEmail:
|
|
223
|
+
"""Tests for --config-email."""
|
|
224
|
+
|
|
225
|
+
def test_config_email_saves_and_exits(self, tmp_path):
|
|
226
|
+
config_file = tmp_path / "config.json"
|
|
227
|
+
with (
|
|
228
|
+
patch("fetchbib.config.CONFIG_FILE", config_file),
|
|
229
|
+
patch("fetchbib.config.CONFIG_DIR", tmp_path),
|
|
230
|
+
):
|
|
231
|
+
code, _, _ = run_cli(["--config-email", "user@university.edu"])
|
|
232
|
+
|
|
233
|
+
assert code == 0
|
|
234
|
+
saved = json.loads(config_file.read_text())
|
|
235
|
+
assert saved["email"] == "user@university.edu"
|
|
236
|
+
|
|
237
|
+
@patch("fetchbib.cli.resolve_doi", return_value=RAW_BIBTEX_A)
|
|
238
|
+
def test_config_email_used_by_resolver(self, mock_resolve, tmp_path):
|
|
239
|
+
config_file = tmp_path / "config.json"
|
|
240
|
+
config_file.write_text(json.dumps({"email": "custom@uni.edu"}))
|
|
241
|
+
|
|
242
|
+
with (
|
|
243
|
+
patch("fetchbib.config.CONFIG_FILE", config_file),
|
|
244
|
+
patch("fetchbib.config.CONFIG_DIR", tmp_path),
|
|
245
|
+
):
|
|
246
|
+
run_cli(["10.1234/test"])
|
|
247
|
+
|
|
248
|
+
headers = mock_resolve.call_args # we can't easily check headers here
|
|
249
|
+
# Instead, verify the user agent function reads the config
|
|
250
|
+
with (
|
|
251
|
+
patch("fetchbib.config.CONFIG_FILE", config_file),
|
|
252
|
+
patch("fetchbib.config.CONFIG_DIR", tmp_path),
|
|
253
|
+
):
|
|
254
|
+
from fetchbib.resolver import get_user_agent
|
|
255
|
+
|
|
256
|
+
ua = get_user_agent()
|
|
257
|
+
assert "custom@uni.edu" in ua
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Tests for the BibTeX formatter (Phase 1)."""
|
|
2
|
+
|
|
3
|
+
from fetchbib.formatter import format_bibtex
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestFormatBibtex:
|
|
7
|
+
"""Tests for format_bibtex()."""
|
|
8
|
+
|
|
9
|
+
def test_single_line_is_formatted(self):
|
|
10
|
+
"""Single-line BibTeX is split into indented, alphabetized fields."""
|
|
11
|
+
raw = (
|
|
12
|
+
"@article{Eysenbach2011,"
|
|
13
|
+
"doi={10.2196/jmir.1933},"
|
|
14
|
+
"title={Can Tweets Predict Citations?},"
|
|
15
|
+
"author={Eysenbach, Gunther},"
|
|
16
|
+
"year={2011},"
|
|
17
|
+
"journal={JMIR}}"
|
|
18
|
+
)
|
|
19
|
+
expected = (
|
|
20
|
+
"@article{Eysenbach2011,\n"
|
|
21
|
+
" author = {Eysenbach, Gunther},\n"
|
|
22
|
+
" doi = {10.2196/jmir.1933},\n"
|
|
23
|
+
" journal = {JMIR},\n"
|
|
24
|
+
" title = {Can Tweets Predict Citations?},\n"
|
|
25
|
+
" year = {2011}\n"
|
|
26
|
+
"}"
|
|
27
|
+
)
|
|
28
|
+
assert format_bibtex(raw) == expected
|
|
29
|
+
|
|
30
|
+
def test_idempotent(self):
|
|
31
|
+
"""Already-formatted BibTeX passes through unchanged."""
|
|
32
|
+
clean = (
|
|
33
|
+
"@article{Eysenbach2011,\n"
|
|
34
|
+
" author = {Eysenbach, Gunther},\n"
|
|
35
|
+
" doi = {10.2196/jmir.1933},\n"
|
|
36
|
+
" journal = {JMIR},\n"
|
|
37
|
+
" title = {Can Tweets Predict Citations?},\n"
|
|
38
|
+
" year = {2011}\n"
|
|
39
|
+
"}"
|
|
40
|
+
)
|
|
41
|
+
assert format_bibtex(clean) == clean
|
|
42
|
+
|
|
43
|
+
def test_author_commas_preserved(self):
|
|
44
|
+
"""Commas inside braces (author names) are not treated as field separators."""
|
|
45
|
+
raw = (
|
|
46
|
+
"@article{Key2020,"
|
|
47
|
+
"author={Last, First and Last2, First2},"
|
|
48
|
+
"title={A Title},"
|
|
49
|
+
"year={2020}}"
|
|
50
|
+
)
|
|
51
|
+
result = format_bibtex(raw)
|
|
52
|
+
assert "author = {Last, First and Last2, First2}" in result
|
|
53
|
+
|
|
54
|
+
def test_trailing_comma_removed(self):
|
|
55
|
+
"""A trailing comma before the closing brace is stripped."""
|
|
56
|
+
raw = "@article{Key2020," "author={Someone}," "year={2020},}"
|
|
57
|
+
result = format_bibtex(raw)
|
|
58
|
+
# The last field line should not end with a comma
|
|
59
|
+
lines = result.strip().split("\n")
|
|
60
|
+
last_field_line = lines[-2] # line before closing '}'
|
|
61
|
+
assert not last_field_line.rstrip().endswith(",")
|
|
62
|
+
|
|
63
|
+
def test_nested_braces_preserved(self):
|
|
64
|
+
"""Nested braces in field values are kept intact."""
|
|
65
|
+
raw = (
|
|
66
|
+
"@inproceedings{Key2021,"
|
|
67
|
+
"title={A {GPU}-Accelerated Approach},"
|
|
68
|
+
"author={Smith, John},"
|
|
69
|
+
"year={2021}}"
|
|
70
|
+
)
|
|
71
|
+
result = format_bibtex(raw)
|
|
72
|
+
assert "title = {A {GPU}-Accelerated Approach}" in result
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Integration tests that hit live APIs.
|
|
2
|
+
|
|
3
|
+
Skipped by default. Run with:
|
|
4
|
+
pytest -m integration
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import tempfile
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
from fetchbib.formatter import format_bibtex
|
|
12
|
+
from fetchbib.resolver import resolve, resolve_doi, search_crossref
|
|
13
|
+
|
|
14
|
+
pytestmark = pytest.mark.integration
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TestLiveResolution:
|
|
18
|
+
"""End-to-end tests against doi.org and Crossref."""
|
|
19
|
+
|
|
20
|
+
def test_doi_resolution(self):
|
|
21
|
+
raw = resolve_doi("10.2196/jmir.1933")
|
|
22
|
+
result = format_bibtex(raw)
|
|
23
|
+
assert "Eysenbach" in result
|
|
24
|
+
assert "2011" in result
|
|
25
|
+
|
|
26
|
+
def test_free_text_search(self):
|
|
27
|
+
doi = search_crossref("Eysenbach JMIR 2011")
|
|
28
|
+
raw = resolve_doi(doi)
|
|
29
|
+
result = format_bibtex(raw)
|
|
30
|
+
assert "Eysenbach" in result
|
|
31
|
+
|
|
32
|
+
def test_file_input_via_cli(self):
|
|
33
|
+
"""Resolve a DOI through the full CLI path."""
|
|
34
|
+
import sys
|
|
35
|
+
from io import StringIO
|
|
36
|
+
from unittest.mock import patch
|
|
37
|
+
|
|
38
|
+
from fetchbib.cli import main
|
|
39
|
+
|
|
40
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
41
|
+
f.write("10.2196/jmir.1933\n")
|
|
42
|
+
f.flush()
|
|
43
|
+
path = f.name
|
|
44
|
+
|
|
45
|
+
old_argv = sys.argv
|
|
46
|
+
sys.argv = ["fbib", "--file", path]
|
|
47
|
+
stdout_capture = StringIO()
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
with patch("sys.stdout", stdout_capture):
|
|
51
|
+
main()
|
|
52
|
+
except SystemExit:
|
|
53
|
+
pass
|
|
54
|
+
finally:
|
|
55
|
+
sys.argv = old_argv
|
|
56
|
+
|
|
57
|
+
output = stdout_capture.getvalue()
|
|
58
|
+
assert "Eysenbach" in output
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Tests for the resolver (Phase 2).
|
|
2
|
+
|
|
3
|
+
All HTTP calls are mocked — no network access needed.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from unittest.mock import MagicMock, patch
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
from fetchbib.resolver import (
|
|
11
|
+
ResolverError,
|
|
12
|
+
is_doi,
|
|
13
|
+
normalize_doi_input,
|
|
14
|
+
resolve,
|
|
15
|
+
resolve_doi,
|
|
16
|
+
search_crossref,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
# is_doi
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestIsDoi:
|
|
25
|
+
"""Tests for DOI pattern matching."""
|
|
26
|
+
|
|
27
|
+
@pytest.mark.parametrize(
|
|
28
|
+
"value",
|
|
29
|
+
[
|
|
30
|
+
"10.2196/jmir.1933",
|
|
31
|
+
"10.1000/xyz123",
|
|
32
|
+
"10.1234/some-thing_(here)",
|
|
33
|
+
],
|
|
34
|
+
)
|
|
35
|
+
def test_valid_dois(self, value):
|
|
36
|
+
assert is_doi(value) is True
|
|
37
|
+
|
|
38
|
+
@pytest.mark.parametrize(
|
|
39
|
+
"value",
|
|
40
|
+
[
|
|
41
|
+
"not a doi",
|
|
42
|
+
"10.12345", # no suffix after slash
|
|
43
|
+
"",
|
|
44
|
+
"http://doi.org/10.2196/jmir.1933", # full URL
|
|
45
|
+
],
|
|
46
|
+
)
|
|
47
|
+
def test_invalid_inputs(self, value):
|
|
48
|
+
assert is_doi(value) is False
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
# normalize_doi_input
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TestNormalizeDOIInput:
|
|
57
|
+
"""Tests for stripping DOI URL prefixes."""
|
|
58
|
+
|
|
59
|
+
@pytest.mark.parametrize(
|
|
60
|
+
"url,expected",
|
|
61
|
+
[
|
|
62
|
+
("https://doi.org/10.2196/jmir.1933", "10.2196/jmir.1933"),
|
|
63
|
+
("http://doi.org/10.2196/jmir.1933", "10.2196/jmir.1933"),
|
|
64
|
+
("https://dx.doi.org/10.2196/jmir.1933", "10.2196/jmir.1933"),
|
|
65
|
+
("http://dx.doi.org/10.2196/jmir.1933", "10.2196/jmir.1933"),
|
|
66
|
+
],
|
|
67
|
+
)
|
|
68
|
+
def test_strips_doi_url_prefixes(self, url, expected):
|
|
69
|
+
assert normalize_doi_input(url) == expected
|
|
70
|
+
|
|
71
|
+
def test_bare_doi_unchanged(self):
|
|
72
|
+
assert normalize_doi_input("10.2196/jmir.1933") == "10.2196/jmir.1933"
|
|
73
|
+
|
|
74
|
+
def test_non_doi_string_unchanged(self):
|
|
75
|
+
assert normalize_doi_input("Eysenbach JMIR 2011") == "Eysenbach JMIR 2011"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
# resolve_doi
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class TestResolveDoi:
|
|
84
|
+
"""Tests for fetching BibTeX from doi.org."""
|
|
85
|
+
|
|
86
|
+
@patch("fetchbib.resolver.requests.get")
|
|
87
|
+
def test_returns_bibtex_on_success(self, mock_get):
|
|
88
|
+
bibtex = "@article{Key, author={Someone}, year={2020}}"
|
|
89
|
+
mock_resp = MagicMock()
|
|
90
|
+
mock_resp.status_code = 200
|
|
91
|
+
mock_resp.text = bibtex
|
|
92
|
+
mock_get.return_value = mock_resp
|
|
93
|
+
|
|
94
|
+
result = resolve_doi("10.1234/test")
|
|
95
|
+
|
|
96
|
+
assert result == bibtex
|
|
97
|
+
# Verify correct headers were sent
|
|
98
|
+
call_kwargs = mock_get.call_args
|
|
99
|
+
headers = call_kwargs.kwargs.get("headers") or call_kwargs[1].get("headers", {})
|
|
100
|
+
assert headers["Accept"] == "text/bibliography; style=bibtex"
|
|
101
|
+
assert "fetchbib" in headers["User-Agent"]
|
|
102
|
+
|
|
103
|
+
@patch("fetchbib.resolver.requests.get")
|
|
104
|
+
def test_raises_on_http_failure(self, mock_get):
|
|
105
|
+
mock_resp = MagicMock()
|
|
106
|
+
mock_resp.status_code = 404
|
|
107
|
+
mock_get.return_value = mock_resp
|
|
108
|
+
|
|
109
|
+
with pytest.raises(ResolverError, match="404"):
|
|
110
|
+
resolve_doi("10.1234/missing")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# ---------------------------------------------------------------------------
|
|
114
|
+
# search_crossref
|
|
115
|
+
# ---------------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class TestSearchCrossref:
|
|
119
|
+
"""Tests for the Crossref search API."""
|
|
120
|
+
|
|
121
|
+
@patch("fetchbib.resolver.requests.get")
|
|
122
|
+
def test_extracts_doi_from_first_result(self, mock_get):
|
|
123
|
+
mock_resp = MagicMock()
|
|
124
|
+
mock_resp.status_code = 200
|
|
125
|
+
mock_resp.json.return_value = {
|
|
126
|
+
"message": {
|
|
127
|
+
"items": [
|
|
128
|
+
{"DOI": "10.2196/jmir.1933"},
|
|
129
|
+
{"DOI": "10.9999/other"},
|
|
130
|
+
]
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
mock_get.return_value = mock_resp
|
|
134
|
+
|
|
135
|
+
assert search_crossref("Eysenbach JMIR 2011") == "10.2196/jmir.1933"
|
|
136
|
+
|
|
137
|
+
@patch("fetchbib.resolver.requests.get")
|
|
138
|
+
def test_raises_on_empty_results(self, mock_get):
|
|
139
|
+
mock_resp = MagicMock()
|
|
140
|
+
mock_resp.status_code = 200
|
|
141
|
+
mock_resp.json.return_value = {"message": {"items": []}}
|
|
142
|
+
mock_get.return_value = mock_resp
|
|
143
|
+
|
|
144
|
+
with pytest.raises(ResolverError, match="[Nn]o results"):
|
|
145
|
+
search_crossref("nonexistent gibberish query")
|
|
146
|
+
|
|
147
|
+
@patch("fetchbib.resolver.requests.get")
|
|
148
|
+
def test_raises_on_http_failure(self, mock_get):
|
|
149
|
+
mock_resp = MagicMock()
|
|
150
|
+
mock_resp.status_code = 503
|
|
151
|
+
mock_get.return_value = mock_resp
|
|
152
|
+
|
|
153
|
+
with pytest.raises(ResolverError, match="503"):
|
|
154
|
+
search_crossref("anything")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# ---------------------------------------------------------------------------
|
|
158
|
+
# resolve (orchestrator)
|
|
159
|
+
# ---------------------------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class TestResolve:
|
|
163
|
+
"""Tests for the top-level resolve() orchestrator."""
|
|
164
|
+
|
|
165
|
+
@patch("fetchbib.resolver.resolve_doi")
|
|
166
|
+
@patch("fetchbib.resolver.search_crossref")
|
|
167
|
+
def test_routes_doi_directly(self, mock_search, mock_resolve_doi):
|
|
168
|
+
mock_resolve_doi.return_value = "@article{...}"
|
|
169
|
+
|
|
170
|
+
resolve("10.2196/jmir.1933")
|
|
171
|
+
|
|
172
|
+
mock_resolve_doi.assert_called_once_with("10.2196/jmir.1933")
|
|
173
|
+
mock_search.assert_not_called()
|
|
174
|
+
|
|
175
|
+
@patch("fetchbib.resolver.resolve_doi")
|
|
176
|
+
@patch("fetchbib.resolver.search_crossref")
|
|
177
|
+
def test_routes_doi_url_directly(self, mock_search, mock_resolve_doi):
|
|
178
|
+
mock_resolve_doi.return_value = "@article{...}"
|
|
179
|
+
|
|
180
|
+
resolve("https://doi.org/10.2196/jmir.1933")
|
|
181
|
+
|
|
182
|
+
mock_resolve_doi.assert_called_once_with("10.2196/jmir.1933")
|
|
183
|
+
mock_search.assert_not_called()
|
|
184
|
+
|
|
185
|
+
@patch("fetchbib.resolver.resolve_doi")
|
|
186
|
+
@patch("fetchbib.resolver.search_crossref")
|
|
187
|
+
def test_routes_non_doi_through_search(self, mock_search, mock_resolve_doi):
|
|
188
|
+
mock_search.return_value = "10.2196/jmir.1933"
|
|
189
|
+
mock_resolve_doi.return_value = "@article{...}"
|
|
190
|
+
|
|
191
|
+
resolve("Eysenbach JMIR 2011")
|
|
192
|
+
|
|
193
|
+
mock_search.assert_called_once_with("Eysenbach JMIR 2011")
|
|
194
|
+
mock_resolve_doi.assert_called_once_with("10.2196/jmir.1933")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
# Config / User-Agent
|
|
199
|
+
# ---------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class TestUserAgentConfig:
|
|
203
|
+
"""Tests for configurable User-Agent email."""
|
|
204
|
+
|
|
205
|
+
@patch("fetchbib.resolver.requests.get")
|
|
206
|
+
@patch("fetchbib.resolver.config.get_email", return_value="custom@university.edu")
|
|
207
|
+
def test_custom_email_in_user_agent(self, _mock_email, mock_get):
|
|
208
|
+
mock_resp = MagicMock()
|
|
209
|
+
mock_resp.status_code = 200
|
|
210
|
+
mock_resp.text = "@article{...}"
|
|
211
|
+
mock_get.return_value = mock_resp
|
|
212
|
+
|
|
213
|
+
resolve_doi("10.1234/test")
|
|
214
|
+
|
|
215
|
+
headers = (
|
|
216
|
+
mock_get.call_args.kwargs.get("headers") or mock_get.call_args[1]["headers"]
|
|
217
|
+
)
|
|
218
|
+
assert "custom@university.edu" in headers["User-Agent"]
|
|
219
|
+
|
|
220
|
+
@patch("fetchbib.resolver.requests.get")
|
|
221
|
+
@patch("fetchbib.resolver.config.get_email", return_value="fetchbib@example.com")
|
|
222
|
+
def test_default_email_in_user_agent(self, _mock_email, mock_get):
|
|
223
|
+
mock_resp = MagicMock()
|
|
224
|
+
mock_resp.status_code = 200
|
|
225
|
+
mock_resp.text = "@article{...}"
|
|
226
|
+
mock_get.return_value = mock_resp
|
|
227
|
+
|
|
228
|
+
resolve_doi("10.1234/test")
|
|
229
|
+
|
|
230
|
+
headers = (
|
|
231
|
+
mock_get.call_args.kwargs.get("headers") or mock_get.call_args[1]["headers"]
|
|
232
|
+
)
|
|
233
|
+
assert "fetchbib@example.com" in headers["User-Agent"]
|