antifile 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .venv/
5
+ dist/
6
+ build/
7
+ .pytest_cache/
8
+ .ruff_cache/
@@ -0,0 +1,94 @@
1
+ Metadata-Version: 2.4
2
+ Name: antifile
3
+ Version: 0.1.0
4
+ Summary: Extract BibTeX metadata from PDFs, EPUBs, URLs, and identifiers
5
+ Project-URL: Homepage, https://github.com/vxvware/antifile
6
+ Project-URL: Repository, https://github.com/vxvware/antifile
7
+ Project-URL: Issues, https://github.com/vxvware/antifile/issues
8
+ Author-email: VxVware <trevor.j.vincent@gmail.com>
9
+ License-Expression: MIT
10
+ Keywords: arxiv,bibliography,bibtex,citations,doi,epub,isbn,metadata,pdf
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: End Users/Desktop
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Text Processing :: Markup :: LaTeX
21
+ Classifier: Topic :: Utilities
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: bibtexparser>=2.0.0b7
24
+ Requires-Dist: httpx>=0.27
25
+ Requires-Dist: pymupdf>=1.24
26
+ Requires-Dist: rapidfuzz>=3.0
27
+ Requires-Dist: rich>=13.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=8.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.4; extra == 'dev'
31
+ Description-Content-Type: text/markdown
32
+
33
+ # antifile
34
+
35
+ Extract BibTeX metadata from PDFs, EPUBs, URLs, and identifiers (DOI, arXiv, ISBN)
36
+ and append it to a `.bib` file.
37
+
38
+ ## Install
39
+
40
+ ```bash
41
+ pip install antifile
42
+ # or
43
+ uv tool install antifile
44
+ ```
45
+
46
+ ## Usage
47
+
48
+ ```bash
49
+ antifile INPUT -o refs.bib
50
+ ```
51
+
52
+ `INPUT` can be:
53
+
54
+ - a **PDF** or **EPUB** file — `antifile paper.pdf -o refs.bib`
55
+ - a **folder** of PDFs/EPUBs — `antifile ~/Downloads/papers -o refs.bib` (add `--recursive` to descend into subfolders)
56
+ - a **URL** — `antifile https://example.com/article -o refs.bib`
57
+ - a **DOI** — `antifile 10.1145/3292500 -o refs.bib`
58
+ - an **arXiv ID** — `antifile arXiv:1706.03762 -o refs.bib`
59
+ - an **ISBN** — `antifile 9780262033848 -o refs.bib`
60
+
61
+ Entries are appended with de-duplication: a new entry matching an existing one
62
+ (by DOI, arXiv ID, ISBN, or normalized title+author) fills in any missing fields
63
+ rather than creating a duplicate. Pass `--no-merge` to skip on duplicate, or
64
+ `--force` to append anyway with an auto-suffixed key.
65
+
66
+ ### Options
67
+
68
+ | flag | effect |
69
+ |------|--------|
70
+ | `-o, --output FILE` | target `.bib` (required; created if missing) |
71
+ | `--method {auto,doi,arxiv,isbn,crossref,llm,claude-code}` | force a PDF extraction method (default: `auto`) |
72
+ | `--recursive` | recurse into subfolders for folder input |
73
+ | `--no-preview` | skip the first-page PDF preview |
74
+ | `--no-merge` | on duplicate, skip instead of filling missing fields |
75
+ | `--force` | append even if a duplicate exists |
76
+
77
+ ### LLM-assisted extraction
78
+
79
+ When a PDF has no resolvable identifier, antifile can fall back to an LLM to read
80
+ the first page. Set whichever API key you have — it's picked up from the
81
+ environment:
82
+
83
+ ```bash
84
+ export ANTHROPIC_API_KEY=... # or OPENAI_API_KEY, or GEMINI_API_KEY / GOOGLE_API_KEY
85
+ ```
86
+
87
+ ## Related
88
+
89
+ - [antilibrary](https://github.com/vxvware/antilibrary) — manage BibTeX libraries from the terminal (can call antifile via `--add-from-files`).
90
+ - [antifind](https://github.com/vxvware/antifind) — online metadata search → BibTeX.
91
+
92
+ ## License
93
+
94
+ MIT
@@ -0,0 +1,62 @@
1
+ # antifile
2
+
3
+ Extract BibTeX metadata from PDFs, EPUBs, URLs, and identifiers (DOI, arXiv, ISBN)
4
+ and append it to a `.bib` file.
5
+
6
+ ## Install
7
+
8
+ ```bash
9
+ pip install antifile
10
+ # or
11
+ uv tool install antifile
12
+ ```
13
+
14
+ ## Usage
15
+
16
+ ```bash
17
+ antifile INPUT -o refs.bib
18
+ ```
19
+
20
+ `INPUT` can be:
21
+
22
+ - a **PDF** or **EPUB** file — `antifile paper.pdf -o refs.bib`
23
+ - a **folder** of PDFs/EPUBs — `antifile ~/Downloads/papers -o refs.bib` (add `--recursive` to descend into subfolders)
24
+ - a **URL** — `antifile https://example.com/article -o refs.bib`
25
+ - a **DOI** — `antifile 10.1145/3292500 -o refs.bib`
26
+ - an **arXiv ID** — `antifile arXiv:1706.03762 -o refs.bib`
27
+ - an **ISBN** — `antifile 9780262033848 -o refs.bib`
28
+
29
+ Entries are appended with de-duplication: a new entry matching an existing one
30
+ (by DOI, arXiv ID, ISBN, or normalized title+author) fills in any missing fields
31
+ rather than creating a duplicate. Pass `--no-merge` to skip on duplicate, or
32
+ `--force` to append anyway with an auto-suffixed key.
33
+
34
+ ### Options
35
+
36
+ | flag | effect |
37
+ |------|--------|
38
+ | `-o, --output FILE` | target `.bib` (required; created if missing) |
39
+ | `--method {auto,doi,arxiv,isbn,crossref,llm,claude-code}` | force a PDF extraction method (default: `auto`) |
40
+ | `--recursive` | recurse into subfolders for folder input |
41
+ | `--no-preview` | skip the first-page PDF preview |
42
+ | `--no-merge` | on duplicate, skip instead of filling missing fields |
43
+ | `--force` | append even if a duplicate exists |
44
+
45
+ ### LLM-assisted extraction
46
+
47
+ When a PDF has no resolvable identifier, antifile can fall back to an LLM to read
48
+ the first page. Set whichever API key you have — it's picked up from the
49
+ environment:
50
+
51
+ ```bash
52
+ export ANTHROPIC_API_KEY=... # or OPENAI_API_KEY, or GEMINI_API_KEY / GOOGLE_API_KEY
53
+ ```
54
+
55
+ ## Related
56
+
57
+ - [antilibrary](https://github.com/vxvware/antilibrary) — manage BibTeX libraries from the terminal (can call antifile via `--add-from-files`).
58
+ - [antifind](https://github.com/vxvware/antifind) — online metadata search → BibTeX.
59
+
60
+ ## License
61
+
62
+ MIT
@@ -0,0 +1,55 @@
1
+ [project]
2
+ name = "antifile"
3
+ version = "0.1.0"
4
+ description = "Extract BibTeX metadata from PDFs, EPUBs, URLs, and identifiers"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ license = "MIT"
8
+ authors = [
9
+ { name = "VxVware", email = "trevor.j.vincent@gmail.com" },
10
+ ]
11
+ keywords = ["bibtex", "bibliography", "citations", "pdf", "epub", "doi", "arxiv", "isbn", "metadata"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Environment :: Console",
15
+ "Intended Audience :: Science/Research",
16
+ "Intended Audience :: End Users/Desktop",
17
+ "Operating System :: OS Independent",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Topic :: Text Processing :: Markup :: LaTeX",
23
+ "Topic :: Utilities",
24
+ ]
25
+ dependencies = [
26
+ "bibtexparser>=2.0.0b7",
27
+ "rapidfuzz>=3.0",
28
+ "rich>=13.0",
29
+ "httpx>=0.27",
30
+ "pymupdf>=1.24",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ dev = [
35
+ "pytest>=8.0",
36
+ "ruff>=0.4",
37
+ ]
38
+
39
+ [project.urls]
40
+ Homepage = "https://github.com/vxvware/antifile"
41
+ Repository = "https://github.com/vxvware/antifile"
42
+ Issues = "https://github.com/vxvware/antifile/issues"
43
+
44
+ [project.scripts]
45
+ antifile = "antifile.cli:main"
46
+
47
+ [build-system]
48
+ requires = ["hatchling"]
49
+ build-backend = "hatchling.build"
50
+
51
+ [tool.hatch.build.targets.wheel]
52
+ packages = ["src/antifile"]
53
+
54
+ [tool.ruff]
55
+ line-length = 100
@@ -0,0 +1,25 @@
1
+ """antifile — extract BibTeX metadata from PDFs, URLs, and identifiers."""
2
+
3
+ from antifile.entry import Entry
4
+ from antifile._base import (
5
+ ExtractionResult,
6
+ Identifiers,
7
+ ExtractorError,
8
+ ExtractorUnavailableError,
9
+ NetworkError,
10
+ PDFError,
11
+ )
12
+ from antifile.dispatch import extract
13
+ from antifile import bibfile
14
+
15
+ __all__ = [
16
+ "extract",
17
+ "bibfile",
18
+ "Entry",
19
+ "ExtractionResult",
20
+ "Identifiers",
21
+ "ExtractorError",
22
+ "ExtractorUnavailableError",
23
+ "NetworkError",
24
+ "PDFError",
25
+ ]
@@ -0,0 +1,49 @@
1
+ """Base types for the extractor system."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+
8
+ from antifile.entry import Entry
9
+
10
+
11
+ class ExtractorError(Exception):
12
+ """Base exception for extractors."""
13
+
14
+
15
+ class ExtractorUnavailableError(ExtractorError):
16
+ """Raised when a required dependency is missing."""
17
+
18
+
19
+ class NetworkError(ExtractorError):
20
+ """Raised on API/network failures."""
21
+
22
+
23
+ class PDFError(ExtractorError):
24
+ """Raised on PDF read failures."""
25
+
26
+
27
+ @dataclass
28
+ class Identifiers:
29
+ """Identifiers extracted from document text."""
30
+
31
+ dois: list[str] = field(default_factory=list)
32
+ arxiv_ids: list[str] = field(default_factory=list)
33
+ isbns: list[str] = field(default_factory=list)
34
+
35
+ @property
36
+ def has_any(self) -> bool:
37
+ return bool(self.dois or self.arxiv_ids or self.isbns)
38
+
39
+
40
+ @dataclass
41
+ class ExtractionResult:
42
+ """Result from an extraction attempt."""
43
+
44
+ entry: Entry | None
45
+ source: str
46
+ confidence: float
47
+ identifiers: Identifiers = field(default_factory=Identifiers)
48
+ errors: list[str] = field(default_factory=list)
49
+ pdf_path: Path | None = None
@@ -0,0 +1,137 @@
1
+ """Read, dedup, and append BibTeX entries to a .bib file."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import bibtexparser
8
+
9
+ from antifile.entry import Entry
10
+
11
+
12
+ def _btp_to_entry(e, source: Path | None = None) -> Entry:
13
+ fields = {f.key: f.value for f in e.fields}
14
+ return Entry(key=e.key, entry_type=e.entry_type, fields=fields, source_file=source)
15
+
16
+
17
+ def load(path: Path) -> list[Entry]:
18
+ """Load all entries from a .bib file. Returns [] if missing/empty."""
19
+ if not path.exists():
20
+ return []
21
+ text = path.read_text()
22
+ if not text.strip():
23
+ return []
24
+ lib = bibtexparser.parse_string(text)
25
+ return [_btp_to_entry(e, path) for e in lib.entries]
26
+
27
+
28
+ def _norm(s: str) -> str:
29
+ return s.strip().lower()
30
+
31
+
32
+ def find_duplicate(entry: Entry, existing: list[Entry]) -> Entry | None:
33
+ """Return an existing entry that matches by identifier or title+author."""
34
+ new_doi = _norm(entry.fields.get("doi", ""))
35
+ new_arxiv = _norm(entry.fields.get("eprint", ""))
36
+ new_isbn = _norm(entry.fields.get("isbn", ""))
37
+ new_title = _norm(entry.fields.get("title", ""))
38
+ new_author = _norm(entry.fields.get("author", ""))
39
+
40
+ for e in existing:
41
+ if e.key == entry.key:
42
+ return e
43
+ if new_doi and _norm(e.fields.get("doi", "")) == new_doi:
44
+ return e
45
+ if new_arxiv and _norm(e.fields.get("eprint", "")) == new_arxiv:
46
+ return e
47
+ if new_isbn and _norm(e.fields.get("isbn", "")) == new_isbn:
48
+ return e
49
+ if (
50
+ new_title
51
+ and _norm(e.fields.get("title", "")) == new_title
52
+ and new_author
53
+ and _norm(e.fields.get("author", "")) == new_author
54
+ ):
55
+ return e
56
+ return None
57
+
58
+
59
+ def make_unique_key(base: str, existing: list[Entry]) -> str:
60
+ """Suffix a, b, c... if `base` collides with any key in existing."""
61
+ keys = {e.key for e in existing}
62
+ if base not in keys:
63
+ return base
64
+ for suffix in "abcdefghijklmnopqrstuvwxyz":
65
+ candidate = f"{base}{suffix}"
66
+ if candidate not in keys:
67
+ return candidate
68
+ n = 1
69
+ while f"{base}{n}" in keys:
70
+ n += 1
71
+ return f"{base}{n}"
72
+
73
+
74
+ def _append_entry(path: Path, entry: Entry) -> None:
75
+ path.parent.mkdir(parents=True, exist_ok=True)
76
+ text = path.read_text() if path.exists() else ""
77
+ separator = "\n\n" if text.strip() else ""
78
+ with open(path, "a") as f:
79
+ f.write(separator + entry.to_bibtex() + "\n")
80
+ entry.source_file = path
81
+
82
+
83
+ def _rewrite(path: Path, entries: list[Entry]) -> None:
84
+ content = "\n\n".join(e.to_bibtex() for e in entries)
85
+ path.write_text(content + "\n" if content else "")
86
+
87
+
88
+ def merge_missing(target: Entry, source: Entry) -> bool:
89
+ """Fill empty/absent fields on `target` from `source`. Returns True if changed."""
90
+ changed = False
91
+ for k, v in source.fields.items():
92
+ if not v:
93
+ continue
94
+ if not target.fields.get(k, "").strip():
95
+ target.fields[k] = v
96
+ changed = True
97
+ return changed
98
+
99
+
100
+ class AppendResult:
101
+ __slots__ = ("status", "entry", "duplicate_of")
102
+
103
+ def __init__(self, status: str, entry: Entry, duplicate_of: Entry | None = None):
104
+ self.status = status # "added" | "skipped" | "merged"
105
+ self.entry = entry
106
+ self.duplicate_of = duplicate_of
107
+
108
+
109
+ def append(
110
+ path: Path,
111
+ entry: Entry,
112
+ *,
113
+ merge: bool = True,
114
+ force: bool = False,
115
+ ) -> AppendResult:
116
+ """Append `entry` to the .bib at `path` with dedup.
117
+
118
+ - If a duplicate exists and `force` is False:
119
+ - If `merge` is True (default), fill missing/empty fields on the existing entry
120
+ and rewrite the file. Returns status="merged" if anything changed,
121
+ otherwise "skipped".
122
+ - If `merge` is False, returns status="skipped".
123
+ - Otherwise auto-suffixes the key on collision and appends.
124
+ """
125
+ existing = load(path)
126
+ dup = find_duplicate(entry, existing) if not force else None
127
+
128
+ if dup is not None:
129
+ if merge:
130
+ if merge_missing(dup, entry):
131
+ _rewrite(path, existing)
132
+ return AppendResult("merged", dup, duplicate_of=dup)
133
+ return AppendResult("skipped", dup, duplicate_of=dup)
134
+
135
+ entry.key = make_unique_key(entry.key, existing)
136
+ _append_entry(path, entry)
137
+ return AppendResult("added", entry)
@@ -0,0 +1,256 @@
1
+ """antifile CLI — extract BibTeX from PDFs / URLs / identifiers and append to a .bib file."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from rich.console import Console
11
+ from rich.syntax import Syntax
12
+
13
+ from antifile import bibfile
14
+ from antifile._base import ExtractionResult, ExtractorUnavailableError, NetworkError
15
+ from antifile.dispatch import extract as dispatch_extract
16
+ from antifile.pdf import (
17
+ extract_from_pdf,
18
+ extract_text,
19
+ extract_probable_title,
20
+ extract_via_claude_code,
21
+ extract_via_llm,
22
+ find_identifiers,
23
+ resolve_arxiv,
24
+ resolve_doi,
25
+ resolve_isbn,
26
+ search_crossref_by_title,
27
+ )
28
+ from antifile.preview import show_pdf_first_page
29
+
30
+ console = Console()
31
+ _METHODS = ("auto", "doi", "arxiv", "isbn", "crossref", "llm", "claude-code")
32
+
33
+
34
+ def _api_keys_from_env() -> dict[str, str]:
35
+ keys: dict[str, str] = {}
36
+ if v := os.environ.get("ANTHROPIC_API_KEY"):
37
+ keys["anthropic"] = v
38
+ if v := os.environ.get("OPENAI_API_KEY"):
39
+ keys["openai"] = v
40
+ if v := os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY"):
41
+ keys["gemini"] = v
42
+ return keys
43
+
44
+
45
+ def _extract_pdf_with_method(
46
+ pdf_path: Path, method: str, api_keys: dict[str, str]
47
+ ) -> ExtractionResult:
48
+ """Run a specific extraction method against a PDF."""
49
+ if method == "auto":
50
+ return extract_from_pdf(pdf_path, api_keys=api_keys)
51
+
52
+ from antifile._base import Identifiers
53
+
54
+ text = extract_text(pdf_path, max_pages=3)
55
+ identifiers = find_identifiers(text) if text.strip() else Identifiers()
56
+ entry = None
57
+
58
+ if method == "doi":
59
+ for doi in identifiers.dois[:3]:
60
+ entry = resolve_doi(doi)
61
+ if entry:
62
+ break
63
+ elif method == "arxiv":
64
+ for aid in identifiers.arxiv_ids[:3]:
65
+ entry = resolve_arxiv(aid)
66
+ if entry:
67
+ break
68
+ elif method == "isbn":
69
+ for isbn in identifiers.isbns[:3]:
70
+ entry = resolve_isbn(isbn)
71
+ if entry:
72
+ break
73
+ elif method == "crossref":
74
+ title = extract_probable_title(text)
75
+ if title:
76
+ entry = search_crossref_by_title(title)
77
+ elif method == "llm":
78
+ entry = extract_via_llm(text, api_keys)
79
+ elif method == "claude-code":
80
+ entry = extract_via_claude_code(text)
81
+
82
+ if entry:
83
+ entry.fields["file"] = str(pdf_path)
84
+
85
+ confidence = 0.7 if method in ("llm", "claude-code", "crossref") else 0.95
86
+ return ExtractionResult(
87
+ entry=entry, source=method,
88
+ confidence=confidence if entry else 0.0,
89
+ identifiers=identifiers, pdf_path=pdf_path,
90
+ )
91
+
92
+
93
+ def _extract_one(
94
+ input_str: str, method: str, api_keys: dict[str, str]
95
+ ) -> ExtractionResult:
96
+ """Dispatch helper that honors --method for PDFs."""
97
+ path = Path(input_str).expanduser()
98
+ if method != "auto" and path.exists() and path.is_file() and path.suffix.lower() == ".pdf":
99
+ return _extract_pdf_with_method(path, method, api_keys)
100
+ return dispatch_extract(input_str, api_keys=api_keys)
101
+
102
+
103
+ def _looks_like_path(s: str) -> bool:
104
+ return s.startswith(("/", "~", "./", "../")) or "/" in s
105
+
106
+
107
+ def _process(
108
+ input_str: str,
109
+ output: Path,
110
+ method: str,
111
+ api_keys: dict[str, str],
112
+ show_preview: bool,
113
+ merge: bool,
114
+ force: bool,
115
+ counters: dict[str, int],
116
+ label: str | None = None,
117
+ ) -> None:
118
+ display = label or input_str
119
+ console.print(f"[cyan]→[/] {display}")
120
+
121
+ path = Path(input_str).expanduser()
122
+
123
+ # Detect path-like inputs that don't exist before trying to classify them
124
+ # as URLs/identifiers — saves a confusing "Could not classify" message.
125
+ if _looks_like_path(input_str) and not path.exists():
126
+ console.print(f" [yellow]skipped:[/] path not found ({path})")
127
+ counters["skipped"] += 1
128
+ return
129
+
130
+ is_pdf = path.exists() and path.is_file() and path.suffix.lower() == ".pdf"
131
+
132
+ if show_preview and is_pdf:
133
+ show_pdf_first_page(path)
134
+
135
+ try:
136
+ result = _extract_one(input_str, method, api_keys)
137
+ except (ExtractorUnavailableError, NetworkError) as e:
138
+ console.print(f" [red]error:[/] {e}")
139
+ counters["failed"] += 1
140
+ return
141
+ except Exception as e:
142
+ console.print(f" [red]error:[/] {e}")
143
+ counters["failed"] += 1
144
+ return
145
+
146
+ for err in result.errors:
147
+ console.print(f" [yellow]warning:[/] {err}")
148
+
149
+ if result.entry is None:
150
+ if result.source == "unknown":
151
+ console.print(f" [yellow]skipped:[/] unrecognized input")
152
+ counters["skipped"] += 1
153
+ else:
154
+ console.print(f" [red]no entry extracted[/] (source={result.source})")
155
+ counters["failed"] += 1
156
+ return
157
+
158
+ conf_color = "green" if result.confidence >= 0.9 else "yellow"
159
+ console.print(
160
+ f" [dim]source:[/] {result.source} "
161
+ f"[{conf_color}]confidence: {result.confidence:.0%}[/]"
162
+ )
163
+ console.print(
164
+ Syntax(result.entry.to_bibtex(), "bibtex", theme="monokai", line_numbers=False, padding=(0, 1))
165
+ )
166
+
167
+ res = bibfile.append(output, result.entry, merge=merge, force=force)
168
+ if res.status == "added":
169
+ console.print(f" [green]added[/] [bold]{res.entry.key}[/] → {output}")
170
+ counters["added"] += 1
171
+ elif res.status == "merged":
172
+ console.print(f" [green]merged[/] missing fields into [bold]{res.entry.key}[/]")
173
+ counters["merged"] += 1
174
+ else:
175
+ # silent skip per spec, but keep it visible at INFO level
176
+ counters["duplicates"] += 1
177
+
178
+
179
+ _SUPPORTED_SUFFIXES = (".pdf", ".epub")
180
+
181
+
182
+ def _iter_supported(folder: Path, recursive: bool) -> list[Path]:
183
+ pattern = "**/*" if recursive else "*"
184
+ return sorted(
185
+ p for p in folder.glob(pattern)
186
+ if p.is_file() and p.suffix.lower() in _SUPPORTED_SUFFIXES
187
+ )
188
+
189
+
190
+ def main(argv: list[str] | None = None) -> int:
191
+ parser = argparse.ArgumentParser(
192
+ prog="antifile",
193
+ description="Extract BibTeX from PDFs, EPUBs, URLs, DOIs, arXiv IDs, or ISBNs into a .bib file.",
194
+ )
195
+ parser.add_argument("input", help="PDF/EPUB path, folder, URL, DOI, arXiv ID, or ISBN")
196
+ parser.add_argument(
197
+ "-o", "--output", required=True, type=Path,
198
+ help="Target .bib file (created if missing; entries appended with dedup)",
199
+ )
200
+ parser.add_argument(
201
+ "--method", choices=_METHODS, default="auto",
202
+ help="Force a specific extraction method (PDF only). Default: auto.",
203
+ )
204
+ parser.add_argument("--recursive", action="store_true", help="Recurse into subfolders for folder input")
205
+ parser.add_argument("--no-preview", action="store_true", help="Skip first-page PDF preview")
206
+ parser.add_argument("--no-merge", action="store_true",
207
+ help="On duplicate, skip without filling missing fields (default: fill missing)")
208
+ parser.add_argument("--force", action="store_true",
209
+ help="Append even if a duplicate exists (auto-suffixes the key)")
210
+
211
+ args = parser.parse_args(argv)
212
+
213
+ api_keys = _api_keys_from_env()
214
+ show_preview = not args.no_preview
215
+ merge = not args.no_merge
216
+
217
+ counters = {"added": 0, "merged": 0, "duplicates": 0, "failed": 0, "skipped": 0}
218
+
219
+ input_str: str = args.input
220
+ path = Path(input_str).expanduser()
221
+
222
+ if path.exists() and path.is_dir():
223
+ files = _iter_supported(path, args.recursive)
224
+ if not files:
225
+ console.print(f"[dim]No PDF/EPUB files found in {path}[/]")
226
+ return 0
227
+ console.print(f"[cyan]Found {len(files)} file(s) in {path}[/]")
228
+ for i, f in enumerate(files, 1):
229
+ try:
230
+ _process(
231
+ str(f), args.output, args.method, api_keys,
232
+ show_preview, merge, args.force, counters,
233
+ label=f"[{i}/{len(files)}] {f.name}",
234
+ )
235
+ except KeyboardInterrupt:
236
+ console.print("\n[dim]stopped[/]")
237
+ break
238
+ else:
239
+ _process(
240
+ input_str, args.output, args.method, api_keys,
241
+ show_preview, merge, args.force, counters,
242
+ )
243
+
244
+ console.print()
245
+ console.print(
246
+ f"[green]added: {counters['added']}[/] "
247
+ f"[green]merged: {counters['merged']}[/] "
248
+ f"[dim]duplicates: {counters['duplicates']}[/] "
249
+ f"[yellow]skipped: {counters['skipped']}[/] "
250
+ f"[red]failed: {counters['failed']}[/]"
251
+ )
252
+ return 0 if counters["failed"] == 0 else 1
253
+
254
+
255
+ if __name__ == "__main__":
256
+ sys.exit(main())