pdf-file-renamer 0.4.2__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. pdf_file_renamer-0.6.0/.env.example +9 -0
  2. pdf_file_renamer-0.6.0/.github/workflows/ci.yml +78 -0
  3. pdf_file_renamer-0.6.0/.github/workflows/release.yml +69 -0
  4. pdf_file_renamer-0.6.0/.gitignore +55 -0
  5. pdf_file_renamer-0.6.0/.python-version +1 -0
  6. {pdf_file_renamer-0.4.2/pdf_file_renamer.egg-info → pdf_file_renamer-0.6.0}/PKG-INFO +50 -23
  7. pdf_file_renamer-0.4.2/PKG-INFO → pdf_file_renamer-0.6.0/README.md +36 -35
  8. pdf_file_renamer-0.6.0/REFACTORING_SUMMARY.md +288 -0
  9. pdf_file_renamer-0.6.0/coverage.xml +854 -0
  10. {pdf_file_renamer-0.4.2 → pdf_file_renamer-0.6.0}/pyproject.toml +13 -4
  11. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/__init__.py +1 -1
  12. pdf_file_renamer-0.6.0/src/pdf_file_renamer/application/__init__.py +7 -0
  13. pdf_file_renamer-0.6.0/src/pdf_file_renamer/application/filename_service.py +172 -0
  14. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/application/pdf_rename_workflow.py +29 -4
  15. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/application/rename_service.py +1 -1
  16. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/domain/__init__.py +2 -2
  17. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/domain/models.py +29 -0
  18. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/domain/ports.py +18 -1
  19. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/infrastructure/__init__.py +1 -1
  20. pdf_file_renamer-0.6.0/src/pdf_file_renamer/infrastructure/doi/__init__.py +5 -0
  21. pdf_file_renamer-0.6.0/src/pdf_file_renamer/infrastructure/doi/pdf2doi_extractor.py +129 -0
  22. pdf_file_renamer-0.6.0/src/pdf_file_renamer/infrastructure/llm/__init__.py +5 -0
  23. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/infrastructure/llm/pydantic_ai_provider.py +2 -2
  24. pdf_file_renamer-0.6.0/src/pdf_file_renamer/infrastructure/pdf/__init__.py +7 -0
  25. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/infrastructure/pdf/composite.py +2 -2
  26. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/infrastructure/pdf/docling_extractor.py +2 -2
  27. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/infrastructure/pdf/pymupdf_extractor.py +2 -2
  28. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/main.py +1 -1
  29. pdf_file_renamer-0.6.0/src/pdf_file_renamer/presentation/__init__.py +6 -0
  30. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/presentation/cli.py +10 -5
  31. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/presentation/formatters.py +1 -1
  32. pdf_file_renamer-0.6.0/tests/__init__.py +1 -0
  33. pdf_file_renamer-0.6.0/tests/data/2025-dennis-managing-complexity.pdf +0 -0
  34. pdf_file_renamer-0.6.0/tests/data/Camp_of_the_Saints.pdf +0 -0
  35. pdf_file_renamer-0.6.0/tests/data/s43588-025-00854-1.pdf +13838 -22
  36. {pdf_file_renamer-0.4.2 → pdf_file_renamer-0.6.0}/tests/test_domain_models.py +1 -1
  37. {pdf_file_renamer-0.4.2 → pdf_file_renamer-0.6.0}/tests/test_filename_service.py +3 -3
  38. {pdf_file_renamer-0.4.2 → pdf_file_renamer-0.6.0}/tests/test_rename_service.py +1 -1
  39. pdf_file_renamer-0.4.2/README.md +0 -219
  40. pdf_file_renamer-0.4.2/pdf_file_renamer.egg-info/SOURCES.txt +0 -32
  41. pdf_file_renamer-0.4.2/pdf_file_renamer.egg-info/dependency_links.txt +0 -1
  42. pdf_file_renamer-0.4.2/pdf_file_renamer.egg-info/entry_points.txt +0 -2
  43. pdf_file_renamer-0.4.2/pdf_file_renamer.egg-info/requires.txt +0 -18
  44. pdf_file_renamer-0.4.2/pdf_file_renamer.egg-info/top_level.txt +0 -1
  45. pdf_file_renamer-0.4.2/pdf_renamer/application/__init__.py +0 -7
  46. pdf_file_renamer-0.4.2/pdf_renamer/application/filename_service.py +0 -70
  47. pdf_file_renamer-0.4.2/pdf_renamer/infrastructure/llm/__init__.py +0 -5
  48. pdf_file_renamer-0.4.2/pdf_renamer/infrastructure/pdf/__init__.py +0 -7
  49. pdf_file_renamer-0.4.2/pdf_renamer/presentation/__init__.py +0 -6
  50. pdf_file_renamer-0.4.2/setup.cfg +0 -4
  51. {pdf_file_renamer-0.4.2 → pdf_file_renamer-0.6.0}/LICENSE +0 -0
  52. {pdf_file_renamer-0.4.2/pdf_renamer → pdf_file_renamer-0.6.0/src/pdf_file_renamer}/infrastructure/config.py +0 -0
@@ -0,0 +1,9 @@
1
+ # OpenAI API Key (required for OpenAI, optional for custom endpoints)
2
+ OPENAI_API_KEY=your_api_key_here
3
+
4
+ # Optional: Custom base URL for OpenAI-compatible APIs
5
+ # Examples:
6
+ # - Ollama: http://patmos:11434/v1
7
+ # - LM Studio: http://localhost:1234/v1
8
+ # - vLLM: http://your-server:8000/v1
9
+ # LLM_BASE_URL=http://patmos:11434/v1
@@ -0,0 +1,78 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, develop]
6
+ pull_request:
7
+ branches: [main, develop]
8
+
9
+ jobs:
10
+ test:
11
+ name: Test Python ${{ matrix.python-version }}
12
+ runs-on: ubuntu-latest
13
+ strategy:
14
+ matrix:
15
+ python-version: ["3.11", "3.12"]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@v4
22
+ with:
23
+ version: "latest"
24
+
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ run: uv python install ${{ matrix.python-version }}
27
+
28
+ - name: Install dependencies
29
+ run: uv sync --all-extras
30
+
31
+ - name: Run ruff linting
32
+ run: uv run ruff check src/pdf_file_renamer tests
33
+
34
+ - name: Run ruff formatting check
35
+ run: uv run ruff format --check src/pdf_file_renamer tests
36
+
37
+ - name: Run mypy type checking
38
+ run: uv run mypy src/pdf_file_renamer
39
+
40
+ - name: Run tests with coverage
41
+ run: uv run pytest tests/ --cov=pdf_file_renamer --cov-report=xml --cov-report=term
42
+
43
+ - name: Upload coverage to Codecov
44
+ uses: codecov/codecov-action@v4
45
+ if: matrix.python-version == '3.11'
46
+ with:
47
+ file: ./coverage.xml
48
+ fail_ci_if_error: false
49
+
50
+ build:
51
+ name: Build distribution
52
+ runs-on: ubuntu-latest
53
+ needs: test
54
+
55
+ steps:
56
+ - uses: actions/checkout@v4
57
+
58
+ - name: Install uv
59
+ uses: astral-sh/setup-uv@v4
60
+ with:
61
+ version: "latest"
62
+
63
+ - name: Set up Python
64
+ run: uv python install 3.11
65
+
66
+ - name: Build package
67
+ run: uv build
68
+
69
+ - name: Check build
70
+ run: |
71
+ ls -lh dist/
72
+ uv run twine check dist/*
73
+
74
+ - name: Upload artifacts
75
+ uses: actions/upload-artifact@v4
76
+ with:
77
+ name: dist
78
+ path: dist/
@@ -0,0 +1,69 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ contents: write
10
+
11
+ jobs:
12
+ build-and-release:
13
+ name: Build and Release
14
+ runs-on: ubuntu-latest
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v4
21
+ with:
22
+ version: "latest"
23
+
24
+ - name: Set up Python
25
+ run: uv python install 3.11
26
+
27
+ - name: Install dependencies
28
+ run: uv sync --all-extras
29
+
30
+ - name: Run tests
31
+ run: uv run pytest tests/
32
+
33
+ - name: Build package
34
+ run: uv build
35
+
36
+ - name: Extract version from tag
37
+ id: get_version
38
+ run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
39
+
40
+ - name: Publish to PyPI
41
+ env:
42
+ TWINE_USERNAME: __token__
43
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
44
+ run: |
45
+ uv run twine upload dist/*
46
+
47
+ - name: Create Release
48
+ uses: softprops/action-gh-release@v1
49
+ with:
50
+ files: dist/*
51
+ generate_release_notes: true
52
+ body: |
53
+ ## What's Changed
54
+
55
+ Release version ${{ steps.get_version.outputs.VERSION }}
56
+
57
+ See the [REFACTORING_SUMMARY.md](https://github.com/${{ github.repository }}/blob/${{ github.ref_name }}/REFACTORING_SUMMARY.md) for architecture details.
58
+
59
+ ### Installation
60
+
61
+ **From PyPI:**
62
+ ```bash
63
+ pip install pdf-renamer==${{ steps.get_version.outputs.VERSION }}
64
+ ```
65
+
66
+ **Using uvx (no installation required):**
67
+ ```bash
68
+ uvx pdf-renamer@${{ steps.get_version.outputs.VERSION }}
69
+ ```
@@ -0,0 +1,55 @@
1
+ .claude
2
+ # Python
3
+ __pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+ *.so
7
+ .Python
8
+ build/
9
+ develop-eggs/
10
+ dist/
11
+ downloads/
12
+ eggs/
13
+ .eggs/
14
+ lib/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ *.egg-info/
21
+ .installed.cfg
22
+ *.egg
23
+
24
+ # Virtual environments
25
+ venv/
26
+ ENV/
27
+ env/
28
+ .venv/
29
+
30
+ # uv
31
+ uv.lock
32
+
33
+ # IDEs
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+ *.swo
38
+ *~
39
+ .DS_Store
40
+
41
+ # Environment variables
42
+ .env
43
+ .env.local
44
+
45
+ # Testing
46
+ .pytest_cache/
47
+ .coverage
48
+ htmlcov/
49
+
50
+ # Logs
51
+ *.log
52
+
53
+ # Temporary files
54
+ *.tmp
55
+ .cache/
@@ -0,0 +1 @@
1
+ 3.11
@@ -1,28 +1,28 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pdf-file-renamer
3
- Version: 0.4.2
3
+ Version: 0.6.0
4
4
  Summary: Intelligent PDF renaming using LLMs
5
- Requires-Python: >=3.11
6
- Description-Content-Type: text/markdown
7
5
  License-File: LICENSE
8
- Requires-Dist: pydantic>=2.10.6
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: docling-core>=2.0.0
8
+ Requires-Dist: docling-parse>=2.0.0
9
+ Requires-Dist: pdf2doi>=1.7
9
10
  Requires-Dist: pydantic-ai>=1.0.17
10
11
  Requires-Dist: pydantic-settings>=2.7.1
12
+ Requires-Dist: pydantic>=2.10.6
11
13
  Requires-Dist: pymupdf>=1.26.5
12
- Requires-Dist: docling-parse>=2.0.0
13
- Requires-Dist: docling-core>=2.0.0
14
14
  Requires-Dist: python-dotenv>=1.1.1
15
15
  Requires-Dist: rich>=14.2.0
16
- Requires-Dist: typer>=0.19.2
17
16
  Requires-Dist: tenacity>=9.0.0
17
+ Requires-Dist: typer>=0.19.2
18
18
  Provides-Extra: dev
19
- Requires-Dist: pytest>=8.3.4; extra == "dev"
20
- Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
21
- Requires-Dist: pytest-asyncio>=0.25.2; extra == "dev"
22
- Requires-Dist: pytest-mock>=3.14.0; extra == "dev"
23
- Requires-Dist: ruff>=0.9.1; extra == "dev"
24
- Requires-Dist: mypy>=1.14.1; extra == "dev"
25
- Dynamic: license-file
19
+ Requires-Dist: mypy>=1.14.1; extra == 'dev'
20
+ Requires-Dist: pytest-asyncio>=0.25.2; extra == 'dev'
21
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'dev'
22
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
23
+ Requires-Dist: pytest>=8.3.4; extra == 'dev'
24
+ Requires-Dist: ruff>=0.9.1; extra == 'dev'
25
+ Description-Content-Type: text/markdown
26
26
 
27
27
  # PDF Renamer
28
28
 
@@ -44,9 +44,11 @@ Intelligent PDF file renaming using LLMs. This tool analyzes PDF content and met
44
44
 
45
45
  ## Features
46
46
 
47
+ - **DOI-based naming** - Automatically extracts DOI and fetches authoritative metadata for academic papers
47
48
  - **Advanced PDF parsing** using docling-parse for better structure-aware extraction
48
49
  - **OCR fallback** for scanned PDFs with low text content
49
50
  - **Smart LLM prompting** with multi-pass analysis for improved accuracy
51
+ - **Hybrid approach** - Uses DOI metadata when available, falls back to LLM analysis for other documents
50
52
  - Suggests filenames in format: `Author-Topic-Year.pdf`
51
53
  - Dry-run mode to preview changes before applying
52
54
  - **Enhanced interactive mode** with options to accept, manually edit, retry, or skip each file
@@ -209,19 +211,44 @@ You can use interactive mode with `--dry-run` to preview without actually renami
209
211
 
210
212
  ## How It Works
211
213
 
212
- 1. **Extract**: Uses docling-parse to read first 5 pages with structure-aware parsing, falls back to PyMuPDF if needed
213
- 2. **OCR**: Automatically applies OCR for scanned PDFs with minimal text
214
- 3. **Metadata Enhancement**: Extracts focused hints (years, emails, author sections) to supplement unreliable PDF metadata
215
- 4. **Analyze**: Sends full content excerpt to LLM with enhanced metadata and detailed extraction instructions
216
- 5. **Multi-pass Review**: Low-confidence results trigger a second analysis pass with focused prompts
217
- 6. **Suggest**: LLM returns filename in `Author-Topic-Year` format with confidence level and reasoning
218
- 7. **Interactive Review** (optional): User can accept, edit, retry, or skip each suggestion
219
- 8. **Rename**: Applies suggestions (if not in dry-run mode)
214
+ ### Intelligent Hybrid Approach
215
+
216
+ The tool uses a multi-strategy approach to generate accurate filenames:
217
+
218
+ 1. **DOI Detection** (for academic papers)
219
+ - Searches PDF for DOI identifiers using [pdf2doi](https://github.com/MicheleCotrufo/pdf2doi)
220
+ - If found, queries authoritative metadata (title, authors, year, journal)
221
+ - Generates filename with **very high confidence** from validated metadata
222
+ - **Saves API costs** - no LLM call needed for papers with DOIs
223
+
224
+ 2. **LLM Analysis** (fallback for non-academic PDFs)
225
+ - **Extract**: Uses docling-parse to read first 5 pages with structure-aware parsing, falls back to PyMuPDF if needed
226
+ - **OCR**: Automatically applies OCR for scanned PDFs with minimal text
227
+ - **Metadata Enhancement**: Extracts focused hints (years, emails, author sections) to supplement unreliable PDF metadata
228
+ - **Analyze**: Sends full content excerpt to LLM with enhanced metadata and detailed extraction instructions
229
+ - **Multi-pass Review**: Low-confidence results trigger a second analysis pass with focused prompts
230
+ - **Suggest**: LLM returns filename in `Author-Topic-Year` format with confidence level and reasoning
231
+
232
+ 3. **Interactive Review** (optional): User can accept, edit, retry, or skip each suggestion
233
+ 4. **Rename**: Applies suggestions (if not in dry-run mode)
234
+
235
+ ### Benefits of DOI Integration
236
+
237
+ - **Accuracy**: DOI metadata is canonical and verified
238
+ - **Speed**: Instant lookup vs. LLM processing time
239
+ - **Cost**: Free DOI lookups save on API costs for academic papers
240
+ - **Reliability**: Works even when PDF text extraction is poor
220
241
 
221
242
  ## Cost Considerations
222
243
 
223
- **OpenAI:**
244
+ **DOI-based Naming (Academic Papers):**
245
+ - **Completely free** - No API costs
246
+ - **No LLM needed** - Direct metadata lookup
247
+ - Works for most academic papers with embedded DOIs
248
+
249
+ **OpenAI (Fallback):**
224
250
  - Uses `gpt-4o-mini` by default (very cost-effective)
251
+ - Only called when DOI not found
225
252
  - Processes first ~4500 characters per PDF
226
253
  - Typical cost: ~$0.001-0.003 per PDF
227
254
 
@@ -1,29 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: pdf-file-renamer
3
- Version: 0.4.2
4
- Summary: Intelligent PDF renaming using LLMs
5
- Requires-Python: >=3.11
6
- Description-Content-Type: text/markdown
7
- License-File: LICENSE
8
- Requires-Dist: pydantic>=2.10.6
9
- Requires-Dist: pydantic-ai>=1.0.17
10
- Requires-Dist: pydantic-settings>=2.7.1
11
- Requires-Dist: pymupdf>=1.26.5
12
- Requires-Dist: docling-parse>=2.0.0
13
- Requires-Dist: docling-core>=2.0.0
14
- Requires-Dist: python-dotenv>=1.1.1
15
- Requires-Dist: rich>=14.2.0
16
- Requires-Dist: typer>=0.19.2
17
- Requires-Dist: tenacity>=9.0.0
18
- Provides-Extra: dev
19
- Requires-Dist: pytest>=8.3.4; extra == "dev"
20
- Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
21
- Requires-Dist: pytest-asyncio>=0.25.2; extra == "dev"
22
- Requires-Dist: pytest-mock>=3.14.0; extra == "dev"
23
- Requires-Dist: ruff>=0.9.1; extra == "dev"
24
- Requires-Dist: mypy>=1.14.1; extra == "dev"
25
- Dynamic: license-file
26
-
27
1
  # PDF Renamer
28
2
 
29
3
  [![PyPI version](https://img.shields.io/pypi/v/pdf-file-renamer.svg)](https://pypi.org/project/pdf-file-renamer/)
@@ -44,9 +18,11 @@ Intelligent PDF file renaming using LLMs. This tool analyzes PDF content and met
44
18
 
45
19
  ## Features
46
20
 
21
+ - **DOI-based naming** - Automatically extracts DOI and fetches authoritative metadata for academic papers
47
22
  - **Advanced PDF parsing** using docling-parse for better structure-aware extraction
48
23
  - **OCR fallback** for scanned PDFs with low text content
49
24
  - **Smart LLM prompting** with multi-pass analysis for improved accuracy
25
+ - **Hybrid approach** - Uses DOI metadata when available, falls back to LLM analysis for other documents
50
26
  - Suggests filenames in format: `Author-Topic-Year.pdf`
51
27
  - Dry-run mode to preview changes before applying
52
28
  - **Enhanced interactive mode** with options to accept, manually edit, retry, or skip each file
@@ -209,19 +185,44 @@ You can use interactive mode with `--dry-run` to preview without actually renami
209
185
 
210
186
  ## How It Works
211
187
 
212
- 1. **Extract**: Uses docling-parse to read first 5 pages with structure-aware parsing, falls back to PyMuPDF if needed
213
- 2. **OCR**: Automatically applies OCR for scanned PDFs with minimal text
214
- 3. **Metadata Enhancement**: Extracts focused hints (years, emails, author sections) to supplement unreliable PDF metadata
215
- 4. **Analyze**: Sends full content excerpt to LLM with enhanced metadata and detailed extraction instructions
216
- 5. **Multi-pass Review**: Low-confidence results trigger a second analysis pass with focused prompts
217
- 6. **Suggest**: LLM returns filename in `Author-Topic-Year` format with confidence level and reasoning
218
- 7. **Interactive Review** (optional): User can accept, edit, retry, or skip each suggestion
219
- 8. **Rename**: Applies suggestions (if not in dry-run mode)
188
+ ### Intelligent Hybrid Approach
189
+
190
+ The tool uses a multi-strategy approach to generate accurate filenames:
191
+
192
+ 1. **DOI Detection** (for academic papers)
193
+ - Searches PDF for DOI identifiers using [pdf2doi](https://github.com/MicheleCotrufo/pdf2doi)
194
+ - If found, queries authoritative metadata (title, authors, year, journal)
195
+ - Generates filename with **very high confidence** from validated metadata
196
+ - **Saves API costs** - no LLM call needed for papers with DOIs
197
+
198
+ 2. **LLM Analysis** (fallback for non-academic PDFs)
199
+ - **Extract**: Uses docling-parse to read first 5 pages with structure-aware parsing, falls back to PyMuPDF if needed
200
+ - **OCR**: Automatically applies OCR for scanned PDFs with minimal text
201
+ - **Metadata Enhancement**: Extracts focused hints (years, emails, author sections) to supplement unreliable PDF metadata
202
+ - **Analyze**: Sends full content excerpt to LLM with enhanced metadata and detailed extraction instructions
203
+ - **Multi-pass Review**: Low-confidence results trigger a second analysis pass with focused prompts
204
+ - **Suggest**: LLM returns filename in `Author-Topic-Year` format with confidence level and reasoning
205
+
206
+ 3. **Interactive Review** (optional): User can accept, edit, retry, or skip each suggestion
207
+ 4. **Rename**: Applies suggestions (if not in dry-run mode)
208
+
209
+ ### Benefits of DOI Integration
210
+
211
+ - **Accuracy**: DOI metadata is canonical and verified
212
+ - **Speed**: Instant lookup vs. LLM processing time
213
+ - **Cost**: Free DOI lookups save on API costs for academic papers
214
+ - **Reliability**: Works even when PDF text extraction is poor
220
215
 
221
216
  ## Cost Considerations
222
217
 
223
- **OpenAI:**
218
+ **DOI-based Naming (Academic Papers):**
219
+ - **Completely free** - No API costs
220
+ - **No LLM needed** - Direct metadata lookup
221
+ - Works for most academic papers with embedded DOIs
222
+
223
+ **OpenAI (Fallback):**
224
224
  - Uses `gpt-4o-mini` by default (very cost-effective)
225
+ - Only called when DOI not found
225
226
  - Processes first ~4500 characters per PDF
226
227
  - Typical cost: ~$0.001-0.003 per PDF
227
228