pdf-file-renamer 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,245 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdf-file-renamer
3
+ Version: 0.4.2
4
+ Summary: Intelligent PDF renaming using LLMs
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Requires-Dist: pydantic>=2.10.6
9
+ Requires-Dist: pydantic-ai>=1.0.17
10
+ Requires-Dist: pydantic-settings>=2.7.1
11
+ Requires-Dist: pymupdf>=1.26.5
12
+ Requires-Dist: docling-parse>=2.0.0
13
+ Requires-Dist: docling-core>=2.0.0
14
+ Requires-Dist: python-dotenv>=1.1.1
15
+ Requires-Dist: rich>=14.2.0
16
+ Requires-Dist: typer>=0.19.2
17
+ Requires-Dist: tenacity>=9.0.0
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest>=8.3.4; extra == "dev"
20
+ Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
21
+ Requires-Dist: pytest-asyncio>=0.25.2; extra == "dev"
22
+ Requires-Dist: pytest-mock>=3.14.0; extra == "dev"
23
+ Requires-Dist: ruff>=0.9.1; extra == "dev"
24
+ Requires-Dist: mypy>=1.14.1; extra == "dev"
25
+ Dynamic: license-file
26
+
27
+ # PDF Renamer
28
+
29
+ [![PyPI version](https://img.shields.io/pypi/v/pdf-file-renamer.svg)](https://pypi.org/project/pdf-file-renamer/)
30
+ [![Python](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
31
+ [![uv](https://img.shields.io/badge/uv-0.5+-orange.svg)](https://docs.astral.sh/uv/)
32
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
33
+ [![pydantic-ai](https://img.shields.io/badge/pydantic--ai-1.0+-green.svg)](https://ai.pydantic.dev/)
34
+ [![GitHub](https://img.shields.io/badge/github-nostoslabs%2Fpdf--renamer-blue?logo=github)](https://github.com/nostoslabs/pdf-renamer)
35
+
36
+ [![Tests](https://img.shields.io/badge/tests-passing-brightgreen.svg)](https://github.com/nostoslabs/pdf-renamer)
37
+ [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
38
+ [![Type checked: mypy](https://img.shields.io/badge/type%20checked-mypy-blue.svg)](http://mypy-lang.org/)
39
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
40
+
41
+ Intelligent PDF file renaming using LLMs. This tool analyzes PDF content and metadata to suggest descriptive, standardized filenames.
42
+
43
+ > 🚀 Works with **OpenAI**, **Ollama**, **LM Studio**, and any OpenAI-compatible API
44
+
45
+ ## Features
46
+
47
+ - **Advanced PDF parsing** using docling-parse for better structure-aware extraction
48
+ - **OCR fallback** for scanned PDFs with low text content
49
+ - **Smart LLM prompting** with multi-pass analysis for improved accuracy
50
+ - Suggests filenames in format: `Author-Topic-Year.pdf`
51
+ - Dry-run mode to preview changes before applying
52
+ - **Enhanced interactive mode** with options to accept, manually edit, retry, or skip each file
53
+ - **Live progress display** with concurrent processing for speed
54
+ - **Configurable concurrency** limits for API calls and PDF extraction
55
+ - Batch processing of multiple PDFs with optional output directory
56
+
57
+ ## Installation
58
+
59
+ ### Quick Start (No Installation Required)
60
+
61
+ ```bash
62
+ # Run directly with uvx
63
+ uvx pdf-renamer --dry-run /path/to/pdfs
64
+ ```
65
+
66
+ ### Install from PyPI
67
+
68
+ ```bash
69
+ # Using pip
70
+ pip install pdf-file-renamer
71
+
72
+ # Using uv
73
+ uv pip install pdf-file-renamer
74
+ ```
75
+
76
+ ### Install from Source
77
+
78
+ ```bash
79
+ # Clone and install
80
+ git clone https://github.com/nostoslabs/pdf-renamer.git
81
+ cd pdf-renamer
82
+ uv sync
83
+ ```
84
+
85
+ ## Configuration
86
+
87
+ Configure your LLM provider:
88
+
89
+ **Option A: OpenAI (Cloud)**
90
+ ```bash
91
+ cp .env.example .env
92
+ # Edit .env and add your OPENAI_API_KEY
93
+ ```
94
+
95
+ **Option B: Ollama or other local models**
96
+ ```bash
97
+ # No API key needed for local models
98
+ # Either set LLM_BASE_URL in .env or use --url flag
99
+ echo "LLM_BASE_URL=http://patmos:11434/v1" > .env
100
+ ```
101
+
102
+ ## Usage
103
+
104
+ ### Quick Start
105
+
106
+ ```bash
107
+ # Preview renames (dry-run mode)
108
+ pdf-renamer --dry-run /path/to/pdf/directory
109
+
110
+ # Actually rename files
111
+ pdf-renamer --no-dry-run /path/to/pdf/directory
112
+
113
+ # Interactive mode - review each file
114
+ pdf-renamer --interactive --no-dry-run /path/to/pdf/directory
115
+ ```
116
+
117
+ ### Using uvx (No Installation)
118
+
119
+ ```bash
120
+ # Run directly without installing
121
+ uvx pdf-renamer --dry-run /path/to/pdfs
122
+
123
+ # Run from GitHub
124
+ uvx https://github.com/nostoslabs/pdf-renamer --dry-run /path/to/pdfs
125
+ ```
126
+
127
+ ### Options
128
+
129
+ - `--dry-run/--no-dry-run`: Show suggestions without renaming (default: True)
130
+ - `--interactive, -i`: Interactive mode with rich options:
131
+ - **Accept** - Use the suggested filename
132
+ - **Edit** - Manually modify the filename
133
+ - **Retry** - Ask the LLM to generate a new suggestion
134
+ - **Skip** - Skip this file and move to the next
135
+ - `--model`: Model to use (default: llama3.2, works with any OpenAI-compatible API)
136
+ - `--url`: Custom base URL for OpenAI-compatible APIs (default: http://localhost:11434/v1)
137
+ - `--pattern`: Glob pattern for files (default: *.pdf)
138
+ - `--output-dir, -o`: Move renamed files to a different directory
139
+ - `--max-concurrent-api`: Maximum concurrent API calls (default: 3)
140
+ - `--max-concurrent-pdf`: Maximum concurrent PDF extractions (default: 10)
141
+
142
+ ### Examples
143
+
144
+ **Using OpenAI:**
145
+ ```bash
146
+ # Preview all PDFs in current directory
147
+ uvx pdf-renamer --dry-run .
148
+
149
+ # Rename PDFs in specific directory
150
+ uvx pdf-renamer --no-dry-run ~/Documents/Papers
151
+
152
+ # Use a different OpenAI model
153
+ uvx pdf-renamer --model gpt-4o --dry-run .
154
+ ```
155
+
156
+ **Using Ollama (or other local models):**
157
+ ```bash
158
+ # Using Ollama on patmos server with gemma model
159
+ uvx pdf-renamer --url http://patmos:11434/v1 --model gemma3:latest --dry-run .
160
+
161
+ # Using local Ollama with qwen model
162
+ uvx pdf-renamer --url http://localhost:11434/v1 --model qwen2.5 --dry-run .
163
+
164
+ # Set URL in environment and just use model flag
165
+ export LLM_BASE_URL=http://patmos:11434/v1
166
+ uvx pdf-renamer --model gemma3:latest --dry-run .
167
+ ```
168
+
169
+ **Other examples:**
170
+ ```bash
171
+ # Process only specific files
172
+ uvx pdf-renamer --pattern "*2020*.pdf" --dry-run .
173
+
174
+ # Interactive mode with local model
175
+ uvx pdf-renamer --url http://patmos:11434/v1 --model gemma3:latest --interactive --no-dry-run .
176
+
177
+ # Run directly from GitHub
178
+ uvx https://github.com/nostoslabs/pdf-renamer --no-dry-run ~/Documents/Papers
179
+ ```
180
+
181
+ ## Interactive Mode
182
+
183
+ When using `--interactive` mode, you'll be presented with each file one at a time with detailed options:
184
+
185
+ ```
186
+ ================================================================================
187
+ Original: 2024-research-paper.pdf
188
+ Suggested: Smith-Machine-Learning-Applications-2024.pdf
189
+ Confidence: high
190
+ Reasoning: Clear author and topic identified from abstract
191
+ ================================================================================
192
+
193
+ Options:
194
+ y / yes / Enter - Accept suggested name
195
+ e / edit - Manually edit the filename
196
+ r / retry - Ask LLM to generate a new suggestion
197
+ n / no / skip - Skip this file
198
+
199
+ What would you like to do? [y]:
200
+ ```
201
+
202
+ This mode is perfect for:
203
+ - **Reviewing suggestions** before applying them
204
+ - **Fine-tuning filenames** that are close but not quite right
205
+ - **Retrying** when the LLM suggestion isn't good enough
206
+ - **Building confidence** in the tool before batch processing
207
+
208
+ You can use interactive mode with `--dry-run` to preview without actually renaming files, or with `--no-dry-run` to apply changes immediately after confirmation.
209
+
210
+ ## How It Works
211
+
212
+ 1. **Extract**: Uses docling-parse to read first 5 pages with structure-aware parsing, falls back to PyMuPDF if needed
213
+ 2. **OCR**: Automatically applies OCR for scanned PDFs with minimal text
214
+ 3. **Metadata Enhancement**: Extracts focused hints (years, emails, author sections) to supplement unreliable PDF metadata
215
+ 4. **Analyze**: Sends full content excerpt to LLM with enhanced metadata and detailed extraction instructions
216
+ 5. **Multi-pass Review**: Low-confidence results trigger a second analysis pass with focused prompts
217
+ 6. **Suggest**: LLM returns filename in `Author-Topic-Year` format with confidence level and reasoning
218
+ 7. **Interactive Review** (optional): User can accept, edit, retry, or skip each suggestion
219
+ 8. **Rename**: Applies suggestions (if not in dry-run mode)
220
+
221
+ ## Cost Considerations
222
+
223
+ **OpenAI:**
224
+ - Uses `gpt-4o-mini` by default (very cost-effective)
225
+ - Processes first ~4500 characters per PDF
226
+ - Typical cost: ~$0.001-0.003 per PDF
227
+
228
+ **Ollama/Local Models:**
229
+ - Completely free (runs on your hardware)
230
+ - Works with any Ollama model (llama3, qwen2.5, mistral, etc.)
231
+ - Also compatible with LM Studio, vLLM, and other OpenAI-compatible endpoints
232
+
233
+ ## Filename Format
234
+
235
+ The tool generates filenames in this format:
236
+ - `Smith-Kalman-Filtering-Applications-2020.pdf`
237
+ - `Adamy-Electronic-Warfare-Modeling-Techniques.pdf`
238
+ - `Blair-Monopulse-Processing-Unresolved-Targets.pdf`
239
+
240
+ Guidelines:
241
+ - First author's last name
242
+ - 3-6 word topic description (prioritizes clarity over brevity)
243
+ - Year (if identifiable)
244
+ - Hyphens between words
245
+ - Target ~80 characters (can be longer if needed for clarity)
@@ -0,0 +1,26 @@
1
+ pdf_file_renamer-0.4.2.dist-info/licenses/LICENSE,sha256=_w08V08WgoMpDMlGNlkIatC5QfQ_Ds_rXOBM8pl7ffE,1068
2
+ pdf_renamer/__init__.py,sha256=3RvsqaTO80Ud1KZZdLL_Lh-HXxagncoqI4m6u3VL_UE,85
3
+ pdf_renamer/main.py,sha256=5eTsrCQaotNwbdwJwandOlzrWODI73-L5mALHUIvqyw,140
4
+ pdf_renamer/application/__init__.py,sha256=EebV66jsZjubnh6PSEeNGs0A_JGeYXFghzGLDQ92eco,348
5
+ pdf_renamer/application/filename_service.py,sha256=Gk-nPnURsJYLDvoG_NZ4o_yHwAqK6bHU8kqzlev0XXM,2029
6
+ pdf_renamer/application/pdf_rename_workflow.py,sha256=MEUmDR6bLRB-ncNgKk3ahIfsIIk3Gsw1048cId6pYv4,4710
7
+ pdf_renamer/application/rename_service.py,sha256=rnScP2JwKMrIJcplFvxC0b2MOLzWqxpPKc3uDLHPjRI,2352
8
+ pdf_renamer/domain/__init__.py,sha256=UPcXunsI30iFK9dupv2Fc_YDreT1tAqsYaGEAK9sJew,493
9
+ pdf_renamer/domain/models.py,sha256=7S2ul3BoWi2aivWtmDa9LRlmeqURrGEV1sfSu8W6x5k,2246
10
+ pdf_renamer/domain/ports.py,sha256=ecnpkFYB3259ZjaZaOVo1sjP8nXD3x1NGR6hN5nn3gc,2550
11
+ pdf_renamer/infrastructure/__init__.py,sha256=CxBinDAuNm2X57-Y7XdXxVL6uHQXQqWpPrlznzu5_1M,182
12
+ pdf_renamer/infrastructure/config.py,sha256=baNL5_6_NNiS50ZNdql7fDwQbeAwf6f58HGYIWFQxQQ,2464
13
+ pdf_renamer/infrastructure/llm/__init__.py,sha256=evEhabaBshvekLO9DlAZvp-pQ_u03zYXqXaDfa9QUww,154
14
+ pdf_renamer/infrastructure/llm/pydantic_ai_provider.py,sha256=FM2Sd3n3lltJC76afrem5QuuS8qApEma52YD-Y8K89Y,9207
15
+ pdf_renamer/infrastructure/pdf/__init__.py,sha256=-WHYNLeBekm7jwIXRj4xpSIXyZz9olDiMIJLUjv2B-U,353
16
+ pdf_renamer/infrastructure/pdf/composite.py,sha256=1tlZ_X9_KVY01GTr1Hg3x_Ag7g3g4ik6_8R0jip8Wx0,1791
17
+ pdf_renamer/infrastructure/pdf/docling_extractor.py,sha256=7UamnbYFMgtD53oMqu1qKAq3FyQTQlq0Uw0k1sNzPw8,3964
18
+ pdf_renamer/infrastructure/pdf/pymupdf_extractor.py,sha256=lwIPr9yhy2hZVnuvoLcZvmjYSzbTra0AyW59UvU7GgU,5455
19
+ pdf_renamer/presentation/__init__.py,sha256=mxIxy8POUwewiMsmrOMVA8z9pe57lOghuwHZ5RAbMo4,201
20
+ pdf_renamer/presentation/cli.py,sha256=ykZx22quR9ye-ui9bLrRinD7BSChjSbGTRsazCafo5s,7819
21
+ pdf_renamer/presentation/formatters.py,sha256=ilUcXZ-7MpBlz7k7cqRAuixfkVT3cuD-pBcy5fsE2Qo,8514
22
+ pdf_file_renamer-0.4.2.dist-info/METADATA,sha256=xSIAQrGaKmT2o2vOT5HlX6ILaTmDyYbn6P8YG8JtK8U,8668
23
+ pdf_file_renamer-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
+ pdf_file_renamer-0.4.2.dist-info/entry_points.txt,sha256=IvW2oP2SRPv5qqFwDYBRCE53Q3JAyi_chbCo-0rdKQA,53
25
+ pdf_file_renamer-0.4.2.dist-info/top_level.txt,sha256=CFtpWKQjLObHZIssi5I3q7FXfLJZWKpHo7uuAiJ0pVY,12
26
+ pdf_file_renamer-0.4.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ pdf-renamer = pdf_renamer.main:app
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Nostos Labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ pdf_renamer
@@ -0,0 +1,3 @@
1
+ """PDF Renamer - Intelligent PDF file renaming using LLMs."""
2
+
3
+ __version__ = "0.4.2"
@@ -0,0 +1,7 @@
1
+ """Application layer - use cases and business logic orchestration."""
2
+
3
+ from pdf_renamer.application.filename_service import FilenameService
4
+ from pdf_renamer.application.pdf_rename_workflow import PDFRenameWorkflow
5
+ from pdf_renamer.application.rename_service import RenameService
6
+
7
+ __all__ = ["FilenameService", "PDFRenameWorkflow", "RenameService"]
@@ -0,0 +1,70 @@
1
+ """Filename generation service - coordinates PDF extraction and LLM generation."""
2
+
3
+ import re
4
+
5
+ from pdf_renamer.domain.models import FilenameResult, PDFContent
6
+ from pdf_renamer.domain.ports import FilenameGenerator, LLMProvider
7
+
8
+
9
+ class FilenameService(FilenameGenerator):
10
+ """Service for generating filenames from PDF content."""
11
+
12
+ def __init__(self, llm_provider: LLMProvider) -> None:
13
+ """
14
+ Initialize the filename service.
15
+
16
+ Args:
17
+ llm_provider: LLM provider for filename generation
18
+ """
19
+ self.llm_provider = llm_provider
20
+
21
+ async def generate(self, original_filename: str, content: PDFContent) -> FilenameResult:
22
+ """
23
+ Generate a filename suggestion based on PDF content.
24
+
25
+ Args:
26
+ original_filename: Current filename
27
+ content: Extracted PDF content
28
+
29
+ Returns:
30
+ FilenameResult with suggestion
31
+ """
32
+ # Convert metadata to dictionary
33
+ metadata_dict = content.metadata.to_dict()
34
+
35
+ # Generate filename using LLM
36
+ result = await self.llm_provider.generate_filename(
37
+ original_filename=original_filename,
38
+ text_excerpt=content.text,
39
+ metadata_dict=metadata_dict,
40
+ )
41
+
42
+ # Sanitize the generated filename
43
+ result.filename = self.sanitize(result.filename)
44
+
45
+ return result
46
+
47
+ def sanitize(self, filename: str) -> str:
48
+ """
49
+ Sanitize a filename to be filesystem-safe.
50
+
51
+ Args:
52
+ filename: Raw filename
53
+
54
+ Returns:
55
+ Sanitized filename
56
+ """
57
+ # Remove or replace invalid characters
58
+ filename = re.sub(r'[<>:"/\\|?*]', "", filename)
59
+
60
+ # Replace multiple spaces/hyphens with single hyphen
61
+ filename = re.sub(r"[\s\-]+", "-", filename)
62
+
63
+ # Remove leading/trailing hyphens
64
+ filename = filename.strip("-")
65
+
66
+ # Limit length
67
+ if len(filename) > 100:
68
+ filename = filename[:100].rstrip("-")
69
+
70
+ return filename
@@ -0,0 +1,144 @@
1
+ """PDF rename workflow - orchestrates the complete process."""
2
+
3
+ import asyncio
4
+ from collections.abc import Callable
5
+ from pathlib import Path
6
+
7
+ from pdf_renamer.domain.models import FileRenameOperation
8
+ from pdf_renamer.domain.ports import (
9
+ FilenameGenerator,
10
+ FileRenamer,
11
+ PDFExtractor,
12
+ )
13
+
14
+
15
+ class PDFRenameWorkflow:
16
+ """
17
+ Orchestrates the PDF rename workflow.
18
+
19
+ This class follows the Single Responsibility Principle - it only coordinates
20
+ the workflow, delegating actual work to specialized services.
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ pdf_extractor: PDFExtractor,
26
+ filename_generator: FilenameGenerator,
27
+ file_renamer: FileRenamer,
28
+ max_concurrent_api: int = 3,
29
+ max_concurrent_pdf: int = 10,
30
+ ) -> None:
31
+ """
32
+ Initialize the workflow.
33
+
34
+ Args:
35
+ pdf_extractor: PDF extraction service
36
+ filename_generator: Filename generation service
37
+ file_renamer: File renaming service
38
+ max_concurrent_api: Maximum concurrent API calls
39
+ max_concurrent_pdf: Maximum concurrent PDF extractions
40
+ """
41
+ self.pdf_extractor = pdf_extractor
42
+ self.filename_generator = filename_generator
43
+ self.file_renamer = file_renamer
44
+ self.api_semaphore = asyncio.Semaphore(max_concurrent_api)
45
+ self.pdf_semaphore = asyncio.Semaphore(max_concurrent_pdf)
46
+
47
+ async def process_pdf(
48
+ self,
49
+ pdf_path: Path,
50
+ status_callback: Callable[[str, dict[str, str]], None] | None = None,
51
+ ) -> FileRenameOperation | None:
52
+ """
53
+ Process a single PDF file.
54
+
55
+ Args:
56
+ pdf_path: Path to PDF file
57
+ status_callback: Optional callback for status updates (filename, status_dict)
58
+
59
+ Returns:
60
+ FileRenameOperation if successful, None if error
61
+ """
62
+ filename = pdf_path.name
63
+
64
+ try:
65
+ # Update status: extracting
66
+ if status_callback:
67
+ status_callback(filename, {"status": "Extracting", "stage": "📄"})
68
+
69
+ # Extract PDF content (with PDF semaphore to limit memory usage)
70
+ async with self.pdf_semaphore:
71
+ content = await self.pdf_extractor.extract(pdf_path)
72
+
73
+ # Generate filename (with API semaphore to limit API load)
74
+ if status_callback:
75
+ status_callback(filename, {"status": "Analyzing", "stage": "🤖"})
76
+
77
+ async with self.api_semaphore:
78
+ result = await self.filename_generator.generate(filename, content)
79
+
80
+ # Mark complete
81
+ if status_callback:
82
+ status_callback(
83
+ filename,
84
+ {
85
+ "status": "Complete",
86
+ "stage": "✓",
87
+ "confidence": result.confidence.value,
88
+ },
89
+ )
90
+
91
+ return FileRenameOperation(
92
+ original_path=pdf_path,
93
+ suggested_filename=result.filename,
94
+ confidence=result.confidence,
95
+ reasoning=result.reasoning,
96
+ text_excerpt=content.text,
97
+ metadata=content.metadata,
98
+ )
99
+
100
+ except Exception as e:
101
+ if status_callback:
102
+ status_callback(filename, {"status": "Error", "stage": "✗", "error": str(e)})
103
+ return None
104
+
105
+ async def process_batch(
106
+ self,
107
+ pdf_paths: list[Path],
108
+ status_callback: Callable[[str, dict[str, str]], None] | None = None,
109
+ ) -> list[FileRenameOperation | None]:
110
+ """
111
+ Process multiple PDF files concurrently.
112
+
113
+ Args:
114
+ pdf_paths: List of PDF paths to process
115
+ status_callback: Optional callback for status updates
116
+
117
+ Returns:
118
+ List of FileRenameOperation results (None for failures)
119
+ """
120
+ tasks = [self.process_pdf(pdf, status_callback) for pdf in pdf_paths]
121
+ return await asyncio.gather(*tasks, return_exceptions=False)
122
+
123
+ async def execute_rename(
124
+ self,
125
+ operation: FileRenameOperation,
126
+ output_dir: Path | None = None,
127
+ dry_run: bool = True,
128
+ ) -> bool:
129
+ """
130
+ Execute a rename operation.
131
+
132
+ Args:
133
+ operation: The rename operation to execute
134
+ output_dir: Optional output directory
135
+ dry_run: If True, don't actually rename
136
+
137
+ Returns:
138
+ True if successful
139
+
140
+ Raises:
141
+ RuntimeError: If rename fails
142
+ """
143
+ new_path = operation.create_new_path(output_dir)
144
+ return await self.file_renamer.rename(operation.original_path, new_path, dry_run)
@@ -0,0 +1,79 @@
1
+ """File rename service - handles the actual file operations."""
2
+
3
+ import shutil
4
+ from pathlib import Path
5
+
6
+ from pdf_renamer.domain.ports import FileRenamer
7
+
8
+
9
+ class RenameService(FileRenamer):
10
+ """Service for renaming files with duplicate handling."""
11
+
12
+ async def rename(self, original_path: Path, new_path: Path, dry_run: bool = True) -> bool:
13
+ """
14
+ Rename a file with duplicate handling.
15
+
16
+ Args:
17
+ original_path: Original file path
18
+ new_path: New file path
19
+ dry_run: If True, don't actually rename
20
+
21
+ Returns:
22
+ True if successful (or would be successful in dry-run)
23
+
24
+ Raises:
25
+ RuntimeError: If rename fails
26
+ """
27
+ try:
28
+ # Check if source exists
29
+ if not original_path.exists():
30
+ msg = f"Source file does not exist: {original_path}"
31
+ raise RuntimeError(msg)
32
+
33
+ # Handle duplicates
34
+ final_path = self._handle_duplicate(new_path)
35
+
36
+ if dry_run:
37
+ # In dry-run, just verify we could do the operation
38
+ return True
39
+
40
+ # Perform the rename
41
+ if original_path.parent != final_path.parent:
42
+ # Moving to different directory
43
+ final_path.parent.mkdir(parents=True, exist_ok=True)
44
+ shutil.move(str(original_path), str(final_path))
45
+ else:
46
+ # Renaming in same directory
47
+ original_path.rename(final_path)
48
+
49
+ return True
50
+
51
+ except Exception as e:
52
+ msg = f"Failed to rename {original_path} to {new_path}: {e}"
53
+ raise RuntimeError(msg) from e
54
+
55
+ def _handle_duplicate(self, path: Path) -> Path:
56
+ """
57
+ Handle duplicate filenames by adding a counter suffix.
58
+
59
+ Args:
60
+ path: Desired path
61
+
62
+ Returns:
63
+ Path that doesn't conflict with existing files
64
+ """
65
+ if not path.exists():
66
+ return path
67
+
68
+ # Extract stem and suffix
69
+ stem = path.stem
70
+ suffix = path.suffix
71
+ parent = path.parent
72
+
73
+ # Try incrementing counter
74
+ counter = 1
75
+ while True:
76
+ new_path = parent / f"{stem}-{counter}{suffix}"
77
+ if not new_path.exists():
78
+ return new_path
79
+ counter += 1
@@ -0,0 +1,25 @@
1
+ """Domain layer - pure business logic with no external dependencies."""
2
+
3
+ from pdf_renamer.domain.models import (
4
+ FilenameResult,
5
+ FileRenameOperation,
6
+ PDFContent,
7
+ PDFMetadata,
8
+ )
9
+ from pdf_renamer.domain.ports import (
10
+ FilenameGenerator,
11
+ FileRenamer,
12
+ LLMProvider,
13
+ PDFExtractor,
14
+ )
15
+
16
+ __all__ = [
17
+ "FileRenameOperation",
18
+ "FileRenamer",
19
+ "FilenameGenerator",
20
+ "FilenameResult",
21
+ "LLMProvider",
22
+ "PDFContent",
23
+ "PDFExtractor",
24
+ "PDFMetadata",
25
+ ]