pdf-file-renamer 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdf_file_renamer/__init__.py +1 -1
- pdf_file_renamer/application/pdf_rename_workflow.py +8 -2
- pdf_file_renamer/infrastructure/doi/pdf2doi_extractor.py +46 -12
- pdf_file_renamer/presentation/formatters.py +15 -3
- {pdf_file_renamer-0.6.0.dist-info → pdf_file_renamer-0.6.1.dist-info}/METADATA +1 -1
- {pdf_file_renamer-0.6.0.dist-info → pdf_file_renamer-0.6.1.dist-info}/RECORD +9 -9
- {pdf_file_renamer-0.6.0.dist-info → pdf_file_renamer-0.6.1.dist-info}/WHEEL +0 -0
- {pdf_file_renamer-0.6.0.dist-info → pdf_file_renamer-0.6.1.dist-info}/entry_points.txt +0 -0
- {pdf_file_renamer-0.6.0.dist-info → pdf_file_renamer-0.6.1.dist-info}/licenses/LICENSE +0 -0
pdf_file_renamer/__init__.py
CHANGED
@@ -5,7 +5,7 @@ import contextlib
|
|
5
5
|
from collections.abc import Callable
|
6
6
|
from pathlib import Path
|
7
7
|
|
8
|
-
from pdf_file_renamer.domain.models import FileRenameOperation
|
8
|
+
from pdf_file_renamer.domain.models import ConfidenceLevel, FileRenameOperation
|
9
9
|
from pdf_file_renamer.domain.ports import (
|
10
10
|
DOIExtractor,
|
11
11
|
FilenameGenerator,
|
@@ -103,12 +103,18 @@ class PDFRenameWorkflow:
|
|
103
103
|
|
104
104
|
# Mark complete
|
105
105
|
if status_callback:
|
106
|
+
# result.confidence is already a string due to use_enum_values=True
|
107
|
+
confidence_str = (
|
108
|
+
result.confidence.value
|
109
|
+
if isinstance(result.confidence, ConfidenceLevel)
|
110
|
+
else result.confidence
|
111
|
+
)
|
106
112
|
status_callback(
|
107
113
|
filename,
|
108
114
|
{
|
109
115
|
"status": "Complete",
|
110
116
|
"stage": "✓",
|
111
|
-
"confidence":
|
117
|
+
"confidence": confidence_str,
|
112
118
|
},
|
113
119
|
)
|
114
120
|
|
@@ -31,16 +31,14 @@ class PDF2DOIExtractor(DOIExtractor):
|
|
31
31
|
try:
|
32
32
|
# Run pdf2doi in executor to avoid blocking
|
33
33
|
loop = asyncio.get_event_loop()
|
34
|
-
|
34
|
+
result = await loop.run_in_executor(
|
35
35
|
None, pdf2doi.pdf2doi, str(pdf_path)
|
36
36
|
)
|
37
37
|
|
38
|
-
|
38
|
+
# pdf2doi returns a dict (not a list)
|
39
|
+
if not result or not isinstance(result, dict):
|
39
40
|
return None
|
40
41
|
|
41
|
-
# Get the first result
|
42
|
-
result = results[0]
|
43
|
-
|
44
42
|
# Check if DOI was found
|
45
43
|
identifier = result.get("identifier")
|
46
44
|
if not identifier:
|
@@ -50,15 +48,51 @@ class PDF2DOIExtractor(DOIExtractor):
|
|
50
48
|
if identifier_type.lower() not in ("doi", "arxiv"):
|
51
49
|
return None
|
52
50
|
|
53
|
-
# Extract metadata from validation_info (
|
51
|
+
# Extract metadata from validation_info (JSON string from CrossRef API)
|
54
52
|
validation_info = result.get("validation_info", "")
|
55
53
|
|
56
|
-
# Parse
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
54
|
+
# Parse JSON metadata
|
55
|
+
import json
|
56
|
+
|
57
|
+
metadata = {}
|
58
|
+
if validation_info:
|
59
|
+
try:
|
60
|
+
metadata = json.loads(validation_info)
|
61
|
+
except json.JSONDecodeError:
|
62
|
+
pass
|
63
|
+
|
64
|
+
# Extract title
|
65
|
+
title = metadata.get("title")
|
66
|
+
|
67
|
+
# Extract authors (list of dicts with 'given' and 'family' fields)
|
68
|
+
authors = None
|
69
|
+
if "author" in metadata:
|
70
|
+
author_list = metadata["author"]
|
71
|
+
authors = []
|
72
|
+
for author in author_list:
|
73
|
+
if isinstance(author, dict):
|
74
|
+
family = author.get("family", "")
|
75
|
+
given = author.get("given", "")
|
76
|
+
if family:
|
77
|
+
full_name = f"{given} {family}".strip() if given else family
|
78
|
+
authors.append(full_name)
|
79
|
+
if not authors:
|
80
|
+
authors = None
|
81
|
+
|
82
|
+
# Extract year from published-online or published
|
83
|
+
year = None
|
84
|
+
for date_field in ["published-online", "published", "created"]:
|
85
|
+
if date_field in metadata and "date-parts" in metadata[date_field]:
|
86
|
+
date_parts = metadata[date_field]["date-parts"]
|
87
|
+
if date_parts and len(date_parts) > 0 and len(date_parts[0]) > 0:
|
88
|
+
year = str(date_parts[0][0])
|
89
|
+
break
|
90
|
+
|
91
|
+
# Extract journal (container-title)
|
92
|
+
journal = metadata.get("container-title")
|
93
|
+
|
94
|
+
# Extract publisher
|
95
|
+
publisher = metadata.get("publisher")
|
62
96
|
|
63
97
|
return DOIMetadata(
|
64
98
|
doi=identifier,
|
@@ -7,7 +7,7 @@ from rich.prompt import Prompt
|
|
7
7
|
from rich.table import Table
|
8
8
|
from rich.text import Text
|
9
9
|
|
10
|
-
from pdf_file_renamer.domain.models import FileRenameOperation
|
10
|
+
from pdf_file_renamer.domain.models import ConfidenceLevel, FileRenameOperation
|
11
11
|
|
12
12
|
|
13
13
|
class ProgressDisplay:
|
@@ -146,7 +146,13 @@ class InteractivePrompt:
|
|
146
146
|
info_text.append("Suggested: ", style="bold green")
|
147
147
|
info_text.append(f"{operation.new_filename}\n", style="green")
|
148
148
|
info_text.append("Confidence: ", style="bold yellow")
|
149
|
-
|
149
|
+
# Handle both enum and string confidence
|
150
|
+
conf_str = (
|
151
|
+
operation.confidence.value
|
152
|
+
if isinstance(operation.confidence, ConfidenceLevel)
|
153
|
+
else operation.confidence
|
154
|
+
)
|
155
|
+
info_text.append(f"{conf_str}\n", style="yellow")
|
150
156
|
info_text.append("Reasoning: ", style="bold white")
|
151
157
|
info_text.append(operation.reasoning, style="dim white")
|
152
158
|
|
@@ -206,10 +212,16 @@ class ResultsTable:
|
|
206
212
|
reasoning = op.reasoning
|
207
213
|
if len(reasoning) > 100:
|
208
214
|
reasoning = reasoning[:100] + "..."
|
215
|
+
# Handle both enum and string confidence
|
216
|
+
conf_str = (
|
217
|
+
op.confidence.value
|
218
|
+
if isinstance(op.confidence, ConfidenceLevel)
|
219
|
+
else op.confidence
|
220
|
+
)
|
209
221
|
table.add_row(
|
210
222
|
op.original_path.name,
|
211
223
|
op.new_filename,
|
212
|
-
|
224
|
+
conf_str,
|
213
225
|
reasoning,
|
214
226
|
)
|
215
227
|
|
@@ -1,8 +1,8 @@
|
|
1
|
-
pdf_file_renamer/__init__.py,sha256=
|
1
|
+
pdf_file_renamer/__init__.py,sha256=1hyyq0EM6vqGG8Gxxdkg3MuLU_4Mwj3mc812ikutUB8,85
|
2
2
|
pdf_file_renamer/main.py,sha256=FTEEb-9QmOOsN9SE8L1SZvFVIkVpQDy8xZ5a8t8CWUs,145
|
3
3
|
pdf_file_renamer/application/__init__.py,sha256=riSV7UXBenkDst7Nnf11N1_RuRtM7wpKdwugxOhumS4,363
|
4
4
|
pdf_file_renamer/application/filename_service.py,sha256=IbeCNBwyhFlCMCZveq16nmQ2qvyTdtgLmr6PDWPckOs,4868
|
5
|
-
pdf_file_renamer/application/pdf_rename_workflow.py,sha256=
|
5
|
+
pdf_file_renamer/application/pdf_rename_workflow.py,sha256=WLcGJ4ufEmAnGSxVQcOFDeGG8gXSccs11DaP521YDzo,6144
|
6
6
|
pdf_file_renamer/application/rename_service.py,sha256=vviNQolk_w-qDQvOKTKj8ZhqYyyNWL-VJMfuUnL6WLw,2357
|
7
7
|
pdf_file_renamer/domain/__init__.py,sha256=jxbH3h6xaCnSRuBxclFESl6ZE1pua_I1K4CRAaYxu_I,503
|
8
8
|
pdf_file_renamer/domain/models.py,sha256=QwN79TzWmqvQvz-m9ymebvAx3pWlVpSWXNdSEAk4qq0,3186
|
@@ -10,7 +10,7 @@ pdf_file_renamer/domain/ports.py,sha256=ebOcHptiOK119NCmIwM32_fbRK5xkZP9K67vjL-4
|
|
10
10
|
pdf_file_renamer/infrastructure/__init__.py,sha256=C3ZQ7WCPCa6PMfP00lu4wqb0r57GVyDdiD5EL2DhCeY,187
|
11
11
|
pdf_file_renamer/infrastructure/config.py,sha256=baNL5_6_NNiS50ZNdql7fDwQbeAwf6f58HGYIWFQxQQ,2464
|
12
12
|
pdf_file_renamer/infrastructure/doi/__init__.py,sha256=8N9ZEwfG7q5xomzh187YtP8t4CfEBHM334xNRblPeuI,153
|
13
|
-
pdf_file_renamer/infrastructure/doi/pdf2doi_extractor.py,sha256=
|
13
|
+
pdf_file_renamer/infrastructure/doi/pdf2doi_extractor.py,sha256=mK2Z5oOwN-TgiEHLgoLM5yCSe_-G9kWXLr4Sw3nMkEM,5105
|
14
14
|
pdf_file_renamer/infrastructure/llm/__init__.py,sha256=ToB8__mHvXwaIukGKPEAQ8SeC4ZLiH4auZI1P1yH5PQ,159
|
15
15
|
pdf_file_renamer/infrastructure/llm/pydantic_ai_provider.py,sha256=kVsmj0NIawkj-1WWM0hZXbsNH09GabVZm9HPlYsxGuo,9217
|
16
16
|
pdf_file_renamer/infrastructure/pdf/__init__.py,sha256=uMHqxSXNLZH5WH_e1kXrp9m7uTqPkiI2hXjNo6rCRoo,368
|
@@ -19,9 +19,9 @@ pdf_file_renamer/infrastructure/pdf/docling_extractor.py,sha256=auZrJpK7mMg1mUXK
|
|
19
19
|
pdf_file_renamer/infrastructure/pdf/pymupdf_extractor.py,sha256=C61udZCqGqiVx7T0HWNyjvnhgv5AgMIcCYtrhgHOJwk,5465
|
20
20
|
pdf_file_renamer/presentation/__init__.py,sha256=1VR44GoPGTixk3hG5YzhGyQf7a4BTKsJBd2VP3rHcFM,211
|
21
21
|
pdf_file_renamer/presentation/cli.py,sha256=0t_59-utRWLNCYjFetU0ZHoF1DPTjdNiWM9Au0jFaOg,8013
|
22
|
-
pdf_file_renamer/presentation/formatters.py,sha256=
|
23
|
-
pdf_file_renamer-0.6.
|
24
|
-
pdf_file_renamer-0.6.
|
25
|
-
pdf_file_renamer-0.6.
|
26
|
-
pdf_file_renamer-0.6.
|
27
|
-
pdf_file_renamer-0.6.
|
22
|
+
pdf_file_renamer/presentation/formatters.py,sha256=Es7pZoHw5bEPtNfa_s43eHXa_m0yrTmX6S2aU78JUE0,8978
|
23
|
+
pdf_file_renamer-0.6.1.dist-info/METADATA,sha256=OyZKW601xnQFXR-SDLakLEnasq5rtfP7YO6IYn6f-z4,9912
|
24
|
+
pdf_file_renamer-0.6.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
25
|
+
pdf_file_renamer-0.6.1.dist-info/entry_points.txt,sha256=0fEGYy60chGE9rECWeCVPxjxzz6vMtIAYdFvmH7xzbw,63
|
26
|
+
pdf_file_renamer-0.6.1.dist-info/licenses/LICENSE,sha256=_w08V08WgoMpDMlGNlkIatC5QfQ_Ds_rXOBM8pl7ffE,1068
|
27
|
+
pdf_file_renamer-0.6.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|