docorient 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docorient/__init__.py +36 -0
- docorient/_imaging.py +46 -0
- docorient/_version.py +1 -0
- docorient/batch/__init__.py +3 -0
- docorient/batch/processor.py +197 -0
- docorient/batch/scanner.py +59 -0
- docorient/batch/worker.py +118 -0
- docorient/cli.py +175 -0
- docorient/config.py +40 -0
- docorient/correction.py +122 -0
- docorient/detection/__init__.py +3 -0
- docorient/detection/engine.py +51 -0
- docorient/detection/osd.py +72 -0
- docorient/detection/projection.py +72 -0
- docorient/exceptions.py +18 -0
- docorient/types.py +42 -0
- docorient-0.1.0.dist-info/METADATA +146 -0
- docorient-0.1.0.dist-info/RECORD +21 -0
- docorient-0.1.0.dist-info/WHEEL +4 -0
- docorient-0.1.0.dist-info/entry_points.txt +2 -0
- docorient-0.1.0.dist-info/licenses/LICENSE +21 -0
docorient/__init__.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from docorient._version import __version__
|
|
2
|
+
from docorient.batch.processor import process_directory
|
|
3
|
+
from docorient.config import OrientationConfig
|
|
4
|
+
from docorient.correction import correct_document_pages, correct_image
|
|
5
|
+
from docorient.detection.engine import detect_orientation
|
|
6
|
+
from docorient.exceptions import (
|
|
7
|
+
BatchProcessingError,
|
|
8
|
+
CorrectionError,
|
|
9
|
+
DetectionError,
|
|
10
|
+
DocorientError,
|
|
11
|
+
TesseractNotAvailableError,
|
|
12
|
+
)
|
|
13
|
+
from docorient.types import (
|
|
14
|
+
BatchSummary,
|
|
15
|
+
CorrectionResult,
|
|
16
|
+
OrientationResult,
|
|
17
|
+
PageResult,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"BatchProcessingError",
|
|
22
|
+
"BatchSummary",
|
|
23
|
+
"CorrectionError",
|
|
24
|
+
"CorrectionResult",
|
|
25
|
+
"DetectionError",
|
|
26
|
+
"DocorientError",
|
|
27
|
+
"OrientationConfig",
|
|
28
|
+
"OrientationResult",
|
|
29
|
+
"PageResult",
|
|
30
|
+
"TesseractNotAvailableError",
|
|
31
|
+
"__version__",
|
|
32
|
+
"correct_document_pages",
|
|
33
|
+
"correct_image",
|
|
34
|
+
"detect_orientation",
|
|
35
|
+
"process_directory",
|
|
36
|
+
]
|
docorient/_imaging.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from PIL import Image
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def open_as_rgb(image_path: str | Path) -> Image.Image:
|
|
9
|
+
return Image.open(image_path).convert("RGB")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def downscale_to_max_dimension(image: Image.Image, max_dimension: int) -> Image.Image:
|
|
13
|
+
image_width, image_height = image.size
|
|
14
|
+
largest_side = max(image_width, image_height)
|
|
15
|
+
|
|
16
|
+
if largest_side <= max_dimension:
|
|
17
|
+
return image
|
|
18
|
+
|
|
19
|
+
scale_factor = max_dimension / largest_side
|
|
20
|
+
target_width = int(image_width * scale_factor)
|
|
21
|
+
target_height = int(image_height * scale_factor)
|
|
22
|
+
return image.resize((target_width, target_height), Image.LANCZOS)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def save_image(
|
|
26
|
+
image: Image.Image,
|
|
27
|
+
output_path: str | Path,
|
|
28
|
+
output_format: str = "JPEG",
|
|
29
|
+
quality: int = 92,
|
|
30
|
+
) -> None:
|
|
31
|
+
image.save(output_path, output_format, quality=quality)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def determine_output_format(file_path: str | Path) -> str:
|
|
35
|
+
extension = Path(file_path).suffix.lower()
|
|
36
|
+
format_mapping = {
|
|
37
|
+
".jpg": "JPEG",
|
|
38
|
+
".jpeg": "JPEG",
|
|
39
|
+
".png": "PNG",
|
|
40
|
+
".tiff": "TIFF",
|
|
41
|
+
".tif": "TIFF",
|
|
42
|
+
".bmp": "BMP",
|
|
43
|
+
".gif": "GIF",
|
|
44
|
+
".webp": "WEBP",
|
|
45
|
+
}
|
|
46
|
+
return format_mapping.get(extension, "JPEG")
|
docorient/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import multiprocessing
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from dataclasses import asdict
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
12
|
+
from docorient.batch.scanner import ScannedPage, scan_directory
|
|
13
|
+
from docorient.batch.worker import initialize_worker, process_batch
|
|
14
|
+
from docorient.config import RESUME_LOG_FILENAME, OrientationConfig
|
|
15
|
+
from docorient.types import BatchSummary, PageResult
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _load_completed_sources(resume_log_path: Path) -> set[str]:
|
|
19
|
+
if not resume_log_path.exists():
|
|
20
|
+
return set()
|
|
21
|
+
with open(resume_log_path) as resume_log:
|
|
22
|
+
return {line.strip() for line in resume_log if line.strip()}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _distribute_into_batches(
|
|
26
|
+
items: list[tuple[str, list[ScannedPage]]],
|
|
27
|
+
batch_count: int,
|
|
28
|
+
) -> list[list[tuple[str, list[ScannedPage]]]]:
|
|
29
|
+
batches: list[list[tuple[str, list[ScannedPage]]]] = [[] for _ in range(batch_count)]
|
|
30
|
+
for item_index, item in enumerate(items):
|
|
31
|
+
target_batch = item_index % batch_count
|
|
32
|
+
batches[target_batch].append(item)
|
|
33
|
+
return batches
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _build_summary(
|
|
37
|
+
input_directory: str,
|
|
38
|
+
output_directory: str,
|
|
39
|
+
total_files: int,
|
|
40
|
+
all_page_results: dict[str, list[PageResult]],
|
|
41
|
+
source_file_names: list[str],
|
|
42
|
+
) -> BatchSummary:
|
|
43
|
+
all_pages: list[PageResult] = []
|
|
44
|
+
already_correct_count = 0
|
|
45
|
+
corrected_count = 0
|
|
46
|
+
corrected_by_majority_count = 0
|
|
47
|
+
error_count = 0
|
|
48
|
+
|
|
49
|
+
for source_name in source_file_names:
|
|
50
|
+
for page_result in all_page_results.get(source_name, []):
|
|
51
|
+
all_pages.append(page_result)
|
|
52
|
+
if page_result.error is not None:
|
|
53
|
+
error_count += 1
|
|
54
|
+
elif page_result.orientation.angle != 0:
|
|
55
|
+
corrected_count += 1
|
|
56
|
+
if "->majority" in page_result.orientation.method:
|
|
57
|
+
corrected_by_majority_count += 1
|
|
58
|
+
else:
|
|
59
|
+
already_correct_count += 1
|
|
60
|
+
|
|
61
|
+
return BatchSummary(
|
|
62
|
+
input_directory=input_directory,
|
|
63
|
+
output_directory=output_directory,
|
|
64
|
+
total_files=total_files,
|
|
65
|
+
total_pages=len(all_pages),
|
|
66
|
+
already_correct=already_correct_count,
|
|
67
|
+
corrected=corrected_count,
|
|
68
|
+
corrected_by_majority=corrected_by_majority_count,
|
|
69
|
+
errors=error_count,
|
|
70
|
+
pages=tuple(all_pages),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def process_directory(
|
|
75
|
+
input_dir: str | Path,
|
|
76
|
+
*,
|
|
77
|
+
output_dir: str | Path | None = None,
|
|
78
|
+
config: OrientationConfig | None = None,
|
|
79
|
+
limit: int = 0,
|
|
80
|
+
show_progress: bool = True,
|
|
81
|
+
) -> BatchSummary:
|
|
82
|
+
"""Process all images in a directory, detecting and correcting orientation.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
input_dir: Path to directory containing document images.
|
|
86
|
+
output_dir: Path for corrected output. None generates a UUID-named directory.
|
|
87
|
+
config: Processing configuration. Uses defaults if not provided.
|
|
88
|
+
limit: Maximum number of images to process. 0 means all.
|
|
89
|
+
show_progress: Whether to display a tqdm progress bar.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
BatchSummary with statistics and per-page results.
|
|
93
|
+
"""
|
|
94
|
+
effective_config = config or OrientationConfig()
|
|
95
|
+
input_path = Path(input_dir).resolve()
|
|
96
|
+
|
|
97
|
+
if output_dir is None:
|
|
98
|
+
output_path = input_path.parent / str(uuid.uuid4())
|
|
99
|
+
else:
|
|
100
|
+
output_path = Path(output_dir).resolve()
|
|
101
|
+
|
|
102
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
|
|
104
|
+
pages_by_source = scan_directory(
|
|
105
|
+
input_path,
|
|
106
|
+
output_path,
|
|
107
|
+
supported_extensions=effective_config.supported_extensions,
|
|
108
|
+
limit=limit,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
source_file_names = list(pages_by_source.keys())
|
|
112
|
+
total_files = len(source_file_names)
|
|
113
|
+
total_pages = sum(len(pages) for pages in pages_by_source.values())
|
|
114
|
+
|
|
115
|
+
if total_pages == 0:
|
|
116
|
+
return _build_summary(str(input_path), str(output_path), 0, {}, [])
|
|
117
|
+
|
|
118
|
+
resume_log_path = output_path / RESUME_LOG_FILENAME
|
|
119
|
+
already_completed_sources = set()
|
|
120
|
+
|
|
121
|
+
if effective_config.resume_enabled:
|
|
122
|
+
already_completed_sources = _load_completed_sources(resume_log_path)
|
|
123
|
+
|
|
124
|
+
pending_sources = [
|
|
125
|
+
(source_name, pages_by_source[source_name])
|
|
126
|
+
for source_name in source_file_names
|
|
127
|
+
if source_name not in already_completed_sources
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
all_page_results: dict[str, list[PageResult]] = {}
|
|
131
|
+
|
|
132
|
+
if not pending_sources:
|
|
133
|
+
return _build_summary(
|
|
134
|
+
str(input_path), str(output_path), total_files, all_page_results, source_file_names
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
worker_count = min(effective_config.effective_workers, len(pending_sources))
|
|
138
|
+
batches = _distribute_into_batches(pending_sources, worker_count)
|
|
139
|
+
|
|
140
|
+
progress_counter = multiprocessing.Value("i", 0)
|
|
141
|
+
progress_lock = multiprocessing.Lock()
|
|
142
|
+
|
|
143
|
+
config_as_dict = asdict(effective_config)
|
|
144
|
+
|
|
145
|
+
worker_pool = multiprocessing.Pool(
|
|
146
|
+
processes=worker_count,
|
|
147
|
+
initializer=initialize_worker,
|
|
148
|
+
initargs=(progress_counter, progress_lock, str(resume_log_path), config_as_dict),
|
|
149
|
+
maxtasksperchild=1,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
async_results = [
|
|
153
|
+
worker_pool.apply_async(process_batch, (batch,)) for batch in batches
|
|
154
|
+
]
|
|
155
|
+
worker_pool.close()
|
|
156
|
+
|
|
157
|
+
if show_progress:
|
|
158
|
+
progress_bar = tqdm(
|
|
159
|
+
total=len(pending_sources),
|
|
160
|
+
desc="Correcting",
|
|
161
|
+
unit="file",
|
|
162
|
+
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]",
|
|
163
|
+
)
|
|
164
|
+
else:
|
|
165
|
+
progress_bar = None
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
while not all(async_result.ready() for async_result in async_results):
|
|
169
|
+
if progress_bar is not None:
|
|
170
|
+
progress_bar.n = progress_counter.value
|
|
171
|
+
progress_bar.refresh()
|
|
172
|
+
time.sleep(0.3)
|
|
173
|
+
except KeyboardInterrupt:
|
|
174
|
+
worker_pool.terminate()
|
|
175
|
+
worker_pool.join()
|
|
176
|
+
if progress_bar is not None:
|
|
177
|
+
progress_bar.close()
|
|
178
|
+
sys.exit(1)
|
|
179
|
+
|
|
180
|
+
if progress_bar is not None:
|
|
181
|
+
progress_bar.n = progress_counter.value
|
|
182
|
+
progress_bar.refresh()
|
|
183
|
+
progress_bar.close()
|
|
184
|
+
|
|
185
|
+
for async_result in async_results:
|
|
186
|
+
try:
|
|
187
|
+
batch_results = async_result.get(timeout=60)
|
|
188
|
+
for source_name, page_results in batch_results:
|
|
189
|
+
all_page_results[source_name] = page_results
|
|
190
|
+
except Exception:
|
|
191
|
+
pass
|
|
192
|
+
|
|
193
|
+
worker_pool.join()
|
|
194
|
+
|
|
195
|
+
return _build_summary(
|
|
196
|
+
str(input_path), str(output_path), total_files, all_page_results, source_file_names
|
|
197
|
+
)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
PAGE_PATTERN = re.compile(r"^(.+)_p(\d+)\.\w+$")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True, slots=True)
|
|
11
|
+
class ScannedPage:
|
|
12
|
+
source_file: str
|
|
13
|
+
page_number: int
|
|
14
|
+
image_name: str
|
|
15
|
+
image_path: str
|
|
16
|
+
output_path: str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def scan_directory(
|
|
20
|
+
input_directory: Path,
|
|
21
|
+
output_directory: Path,
|
|
22
|
+
supported_extensions: tuple[str, ...],
|
|
23
|
+
limit: int = 0,
|
|
24
|
+
) -> dict[str, list[ScannedPage]]:
|
|
25
|
+
all_image_paths = sorted(
|
|
26
|
+
image_path
|
|
27
|
+
for image_path in input_directory.iterdir()
|
|
28
|
+
if image_path.is_file() and image_path.suffix.lower() in supported_extensions
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
if limit > 0:
|
|
32
|
+
all_image_paths = all_image_paths[:limit]
|
|
33
|
+
|
|
34
|
+
pages_by_source: dict[str, list[ScannedPage]] = {}
|
|
35
|
+
|
|
36
|
+
for image_path in all_image_paths:
|
|
37
|
+
image_name = image_path.name
|
|
38
|
+
page_match = PAGE_PATTERN.match(image_name)
|
|
39
|
+
|
|
40
|
+
if page_match:
|
|
41
|
+
source_file_name = page_match.group(1)
|
|
42
|
+
page_number = int(page_match.group(2))
|
|
43
|
+
else:
|
|
44
|
+
source_file_name = image_path.stem
|
|
45
|
+
page_number = 1
|
|
46
|
+
|
|
47
|
+
output_path = output_directory / image_name
|
|
48
|
+
|
|
49
|
+
scanned_page = ScannedPage(
|
|
50
|
+
source_file=source_file_name,
|
|
51
|
+
page_number=page_number,
|
|
52
|
+
image_name=image_name,
|
|
53
|
+
image_path=str(image_path),
|
|
54
|
+
output_path=str(output_path),
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
pages_by_source.setdefault(source_file_name, []).append(scanned_page)
|
|
58
|
+
|
|
59
|
+
return pages_by_source
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import multiprocessing
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from docorient._imaging import determine_output_format, open_as_rgb, save_image
|
|
7
|
+
from docorient.batch.scanner import ScannedPage
|
|
8
|
+
from docorient.config import OrientationConfig
|
|
9
|
+
from docorient.correction import _apply_majority_voting, _apply_rotation
|
|
10
|
+
from docorient.detection.engine import detect_orientation
|
|
11
|
+
from docorient.types import OrientationResult, PageResult
|
|
12
|
+
|
|
13
|
+
_shared_counter: Any = None
|
|
14
|
+
_shared_lock: Any = None
|
|
15
|
+
_shared_resume_log_path: str | None = None
|
|
16
|
+
_shared_config_dict: dict[str, Any] | None = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def initialize_worker(
|
|
20
|
+
counter: multiprocessing.Value,
|
|
21
|
+
lock: multiprocessing.Lock,
|
|
22
|
+
resume_log_path: str,
|
|
23
|
+
config_dict: dict[str, Any],
|
|
24
|
+
) -> None:
|
|
25
|
+
global _shared_counter, _shared_lock, _shared_resume_log_path, _shared_config_dict
|
|
26
|
+
_shared_counter = counter
|
|
27
|
+
_shared_lock = lock
|
|
28
|
+
_shared_resume_log_path = resume_log_path
|
|
29
|
+
_shared_config_dict = config_dict
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _reconstruct_config() -> OrientationConfig:
|
|
33
|
+
if _shared_config_dict is None:
|
|
34
|
+
return OrientationConfig()
|
|
35
|
+
return OrientationConfig(**_shared_config_dict)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _process_single_source(
|
|
39
|
+
source_file_name: str,
|
|
40
|
+
pages: list[ScannedPage],
|
|
41
|
+
config: OrientationConfig,
|
|
42
|
+
) -> list[PageResult]:
|
|
43
|
+
valid_pages = list(pages)
|
|
44
|
+
detection_results: list[OrientationResult] = []
|
|
45
|
+
page_errors: dict[int, str] = {}
|
|
46
|
+
|
|
47
|
+
for page_index, scanned_page in enumerate(valid_pages):
|
|
48
|
+
try:
|
|
49
|
+
image = open_as_rgb(scanned_page.image_path)
|
|
50
|
+
orientation = detect_orientation(image, config=config)
|
|
51
|
+
detection_results.append(orientation)
|
|
52
|
+
image.close()
|
|
53
|
+
except Exception as detection_error:
|
|
54
|
+
detection_results.append(OrientationResult(angle=0, method="error", reliable=False))
|
|
55
|
+
page_errors[page_index] = str(detection_error)
|
|
56
|
+
|
|
57
|
+
if len(valid_pages) > 1:
|
|
58
|
+
detection_results = _apply_majority_voting(detection_results)
|
|
59
|
+
|
|
60
|
+
page_results: list[PageResult] = []
|
|
61
|
+
|
|
62
|
+
for page_index, (scanned_page, orientation) in enumerate(zip(valid_pages, detection_results)):
|
|
63
|
+
error_message = page_errors.get(page_index)
|
|
64
|
+
|
|
65
|
+
if error_message is None:
|
|
66
|
+
try:
|
|
67
|
+
image = open_as_rgb(scanned_page.image_path)
|
|
68
|
+
corrected_image = _apply_rotation(image, orientation.angle)
|
|
69
|
+
output_format = determine_output_format(scanned_page.output_path)
|
|
70
|
+
save_image(
|
|
71
|
+
corrected_image,
|
|
72
|
+
scanned_page.output_path,
|
|
73
|
+
output_format=output_format,
|
|
74
|
+
quality=config.output_quality,
|
|
75
|
+
)
|
|
76
|
+
corrected_image.close()
|
|
77
|
+
image.close()
|
|
78
|
+
except Exception as save_error:
|
|
79
|
+
error_message = str(save_error)
|
|
80
|
+
|
|
81
|
+
page_results.append(
|
|
82
|
+
PageResult(
|
|
83
|
+
source_file=scanned_page.source_file,
|
|
84
|
+
page_number=scanned_page.page_number,
|
|
85
|
+
image_name=scanned_page.image_name,
|
|
86
|
+
input_path=scanned_page.image_path,
|
|
87
|
+
output_path=scanned_page.output_path,
|
|
88
|
+
orientation=orientation,
|
|
89
|
+
error=error_message,
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return page_results
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _record_completion(source_file_name: str) -> None:
|
|
97
|
+
with _shared_lock:
|
|
98
|
+
_shared_counter.value += 1
|
|
99
|
+
try:
|
|
100
|
+
with open(_shared_resume_log_path, "a") as resume_log:
|
|
101
|
+
resume_log.write(source_file_name + "\n")
|
|
102
|
+
resume_log.flush()
|
|
103
|
+
except OSError:
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def process_batch(
|
|
108
|
+
batch: list[tuple[str, list[ScannedPage]]],
|
|
109
|
+
) -> list[tuple[str, list[PageResult]]]:
|
|
110
|
+
config = _reconstruct_config()
|
|
111
|
+
batch_results: list[tuple[str, list[PageResult]]] = []
|
|
112
|
+
|
|
113
|
+
for source_file_name, scanned_pages in batch:
|
|
114
|
+
page_results = _process_single_source(source_file_name, scanned_pages, config)
|
|
115
|
+
batch_results.append((source_file_name, page_results))
|
|
116
|
+
_record_completion(source_file_name)
|
|
117
|
+
|
|
118
|
+
return batch_results
|
docorient/cli.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from docorient._version import __version__
|
|
8
|
+
from docorient.batch.processor import process_directory
|
|
9
|
+
from docorient.config import OrientationConfig
|
|
10
|
+
from docorient.detection.osd import is_tesseract_available
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _build_argument_parser() -> argparse.ArgumentParser:
|
|
14
|
+
parser = argparse.ArgumentParser(
|
|
15
|
+
prog="docorient",
|
|
16
|
+
description="Detect and correct document image orientation.",
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"input_dir",
|
|
20
|
+
type=str,
|
|
21
|
+
help="Directory containing document images to process.",
|
|
22
|
+
)
|
|
23
|
+
parser.add_argument(
|
|
24
|
+
"--output", "-o",
|
|
25
|
+
type=str,
|
|
26
|
+
default=None,
|
|
27
|
+
dest="output_dir",
|
|
28
|
+
help="Output directory for corrected images. Default: auto-generated UUID.",
|
|
29
|
+
)
|
|
30
|
+
parser.add_argument(
|
|
31
|
+
"--workers", "-w",
|
|
32
|
+
type=int,
|
|
33
|
+
default=None,
|
|
34
|
+
help="Number of parallel worker processes. Default: cpu_count - 2.",
|
|
35
|
+
)
|
|
36
|
+
parser.add_argument(
|
|
37
|
+
"--limit", "-l",
|
|
38
|
+
type=int,
|
|
39
|
+
default=0,
|
|
40
|
+
help="Maximum number of images to process. 0 means all.",
|
|
41
|
+
)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--quality", "-q",
|
|
44
|
+
type=int,
|
|
45
|
+
default=92,
|
|
46
|
+
help="Output JPEG quality (1-100). Default: 92.",
|
|
47
|
+
)
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"--confidence",
|
|
50
|
+
type=float,
|
|
51
|
+
default=2.0,
|
|
52
|
+
help="OSD confidence threshold. Default: 2.0.",
|
|
53
|
+
)
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
"--no-ocr",
|
|
56
|
+
action="store_true",
|
|
57
|
+
default=False,
|
|
58
|
+
help="Disable Tesseract OSD (only projection-based detection).",
|
|
59
|
+
)
|
|
60
|
+
parser.add_argument(
|
|
61
|
+
"--no-resume",
|
|
62
|
+
action="store_true",
|
|
63
|
+
default=False,
|
|
64
|
+
help="Disable resume from previous run.",
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"--dry-run",
|
|
68
|
+
action="store_true",
|
|
69
|
+
default=False,
|
|
70
|
+
help="Only show what would be done, without processing.",
|
|
71
|
+
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"--version", "-V",
|
|
74
|
+
action="version",
|
|
75
|
+
version=f"docorient {__version__}",
|
|
76
|
+
)
|
|
77
|
+
return parser
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _print_dry_run_info(input_path: Path, config: OrientationConfig, limit: int) -> None:
|
|
81
|
+
from docorient.batch.scanner import scan_directory
|
|
82
|
+
|
|
83
|
+
temp_output = input_path.parent / "__dry_run_temp__"
|
|
84
|
+
pages_by_source = scan_directory(
|
|
85
|
+
input_path,
|
|
86
|
+
temp_output,
|
|
87
|
+
supported_extensions=config.supported_extensions,
|
|
88
|
+
limit=limit,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
total_files = len(pages_by_source)
|
|
92
|
+
total_pages = sum(len(pages) for pages in pages_by_source.values())
|
|
93
|
+
|
|
94
|
+
print(f"\n{'=' * 60}")
|
|
95
|
+
print(" DRY RUN - No changes will be made")
|
|
96
|
+
print(f"{'=' * 60}")
|
|
97
|
+
print(f" Input: {input_path}")
|
|
98
|
+
print(f" Files: {total_files} source documents")
|
|
99
|
+
print(f" Pages: {total_pages} images")
|
|
100
|
+
print(f" Workers: {config.effective_workers}")
|
|
101
|
+
print(f" OCR: {'enabled' if is_tesseract_available() else 'disabled'}")
|
|
102
|
+
print(f" Quality: {config.output_quality}")
|
|
103
|
+
print(f" Resume: {'enabled' if config.resume_enabled else 'disabled'}")
|
|
104
|
+
print(f"{'=' * 60}\n")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _print_summary(summary) -> None:
|
|
108
|
+
print(f"\n{'=' * 60}")
|
|
109
|
+
print(" SUMMARY")
|
|
110
|
+
print(f"{'=' * 60}")
|
|
111
|
+
print(f" Output: {summary.output_directory}")
|
|
112
|
+
print(f" Files processed: {summary.total_files}")
|
|
113
|
+
print(f" Total pages: {summary.total_pages}")
|
|
114
|
+
print(f" Already correct (0°): {summary.already_correct}")
|
|
115
|
+
print(f" Corrected: {summary.corrected}")
|
|
116
|
+
if summary.corrected_by_majority > 0:
|
|
117
|
+
print(f" (majority vote): {summary.corrected_by_majority}")
|
|
118
|
+
print(f" Errors: {summary.errors}")
|
|
119
|
+
print(f"{'=' * 60}\n")
|
|
120
|
+
|
|
121
|
+
corrected_pages = [
|
|
122
|
+
page for page in summary.pages
|
|
123
|
+
if page.orientation.angle != 0 and page.error is None
|
|
124
|
+
]
|
|
125
|
+
if corrected_pages:
|
|
126
|
+
print("Corrections applied:")
|
|
127
|
+
for page in corrected_pages:
|
|
128
|
+
print(f" {page.image_name}: {page.orientation.angle}° ({page.orientation.method})")
|
|
129
|
+
print()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def main() -> None:
|
|
133
|
+
parser = _build_argument_parser()
|
|
134
|
+
arguments = parser.parse_args()
|
|
135
|
+
|
|
136
|
+
input_path = Path(arguments.input_dir)
|
|
137
|
+
if not input_path.is_dir():
|
|
138
|
+
print(f"Error: '{arguments.input_dir}' is not a valid directory.", file=sys.stderr)
|
|
139
|
+
sys.exit(1)
|
|
140
|
+
|
|
141
|
+
osd_threshold = arguments.confidence if not arguments.no_ocr else float("inf")
|
|
142
|
+
|
|
143
|
+
config = OrientationConfig(
|
|
144
|
+
osd_confidence_threshold=osd_threshold,
|
|
145
|
+
output_quality=arguments.quality,
|
|
146
|
+
workers=arguments.workers,
|
|
147
|
+
resume_enabled=not arguments.no_resume,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
if arguments.dry_run:
|
|
151
|
+
_print_dry_run_info(input_path, config, arguments.limit)
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
print(f"\n{'=' * 60}")
|
|
155
|
+
print(f" docorient v{__version__}")
|
|
156
|
+
print(f"{'=' * 60}")
|
|
157
|
+
print(f" Input: {input_path}")
|
|
158
|
+
print(f" Output: {arguments.output_dir or 'auto (UUID)'}")
|
|
159
|
+
print(f" Workers: {config.effective_workers}")
|
|
160
|
+
print(f" OCR: {'disabled' if arguments.no_ocr else 'enabled'}")
|
|
161
|
+
print(f" Quality: {config.output_quality}")
|
|
162
|
+
print(f"{'=' * 60}\n")
|
|
163
|
+
|
|
164
|
+
summary = process_directory(
|
|
165
|
+
input_dir=input_path,
|
|
166
|
+
output_dir=arguments.output_dir,
|
|
167
|
+
config=config,
|
|
168
|
+
limit=arguments.limit,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
_print_summary(summary)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == "__main__":
|
|
175
|
+
main()
|
docorient/config.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import multiprocessing
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
|
|
6
|
+
DEFAULT_SUPPORTED_EXTENSIONS: tuple[str, ...] = (
|
|
7
|
+
".jpg",
|
|
8
|
+
".jpeg",
|
|
9
|
+
".png",
|
|
10
|
+
".tiff",
|
|
11
|
+
".tif",
|
|
12
|
+
".bmp",
|
|
13
|
+
".gif",
|
|
14
|
+
".webp",
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
RESUME_LOG_FILENAME = "_orientation_done.log"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _default_worker_count() -> int:
|
|
21
|
+
return max(1, multiprocessing.cpu_count() - 2)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True, slots=True)
|
|
25
|
+
class OrientationConfig:
|
|
26
|
+
osd_confidence_threshold: float = 2.0
|
|
27
|
+
output_quality: int = 92
|
|
28
|
+
max_osd_dimension: int = 1200
|
|
29
|
+
projection_target_dimension: int = 800
|
|
30
|
+
workers: int | None = None
|
|
31
|
+
resume_enabled: bool = True
|
|
32
|
+
supported_extensions: tuple[str, ...] = field(
|
|
33
|
+
default_factory=lambda: DEFAULT_SUPPORTED_EXTENSIONS
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def effective_workers(self) -> int:
|
|
38
|
+
if self.workers is not None:
|
|
39
|
+
return max(1, self.workers)
|
|
40
|
+
return _default_worker_count()
|
docorient/correction.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from typing import overload
|
|
5
|
+
|
|
6
|
+
from PIL import Image
|
|
7
|
+
|
|
8
|
+
from docorient.config import OrientationConfig
|
|
9
|
+
from docorient.detection.engine import detect_orientation
|
|
10
|
+
from docorient.types import CorrectionResult, OrientationResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _apply_rotation(image: Image.Image, angle: int) -> Image.Image:
|
|
14
|
+
if angle == 0:
|
|
15
|
+
return image.copy()
|
|
16
|
+
return image.rotate(angle, expand=True)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@overload
|
|
20
|
+
def correct_image(
|
|
21
|
+
image: Image.Image,
|
|
22
|
+
*,
|
|
23
|
+
config: OrientationConfig | None = ...,
|
|
24
|
+
return_metadata: bool = False,
|
|
25
|
+
) -> Image.Image: ...
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@overload
|
|
29
|
+
def correct_image(
|
|
30
|
+
image: Image.Image,
|
|
31
|
+
*,
|
|
32
|
+
config: OrientationConfig | None = ...,
|
|
33
|
+
return_metadata: bool = True,
|
|
34
|
+
) -> CorrectionResult: ...
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def correct_image(
|
|
38
|
+
image: Image.Image,
|
|
39
|
+
*,
|
|
40
|
+
config: OrientationConfig | None = None,
|
|
41
|
+
return_metadata: bool = False,
|
|
42
|
+
) -> Image.Image | CorrectionResult:
|
|
43
|
+
"""Detect and correct the orientation of a single document image.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
image: PIL Image to correct.
|
|
47
|
+
config: Optional configuration. Uses defaults if not provided.
|
|
48
|
+
return_metadata: If True, returns CorrectionResult with image and detection metadata.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Corrected PIL Image, or CorrectionResult if return_metadata is True.
|
|
52
|
+
"""
|
|
53
|
+
orientation = detect_orientation(image, config=config)
|
|
54
|
+
corrected_image = _apply_rotation(image, orientation.angle)
|
|
55
|
+
|
|
56
|
+
if return_metadata:
|
|
57
|
+
return CorrectionResult(image=corrected_image, orientation=orientation)
|
|
58
|
+
return corrected_image
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _apply_majority_voting(
|
|
62
|
+
detection_results: list[OrientationResult],
|
|
63
|
+
) -> list[OrientationResult]:
|
|
64
|
+
confident_angles = [
|
|
65
|
+
result.angle for result in detection_results if result.reliable
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
if not confident_angles:
|
|
69
|
+
return detection_results
|
|
70
|
+
|
|
71
|
+
majority_angle = Counter(confident_angles).most_common(1)[0][0]
|
|
72
|
+
corrected_results = []
|
|
73
|
+
|
|
74
|
+
for result in detection_results:
|
|
75
|
+
if not result.reliable and result.angle != majority_angle:
|
|
76
|
+
corrected_results.append(
|
|
77
|
+
OrientationResult(
|
|
78
|
+
angle=majority_angle,
|
|
79
|
+
method=f"{result.method}->majority({majority_angle},was={result.angle})",
|
|
80
|
+
reliable=True,
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
corrected_results.append(result)
|
|
85
|
+
|
|
86
|
+
return corrected_results
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def correct_document_pages(
|
|
90
|
+
pages: list[Image.Image],
|
|
91
|
+
*,
|
|
92
|
+
config: OrientationConfig | None = None,
|
|
93
|
+
) -> list[CorrectionResult]:
|
|
94
|
+
"""Correct orientation of multiple pages from the same document using majority voting.
|
|
95
|
+
|
|
96
|
+
Detects orientation for each page individually, then applies majority voting
|
|
97
|
+
to override low-confidence detections with the most common angle.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
pages: List of PIL Images representing pages of the same document.
|
|
101
|
+
config: Optional configuration. Uses defaults if not provided.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
List of CorrectionResult, one per input page.
|
|
105
|
+
"""
|
|
106
|
+
effective_config = config or OrientationConfig()
|
|
107
|
+
|
|
108
|
+
detection_results = [
|
|
109
|
+
detect_orientation(page_image, config=effective_config) for page_image in pages
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
if len(pages) > 1:
|
|
113
|
+
detection_results = _apply_majority_voting(detection_results)
|
|
114
|
+
|
|
115
|
+
correction_results = []
|
|
116
|
+
for page_image, orientation in zip(pages, detection_results):
|
|
117
|
+
corrected_page = _apply_rotation(page_image, orientation.angle)
|
|
118
|
+
correction_results.append(
|
|
119
|
+
CorrectionResult(image=corrected_page, orientation=orientation)
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
return correction_results
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from PIL import Image
|
|
4
|
+
|
|
5
|
+
from docorient.config import OrientationConfig
|
|
6
|
+
from docorient.detection.osd import detect_orientation_by_osd
|
|
7
|
+
from docorient.detection.projection import detect_orientation_by_projection
|
|
8
|
+
from docorient.types import OrientationResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def detect_orientation(
|
|
12
|
+
image: Image.Image,
|
|
13
|
+
config: OrientationConfig | None = None,
|
|
14
|
+
) -> OrientationResult:
|
|
15
|
+
"""Detect the orientation of a document image.
|
|
16
|
+
|
|
17
|
+
Uses projection profile analysis for 90°/270° detection
|
|
18
|
+
and optionally Tesseract OSD for 180° detection.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
image: PIL Image to analyze.
|
|
22
|
+
config: Optional configuration. Uses defaults if not provided.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
OrientationResult with detected angle, method description, and reliability flag.
|
|
26
|
+
"""
|
|
27
|
+
effective_config = config or OrientationConfig()
|
|
28
|
+
|
|
29
|
+
projection_result = detect_orientation_by_projection(
|
|
30
|
+
image,
|
|
31
|
+
target_dimension=effective_config.projection_target_dimension,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if projection_result.angle in (90, 270):
|
|
35
|
+
return projection_result
|
|
36
|
+
|
|
37
|
+
osd_result = detect_orientation_by_osd(
|
|
38
|
+
image,
|
|
39
|
+
max_dimension=effective_config.max_osd_dimension,
|
|
40
|
+
confidence_threshold=effective_config.osd_confidence_threshold,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if osd_result is not None:
|
|
44
|
+
combined_method = f"{osd_result.method},{projection_result.method}"
|
|
45
|
+
return OrientationResult(
|
|
46
|
+
angle=osd_result.angle,
|
|
47
|
+
method=combined_method,
|
|
48
|
+
reliable=True,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
return projection_result
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
from PIL import Image
|
|
6
|
+
|
|
7
|
+
from docorient._imaging import downscale_to_max_dimension
|
|
8
|
+
from docorient.types import OrientationResult
|
|
9
|
+
|
|
10
|
+
_tesseract_available: bool | None = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def is_tesseract_available() -> bool:
|
|
14
|
+
global _tesseract_available
|
|
15
|
+
if _tesseract_available is None:
|
|
16
|
+
try:
|
|
17
|
+
import pytesseract # noqa: F401
|
|
18
|
+
|
|
19
|
+
_tesseract_available = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
_tesseract_available = False
|
|
22
|
+
return _tesseract_available
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _query_tesseract_osd(image: Image.Image) -> tuple[int, float]:
|
|
26
|
+
import pytesseract
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
osd_result = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT)
|
|
30
|
+
detected_angle = int(osd_result.get("orientation", 0))
|
|
31
|
+
detection_confidence = float(osd_result.get("orientation_conf", 0.0))
|
|
32
|
+
return detected_angle, detection_confidence
|
|
33
|
+
except pytesseract.TesseractError:
|
|
34
|
+
return 0, 0.0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def detect_orientation_by_osd(
|
|
38
|
+
image: Image.Image,
|
|
39
|
+
max_dimension: int = 1200,
|
|
40
|
+
confidence_threshold: float = 2.0,
|
|
41
|
+
) -> OrientationResult | None:
|
|
42
|
+
"""Detect document orientation using Tesseract OSD.
|
|
43
|
+
|
|
44
|
+
Returns OrientationResult if a confident detection is made, None otherwise.
|
|
45
|
+
Returns None immediately if pytesseract is not installed.
|
|
46
|
+
"""
|
|
47
|
+
if not is_tesseract_available():
|
|
48
|
+
warnings.warn(
|
|
49
|
+
"pytesseract is not installed. 180° detection is disabled. "
|
|
50
|
+
"Install with: pip install docorient[ocr]",
|
|
51
|
+
UserWarning,
|
|
52
|
+
stacklevel=2,
|
|
53
|
+
)
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
downscaled_image = downscale_to_max_dimension(image, max_dimension)
|
|
57
|
+
detected_angle, detection_confidence = _query_tesseract_osd(downscaled_image)
|
|
58
|
+
|
|
59
|
+
if downscaled_image is not image:
|
|
60
|
+
downscaled_image.close()
|
|
61
|
+
|
|
62
|
+
if detection_confidence < confidence_threshold:
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
if detected_angle not in (90, 180, 270):
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
return OrientationResult(
|
|
69
|
+
angle=detected_angle,
|
|
70
|
+
method=f"osd(angle={detected_angle},conf={detection_confidence:.1f})",
|
|
71
|
+
reliable=True,
|
|
72
|
+
)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from PIL import Image
|
|
5
|
+
|
|
6
|
+
from docorient._imaging import downscale_to_max_dimension
|
|
7
|
+
from docorient.types import OrientationResult
|
|
8
|
+
|
|
9
|
+
PROJECTION_ANALYSIS_DIMENSION = 800
|
|
10
|
+
ENERGY_EPSILON = 1e-10
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _compute_projection_energy(pixel_array: np.ndarray, threshold: float) -> tuple[float, float]:
|
|
14
|
+
binary_mask = (pixel_array < threshold).astype(np.float32)
|
|
15
|
+
horizontal_projection = binary_mask.sum(axis=1)
|
|
16
|
+
vertical_projection = binary_mask.sum(axis=0)
|
|
17
|
+
horizontal_energy = float(np.mean(np.diff(horizontal_projection) ** 2))
|
|
18
|
+
vertical_energy = float(np.mean(np.diff(vertical_projection) ** 2))
|
|
19
|
+
return horizontal_energy, vertical_energy
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _compute_energy_ratio(horizontal_energy: float, vertical_energy: float) -> float:
|
|
23
|
+
return horizontal_energy / (vertical_energy + ENERGY_EPSILON)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def detect_orientation_by_projection(
|
|
27
|
+
image: Image.Image,
|
|
28
|
+
target_dimension: int = PROJECTION_ANALYSIS_DIMENSION,
|
|
29
|
+
) -> OrientationResult:
|
|
30
|
+
"""Detect document orientation using horizontal/vertical projection profile energy analysis.
|
|
31
|
+
|
|
32
|
+
Returns OrientationResult with angle 0 (horizontal), 90 or 270 (vertical, needs rotation).
|
|
33
|
+
"""
|
|
34
|
+
grayscale_image = image.convert("L")
|
|
35
|
+
downscaled_image = downscale_to_max_dimension(grayscale_image, target_dimension)
|
|
36
|
+
if downscaled_image is not grayscale_image:
|
|
37
|
+
grayscale_image.close()
|
|
38
|
+
|
|
39
|
+
pixel_array = np.array(downscaled_image, dtype=np.float32)
|
|
40
|
+
downscaled_image.close()
|
|
41
|
+
brightness_threshold = float(pixel_array.mean())
|
|
42
|
+
|
|
43
|
+
horizontal_energy, vertical_energy = _compute_projection_energy(
|
|
44
|
+
pixel_array, brightness_threshold
|
|
45
|
+
)
|
|
46
|
+
energy_ratio = _compute_energy_ratio(horizontal_energy, vertical_energy)
|
|
47
|
+
|
|
48
|
+
if energy_ratio > 1.0:
|
|
49
|
+
return OrientationResult(
|
|
50
|
+
angle=0,
|
|
51
|
+
method=f"projection(h/v={energy_ratio:.2f},horizontal)",
|
|
52
|
+
reliable=True,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
rotated_array = np.rot90(pixel_array, k=1)
|
|
56
|
+
rotated_horizontal_energy, rotated_vertical_energy = _compute_projection_energy(
|
|
57
|
+
rotated_array, brightness_threshold
|
|
58
|
+
)
|
|
59
|
+
rotated_energy_ratio = _compute_energy_ratio(rotated_horizontal_energy, rotated_vertical_energy)
|
|
60
|
+
|
|
61
|
+
if rotated_energy_ratio > energy_ratio:
|
|
62
|
+
return OrientationResult(
|
|
63
|
+
angle=90,
|
|
64
|
+
method=f"projection(h/v={energy_ratio:.2f}->90ccw:{rotated_energy_ratio:.2f})",
|
|
65
|
+
reliable=True,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return OrientationResult(
|
|
69
|
+
angle=270,
|
|
70
|
+
method=f"projection(h/v={energy_ratio:.2f}->270ccw)",
|
|
71
|
+
reliable=True,
|
|
72
|
+
)
|
docorient/exceptions.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class DocorientError(Exception):
|
|
2
|
+
pass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DetectionError(DocorientError):
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CorrectionError(DocorientError):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BatchProcessingError(DocorientError):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TesseractNotAvailableError(DocorientError):
|
|
18
|
+
pass
|
docorient/types.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from PIL import Image as PILImage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True, slots=True)
|
|
9
|
+
class OrientationResult:
|
|
10
|
+
angle: int
|
|
11
|
+
method: str
|
|
12
|
+
reliable: bool
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True, slots=True)
|
|
16
|
+
class CorrectionResult:
|
|
17
|
+
image: PILImage.Image
|
|
18
|
+
orientation: OrientationResult
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True, slots=True)
|
|
22
|
+
class PageResult:
|
|
23
|
+
source_file: str
|
|
24
|
+
page_number: int
|
|
25
|
+
image_name: str
|
|
26
|
+
input_path: str
|
|
27
|
+
output_path: str
|
|
28
|
+
orientation: OrientationResult
|
|
29
|
+
error: str | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True, slots=True)
|
|
33
|
+
class BatchSummary:
|
|
34
|
+
input_directory: str
|
|
35
|
+
output_directory: str
|
|
36
|
+
total_files: int
|
|
37
|
+
total_pages: int
|
|
38
|
+
already_correct: int
|
|
39
|
+
corrected: int
|
|
40
|
+
corrected_by_majority: int
|
|
41
|
+
errors: int
|
|
42
|
+
pages: tuple[PageResult, ...]
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: docorient
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Document image orientation detection and correction using projection profile analysis and optional Tesseract OSD.
|
|
5
|
+
Project-URL: Homepage, https://github.com/cebraspe-lab/docorient
|
|
6
|
+
Project-URL: Repository, https://github.com/cebraspe-lab/docorient
|
|
7
|
+
Project-URL: Issues, https://github.com/cebraspe-lab/docorient/issues
|
|
8
|
+
Author: Cebraspe Lab
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: correction,document,image,ocr,orientation,rotation,tesseract
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Requires-Dist: pillow>=10.0
|
|
25
|
+
Requires-Dist: tqdm>=4.60
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: build; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
31
|
+
Requires-Dist: twine; extra == 'dev'
|
|
32
|
+
Provides-Extra: ocr
|
|
33
|
+
Requires-Dist: pytesseract>=0.3.10; extra == 'ocr'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# docorient
|
|
37
|
+
|
|
38
|
+
Document image orientation detection and correction.
|
|
39
|
+
|
|
40
|
+
Detects and fixes rotation (0°, 90°, 180°, 270°) in scanned document images using projection profile analysis and optional Tesseract OSD.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install docorient
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
For 180° detection via Tesseract OSD:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install docorient[ocr]
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
> **Note:** The `[ocr]` extra requires [Tesseract](https://github.com/tesseract-ocr/tesseract) installed on your system.
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
### Detect orientation
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from PIL import Image
|
|
62
|
+
from docorient import detect_orientation
|
|
63
|
+
|
|
64
|
+
image = Image.open("document.jpg")
|
|
65
|
+
result = detect_orientation(image)
|
|
66
|
+
|
|
67
|
+
print(result.angle) # 0, 90, 180, or 270
|
|
68
|
+
print(result.method) # detection method used
|
|
69
|
+
print(result.reliable) # confidence flag
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Correct a single image
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from docorient import correct_image
|
|
76
|
+
|
|
77
|
+
corrected = correct_image(image)
|
|
78
|
+
corrected.save("fixed.jpg")
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Correct with metadata
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from docorient import correct_image
|
|
85
|
+
|
|
86
|
+
result = correct_image(image, return_metadata=True)
|
|
87
|
+
print(result.orientation.angle)
|
|
88
|
+
result.image.save("fixed.jpg")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Correct multi-page document (majority voting)
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from docorient import correct_document_pages
|
|
95
|
+
|
|
96
|
+
pages = [Image.open(f"page_{i}.jpg") for i in range(5)]
|
|
97
|
+
corrected_pages = correct_document_pages(pages)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Batch process a directory
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from docorient import process_directory, OrientationConfig
|
|
104
|
+
|
|
105
|
+
config = OrientationConfig(workers=4, output_quality=95)
|
|
106
|
+
summary = process_directory("./scans", output_dir="./fixed", config=config)
|
|
107
|
+
|
|
108
|
+
print(f"Corrected: {summary.corrected}/{summary.total_pages}")
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### CLI
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
docorient ./scans --output ./fixed --workers 4
|
|
115
|
+
docorient ./scans --dry-run
|
|
116
|
+
docorient ./scans --no-ocr --limit 100
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## How It Works
|
|
120
|
+
|
|
121
|
+
1. **Projection profile analysis** detects 90° and 270° rotations by comparing horizontal vs vertical text energy
|
|
122
|
+
2. **Tesseract OSD** (optional) detects 180° rotation with confidence thresholding
|
|
123
|
+
3. **Majority voting** across pages of the same document improves reliability
|
|
124
|
+
|
|
125
|
+
## Supported Formats
|
|
126
|
+
|
|
127
|
+
Any format readable by Pillow: JPEG, PNG, TIFF, BMP, GIF, WebP, and more.
|
|
128
|
+
|
|
129
|
+
## Configuration
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from docorient import OrientationConfig
|
|
133
|
+
|
|
134
|
+
config = OrientationConfig(
|
|
135
|
+
osd_confidence_threshold=2.0,
|
|
136
|
+
output_quality=92,
|
|
137
|
+
max_osd_dimension=1200,
|
|
138
|
+
projection_target_dimension=800,
|
|
139
|
+
workers=4,
|
|
140
|
+
resume_enabled=True,
|
|
141
|
+
)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
MIT
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
docorient/__init__.py,sha256=J_B5pjSvSZknWhctdEYtRs2iOwGoVrOAzL7NtRmGQ54,919
|
|
2
|
+
docorient/_imaging.py,sha256=iO0-LjaH5QjiHcTUzHML5LCVcTAYxZlD4oK9lZJ078A,1232
|
|
3
|
+
docorient/_version.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
|
|
4
|
+
docorient/cli.py,sha256=zNepvIuVt-clLL37XzFZTaCcleV8jiPnIQ2DwmCPDRs,5467
|
|
5
|
+
docorient/config.py,sha256=AVeX4GbsdgJyBC9uIeAD8fvIMKAJuRkeSh-JsR_elPk,951
|
|
6
|
+
docorient/correction.py,sha256=L-uuOcGQKZXqFnTFccl4SvR20dOezNinLndrIx5OSDk,3630
|
|
7
|
+
docorient/exceptions.py,sha256=vdWkgbQH3DJLce5LFlKe9AnIUYmRMho4Sq0p9HdKVRo,257
|
|
8
|
+
docorient/types.py,sha256=qo3KbtyJOo-xxXivMAErOBeX6AnxtBDlELuj4bGFfys,840
|
|
9
|
+
docorient/batch/__init__.py,sha256=tr5WRSoQC0PRNB4N_Z7TwkR3ZzOyCmD-KStlMjAvz6I,89
|
|
10
|
+
docorient/batch/processor.py,sha256=3PvwGAxvKQdJO1yRlxnMtIngvb9oZArlUfL2ajdO6JY,6349
|
|
11
|
+
docorient/batch/scanner.py,sha256=0CGUbRwPnEkDJKYyHYkNCXffy5DZXV3s-cplvjQ-XSo,1555
|
|
12
|
+
docorient/batch/worker.py,sha256=qBegpAqTajoUHOI4QhjxvoZuvnPysvcbgmRTIF8wi_I,4097
|
|
13
|
+
docorient/detection/__init__.py,sha256=1i2bVoFXfpQe8u7O3HjRpkXqAFxi6kNvA8xbYZkVZjA,92
|
|
14
|
+
docorient/detection/engine.py,sha256=iG4CoY1ofoCrXSc5QjzoiB9UHi6RmLQf8lQ2Wz_FTxU,1567
|
|
15
|
+
docorient/detection/osd.py,sha256=EiI0yYPx5VgwoaX4AXXAVAeH2KF5QGaRLAXwmRTGhNA,2148
|
|
16
|
+
docorient/detection/projection.py,sha256=-WugLjtgzqi3fP7XxAAJLpRRTzGzNwSpcWd4MULOUeI,2594
|
|
17
|
+
docorient-0.1.0.dist-info/METADATA,sha256=kp2j0qcfAZJp4-bQ5hl9TFPE5tSWLQgV_38ZtoIHY3k,3890
|
|
18
|
+
docorient-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
19
|
+
docorient-0.1.0.dist-info/entry_points.txt,sha256=yxZkcXy-6woVnA6vxEX3vi_ywPaa2RsQkn282hDrEDc,49
|
|
20
|
+
docorient-0.1.0.dist-info/licenses/LICENSE,sha256=wUKLgf8GreWy4jQrdQOyEUBchTfAns3QvgCNgW_IlNc,1069
|
|
21
|
+
docorient-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Cebraspe Lab
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|