docorient 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
docorient/__init__.py ADDED
@@ -0,0 +1,36 @@
1
+ from docorient._version import __version__
2
+ from docorient.batch.processor import process_directory
3
+ from docorient.config import OrientationConfig
4
+ from docorient.correction import correct_document_pages, correct_image
5
+ from docorient.detection.engine import detect_orientation
6
+ from docorient.exceptions import (
7
+ BatchProcessingError,
8
+ CorrectionError,
9
+ DetectionError,
10
+ DocorientError,
11
+ TesseractNotAvailableError,
12
+ )
13
+ from docorient.types import (
14
+ BatchSummary,
15
+ CorrectionResult,
16
+ OrientationResult,
17
+ PageResult,
18
+ )
19
+
20
+ __all__ = [
21
+ "BatchProcessingError",
22
+ "BatchSummary",
23
+ "CorrectionError",
24
+ "CorrectionResult",
25
+ "DetectionError",
26
+ "DocorientError",
27
+ "OrientationConfig",
28
+ "OrientationResult",
29
+ "PageResult",
30
+ "TesseractNotAvailableError",
31
+ "__version__",
32
+ "correct_document_pages",
33
+ "correct_image",
34
+ "detect_orientation",
35
+ "process_directory",
36
+ ]
docorient/_imaging.py ADDED
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from PIL import Image
6
+
7
+
8
+ def open_as_rgb(image_path: str | Path) -> Image.Image:
9
+ return Image.open(image_path).convert("RGB")
10
+
11
+
12
+ def downscale_to_max_dimension(image: Image.Image, max_dimension: int) -> Image.Image:
13
+ image_width, image_height = image.size
14
+ largest_side = max(image_width, image_height)
15
+
16
+ if largest_side <= max_dimension:
17
+ return image
18
+
19
+ scale_factor = max_dimension / largest_side
20
+ target_width = int(image_width * scale_factor)
21
+ target_height = int(image_height * scale_factor)
22
+ return image.resize((target_width, target_height), Image.LANCZOS)
23
+
24
+
25
+ def save_image(
26
+ image: Image.Image,
27
+ output_path: str | Path,
28
+ output_format: str = "JPEG",
29
+ quality: int = 92,
30
+ ) -> None:
31
+ image.save(output_path, output_format, quality=quality)
32
+
33
+
34
+ def determine_output_format(file_path: str | Path) -> str:
35
+ extension = Path(file_path).suffix.lower()
36
+ format_mapping = {
37
+ ".jpg": "JPEG",
38
+ ".jpeg": "JPEG",
39
+ ".png": "PNG",
40
+ ".tiff": "TIFF",
41
+ ".tif": "TIFF",
42
+ ".bmp": "BMP",
43
+ ".gif": "GIF",
44
+ ".webp": "WEBP",
45
+ }
46
+ return format_mapping.get(extension, "JPEG")
docorient/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,3 @@
1
+ from docorient.batch.processor import process_directory
2
+
3
+ __all__ = ["process_directory"]
@@ -0,0 +1,197 @@
1
+ from __future__ import annotations
2
+
3
+ import multiprocessing
4
+ import sys
5
+ import time
6
+ import uuid
7
+ from dataclasses import asdict
8
+ from pathlib import Path
9
+
10
+ from tqdm import tqdm
11
+
12
+ from docorient.batch.scanner import ScannedPage, scan_directory
13
+ from docorient.batch.worker import initialize_worker, process_batch
14
+ from docorient.config import RESUME_LOG_FILENAME, OrientationConfig
15
+ from docorient.types import BatchSummary, PageResult
16
+
17
+
18
+ def _load_completed_sources(resume_log_path: Path) -> set[str]:
19
+ if not resume_log_path.exists():
20
+ return set()
21
+ with open(resume_log_path) as resume_log:
22
+ return {line.strip() for line in resume_log if line.strip()}
23
+
24
+
25
+ def _distribute_into_batches(
26
+ items: list[tuple[str, list[ScannedPage]]],
27
+ batch_count: int,
28
+ ) -> list[list[tuple[str, list[ScannedPage]]]]:
29
+ batches: list[list[tuple[str, list[ScannedPage]]]] = [[] for _ in range(batch_count)]
30
+ for item_index, item in enumerate(items):
31
+ target_batch = item_index % batch_count
32
+ batches[target_batch].append(item)
33
+ return batches
34
+
35
+
36
+ def _build_summary(
37
+ input_directory: str,
38
+ output_directory: str,
39
+ total_files: int,
40
+ all_page_results: dict[str, list[PageResult]],
41
+ source_file_names: list[str],
42
+ ) -> BatchSummary:
43
+ all_pages: list[PageResult] = []
44
+ already_correct_count = 0
45
+ corrected_count = 0
46
+ corrected_by_majority_count = 0
47
+ error_count = 0
48
+
49
+ for source_name in source_file_names:
50
+ for page_result in all_page_results.get(source_name, []):
51
+ all_pages.append(page_result)
52
+ if page_result.error is not None:
53
+ error_count += 1
54
+ elif page_result.orientation.angle != 0:
55
+ corrected_count += 1
56
+ if "->majority" in page_result.orientation.method:
57
+ corrected_by_majority_count += 1
58
+ else:
59
+ already_correct_count += 1
60
+
61
+ return BatchSummary(
62
+ input_directory=input_directory,
63
+ output_directory=output_directory,
64
+ total_files=total_files,
65
+ total_pages=len(all_pages),
66
+ already_correct=already_correct_count,
67
+ corrected=corrected_count,
68
+ corrected_by_majority=corrected_by_majority_count,
69
+ errors=error_count,
70
+ pages=tuple(all_pages),
71
+ )
72
+
73
+
74
+ def process_directory(
75
+ input_dir: str | Path,
76
+ *,
77
+ output_dir: str | Path | None = None,
78
+ config: OrientationConfig | None = None,
79
+ limit: int = 0,
80
+ show_progress: bool = True,
81
+ ) -> BatchSummary:
82
+ """Process all images in a directory, detecting and correcting orientation.
83
+
84
+ Args:
85
+ input_dir: Path to directory containing document images.
86
+ output_dir: Path for corrected output. None generates a UUID-named directory.
87
+ config: Processing configuration. Uses defaults if not provided.
88
+ limit: Maximum number of images to process. 0 means all.
89
+ show_progress: Whether to display a tqdm progress bar.
90
+
91
+ Returns:
92
+ BatchSummary with statistics and per-page results.
93
+ """
94
+ effective_config = config or OrientationConfig()
95
+ input_path = Path(input_dir).resolve()
96
+
97
+ if output_dir is None:
98
+ output_path = input_path.parent / str(uuid.uuid4())
99
+ else:
100
+ output_path = Path(output_dir).resolve()
101
+
102
+ output_path.mkdir(parents=True, exist_ok=True)
103
+
104
+ pages_by_source = scan_directory(
105
+ input_path,
106
+ output_path,
107
+ supported_extensions=effective_config.supported_extensions,
108
+ limit=limit,
109
+ )
110
+
111
+ source_file_names = list(pages_by_source.keys())
112
+ total_files = len(source_file_names)
113
+ total_pages = sum(len(pages) for pages in pages_by_source.values())
114
+
115
+ if total_pages == 0:
116
+ return _build_summary(str(input_path), str(output_path), 0, {}, [])
117
+
118
+ resume_log_path = output_path / RESUME_LOG_FILENAME
119
+ already_completed_sources = set()
120
+
121
+ if effective_config.resume_enabled:
122
+ already_completed_sources = _load_completed_sources(resume_log_path)
123
+
124
+ pending_sources = [
125
+ (source_name, pages_by_source[source_name])
126
+ for source_name in source_file_names
127
+ if source_name not in already_completed_sources
128
+ ]
129
+
130
+ all_page_results: dict[str, list[PageResult]] = {}
131
+
132
+ if not pending_sources:
133
+ return _build_summary(
134
+ str(input_path), str(output_path), total_files, all_page_results, source_file_names
135
+ )
136
+
137
+ worker_count = min(effective_config.effective_workers, len(pending_sources))
138
+ batches = _distribute_into_batches(pending_sources, worker_count)
139
+
140
+ progress_counter = multiprocessing.Value("i", 0)
141
+ progress_lock = multiprocessing.Lock()
142
+
143
+ config_as_dict = asdict(effective_config)
144
+
145
+ worker_pool = multiprocessing.Pool(
146
+ processes=worker_count,
147
+ initializer=initialize_worker,
148
+ initargs=(progress_counter, progress_lock, str(resume_log_path), config_as_dict),
149
+ maxtasksperchild=1,
150
+ )
151
+
152
+ async_results = [
153
+ worker_pool.apply_async(process_batch, (batch,)) for batch in batches
154
+ ]
155
+ worker_pool.close()
156
+
157
+ if show_progress:
158
+ progress_bar = tqdm(
159
+ total=len(pending_sources),
160
+ desc="Correcting",
161
+ unit="file",
162
+ bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]",
163
+ )
164
+ else:
165
+ progress_bar = None
166
+
167
+ try:
168
+ while not all(async_result.ready() for async_result in async_results):
169
+ if progress_bar is not None:
170
+ progress_bar.n = progress_counter.value
171
+ progress_bar.refresh()
172
+ time.sleep(0.3)
173
+ except KeyboardInterrupt:
174
+ worker_pool.terminate()
175
+ worker_pool.join()
176
+ if progress_bar is not None:
177
+ progress_bar.close()
178
+ sys.exit(1)
179
+
180
+ if progress_bar is not None:
181
+ progress_bar.n = progress_counter.value
182
+ progress_bar.refresh()
183
+ progress_bar.close()
184
+
185
+ for async_result in async_results:
186
+ try:
187
+ batch_results = async_result.get(timeout=60)
188
+ for source_name, page_results in batch_results:
189
+ all_page_results[source_name] = page_results
190
+ except Exception:
191
+ pass
192
+
193
+ worker_pool.join()
194
+
195
+ return _build_summary(
196
+ str(input_path), str(output_path), total_files, all_page_results, source_file_names
197
+ )
@@ -0,0 +1,59 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ PAGE_PATTERN = re.compile(r"^(.+)_p(\d+)\.\w+$")
8
+
9
+
10
+ @dataclass(frozen=True, slots=True)
11
+ class ScannedPage:
12
+ source_file: str
13
+ page_number: int
14
+ image_name: str
15
+ image_path: str
16
+ output_path: str
17
+
18
+
19
+ def scan_directory(
20
+ input_directory: Path,
21
+ output_directory: Path,
22
+ supported_extensions: tuple[str, ...],
23
+ limit: int = 0,
24
+ ) -> dict[str, list[ScannedPage]]:
25
+ all_image_paths = sorted(
26
+ image_path
27
+ for image_path in input_directory.iterdir()
28
+ if image_path.is_file() and image_path.suffix.lower() in supported_extensions
29
+ )
30
+
31
+ if limit > 0:
32
+ all_image_paths = all_image_paths[:limit]
33
+
34
+ pages_by_source: dict[str, list[ScannedPage]] = {}
35
+
36
+ for image_path in all_image_paths:
37
+ image_name = image_path.name
38
+ page_match = PAGE_PATTERN.match(image_name)
39
+
40
+ if page_match:
41
+ source_file_name = page_match.group(1)
42
+ page_number = int(page_match.group(2))
43
+ else:
44
+ source_file_name = image_path.stem
45
+ page_number = 1
46
+
47
+ output_path = output_directory / image_name
48
+
49
+ scanned_page = ScannedPage(
50
+ source_file=source_file_name,
51
+ page_number=page_number,
52
+ image_name=image_name,
53
+ image_path=str(image_path),
54
+ output_path=str(output_path),
55
+ )
56
+
57
+ pages_by_source.setdefault(source_file_name, []).append(scanned_page)
58
+
59
+ return pages_by_source
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ import multiprocessing
4
+ from typing import Any
5
+
6
+ from docorient._imaging import determine_output_format, open_as_rgb, save_image
7
+ from docorient.batch.scanner import ScannedPage
8
+ from docorient.config import OrientationConfig
9
+ from docorient.correction import _apply_majority_voting, _apply_rotation
10
+ from docorient.detection.engine import detect_orientation
11
+ from docorient.types import OrientationResult, PageResult
12
+
13
+ _shared_counter: Any = None
14
+ _shared_lock: Any = None
15
+ _shared_resume_log_path: str | None = None
16
+ _shared_config_dict: dict[str, Any] | None = None
17
+
18
+
19
+ def initialize_worker(
20
+ counter: multiprocessing.Value,
21
+ lock: multiprocessing.Lock,
22
+ resume_log_path: str,
23
+ config_dict: dict[str, Any],
24
+ ) -> None:
25
+ global _shared_counter, _shared_lock, _shared_resume_log_path, _shared_config_dict
26
+ _shared_counter = counter
27
+ _shared_lock = lock
28
+ _shared_resume_log_path = resume_log_path
29
+ _shared_config_dict = config_dict
30
+
31
+
32
+ def _reconstruct_config() -> OrientationConfig:
33
+ if _shared_config_dict is None:
34
+ return OrientationConfig()
35
+ return OrientationConfig(**_shared_config_dict)
36
+
37
+
38
+ def _process_single_source(
39
+ source_file_name: str,
40
+ pages: list[ScannedPage],
41
+ config: OrientationConfig,
42
+ ) -> list[PageResult]:
43
+ valid_pages = list(pages)
44
+ detection_results: list[OrientationResult] = []
45
+ page_errors: dict[int, str] = {}
46
+
47
+ for page_index, scanned_page in enumerate(valid_pages):
48
+ try:
49
+ image = open_as_rgb(scanned_page.image_path)
50
+ orientation = detect_orientation(image, config=config)
51
+ detection_results.append(orientation)
52
+ image.close()
53
+ except Exception as detection_error:
54
+ detection_results.append(OrientationResult(angle=0, method="error", reliable=False))
55
+ page_errors[page_index] = str(detection_error)
56
+
57
+ if len(valid_pages) > 1:
58
+ detection_results = _apply_majority_voting(detection_results)
59
+
60
+ page_results: list[PageResult] = []
61
+
62
+ for page_index, (scanned_page, orientation) in enumerate(zip(valid_pages, detection_results)):
63
+ error_message = page_errors.get(page_index)
64
+
65
+ if error_message is None:
66
+ try:
67
+ image = open_as_rgb(scanned_page.image_path)
68
+ corrected_image = _apply_rotation(image, orientation.angle)
69
+ output_format = determine_output_format(scanned_page.output_path)
70
+ save_image(
71
+ corrected_image,
72
+ scanned_page.output_path,
73
+ output_format=output_format,
74
+ quality=config.output_quality,
75
+ )
76
+ corrected_image.close()
77
+ image.close()
78
+ except Exception as save_error:
79
+ error_message = str(save_error)
80
+
81
+ page_results.append(
82
+ PageResult(
83
+ source_file=scanned_page.source_file,
84
+ page_number=scanned_page.page_number,
85
+ image_name=scanned_page.image_name,
86
+ input_path=scanned_page.image_path,
87
+ output_path=scanned_page.output_path,
88
+ orientation=orientation,
89
+ error=error_message,
90
+ )
91
+ )
92
+
93
+ return page_results
94
+
95
+
96
+ def _record_completion(source_file_name: str) -> None:
97
+ with _shared_lock:
98
+ _shared_counter.value += 1
99
+ try:
100
+ with open(_shared_resume_log_path, "a") as resume_log:
101
+ resume_log.write(source_file_name + "\n")
102
+ resume_log.flush()
103
+ except OSError:
104
+ pass
105
+
106
+
107
+ def process_batch(
108
+ batch: list[tuple[str, list[ScannedPage]]],
109
+ ) -> list[tuple[str, list[PageResult]]]:
110
+ config = _reconstruct_config()
111
+ batch_results: list[tuple[str, list[PageResult]]] = []
112
+
113
+ for source_file_name, scanned_pages in batch:
114
+ page_results = _process_single_source(source_file_name, scanned_pages, config)
115
+ batch_results.append((source_file_name, page_results))
116
+ _record_completion(source_file_name)
117
+
118
+ return batch_results
docorient/cli.py ADDED
@@ -0,0 +1,175 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ from docorient._version import __version__
8
+ from docorient.batch.processor import process_directory
9
+ from docorient.config import OrientationConfig
10
+ from docorient.detection.osd import is_tesseract_available
11
+
12
+
13
+ def _build_argument_parser() -> argparse.ArgumentParser:
14
+ parser = argparse.ArgumentParser(
15
+ prog="docorient",
16
+ description="Detect and correct document image orientation.",
17
+ )
18
+ parser.add_argument(
19
+ "input_dir",
20
+ type=str,
21
+ help="Directory containing document images to process.",
22
+ )
23
+ parser.add_argument(
24
+ "--output", "-o",
25
+ type=str,
26
+ default=None,
27
+ dest="output_dir",
28
+ help="Output directory for corrected images. Default: auto-generated UUID.",
29
+ )
30
+ parser.add_argument(
31
+ "--workers", "-w",
32
+ type=int,
33
+ default=None,
34
+ help="Number of parallel worker processes. Default: cpu_count - 2.",
35
+ )
36
+ parser.add_argument(
37
+ "--limit", "-l",
38
+ type=int,
39
+ default=0,
40
+ help="Maximum number of images to process. 0 means all.",
41
+ )
42
+ parser.add_argument(
43
+ "--quality", "-q",
44
+ type=int,
45
+ default=92,
46
+ help="Output JPEG quality (1-100). Default: 92.",
47
+ )
48
+ parser.add_argument(
49
+ "--confidence",
50
+ type=float,
51
+ default=2.0,
52
+ help="OSD confidence threshold. Default: 2.0.",
53
+ )
54
+ parser.add_argument(
55
+ "--no-ocr",
56
+ action="store_true",
57
+ default=False,
58
+ help="Disable Tesseract OSD (only projection-based detection).",
59
+ )
60
+ parser.add_argument(
61
+ "--no-resume",
62
+ action="store_true",
63
+ default=False,
64
+ help="Disable resume from previous run.",
65
+ )
66
+ parser.add_argument(
67
+ "--dry-run",
68
+ action="store_true",
69
+ default=False,
70
+ help="Only show what would be done, without processing.",
71
+ )
72
+ parser.add_argument(
73
+ "--version", "-V",
74
+ action="version",
75
+ version=f"docorient {__version__}",
76
+ )
77
+ return parser
78
+
79
+
80
+ def _print_dry_run_info(input_path: Path, config: OrientationConfig, limit: int) -> None:
81
+ from docorient.batch.scanner import scan_directory
82
+
83
+ temp_output = input_path.parent / "__dry_run_temp__"
84
+ pages_by_source = scan_directory(
85
+ input_path,
86
+ temp_output,
87
+ supported_extensions=config.supported_extensions,
88
+ limit=limit,
89
+ )
90
+
91
+ total_files = len(pages_by_source)
92
+ total_pages = sum(len(pages) for pages in pages_by_source.values())
93
+
94
+ print(f"\n{'=' * 60}")
95
+ print(" DRY RUN - No changes will be made")
96
+ print(f"{'=' * 60}")
97
+ print(f" Input: {input_path}")
98
+ print(f" Files: {total_files} source documents")
99
+ print(f" Pages: {total_pages} images")
100
+ print(f" Workers: {config.effective_workers}")
101
+ print(f" OCR: {'enabled' if is_tesseract_available() else 'disabled'}")
102
+ print(f" Quality: {config.output_quality}")
103
+ print(f" Resume: {'enabled' if config.resume_enabled else 'disabled'}")
104
+ print(f"{'=' * 60}\n")
105
+
106
+
107
+ def _print_summary(summary) -> None:
108
+ print(f"\n{'=' * 60}")
109
+ print(" SUMMARY")
110
+ print(f"{'=' * 60}")
111
+ print(f" Output: {summary.output_directory}")
112
+ print(f" Files processed: {summary.total_files}")
113
+ print(f" Total pages: {summary.total_pages}")
114
+ print(f" Already correct (0°): {summary.already_correct}")
115
+ print(f" Corrected: {summary.corrected}")
116
+ if summary.corrected_by_majority > 0:
117
+ print(f" (majority vote): {summary.corrected_by_majority}")
118
+ print(f" Errors: {summary.errors}")
119
+ print(f"{'=' * 60}\n")
120
+
121
+ corrected_pages = [
122
+ page for page in summary.pages
123
+ if page.orientation.angle != 0 and page.error is None
124
+ ]
125
+ if corrected_pages:
126
+ print("Corrections applied:")
127
+ for page in corrected_pages:
128
+ print(f" {page.image_name}: {page.orientation.angle}° ({page.orientation.method})")
129
+ print()
130
+
131
+
132
+ def main() -> None:
133
+ parser = _build_argument_parser()
134
+ arguments = parser.parse_args()
135
+
136
+ input_path = Path(arguments.input_dir)
137
+ if not input_path.is_dir():
138
+ print(f"Error: '{arguments.input_dir}' is not a valid directory.", file=sys.stderr)
139
+ sys.exit(1)
140
+
141
+ osd_threshold = arguments.confidence if not arguments.no_ocr else float("inf")
142
+
143
+ config = OrientationConfig(
144
+ osd_confidence_threshold=osd_threshold,
145
+ output_quality=arguments.quality,
146
+ workers=arguments.workers,
147
+ resume_enabled=not arguments.no_resume,
148
+ )
149
+
150
+ if arguments.dry_run:
151
+ _print_dry_run_info(input_path, config, arguments.limit)
152
+ return
153
+
154
+ print(f"\n{'=' * 60}")
155
+ print(f" docorient v{__version__}")
156
+ print(f"{'=' * 60}")
157
+ print(f" Input: {input_path}")
158
+ print(f" Output: {arguments.output_dir or 'auto (UUID)'}")
159
+ print(f" Workers: {config.effective_workers}")
160
+ print(f" OCR: {'disabled' if arguments.no_ocr else 'enabled'}")
161
+ print(f" Quality: {config.output_quality}")
162
+ print(f"{'=' * 60}\n")
163
+
164
+ summary = process_directory(
165
+ input_dir=input_path,
166
+ output_dir=arguments.output_dir,
167
+ config=config,
168
+ limit=arguments.limit,
169
+ )
170
+
171
+ _print_summary(summary)
172
+
173
+
174
+ if __name__ == "__main__":
175
+ main()
docorient/config.py ADDED
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ import multiprocessing
4
+ from dataclasses import dataclass, field
5
+
6
+ DEFAULT_SUPPORTED_EXTENSIONS: tuple[str, ...] = (
7
+ ".jpg",
8
+ ".jpeg",
9
+ ".png",
10
+ ".tiff",
11
+ ".tif",
12
+ ".bmp",
13
+ ".gif",
14
+ ".webp",
15
+ )
16
+
17
+ RESUME_LOG_FILENAME = "_orientation_done.log"
18
+
19
+
20
+ def _default_worker_count() -> int:
21
+ return max(1, multiprocessing.cpu_count() - 2)
22
+
23
+
24
+ @dataclass(frozen=True, slots=True)
25
+ class OrientationConfig:
26
+ osd_confidence_threshold: float = 2.0
27
+ output_quality: int = 92
28
+ max_osd_dimension: int = 1200
29
+ projection_target_dimension: int = 800
30
+ workers: int | None = None
31
+ resume_enabled: bool = True
32
+ supported_extensions: tuple[str, ...] = field(
33
+ default_factory=lambda: DEFAULT_SUPPORTED_EXTENSIONS
34
+ )
35
+
36
+ @property
37
+ def effective_workers(self) -> int:
38
+ if self.workers is not None:
39
+ return max(1, self.workers)
40
+ return _default_worker_count()
@@ -0,0 +1,122 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+ from typing import overload
5
+
6
+ from PIL import Image
7
+
8
+ from docorient.config import OrientationConfig
9
+ from docorient.detection.engine import detect_orientation
10
+ from docorient.types import CorrectionResult, OrientationResult
11
+
12
+
13
+ def _apply_rotation(image: Image.Image, angle: int) -> Image.Image:
14
+ if angle == 0:
15
+ return image.copy()
16
+ return image.rotate(angle, expand=True)
17
+
18
+
19
+ @overload
20
+ def correct_image(
21
+ image: Image.Image,
22
+ *,
23
+ config: OrientationConfig | None = ...,
24
+ return_metadata: bool = False,
25
+ ) -> Image.Image: ...
26
+
27
+
28
+ @overload
29
+ def correct_image(
30
+ image: Image.Image,
31
+ *,
32
+ config: OrientationConfig | None = ...,
33
+ return_metadata: bool = True,
34
+ ) -> CorrectionResult: ...
35
+
36
+
37
+ def correct_image(
38
+ image: Image.Image,
39
+ *,
40
+ config: OrientationConfig | None = None,
41
+ return_metadata: bool = False,
42
+ ) -> Image.Image | CorrectionResult:
43
+ """Detect and correct the orientation of a single document image.
44
+
45
+ Args:
46
+ image: PIL Image to correct.
47
+ config: Optional configuration. Uses defaults if not provided.
48
+ return_metadata: If True, returns CorrectionResult with image and detection metadata.
49
+
50
+ Returns:
51
+ Corrected PIL Image, or CorrectionResult if return_metadata is True.
52
+ """
53
+ orientation = detect_orientation(image, config=config)
54
+ corrected_image = _apply_rotation(image, orientation.angle)
55
+
56
+ if return_metadata:
57
+ return CorrectionResult(image=corrected_image, orientation=orientation)
58
+ return corrected_image
59
+
60
+
61
+ def _apply_majority_voting(
62
+ detection_results: list[OrientationResult],
63
+ ) -> list[OrientationResult]:
64
+ confident_angles = [
65
+ result.angle for result in detection_results if result.reliable
66
+ ]
67
+
68
+ if not confident_angles:
69
+ return detection_results
70
+
71
+ majority_angle = Counter(confident_angles).most_common(1)[0][0]
72
+ corrected_results = []
73
+
74
+ for result in detection_results:
75
+ if not result.reliable and result.angle != majority_angle:
76
+ corrected_results.append(
77
+ OrientationResult(
78
+ angle=majority_angle,
79
+ method=f"{result.method}->majority({majority_angle},was={result.angle})",
80
+ reliable=True,
81
+ )
82
+ )
83
+ else:
84
+ corrected_results.append(result)
85
+
86
+ return corrected_results
87
+
88
+
89
+ def correct_document_pages(
90
+ pages: list[Image.Image],
91
+ *,
92
+ config: OrientationConfig | None = None,
93
+ ) -> list[CorrectionResult]:
94
+ """Correct orientation of multiple pages from the same document using majority voting.
95
+
96
+ Detects orientation for each page individually, then applies majority voting
97
+ to override low-confidence detections with the most common angle.
98
+
99
+ Args:
100
+ pages: List of PIL Images representing pages of the same document.
101
+ config: Optional configuration. Uses defaults if not provided.
102
+
103
+ Returns:
104
+ List of CorrectionResult, one per input page.
105
+ """
106
+ effective_config = config or OrientationConfig()
107
+
108
+ detection_results = [
109
+ detect_orientation(page_image, config=effective_config) for page_image in pages
110
+ ]
111
+
112
+ if len(pages) > 1:
113
+ detection_results = _apply_majority_voting(detection_results)
114
+
115
+ correction_results = []
116
+ for page_image, orientation in zip(pages, detection_results):
117
+ corrected_page = _apply_rotation(page_image, orientation.angle)
118
+ correction_results.append(
119
+ CorrectionResult(image=corrected_page, orientation=orientation)
120
+ )
121
+
122
+ return correction_results
@@ -0,0 +1,3 @@
1
+ from docorient.detection.engine import detect_orientation
2
+
3
+ __all__ = ["detect_orientation"]
@@ -0,0 +1,51 @@
1
+ from __future__ import annotations
2
+
3
+ from PIL import Image
4
+
5
+ from docorient.config import OrientationConfig
6
+ from docorient.detection.osd import detect_orientation_by_osd
7
+ from docorient.detection.projection import detect_orientation_by_projection
8
+ from docorient.types import OrientationResult
9
+
10
+
11
+ def detect_orientation(
12
+ image: Image.Image,
13
+ config: OrientationConfig | None = None,
14
+ ) -> OrientationResult:
15
+ """Detect the orientation of a document image.
16
+
17
+ Uses projection profile analysis for 90°/270° detection
18
+ and optionally Tesseract OSD for 180° detection.
19
+
20
+ Args:
21
+ image: PIL Image to analyze.
22
+ config: Optional configuration. Uses defaults if not provided.
23
+
24
+ Returns:
25
+ OrientationResult with detected angle, method description, and reliability flag.
26
+ """
27
+ effective_config = config or OrientationConfig()
28
+
29
+ projection_result = detect_orientation_by_projection(
30
+ image,
31
+ target_dimension=effective_config.projection_target_dimension,
32
+ )
33
+
34
+ if projection_result.angle in (90, 270):
35
+ return projection_result
36
+
37
+ osd_result = detect_orientation_by_osd(
38
+ image,
39
+ max_dimension=effective_config.max_osd_dimension,
40
+ confidence_threshold=effective_config.osd_confidence_threshold,
41
+ )
42
+
43
+ if osd_result is not None:
44
+ combined_method = f"{osd_result.method},{projection_result.method}"
45
+ return OrientationResult(
46
+ angle=osd_result.angle,
47
+ method=combined_method,
48
+ reliable=True,
49
+ )
50
+
51
+ return projection_result
@@ -0,0 +1,72 @@
1
+ from __future__ import annotations
2
+
3
+ import warnings
4
+
5
+ from PIL import Image
6
+
7
+ from docorient._imaging import downscale_to_max_dimension
8
+ from docorient.types import OrientationResult
9
+
10
+ _tesseract_available: bool | None = None
11
+
12
+
13
+ def is_tesseract_available() -> bool:
14
+ global _tesseract_available
15
+ if _tesseract_available is None:
16
+ try:
17
+ import pytesseract # noqa: F401
18
+
19
+ _tesseract_available = True
20
+ except ImportError:
21
+ _tesseract_available = False
22
+ return _tesseract_available
23
+
24
+
25
+ def _query_tesseract_osd(image: Image.Image) -> tuple[int, float]:
26
+ import pytesseract
27
+
28
+ try:
29
+ osd_result = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT)
30
+ detected_angle = int(osd_result.get("orientation", 0))
31
+ detection_confidence = float(osd_result.get("orientation_conf", 0.0))
32
+ return detected_angle, detection_confidence
33
+ except pytesseract.TesseractError:
34
+ return 0, 0.0
35
+
36
+
37
+ def detect_orientation_by_osd(
38
+ image: Image.Image,
39
+ max_dimension: int = 1200,
40
+ confidence_threshold: float = 2.0,
41
+ ) -> OrientationResult | None:
42
+ """Detect document orientation using Tesseract OSD.
43
+
44
+ Returns OrientationResult if a confident detection is made, None otherwise.
45
+ Returns None immediately if pytesseract is not installed.
46
+ """
47
+ if not is_tesseract_available():
48
+ warnings.warn(
49
+ "pytesseract is not installed. 180° detection is disabled. "
50
+ "Install with: pip install docorient[ocr]",
51
+ UserWarning,
52
+ stacklevel=2,
53
+ )
54
+ return None
55
+
56
+ downscaled_image = downscale_to_max_dimension(image, max_dimension)
57
+ detected_angle, detection_confidence = _query_tesseract_osd(downscaled_image)
58
+
59
+ if downscaled_image is not image:
60
+ downscaled_image.close()
61
+
62
+ if detection_confidence < confidence_threshold:
63
+ return None
64
+
65
+ if detected_angle not in (90, 180, 270):
66
+ return None
67
+
68
+ return OrientationResult(
69
+ angle=detected_angle,
70
+ method=f"osd(angle={detected_angle},conf={detection_confidence:.1f})",
71
+ reliable=True,
72
+ )
@@ -0,0 +1,72 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ from PIL import Image
5
+
6
+ from docorient._imaging import downscale_to_max_dimension
7
+ from docorient.types import OrientationResult
8
+
9
+ PROJECTION_ANALYSIS_DIMENSION = 800
10
+ ENERGY_EPSILON = 1e-10
11
+
12
+
13
+ def _compute_projection_energy(pixel_array: np.ndarray, threshold: float) -> tuple[float, float]:
14
+ binary_mask = (pixel_array < threshold).astype(np.float32)
15
+ horizontal_projection = binary_mask.sum(axis=1)
16
+ vertical_projection = binary_mask.sum(axis=0)
17
+ horizontal_energy = float(np.mean(np.diff(horizontal_projection) ** 2))
18
+ vertical_energy = float(np.mean(np.diff(vertical_projection) ** 2))
19
+ return horizontal_energy, vertical_energy
20
+
21
+
22
+ def _compute_energy_ratio(horizontal_energy: float, vertical_energy: float) -> float:
23
+ return horizontal_energy / (vertical_energy + ENERGY_EPSILON)
24
+
25
+
26
+ def detect_orientation_by_projection(
27
+ image: Image.Image,
28
+ target_dimension: int = PROJECTION_ANALYSIS_DIMENSION,
29
+ ) -> OrientationResult:
30
+ """Detect document orientation using horizontal/vertical projection profile energy analysis.
31
+
32
+ Returns OrientationResult with angle 0 (horizontal), 90 or 270 (vertical, needs rotation).
33
+ """
34
+ grayscale_image = image.convert("L")
35
+ downscaled_image = downscale_to_max_dimension(grayscale_image, target_dimension)
36
+ if downscaled_image is not grayscale_image:
37
+ grayscale_image.close()
38
+
39
+ pixel_array = np.array(downscaled_image, dtype=np.float32)
40
+ downscaled_image.close()
41
+ brightness_threshold = float(pixel_array.mean())
42
+
43
+ horizontal_energy, vertical_energy = _compute_projection_energy(
44
+ pixel_array, brightness_threshold
45
+ )
46
+ energy_ratio = _compute_energy_ratio(horizontal_energy, vertical_energy)
47
+
48
+ if energy_ratio > 1.0:
49
+ return OrientationResult(
50
+ angle=0,
51
+ method=f"projection(h/v={energy_ratio:.2f},horizontal)",
52
+ reliable=True,
53
+ )
54
+
55
+ rotated_array = np.rot90(pixel_array, k=1)
56
+ rotated_horizontal_energy, rotated_vertical_energy = _compute_projection_energy(
57
+ rotated_array, brightness_threshold
58
+ )
59
+ rotated_energy_ratio = _compute_energy_ratio(rotated_horizontal_energy, rotated_vertical_energy)
60
+
61
+ if rotated_energy_ratio > energy_ratio:
62
+ return OrientationResult(
63
+ angle=90,
64
+ method=f"projection(h/v={energy_ratio:.2f}->90ccw:{rotated_energy_ratio:.2f})",
65
+ reliable=True,
66
+ )
67
+
68
+ return OrientationResult(
69
+ angle=270,
70
+ method=f"projection(h/v={energy_ratio:.2f}->270ccw)",
71
+ reliable=True,
72
+ )
@@ -0,0 +1,18 @@
1
+ class DocorientError(Exception):
2
+ pass
3
+
4
+
5
+ class DetectionError(DocorientError):
6
+ pass
7
+
8
+
9
+ class CorrectionError(DocorientError):
10
+ pass
11
+
12
+
13
+ class BatchProcessingError(DocorientError):
14
+ pass
15
+
16
+
17
+ class TesseractNotAvailableError(DocorientError):
18
+ pass
docorient/types.py ADDED
@@ -0,0 +1,42 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from PIL import Image as PILImage
6
+
7
+
8
+ @dataclass(frozen=True, slots=True)
9
+ class OrientationResult:
10
+ angle: int
11
+ method: str
12
+ reliable: bool
13
+
14
+
15
+ @dataclass(frozen=True, slots=True)
16
+ class CorrectionResult:
17
+ image: PILImage.Image
18
+ orientation: OrientationResult
19
+
20
+
21
+ @dataclass(frozen=True, slots=True)
22
+ class PageResult:
23
+ source_file: str
24
+ page_number: int
25
+ image_name: str
26
+ input_path: str
27
+ output_path: str
28
+ orientation: OrientationResult
29
+ error: str | None = None
30
+
31
+
32
+ @dataclass(frozen=True, slots=True)
33
+ class BatchSummary:
34
+ input_directory: str
35
+ output_directory: str
36
+ total_files: int
37
+ total_pages: int
38
+ already_correct: int
39
+ corrected: int
40
+ corrected_by_majority: int
41
+ errors: int
42
+ pages: tuple[PageResult, ...]
@@ -0,0 +1,146 @@
1
+ Metadata-Version: 2.4
2
+ Name: docorient
3
+ Version: 0.1.0
4
+ Summary: Document image orientation detection and correction using projection profile analysis and optional Tesseract OSD.
5
+ Project-URL: Homepage, https://github.com/cebraspe-lab/docorient
6
+ Project-URL: Repository, https://github.com/cebraspe-lab/docorient
7
+ Project-URL: Issues, https://github.com/cebraspe-lab/docorient/issues
8
+ Author: Cebraspe Lab
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: correction,document,image,ocr,orientation,rotation,tesseract
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Image Processing
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: numpy>=1.24
24
+ Requires-Dist: pillow>=10.0
25
+ Requires-Dist: tqdm>=4.60
26
+ Provides-Extra: dev
27
+ Requires-Dist: build; extra == 'dev'
28
+ Requires-Dist: pytest-cov; extra == 'dev'
29
+ Requires-Dist: pytest>=8.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.4; extra == 'dev'
31
+ Requires-Dist: twine; extra == 'dev'
32
+ Provides-Extra: ocr
33
+ Requires-Dist: pytesseract>=0.3.10; extra == 'ocr'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # docorient
37
+
38
+ Document image orientation detection and correction.
39
+
40
+ Detects and fixes rotation (0°, 90°, 180°, 270°) in scanned document images using projection profile analysis and optional Tesseract OSD.
41
+
42
+ ## Installation
43
+
44
+ ```bash
45
+ pip install docorient
46
+ ```
47
+
48
+ For 180° detection via Tesseract OSD:
49
+
50
+ ```bash
51
+ pip install docorient[ocr]
52
+ ```
53
+
54
+ > **Note:** The `[ocr]` extra requires [Tesseract](https://github.com/tesseract-ocr/tesseract) installed on your system.
55
+
56
+ ## Quick Start
57
+
58
+ ### Detect orientation
59
+
60
+ ```python
61
+ from PIL import Image
62
+ from docorient import detect_orientation
63
+
64
+ image = Image.open("document.jpg")
65
+ result = detect_orientation(image)
66
+
67
+ print(result.angle) # 0, 90, 180, or 270
68
+ print(result.method) # detection method used
69
+ print(result.reliable) # confidence flag
70
+ ```
71
+
72
+ ### Correct a single image
73
+
74
+ ```python
75
+ from docorient import correct_image
76
+
77
+ corrected = correct_image(image)
78
+ corrected.save("fixed.jpg")
79
+ ```
80
+
81
+ ### Correct with metadata
82
+
83
+ ```python
84
+ from docorient import correct_image
85
+
86
+ result = correct_image(image, return_metadata=True)
87
+ print(result.orientation.angle)
88
+ result.image.save("fixed.jpg")
89
+ ```
90
+
91
+ ### Correct multi-page document (majority voting)
92
+
93
+ ```python
94
+ from docorient import correct_document_pages
95
+
96
+ pages = [Image.open(f"page_{i}.jpg") for i in range(5)]
97
+ corrected_pages = correct_document_pages(pages)
98
+ ```
99
+
100
+ ### Batch process a directory
101
+
102
+ ```python
103
+ from docorient import process_directory, OrientationConfig
104
+
105
+ config = OrientationConfig(workers=4, output_quality=95)
106
+ summary = process_directory("./scans", output_dir="./fixed", config=config)
107
+
108
+ print(f"Corrected: {summary.corrected}/{summary.total_pages}")
109
+ ```
110
+
111
+ ### CLI
112
+
113
+ ```bash
114
+ docorient ./scans --output ./fixed --workers 4
115
+ docorient ./scans --dry-run
116
+ docorient ./scans --no-ocr --limit 100
117
+ ```
118
+
119
+ ## How It Works
120
+
121
+ 1. **Projection profile analysis** detects 90° and 270° rotations by comparing horizontal vs vertical text energy
122
+ 2. **Tesseract OSD** (optional) detects 180° rotation with confidence thresholding
123
+ 3. **Majority voting** across pages of the same document improves reliability
124
+
125
+ ## Supported Formats
126
+
127
+ Any format readable by Pillow: JPEG, PNG, TIFF, BMP, GIF, WebP, and more.
128
+
129
+ ## Configuration
130
+
131
+ ```python
132
+ from docorient import OrientationConfig
133
+
134
+ config = OrientationConfig(
135
+ osd_confidence_threshold=2.0,
136
+ output_quality=92,
137
+ max_osd_dimension=1200,
138
+ projection_target_dimension=800,
139
+ workers=4,
140
+ resume_enabled=True,
141
+ )
142
+ ```
143
+
144
+ ## License
145
+
146
+ MIT
@@ -0,0 +1,21 @@
1
+ docorient/__init__.py,sha256=J_B5pjSvSZknWhctdEYtRs2iOwGoVrOAzL7NtRmGQ54,919
2
+ docorient/_imaging.py,sha256=iO0-LjaH5QjiHcTUzHML5LCVcTAYxZlD4oK9lZJ078A,1232
3
+ docorient/_version.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
4
+ docorient/cli.py,sha256=zNepvIuVt-clLL37XzFZTaCcleV8jiPnIQ2DwmCPDRs,5467
5
+ docorient/config.py,sha256=AVeX4GbsdgJyBC9uIeAD8fvIMKAJuRkeSh-JsR_elPk,951
6
+ docorient/correction.py,sha256=L-uuOcGQKZXqFnTFccl4SvR20dOezNinLndrIx5OSDk,3630
7
+ docorient/exceptions.py,sha256=vdWkgbQH3DJLce5LFlKe9AnIUYmRMho4Sq0p9HdKVRo,257
8
+ docorient/types.py,sha256=qo3KbtyJOo-xxXivMAErOBeX6AnxtBDlELuj4bGFfys,840
9
+ docorient/batch/__init__.py,sha256=tr5WRSoQC0PRNB4N_Z7TwkR3ZzOyCmD-KStlMjAvz6I,89
10
+ docorient/batch/processor.py,sha256=3PvwGAxvKQdJO1yRlxnMtIngvb9oZArlUfL2ajdO6JY,6349
11
+ docorient/batch/scanner.py,sha256=0CGUbRwPnEkDJKYyHYkNCXffy5DZXV3s-cplvjQ-XSo,1555
12
+ docorient/batch/worker.py,sha256=qBegpAqTajoUHOI4QhjxvoZuvnPysvcbgmRTIF8wi_I,4097
13
+ docorient/detection/__init__.py,sha256=1i2bVoFXfpQe8u7O3HjRpkXqAFxi6kNvA8xbYZkVZjA,92
14
+ docorient/detection/engine.py,sha256=iG4CoY1ofoCrXSc5QjzoiB9UHi6RmLQf8lQ2Wz_FTxU,1567
15
+ docorient/detection/osd.py,sha256=EiI0yYPx5VgwoaX4AXXAVAeH2KF5QGaRLAXwmRTGhNA,2148
16
+ docorient/detection/projection.py,sha256=-WugLjtgzqi3fP7XxAAJLpRRTzGzNwSpcWd4MULOUeI,2594
17
+ docorient-0.1.0.dist-info/METADATA,sha256=kp2j0qcfAZJp4-bQ5hl9TFPE5tSWLQgV_38ZtoIHY3k,3890
18
+ docorient-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
19
+ docorient-0.1.0.dist-info/entry_points.txt,sha256=yxZkcXy-6woVnA6vxEX3vi_ywPaa2RsQkn282hDrEDc,49
20
+ docorient-0.1.0.dist-info/licenses/LICENSE,sha256=wUKLgf8GreWy4jQrdQOyEUBchTfAns3QvgCNgW_IlNc,1069
21
+ docorient-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ docorient = docorient.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Cebraspe Lab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.