pysfi 0.1.6__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sfi/pdfsplit/pdfsplit.py CHANGED
@@ -1,173 +1,173 @@
1
- from __future__ import annotations
2
-
3
- import argparse
4
- import logging
5
- from pathlib import Path
6
-
7
- import fitz
8
-
9
- logging.basicConfig(level=logging.INFO, format="%(message)s")
10
- cwd = Path.cwd()
11
- logger = logging.getLogger(__name__)
12
-
13
-
14
- def parse_page_ranges(range_str: str, total_pages: int) -> list[int]:
15
- """Parse page range string and return list of page numbers (1-indexed)."""
16
- pages = []
17
- for part in range_str.split(","):
18
- part = part.strip()
19
- if not part:
20
- continue
21
- if "-" in part:
22
- start, end = part.split("-")
23
- start = int(start) if start else 1
24
- end = int(end) if end else total_pages
25
- pages.extend(range(start, end + 1))
26
- else:
27
- pages.append(int(part))
28
- return pages
29
-
30
-
31
- def split_by_number(input_file: Path, output_file: Path, number: int):
32
- """Split PDF into specified number of parts evenly."""
33
- doc = fitz.open(input_file)
34
- total_pages = doc.page_count
35
- base_pages = total_pages // number
36
- remainder = total_pages % number
37
-
38
- logger.debug(
39
- f"Total pages: {total_pages}, Splitting into {number} parts, {base_pages} base pages per part, {remainder} extra pages"
40
- )
41
-
42
- current_page = 0
43
- for i in range(number):
44
- # First 'remainder' parts get one extra page
45
- pages_in_this_part = base_pages + (1 if i < remainder else 0)
46
-
47
- if current_page >= total_pages:
48
- logger.debug(f"Skipping part {i + 1}: no more pages remaining")
49
- continue
50
-
51
- end_page = min(current_page + pages_in_this_part, total_pages)
52
-
53
- part_file = output_file.parent / f"{output_file.stem}_part{i + 1}{output_file.suffix}"
54
- part_doc = fitz.open()
55
-
56
- for page_num in range(current_page, end_page):
57
- part_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
58
-
59
- part_doc.save(part_file)
60
- part_doc.close()
61
- logger.info(f"Created part {i + 1}: {part_file} (pages {current_page + 1}-{end_page})")
62
-
63
- current_page = end_page
64
-
65
- doc.close()
66
-
67
-
68
- def split_by_size(input_file: Path, output_file: Path, size: int):
69
- """Split PDF into parts with specified page size."""
70
- doc = fitz.open(input_file)
71
- total_pages = doc.page_count
72
-
73
- logger.debug(f"Total pages: {total_pages}, Splitting with {size} pages per part")
74
-
75
- part = 0
76
- start_page = 0
77
-
78
- while start_page < total_pages:
79
- end_page = min(start_page + size, total_pages)
80
- part_file = output_file.parent / f"{output_file.stem}_part{part + 1}{output_file.suffix}"
81
- part_doc = fitz.open()
82
-
83
- for page_num in range(start_page, end_page):
84
- part_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
85
-
86
- part_doc.save(part_file)
87
- part_doc.close()
88
- logger.info(f"Created part {part + 1}: {part_file} (pages {start_page + 1}-{end_page})")
89
-
90
- start_page = end_page
91
- part += 1
92
-
93
- doc.close()
94
-
95
-
96
- def split_by_range(input_file: Path, output_file: Path, range_str: str):
97
- """Extract specific pages from PDF based on range string."""
98
- doc = fitz.open(input_file)
99
- total_pages = doc.page_count
100
-
101
- pages = parse_page_ranges(range_str, total_pages)
102
- pages = [p - 1 for p in pages if 1 <= p <= total_pages] # Convert to 0-indexed
103
-
104
- if not pages:
105
- logger.error("No valid pages found in the specified range")
106
- doc.close()
107
- return
108
-
109
- # Remove duplicates while preserving order
110
- pages = sorted(set(pages))
111
-
112
- logger.debug(f"Extracting pages: {[p + 1 for p in pages]}")
113
-
114
- new_doc = fitz.open()
115
- for page_num in pages:
116
- new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
117
-
118
- new_doc.save(output_file)
119
- new_doc.close()
120
- doc.close()
121
- logger.info(f"Created output file: {output_file} ({len(pages)} pages)")
122
-
123
-
124
- def main():
125
- parser = argparse.ArgumentParser(description="Split PDF files")
126
- parser.add_argument("input", help="Input PDF file")
127
- parser.add_argument("output", nargs="?", help="Output PDF file (optional for -n and -s modes)")
128
- parser.add_argument("-o", "--output-dir", default=str(cwd), help="Output directory (default: current directory)")
129
- parser.add_argument("-f", "--output-format", help="Output file format pattern, e.g., 'split_{part:02d}.pdf'")
130
- parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
131
-
132
- # Split by number, size, or range
133
- group = parser.add_mutually_exclusive_group(required=True)
134
- group.add_argument("-n", "--number", type=int, help="Number of splits")
135
- group.add_argument("-s", "--size", type=int, default=1, help="Size of each split in pages")
136
- group.add_argument("-r", "--range", type=str, help="Range of pages to extract, e.g., '1,2,4-10,15-20,25-'")
137
-
138
- args = parser.parse_args()
139
-
140
- if args.verbose:
141
- logger.setLevel(logging.DEBUG)
142
-
143
- output_dir = Path(args.output_dir)
144
- if not output_dir.is_dir():
145
- logger.error(f"Output directory {args.output_dir} does not exist, please check the path.")
146
- return
147
-
148
- input_file = Path(args.input)
149
- if not input_file.is_file():
150
- logger.error(f"Input file {args.input} does not exist, please check the path.")
151
- return
152
-
153
- # For -n and -s modes, output is optional and defaults to base name with suffix
154
- # For -r mode, output is required
155
- if args.range and not args.output:
156
- logger.error("Output file is required for -r/--range mode")
157
- return
158
-
159
- if not args.range:
160
- output_file = output_dir / (input_file.stem + "_split.pdf") if not args.output else Path(args.output)
161
- else:
162
- output_file = Path(args.output)
163
-
164
- logger.info(f"Start splitting {input_file}")
165
- if args.number:
166
- split_by_number(input_file, output_file, args.number)
167
- elif args.size:
168
- split_by_size(input_file, output_file, args.size)
169
- elif args.range:
170
- split_by_range(input_file, output_file, args.range)
171
- else:
172
- logger.error("Please specify either -n, -s, or -r")
173
- return
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import logging
5
+ from pathlib import Path
6
+
7
+ import fitz
8
+
9
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
10
+ cwd = Path.cwd()
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def parse_page_ranges(range_str: str, total_pages: int) -> list[int]:
15
+ """Parse page range string and return list of page numbers (1-indexed)."""
16
+ pages = []
17
+ for part in range_str.split(","):
18
+ part = part.strip()
19
+ if not part:
20
+ continue
21
+ if "-" in part:
22
+ start, end = part.split("-")
23
+ start = int(start) if start else 1
24
+ end = int(end) if end else total_pages
25
+ pages.extend(range(start, end + 1))
26
+ else:
27
+ pages.append(int(part))
28
+ return pages
29
+
30
+
31
+ def split_by_number(input_file: Path, output_file: Path, number: int):
32
+ """Split PDF into specified number of parts evenly."""
33
+ doc = fitz.open(input_file)
34
+ total_pages = doc.page_count
35
+ base_pages = total_pages // number
36
+ remainder = total_pages % number
37
+
38
+ logger.debug(
39
+ f"Total pages: {total_pages}, Splitting into {number} parts, {base_pages} base pages per part, {remainder} extra pages"
40
+ )
41
+
42
+ current_page = 0
43
+ for i in range(number):
44
+ # First 'remainder' parts get one extra page
45
+ pages_in_this_part = base_pages + (1 if i < remainder else 0)
46
+
47
+ if current_page >= total_pages:
48
+ logger.debug(f"Skipping part {i + 1}: no more pages remaining")
49
+ continue
50
+
51
+ end_page = min(current_page + pages_in_this_part, total_pages)
52
+
53
+ part_file = output_file.parent / f"{output_file.stem}_part{i + 1}{output_file.suffix}"
54
+ part_doc = fitz.open()
55
+
56
+ for page_num in range(current_page, end_page):
57
+ part_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
58
+
59
+ part_doc.save(part_file)
60
+ part_doc.close()
61
+ logger.info(f"Created part {i + 1}: {part_file} (pages {current_page + 1}-{end_page})")
62
+
63
+ current_page = end_page
64
+
65
+ doc.close()
66
+
67
+
68
+ def split_by_size(input_file: Path, output_file: Path, size: int):
69
+ """Split PDF into parts with specified page size."""
70
+ doc = fitz.open(input_file)
71
+ total_pages = doc.page_count
72
+
73
+ logger.debug(f"Total pages: {total_pages}, Splitting with {size} pages per part")
74
+
75
+ part = 0
76
+ start_page = 0
77
+
78
+ while start_page < total_pages:
79
+ end_page = min(start_page + size, total_pages)
80
+ part_file = output_file.parent / f"{output_file.stem}_part{part + 1}{output_file.suffix}"
81
+ part_doc = fitz.open()
82
+
83
+ for page_num in range(start_page, end_page):
84
+ part_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
85
+
86
+ part_doc.save(part_file)
87
+ part_doc.close()
88
+ logger.info(f"Created part {part + 1}: {part_file} (pages {start_page + 1}-{end_page})")
89
+
90
+ start_page = end_page
91
+ part += 1
92
+
93
+ doc.close()
94
+
95
+
96
+ def split_by_range(input_file: Path, output_file: Path, range_str: str):
97
+ """Extract specific pages from PDF based on range string."""
98
+ doc = fitz.open(input_file)
99
+ total_pages = doc.page_count
100
+
101
+ pages = parse_page_ranges(range_str, total_pages)
102
+ pages = [p - 1 for p in pages if 1 <= p <= total_pages] # Convert to 0-indexed
103
+
104
+ if not pages:
105
+ logger.error("No valid pages found in the specified range")
106
+ doc.close()
107
+ return
108
+
109
+ # Remove duplicates while preserving order
110
+ pages = sorted(set(pages))
111
+
112
+ logger.debug(f"Extracting pages: {[p + 1 for p in pages]}")
113
+
114
+ new_doc = fitz.open()
115
+ for page_num in pages:
116
+ new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
117
+
118
+ new_doc.save(output_file)
119
+ new_doc.close()
120
+ doc.close()
121
+ logger.info(f"Created output file: {output_file} ({len(pages)} pages)")
122
+
123
+
124
+ def main():
125
+ parser = argparse.ArgumentParser(description="Split PDF files")
126
+ parser.add_argument("input", help="Input PDF file")
127
+ parser.add_argument("output", nargs="?", help="Output PDF file (optional for -n and -s modes)")
128
+ parser.add_argument("-o", "--output-dir", default=str(cwd), help="Output directory (default: current directory)")
129
+ parser.add_argument("-f", "--output-format", help="Output file format pattern, e.g., 'split_{part:02d}.pdf'")
130
+ parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
131
+
132
+ # Split by number, size, or range
133
+ group = parser.add_mutually_exclusive_group(required=True)
134
+ group.add_argument("-n", "--number", type=int, help="Number of splits")
135
+ group.add_argument("-s", "--size", type=int, default=1, help="Size of each split in pages")
136
+ group.add_argument("-r", "--range", type=str, help="Range of pages to extract, e.g., '1,2,4-10,15-20,25-'")
137
+
138
+ args = parser.parse_args()
139
+
140
+ if args.verbose:
141
+ logger.setLevel(logging.DEBUG)
142
+
143
+ output_dir = Path(args.output_dir)
144
+ if not output_dir.is_dir():
145
+ logger.error(f"Output directory {args.output_dir} does not exist, please check the path.")
146
+ return
147
+
148
+ input_file = Path(args.input)
149
+ if not input_file.is_file():
150
+ logger.error(f"Input file {args.input} does not exist, please check the path.")
151
+ return
152
+
153
+ # For -n and -s modes, output is optional and defaults to base name with suffix
154
+ # For -r mode, output is required
155
+ if args.range and not args.output:
156
+ logger.error("Output file is required for -r/--range mode")
157
+ return
158
+
159
+ if not args.range:
160
+ output_file = output_dir / (input_file.stem + "_split.pdf") if not args.output else Path(args.output)
161
+ else:
162
+ output_file = Path(args.output)
163
+
164
+ logger.info(f"Start splitting {input_file}")
165
+ if args.number:
166
+ split_by_number(input_file, output_file, args.number)
167
+ elif args.size:
168
+ split_by_size(input_file, output_file, args.size)
169
+ elif args.range:
170
+ split_by_range(input_file, output_file, args.range)
171
+ else:
172
+ logger.error("Please specify either -n, -s, or -r")
173
+ return
File without changes
File without changes