pysfi 0.1.5__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {pysfi-0.1.5 → pysfi-0.1.6}/PKG-INFO +5 -1
  2. {pysfi-0.1.5 → pysfi-0.1.6}/pyproject.toml +132 -117
  3. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/__init__.py +1 -1
  4. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/bumpversion/__init__.py +1 -1
  5. pysfi-0.1.6/sfi/pdfsplit/pdfsplit.py +173 -0
  6. pysfi-0.1.6/sfi/pdfsplit/tests/test_pdfsplit.py +333 -0
  7. pysfi-0.1.6/sfi/pyloadergen/__init__.py +0 -0
  8. pysfi-0.1.6/sfi/pyloadergen/tests/__init__.py +0 -0
  9. {pysfi-0.1.5 → pysfi-0.1.6}/.gitignore +0 -0
  10. {pysfi-0.1.5 → pysfi-0.1.6}/README.md +0 -0
  11. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/alarmclock/__init__.py +0 -0
  12. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/alarmclock/alarmclock.py +0 -0
  13. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/bumpversion/README.md +0 -0
  14. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/bumpversion/bumpversion.py +0 -0
  15. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/bumpversion/tests/__init__.py +0 -0
  16. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/bumpversion/tests/test_bumpversion.py +0 -0
  17. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/embedinstall/embedinstall.py +0 -0
  18. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/filedate/README.md +0 -0
  19. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/filedate/__init__.py +0 -0
  20. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/filedate/filedate.py +0 -0
  21. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/makepython/__init__.py +0 -0
  22. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/makepython/makepython.py +0 -0
  23. {pysfi-0.1.5/sfi/pyloadergen → pysfi-0.1.6/sfi/pdfsplit}/tests/__init__.py +0 -0
  24. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/projectparse/projectparse.py +0 -0
  25. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/pyloadergen/pyloadergen.py +0 -0
  26. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/pyloadergen/tests/test_pyloadergen.py +0 -0
  27. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/pypacker/fspacker.py +0 -0
  28. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/taskkill/taskkill.py +0 -0
  29. {pysfi-0.1.5 → pysfi-0.1.6}/sfi/which/which.py +0 -0
@@ -1,9 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pysfi
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Single File commands for Interactive python.
5
5
  Requires-Python: >=3.8
6
6
  Requires-Dist: tomli>=2.4.0; python_version < '3.11'
7
+ Provides-Extra: all
8
+ Requires-Dist: pymupdf>=1.24.11; extra == 'all'
9
+ Provides-Extra: office
10
+ Requires-Dist: pymupdf>=1.24.11; extra == 'office'
7
11
  Description-Content-Type: text/markdown
8
12
 
9
13
  # pysfi
@@ -1,117 +1,132 @@
1
- [build-system]
2
- build-backend = "hatchling.build"
3
- requires = ["hatchling"]
4
-
5
- [project]
6
- dependencies = ["tomli>=2.4.0; python_version<'3.11'"]
7
- description = "Single File commands for Interactive python."
8
- name = "pysfi"
9
- readme = "README.md"
10
- requires-python = ">=3.8"
11
- version = "0.1.5"
12
-
13
- [project.scripts]
14
- alarmclk = "sfi.alarmclock.alarmclock:main"
15
- bumpversion = "sfi.bumpversion.bumpversion:main"
16
- embedinstall = "sfi.embedinstall.embedinstall:main"
17
- filedate = "sfi.filedate.filedate:main"
18
- mkp = "sfi.makepython.makepython:main"
19
- projectparse = "sfi.projectparse.projectparse:main"
20
- pyloadergen = "sfi.pyloadergen.pyloadergen:main"
21
- pypacker = "sfi.pypacker.pypacker:main"
22
- taskk = "sfi.taskkill.taskkill:main"
23
- wch = "sfi.which.which:main"
24
-
25
- [tool.hatch.build.targets.wheel]
26
- exclude = [
27
- "sfi/*/README.md",
28
- "sfi/*/dist",
29
- "sfi/*/pyproject.toml",
30
- "sfi/*/tests",
31
- ]
32
- packages = ["sfi"]
33
-
34
- # Only include necessary source files to minimize package size
35
- [tool.hatch.build]
36
- include = ["README.md", "sfi/**/*.py", "sfi/pyproject.toml"]
37
-
38
- [tool.ruff]
39
- line-length = 120
40
- target-version = "py38"
41
-
42
- # Exclude files and directories
43
- exclude = [
44
- ".bzr",
45
- ".direnv",
46
- ".eggs",
47
- ".git",
48
- ".git-rewrite",
49
- ".hg",
50
- ".mypy_cache",
51
- ".nox",
52
- ".pants.d",
53
- ".pytype",
54
- ".ruff_cache",
55
- ".svn",
56
- ".tox",
57
- ".venv",
58
- "__pypackages__",
59
- "_build",
60
- "buck-out",
61
- "build",
62
- "dist",
63
- "node_modules",
64
- "venv",
65
- ]
66
-
67
- [tool.ruff.lint]
68
- dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
69
- fixable = ["ALL"]
70
- ignore = [
71
- "B008", # Do not perform function calls in function arguments
72
- "E501", # Line too long (handled by formatter)
73
- ]
74
- select = [
75
- "B", # flake8-bugbear
76
- "C4", # flake8-comprehensions
77
- "E", # pycodestyle errors
78
- "F", # Pyflakes
79
- "I", # isort
80
- "N", # pep8-naming
81
- "RUF", # Ruff-specific rules
82
- "SIM", # flake8-simplify
83
- "UP", # pyupgrade
84
- "W", # pycodestyle warnings
85
- ]
86
- unfixable = []
87
-
88
- [tool.ruff.lint.isort]
89
- known-first-party = ["sfi"]
90
-
91
- [tool.ruff.lint.per-file-ignores]
92
- "__init__.py" = ["F401"] # Allow unused imports
93
-
94
- [tool.uv.workspace]
95
- members = [
96
- "sfi/alarmclock",
97
- "sfi/bumpversion",
98
- "sfi/embedinstall",
99
- "sfi/filedate",
100
- "sfi/makepython",
101
- "sfi/projectparse",
102
- "sfi/pyloadergen",
103
- "sfi/pypacker",
104
- "sfi/taskkill",
105
- "sfi/which",
106
- ]
107
-
108
- [dependency-groups]
109
- dev = [
110
- "hatch>=1.14.2",
111
- "pyside2>=5.15.2.1",
112
- "pytest-cov>=5.0.0",
113
- "pytest>=8.3.5",
114
- "qdarkstyle>=3.2.3",
115
- "ruff>=0.14.11",
116
- "tomli>=2.4.0",
117
- ]
1
+ [build-system]
2
+ build-backend = "hatchling.build"
3
+ requires = ["hatchling"]
4
+
5
+ [project]
6
+ dependencies = ["tomli>=2.4.0; python_version<'3.11'"]
7
+ description = "Single File commands for Interactive python."
8
+ name = "pysfi"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ version = "0.1.6"
12
+
13
+ [project.scripts]
14
+ alarmclk = "sfi.alarmclock.alarmclock:main"
15
+ bumpversion = "sfi.bumpversion.bumpversion:main"
16
+ embedinstall = "sfi.embedinstall.embedinstall:main"
17
+ filedate = "sfi.filedate.filedate:main"
18
+ mkp = "sfi.makepython.makepython:main"
19
+ pdfsplit = "sfi.pdfsplit.pdfsplit:main"
20
+ projectparse = "sfi.projectparse.projectparse:main"
21
+ pyloadergen = "sfi.pyloadergen.pyloadergen:main"
22
+ pypacker = "sfi.pypacker.pypacker:main"
23
+ taskk = "sfi.taskkill.taskkill:main"
24
+ wch = "sfi.which.which:main"
25
+
26
+ [project.optional-dependencies]
27
+ all = ["pysfi[office]"]
28
+ office = ["pymupdf>=1.24.11"]
29
+
30
+ [tool.hatch.build.targets.wheel]
31
+ exclude = [
32
+ "sfi/*/README.md",
33
+ "sfi/*/dist",
34
+ "sfi/*/pyproject.toml",
35
+ "sfi/*/tests",
36
+ ]
37
+ packages = ["sfi"]
38
+
39
+ # Only include necessary source files to minimize package size
40
+ [tool.hatch.build]
41
+ include = ["README.md", "sfi/**/*.py", "sfi/pyproject.toml"]
42
+
43
+ [tool.ruff]
44
+ line-length = 120
45
+ target-version = "py38"
46
+
47
+ # Exclude files and directories
48
+ exclude = [
49
+ ".bzr",
50
+ ".direnv",
51
+ ".eggs",
52
+ ".git",
53
+ ".git-rewrite",
54
+ ".hg",
55
+ ".mypy_cache",
56
+ ".nox",
57
+ ".pants.d",
58
+ ".pytype",
59
+ ".ruff_cache",
60
+ ".svn",
61
+ ".tox",
62
+ ".venv",
63
+ "__pypackages__",
64
+ "_build",
65
+ "buck-out",
66
+ "build",
67
+ "dist",
68
+ "node_modules",
69
+ "venv",
70
+ ]
71
+
72
+ [tool.ruff.lint]
73
+ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
74
+ fixable = ["ALL"]
75
+ ignore = [
76
+ "B008", # Do not perform function calls in function arguments
77
+ "E501", # Line too long (handled by formatter)
78
+ ]
79
+ select = [
80
+ "B", # flake8-bugbear
81
+ "C4", # flake8-comprehensions
82
+ "E", # pycodestyle errors
83
+ "F", # Pyflakes
84
+ "I", # isort
85
+ "N", # pep8-naming
86
+ "RUF", # Ruff-specific rules
87
+ "SIM", # flake8-simplify
88
+ "UP", # pyupgrade
89
+ "W", # pycodestyle warnings
90
+ ]
91
+ unfixable = []
92
+
93
+ [tool.ruff.lint.isort]
94
+ known-first-party = ["sfi"]
95
+
96
+ [tool.ruff.lint.per-file-ignores]
97
+ "__init__.py" = ["F401"] # Allow unused imports
98
+
99
+ [tool.uv.workspace]
100
+ members = [
101
+ "sfi/alarmclock",
102
+ "sfi/bumpversion",
103
+ "sfi/embedinstall",
104
+ "sfi/filedate",
105
+ "sfi/makepython",
106
+ "sfi/pdfsplit",
107
+ "sfi/projectparse",
108
+ "sfi/pyloadergen",
109
+ "sfi/pypacker",
110
+ "sfi/taskkill",
111
+ "sfi/which",
112
+ ]
113
+
114
+ [dependency-groups]
115
+ dev = [
116
+ "hatch>=1.14.2",
117
+ "pysfi[all]",
118
+ "pyside2>=5.15.2.1",
119
+ "pytest-cov>=5.0.0",
120
+ "pytest>=8.3.5",
121
+ "qdarkstyle>=3.2.3",
122
+ "ruff>=0.14.11",
123
+ "tomli>=2.4.0",
124
+ ]
125
+
126
+ [tool.pytest.ini_options]
127
+ addopts = "-v --tb=short"
128
+ python_classes = ["Test*"]
129
+ python_files = ["test_*.py"]
130
+ python_functions = ["test_*"]
131
+ pythonpath = ["."]
132
+ testpaths = ["sfi"]
@@ -1,3 +1,3 @@
1
1
  """Single File commands for Interactive python."""
2
2
 
3
- __version__ = "0.1.5"
3
+ __version__ = "0.1.6"
@@ -1,3 +1,3 @@
1
1
  """Bumpversion - Automated version number management tool."""
2
2
 
3
- __version__ = "0.1.5"
3
+ __version__ = "0.1.6"
@@ -0,0 +1,173 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import logging
5
+ from pathlib import Path
6
+
7
+ import fitz
8
+
9
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
10
+ cwd = Path.cwd()
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def parse_page_ranges(range_str: str, total_pages: int) -> list[int]:
15
+ """Parse page range string and return list of page numbers (1-indexed)."""
16
+ pages = []
17
+ for part in range_str.split(","):
18
+ part = part.strip()
19
+ if not part:
20
+ continue
21
+ if "-" in part:
22
+ start, end = part.split("-")
23
+ start = int(start) if start else 1
24
+ end = int(end) if end else total_pages
25
+ pages.extend(range(start, end + 1))
26
+ else:
27
+ pages.append(int(part))
28
+ return pages
29
+
30
+
31
+ def split_by_number(input_file: Path, output_file: Path, number: int):
32
+ """Split PDF into specified number of parts evenly."""
33
+ doc = fitz.open(input_file)
34
+ total_pages = doc.page_count
35
+ base_pages = total_pages // number
36
+ remainder = total_pages % number
37
+
38
+ logger.debug(
39
+ f"Total pages: {total_pages}, Splitting into {number} parts, {base_pages} base pages per part, {remainder} extra pages"
40
+ )
41
+
42
+ current_page = 0
43
+ for i in range(number):
44
+ # First 'remainder' parts get one extra page
45
+ pages_in_this_part = base_pages + (1 if i < remainder else 0)
46
+
47
+ if current_page >= total_pages:
48
+ logger.debug(f"Skipping part {i + 1}: no more pages remaining")
49
+ continue
50
+
51
+ end_page = min(current_page + pages_in_this_part, total_pages)
52
+
53
+ part_file = output_file.parent / f"{output_file.stem}_part{i + 1}{output_file.suffix}"
54
+ part_doc = fitz.open()
55
+
56
+ for page_num in range(current_page, end_page):
57
+ part_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
58
+
59
+ part_doc.save(part_file)
60
+ part_doc.close()
61
+ logger.info(f"Created part {i + 1}: {part_file} (pages {current_page + 1}-{end_page})")
62
+
63
+ current_page = end_page
64
+
65
+ doc.close()
66
+
67
+
68
+ def split_by_size(input_file: Path, output_file: Path, size: int):
69
+ """Split PDF into parts with specified page size."""
70
+ doc = fitz.open(input_file)
71
+ total_pages = doc.page_count
72
+
73
+ logger.debug(f"Total pages: {total_pages}, Splitting with {size} pages per part")
74
+
75
+ part = 0
76
+ start_page = 0
77
+
78
+ while start_page < total_pages:
79
+ end_page = min(start_page + size, total_pages)
80
+ part_file = output_file.parent / f"{output_file.stem}_part{part + 1}{output_file.suffix}"
81
+ part_doc = fitz.open()
82
+
83
+ for page_num in range(start_page, end_page):
84
+ part_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
85
+
86
+ part_doc.save(part_file)
87
+ part_doc.close()
88
+ logger.info(f"Created part {part + 1}: {part_file} (pages {start_page + 1}-{end_page})")
89
+
90
+ start_page = end_page
91
+ part += 1
92
+
93
+ doc.close()
94
+
95
+
96
+ def split_by_range(input_file: Path, output_file: Path, range_str: str):
97
+ """Extract specific pages from PDF based on range string."""
98
+ doc = fitz.open(input_file)
99
+ total_pages = doc.page_count
100
+
101
+ pages = parse_page_ranges(range_str, total_pages)
102
+ pages = [p - 1 for p in pages if 1 <= p <= total_pages] # Convert to 0-indexed
103
+
104
+ if not pages:
105
+ logger.error("No valid pages found in the specified range")
106
+ doc.close()
107
+ return
108
+
109
+ # Remove duplicates while preserving order
110
+ pages = sorted(set(pages))
111
+
112
+ logger.debug(f"Extracting pages: {[p + 1 for p in pages]}")
113
+
114
+ new_doc = fitz.open()
115
+ for page_num in pages:
116
+ new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
117
+
118
+ new_doc.save(output_file)
119
+ new_doc.close()
120
+ doc.close()
121
+ logger.info(f"Created output file: {output_file} ({len(pages)} pages)")
122
+
123
+
124
+ def main():
125
+ parser = argparse.ArgumentParser(description="Split PDF files")
126
+ parser.add_argument("input", help="Input PDF file")
127
+ parser.add_argument("output", nargs="?", help="Output PDF file (optional for -n and -s modes)")
128
+ parser.add_argument("-o", "--output-dir", default=str(cwd), help="Output directory (default: current directory)")
129
+ parser.add_argument("-f", "--output-format", help="Output file format pattern, e.g., 'split_{part:02d}.pdf'")
130
+ parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
131
+
132
+ # Split by number, size, or range
133
+ group = parser.add_mutually_exclusive_group(required=True)
134
+ group.add_argument("-n", "--number", type=int, help="Number of splits")
135
+ group.add_argument("-s", "--size", type=int, default=1, help="Size of each split in pages")
136
+ group.add_argument("-r", "--range", type=str, help="Range of pages to extract, e.g., '1,2,4-10,15-20,25-'")
137
+
138
+ args = parser.parse_args()
139
+
140
+ if args.verbose:
141
+ logger.setLevel(logging.DEBUG)
142
+
143
+ output_dir = Path(args.output_dir)
144
+ if not output_dir.is_dir():
145
+ logger.error(f"Output directory {args.output_dir} does not exist, please check the path.")
146
+ return
147
+
148
+ input_file = Path(args.input)
149
+ if not input_file.is_file():
150
+ logger.error(f"Input file {args.input} does not exist, please check the path.")
151
+ return
152
+
153
+ # For -n and -s modes, output is optional and defaults to base name with suffix
154
+ # For -r mode, output is required
155
+ if args.range and not args.output:
156
+ logger.error("Output file is required for -r/--range mode")
157
+ return
158
+
159
+ if not args.range:
160
+ output_file = output_dir / (input_file.stem + "_split.pdf") if not args.output else Path(args.output)
161
+ else:
162
+ output_file = Path(args.output)
163
+
164
+ logger.info(f"Start splitting {input_file}")
165
+ if args.number:
166
+ split_by_number(input_file, output_file, args.number)
167
+ elif args.size:
168
+ split_by_size(input_file, output_file, args.size)
169
+ elif args.range:
170
+ split_by_range(input_file, output_file, args.range)
171
+ else:
172
+ logger.error("Please specify either -n, -s, or -r")
173
+ return
@@ -0,0 +1,333 @@
1
+ """Tests for PDF split functionality."""
2
+
3
+ import logging
4
+ import shutil
5
+ import sys
6
+ import tempfile
7
+ from pathlib import Path
8
+
9
+ import fitz
10
+ import pytest
11
+
12
+ sys.path.insert(0, str(Path(__file__).parent.parent))
13
+
14
+ from sfi.pdfsplit.pdfsplit import parse_page_ranges, split_by_number, split_by_range, split_by_size
15
+
16
+ logging.basicConfig(level=logging.DEBUG)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @pytest.fixture
21
+ def temp_dir():
22
+ """Create a temporary directory for test files."""
23
+ dir_path = Path(tempfile.mkdtemp())
24
+ yield dir_path
25
+ # Cleanup
26
+ shutil.rmtree(dir_path, ignore_errors=True)
27
+
28
+
29
+ @pytest.fixture
30
+ def sample_pdf_10(temp_dir):
31
+ """Create a 10-page PDF for testing."""
32
+ pdf_path = temp_dir / "sample_10.pdf"
33
+ doc = fitz.open()
34
+ for i in range(10):
35
+ page = doc.new_page() # pyright: ignore[reportAttributeAccessIssue]
36
+ text = fitz.Point(50, 72)
37
+ page.insert_text(text, f"This is page {i + 1}", fontsize=12)
38
+ doc.save(pdf_path)
39
+ doc.close()
40
+ logger.debug(f"Created test PDF: {pdf_path} with 10 pages")
41
+ return pdf_path
42
+
43
+
44
+ @pytest.fixture
45
+ def sample_pdf_5(temp_dir):
46
+ """Create a 5-page PDF for testing."""
47
+ pdf_path = temp_dir / "sample_5.pdf"
48
+ doc = fitz.open()
49
+ for i in range(5):
50
+ page = doc.new_page() # pyright: ignore[reportAttributeAccessIssue]
51
+ text = fitz.Point(50, 72)
52
+ page.insert_text(text, f"This is page {i + 1}", fontsize=12)
53
+ doc.save(pdf_path)
54
+ doc.close()
55
+ logger.debug(f"Created test PDF: {pdf_path} with 5 pages")
56
+ return pdf_path
57
+
58
+
59
+ def get_page_count(pdf_path: Path) -> int:
60
+ """Get the number of pages in a PDF."""
61
+ doc = fitz.open(pdf_path)
62
+ count = doc.page_count
63
+ doc.close()
64
+ return count
65
+
66
+
67
+ def test_parse_page_ranges():
68
+ """Test page range parsing function."""
69
+ # Single pages
70
+ assert parse_page_ranges("1,2,3", 10) == [1, 2, 3]
71
+
72
+ # Range
73
+ assert parse_page_ranges("1-5", 10) == [1, 2, 3, 4, 5]
74
+
75
+ # Mixed
76
+ assert parse_page_ranges("1,3,5-7", 10) == [1, 3, 5, 6, 7]
77
+
78
+ # Open end range
79
+ assert parse_page_ranges("5-", 10) == [5, 6, 7, 8, 9, 10]
80
+
81
+ # Open start range
82
+ assert parse_page_ranges("-5", 10) == [1, 2, 3, 4, 5]
83
+
84
+ # Out of range pages are handled
85
+ assert parse_page_ranges("1,2,20", 10) == [1, 2, 20]
86
+
87
+
88
+ def test_split_by_number_3_parts(sample_pdf_10, temp_dir):
89
+ """Test splitting 10-page PDF into 3 parts."""
90
+ output_file = temp_dir / "output.pdf"
91
+ split_by_number(sample_pdf_10, output_file, 3)
92
+
93
+ # Check that 3 parts were created
94
+ parts = sorted(temp_dir.glob("output_part*.pdf"))
95
+ assert len(parts) == 3
96
+
97
+ # Check page counts: 10 pages split into 3 parts should be 4, 3, 3 pages
98
+ page_counts = [get_page_count(p) for p in parts]
99
+ assert page_counts == [4, 3, 3]
100
+
101
+ logger.info(f"Split into 3 parts: {page_counts}")
102
+
103
+
104
+ def test_split_by_number_equal(sample_pdf_10, temp_dir):
105
+ """Test splitting 10-page PDF into 5 parts."""
106
+ output_file = temp_dir / "output.pdf"
107
+ split_by_number(sample_pdf_10, output_file, 5)
108
+
109
+ parts = sorted(temp_dir.glob("output_part*.pdf"))
110
+ assert len(parts) == 5
111
+
112
+ # Each part should have 2 pages
113
+ page_counts = [get_page_count(p) for p in parts]
114
+ assert all(count == 2 for count in page_counts)
115
+
116
+ logger.info(f"Split into 5 equal parts: {page_counts}")
117
+
118
+
119
+ def test_split_by_number_more_parts_than_pages(sample_pdf_5, temp_dir):
120
+ """Test splitting 5-page PDF into 10 parts."""
121
+ output_file = temp_dir / "output.pdf"
122
+ split_by_number(sample_pdf_5, output_file, 10)
123
+
124
+ parts = sorted(temp_dir.glob("output_part*.pdf"))
125
+ assert len(parts) == 5
126
+
127
+ # Each part should have 1 page
128
+ page_counts = [get_page_count(p) for p in parts]
129
+ assert all(count == 1 for count in page_counts)
130
+
131
+ logger.info(f"Split 5 pages into 10 parts: {page_counts}")
132
+
133
+
134
+ def test_split_by_size_3_pages(sample_pdf_10, temp_dir):
135
+ """Test splitting PDF with 3 pages per part."""
136
+ output_file = temp_dir / "output.pdf"
137
+ split_by_size(sample_pdf_10, output_file, 3)
138
+
139
+ parts = sorted(temp_dir.glob("output_part*.pdf"))
140
+ assert len(parts) == 4
141
+
142
+ # Page counts should be 3, 3, 3, 1
143
+ page_counts = [get_page_count(p) for p in parts]
144
+ assert page_counts == [3, 3, 3, 1]
145
+
146
+ logger.info(f"Split by size 3: {page_counts}")
147
+
148
+
149
+ def test_split_by_size_1_page(sample_pdf_10, temp_dir):
150
+ """Test splitting PDF with 1 page per part."""
151
+ output_file = temp_dir / "output.pdf"
152
+ split_by_size(sample_pdf_10, output_file, 1)
153
+
154
+ parts = sorted(temp_dir.glob("output_part*.pdf"))
155
+ assert len(parts) == 10
156
+
157
+ # Each part should have 1 page
158
+ page_counts = [get_page_count(p) for p in parts]
159
+ assert all(count == 1 for count in page_counts)
160
+
161
+ logger.info(f"Split by size 1: {page_counts}")
162
+
163
+
164
+ def test_split_by_size_exact_division(sample_pdf_10, temp_dir):
165
+ """Test splitting PDF with exact division."""
166
+ output_file = temp_dir / "output.pdf"
167
+ split_by_size(sample_pdf_10, output_file, 5)
168
+
169
+ parts = sorted(temp_dir.glob("output_part*.pdf"))
170
+ assert len(parts) == 2
171
+
172
+ # Each part should have 5 pages
173
+ page_counts = [get_page_count(p) for p in parts]
174
+ assert page_counts == [5, 5]
175
+
176
+ logger.info(f"Split by size 5 (exact): {page_counts}")
177
+
178
+
179
+ def test_split_by_range_single_pages(sample_pdf_10, temp_dir):
180
+ """Test extracting single pages."""
181
+ output_file = temp_dir / "extracted.pdf"
182
+ split_by_range(sample_pdf_10, output_file, "1,3,5,7,9")
183
+
184
+ assert output_file.exists()
185
+ assert get_page_count(output_file) == 5
186
+
187
+ # Verify the content (optional, just checking page count)
188
+ logger.info("Extracted 5 pages: 1,3,5,7,9")
189
+
190
+
191
+ def test_split_by_range_with_dash(sample_pdf_10, temp_dir):
192
+ """Test extracting page ranges."""
193
+ output_file = temp_dir / "extracted.pdf"
194
+ split_by_range(sample_pdf_10, output_file, "1-3,5-7,9-10")
195
+
196
+ assert output_file.exists()
197
+ assert get_page_count(output_file) == 8 # Pages 1,2,3,5,6,7,9,10
198
+
199
+ logger.info("Extracted 8 pages with ranges: 1-3,5-7,9-10")
200
+
201
+
202
+ def test_split_by_range_open_end(sample_pdf_10, temp_dir):
203
+ """Test extracting from a page to the end."""
204
+ output_file = temp_dir / "extracted.pdf"
205
+ split_by_range(sample_pdf_10, output_file, "5-")
206
+
207
+ assert output_file.exists()
208
+ assert get_page_count(output_file) == 6 # Pages 5,6,7,8,9,10
209
+
210
+ logger.info("Extracted pages 5- (6 pages)")
211
+
212
+
213
+ def test_split_by_range_open_start(sample_pdf_10, temp_dir):
214
+ """Test extracting from start to a page."""
215
+ output_file = temp_dir / "extracted.pdf"
216
+ split_by_range(sample_pdf_10, output_file, "-3")
217
+
218
+ assert output_file.exists()
219
+ assert get_page_count(output_file) == 3 # Pages 1,2,3
220
+
221
+ logger.info("Extracted pages -3 (3 pages)")
222
+
223
+
224
+ def test_split_by_range_complex(sample_pdf_10, temp_dir):
225
+ """Test complex page range."""
226
+ output_file = temp_dir / "extracted.pdf"
227
+ split_by_range(sample_pdf_10, output_file, "1,2,4-6,8-")
228
+
229
+ assert output_file.exists()
230
+ assert get_page_count(output_file) == 8 # Pages 1,2,4,5,6,8,9,10
231
+
232
+ logger.info("Extracted complex range: 1,2,4-6,8-")
233
+
234
+
235
+ def test_split_by_range_out_of_range(sample_pdf_5, temp_dir):
236
+ """Test extracting pages beyond the PDF length."""
237
+ output_file = temp_dir / "extracted.pdf"
238
+ split_by_range(sample_pdf_5, output_file, "1,3,10,20")
239
+
240
+ assert output_file.exists()
241
+ assert get_page_count(output_file) == 2 # Only pages 1 and 3 exist
242
+
243
+ logger.info("Extracted pages with some out of range: 1,3,10,20")
244
+
245
+
246
+ def test_split_by_range_all_pages(sample_pdf_10, temp_dir):
247
+ """Test extracting all pages."""
248
+ output_file = temp_dir / "extracted.pdf"
249
+ split_by_range(sample_pdf_10, output_file, "1-10")
250
+
251
+ assert output_file.exists()
252
+ assert get_page_count(output_file) == 10
253
+
254
+ logger.info("Extracted all 10 pages")
255
+
256
+
257
+ def test_split_by_range_duplicate_pages(sample_pdf_10, temp_dir):
258
+ """Test extracting with duplicate page specifications."""
259
+ output_file = temp_dir / "extracted.pdf"
260
+ split_by_range(sample_pdf_10, output_file, "1,2,1,2,3-5,3-5")
261
+
262
+ assert output_file.exists()
263
+ # Should deduplicate: 1,2,3,4,5
264
+ assert get_page_count(output_file) == 5
265
+
266
+ logger.info("Extracted with duplicates (should deduplicate): 1,2,1,2,3-5,3-5")
267
+
268
+
269
+ def test_output_file_naming(sample_pdf_10, temp_dir):
270
+ """Test that output files are named correctly."""
271
+ output_file = temp_dir / "test_output.pdf"
272
+ split_by_number(sample_pdf_10, output_file, 3)
273
+
274
+ # Check that parts are named correctly
275
+ parts = sorted(temp_dir.glob("test_output_part*.pdf"))
276
+ assert len(parts) == 3
277
+
278
+ # Check file names
279
+ filenames = [p.name for p in parts]
280
+ assert filenames == ["test_output_part1.pdf", "test_output_part2.pdf", "test_output_part3.pdf"]
281
+
282
+ logger.info(f"File naming test passed: {filenames}")
283
+
284
+
285
+ def test_large_pdf(temp_dir):
286
+ """Test with a larger PDF (50 pages)."""
287
+ # Create 50-page PDF
288
+ pdf_path = temp_dir / "large_50.pdf"
289
+ doc = fitz.open()
290
+ for i in range(50):
291
+ page = doc.new_page() # pyright: ignore[reportAttributeAccessIssue]
292
+ text = fitz.Point(50, 72)
293
+ page.insert_text(text, f"This is page {i + 1}", fontsize=12)
294
+ doc.save(pdf_path)
295
+ doc.close()
296
+
297
+ # Test split by size (10 pages each)
298
+ output_file = temp_dir / "large_output.pdf"
299
+ split_by_size(pdf_path, output_file, 10)
300
+
301
+ parts = sorted(temp_dir.glob("large_output_part*.pdf"))
302
+ assert len(parts) == 5
303
+
304
+ # Each part should have 10 pages
305
+ page_counts = [get_page_count(p) for p in parts]
306
+ assert all(count == 10 for count in page_counts)
307
+
308
+ logger.info("Large PDF (50 pages) split into 5 parts of 10 pages each")
309
+
310
+
311
+ def test_edge_case_single_page_pdf(temp_dir):
312
+ """Test with a single-page PDF."""
313
+ pdf_path = temp_dir / "single.pdf"
314
+ doc = fitz.open()
315
+ page = doc.new_page() # pyright: ignore[reportAttributeAccessIssue]
316
+ text = fitz.Point(50, 72)
317
+ page.insert_text(text, "Single page", fontsize=12)
318
+ doc.save(pdf_path)
319
+ doc.close()
320
+
321
+ # Split by number (should work)
322
+ output_file = temp_dir / "single_output.pdf"
323
+ split_by_number(pdf_path, output_file, 2)
324
+
325
+ parts = sorted(temp_dir.glob("single_output_part*.pdf"))
326
+ assert len(parts) == 1
327
+ assert get_page_count(parts[0]) == 1
328
+
329
+ logger.info("Single-page PDF handled correctly")
330
+
331
+
332
+ if __name__ == "__main__":
333
+ pytest.main([__file__, "-v", "-s"])
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes