diffpdf 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {diffpdf-0.2.0 → diffpdf-0.3.0}/.github/workflows/pypi-publish.yml +2 -2
  2. {diffpdf-0.2.0 → diffpdf-0.3.0}/PKG-INFO +12 -10
  3. {diffpdf-0.2.0 → diffpdf-0.3.0}/README.md +10 -8
  4. {diffpdf-0.2.0 → diffpdf-0.3.0}/pyproject.toml +2 -2
  5. diffpdf-0.3.0/src/diffpdf/__init__.py +27 -0
  6. {diffpdf-0.2.0 → diffpdf-0.3.0}/src/diffpdf/cli.py +2 -2
  7. {diffpdf-0.2.0 → diffpdf-0.3.0}/src/diffpdf/comparators.py +1 -1
  8. {diffpdf-0.2.0 → diffpdf-0.3.0}/src/diffpdf/visual_check.py +18 -12
  9. diffpdf-0.3.0/tests/test_api.py +16 -0
  10. diffpdf-0.3.0/tests/test_cli.py +36 -0
  11. diffpdf-0.2.0/tests/test_cli.py → diffpdf-0.3.0/tests/test_comparators.py +13 -30
  12. diffpdf-0.2.0/src/diffpdf/__init__.py +0 -15
  13. {diffpdf-0.2.0 → diffpdf-0.3.0}/.github/dependabot.yml +0 -0
  14. {diffpdf-0.2.0 → diffpdf-0.3.0}/.github/workflows/build.yml +0 -0
  15. {diffpdf-0.2.0 → diffpdf-0.3.0}/.gitignore +0 -0
  16. {diffpdf-0.2.0 → diffpdf-0.3.0}/.vscode/settings.json +0 -0
  17. {diffpdf-0.2.0 → diffpdf-0.3.0}/LICENSE +0 -0
  18. {diffpdf-0.2.0 → diffpdf-0.3.0}/MANIFEST.in +0 -0
  19. {diffpdf-0.2.0 → diffpdf-0.3.0}/hooks/pre-commit +0 -0
  20. {diffpdf-0.2.0 → diffpdf-0.3.0}/src/diffpdf/hash_check.py +0 -0
  21. {diffpdf-0.2.0 → diffpdf-0.3.0}/src/diffpdf/logger.py +0 -0
  22. {diffpdf-0.2.0 → diffpdf-0.3.0}/src/diffpdf/page_check.py +0 -0
  23. {diffpdf-0.2.0 → diffpdf-0.3.0}/src/diffpdf/text_check.py +0 -0
  24. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/fail/1-letter-diff-A.pdf +0 -0
  25. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/fail/1-letter-diff-B.pdf +0 -0
  26. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/fail/major-color-diff-A.pdf +0 -0
  27. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/fail/major-color-diff-B.pdf +0 -0
  28. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/fail/page-count-diff-A.pdf +0 -0
  29. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/fail/page-count-diff-B.pdf +0 -0
  30. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/pass/hash-diff-A.pdf +0 -0
  31. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/pass/hash-diff-B.pdf +0 -0
  32. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/pass/identical-A.pdf +0 -0
  33. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/pass/identical-B.pdf +0 -0
  34. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/pass/minor-color-diff-A.pdf +0 -0
  35. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/pass/minor-color-diff-B.pdf +0 -0
  36. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/pass/multiplatform-diff-A.pdf +0 -0
  37. {diffpdf-0.2.0 → diffpdf-0.3.0}/tests/assets/pass/multiplatform-diff-B.pdf +0 -0
@@ -11,10 +11,10 @@ jobs:
11
11
  id-token: write
12
12
 
13
13
  steps:
14
- - uses: actions/checkout@v4
14
+ - uses: actions/checkout@v6
15
15
 
16
16
  - name: Set up Python
17
- uses: actions/setup-python@v5
17
+ uses: actions/setup-python@v6
18
18
  with:
19
19
  python-version: '3.10'
20
20
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffpdf
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: A tool for comparing PDF files
5
5
  Project-URL: Homepage, https://github.com/JustusRijke/DiffPDF
6
6
  Project-URL: Issues, https://github.com/JustusRijke/DiffPDF/issues
@@ -15,7 +15,7 @@ Requires-Python: >=3.10
15
15
  Requires-Dist: click
16
16
  Requires-Dist: colorlog
17
17
  Requires-Dist: pillow>=10.0.0
18
- Requires-Dist: pixelmatch>=0.3.0
18
+ Requires-Dist: pixelmatch-fast>=1.1.0
19
19
  Requires-Dist: pymupdf>=1.23.0
20
20
  Provides-Extra: dev
21
21
  Requires-Dist: pytest; extra == 'dev'
@@ -39,12 +39,10 @@ DiffPDF uses a fail-fast sequential pipeline to compare PDFs:
39
39
  1. **Hash Check** - SHA-256 comparison. If identical, exit immediately with pass.
40
40
  2. **Page Count** - Verify both PDFs have the same number of pages.
41
41
  3. **Text Content** - Extract and compare text from all pages (ignoring whitespace).
42
- 4. **Visual Check** - Render pages to images and compare using pixelmatch.
42
+ 4. **Visual Check** - Render pages to images and compare using [pixelmatch-fast](https://pypi.org/project/pixelmatch-fast/).
43
43
 
44
44
  Each stage only runs if all previous stages pass.
45
45
 
46
- **⚠️ Performance Warning:** The Python port of pixelmatch is extremely slow.
47
-
48
46
  ## Installation
49
47
 
50
48
  ```bash
@@ -60,7 +58,7 @@ Usage: diffpdf [OPTIONS] REFERENCE ACTUAL
60
58
  Options:
61
59
  --threshold FLOAT Pixelmatch threshold (0.0-1.0)
62
60
  --dpi INTEGER Render resolution
63
- --output-dir DIRECTORY Diff image output directory
61
+ --output-dir DIRECTORY Diff image output directory (optional, if not specified no diff images are saved)
64
62
  -v, --verbose Increase verbosity (-v for INFO, -vv for DEBUG)
65
63
  --save-log Write log output to log.txt
66
64
  --version Show the version and exit.
@@ -75,10 +73,14 @@ Options:
75
73
 
76
74
  ## Library Usage
77
75
 
78
- Call the CLI from Python:
79
76
  ```python
80
- from diffpdf import main
81
- main(["-vv","foo.pdf", "bar.pdf"])
77
+ from diffpdf import diffpdf
78
+
79
+ # Basic usage (no diff images saved)
80
+ diffpdf("reference.pdf", "actual.pdf")
81
+
82
+ # With options (save diff images to ./output directory)
83
+ diffpdf("reference.pdf", "actual.pdf", output_dir="./output", threshold=0.2, dpi=150, verbosity=2)
82
84
  ```
83
85
 
84
86
  ## Development
@@ -91,4 +93,4 @@ ruff check .
91
93
 
92
94
  ## Acknowledgements
93
95
 
94
- Built with [PyMuPDF](https://pymupdf.readthedocs.io/) for PDF parsing and [pixelmatch-py](https://github.com/whtsky/pixelmatch-py) (Python port of [pixelmatch](https://github.com/mapbox/pixelmatch)) for visual comparison.
96
+ Built with [PyMuPDF](https://pymupdf.readthedocs.io/) for PDF parsing and [pixelmatch-fast](https://pypi.org/project/pixelmatch-fast/) (Python port of [pixelmatch](https://github.com/mapbox/pixelmatch)) for visual comparison.
@@ -14,12 +14,10 @@ DiffPDF uses a fail-fast sequential pipeline to compare PDFs:
14
14
  1. **Hash Check** - SHA-256 comparison. If identical, exit immediately with pass.
15
15
  2. **Page Count** - Verify both PDFs have the same number of pages.
16
16
  3. **Text Content** - Extract and compare text from all pages (ignoring whitespace).
17
- 4. **Visual Check** - Render pages to images and compare using pixelmatch.
17
+ 4. **Visual Check** - Render pages to images and compare using [pixelmatch-fast](https://pypi.org/project/pixelmatch-fast/).
18
18
 
19
19
  Each stage only runs if all previous stages pass.
20
20
 
21
- **⚠️ Performance Warning:** The Python port of pixelmatch is extremely slow.
22
-
23
21
  ## Installation
24
22
 
25
23
  ```bash
@@ -35,7 +33,7 @@ Usage: diffpdf [OPTIONS] REFERENCE ACTUAL
35
33
  Options:
36
34
  --threshold FLOAT Pixelmatch threshold (0.0-1.0)
37
35
  --dpi INTEGER Render resolution
38
- --output-dir DIRECTORY Diff image output directory
36
+ --output-dir DIRECTORY Diff image output directory (optional, if not specified no diff images are saved)
39
37
  -v, --verbose Increase verbosity (-v for INFO, -vv for DEBUG)
40
38
  --save-log Write log output to log.txt
41
39
  --version Show the version and exit.
@@ -50,10 +48,14 @@ Options:
50
48
 
51
49
  ## Library Usage
52
50
 
53
- Call the CLI from Python:
54
51
  ```python
55
- from diffpdf import main
56
- main(["-vv","foo.pdf", "bar.pdf"])
52
+ from diffpdf import diffpdf
53
+
54
+ # Basic usage (no diff images saved)
55
+ diffpdf("reference.pdf", "actual.pdf")
56
+
57
+ # With options (save diff images to ./output directory)
58
+ diffpdf("reference.pdf", "actual.pdf", output_dir="./output", threshold=0.2, dpi=150, verbosity=2)
57
59
  ```
58
60
 
59
61
  ## Development
@@ -66,4 +68,4 @@ ruff check .
66
68
 
67
69
  ## Acknowledgements
68
70
 
69
- Built with [PyMuPDF](https://pymupdf.readthedocs.io/) for PDF parsing and [pixelmatch-py](https://github.com/whtsky/pixelmatch-py) (Python port of [pixelmatch](https://github.com/mapbox/pixelmatch)) for visual comparison.
71
+ Built with [PyMuPDF](https://pymupdf.readthedocs.io/) for PDF parsing and [pixelmatch-fast](https://pypi.org/project/pixelmatch-fast/) (Python port of [pixelmatch](https://github.com/mapbox/pixelmatch)) for visual comparison.
@@ -24,7 +24,7 @@ dependencies = [
24
24
  "click",
25
25
  "colorlog",
26
26
  "pymupdf>=1.23.0",
27
- "pixelmatch>=0.3.0",
27
+ "pixelmatch-fast>=1.1.0",
28
28
  "Pillow>=10.0.0",
29
29
  ]
30
30
 
@@ -40,7 +40,7 @@ dev = [
40
40
  ]
41
41
 
42
42
  [project.scripts]
43
- diffpdf = "diffpdf:main"
43
+ diffpdf = "diffpdf.cli:cli"
44
44
 
45
45
  [tool.hatch.version]
46
46
  source = "vcs"
@@ -0,0 +1,27 @@
1
+ from importlib.metadata import version
2
+ from pathlib import Path
3
+
4
+ from .comparators import compare_pdfs
5
+ from .logger import setup_logging
6
+
7
+ __version__ = version("diffpdf")
8
+
9
+
10
+ def diffpdf(
11
+ reference: str | Path,
12
+ actual: str | Path,
13
+ threshold: float = 0.1,
14
+ dpi: int = 96,
15
+ output_dir: str | Path | None = None,
16
+ verbosity: int = 0,
17
+ save_log: bool = False,
18
+ ) -> None:
19
+ ref_path = Path(reference) if isinstance(reference, str) else reference
20
+ actual_path = Path(actual) if isinstance(actual, str) else actual
21
+ out_path = Path(output_dir) if isinstance(output_dir, str) else output_dir
22
+
23
+ logger = setup_logging(verbosity, save_log)
24
+ compare_pdfs(ref_path, actual_path, threshold, dpi, out_path, logger)
25
+
26
+
27
+ __all__ = ["diffpdf", "__version__"]
@@ -19,8 +19,8 @@ from .logger import setup_logging
19
19
  @click.option(
20
20
  "--output-dir",
21
21
  type=click.Path(file_okay=False, path_type=Path),
22
- default="./",
23
- help="Diff image output directory",
22
+ default=None,
23
+ help="Diff image output directory (if not specified, no diff images are saved)",
24
24
  )
25
25
  @click.option(
26
26
  "-v",
@@ -8,7 +8,7 @@ from .visual_check import check_visual_content
8
8
 
9
9
 
10
10
  def compare_pdfs(
11
- ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path, logger
11
+ ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
12
12
  ) -> None:
13
13
  check_hash(ref, actual, logger)
14
14
 
@@ -3,7 +3,7 @@ from pathlib import Path
3
3
 
4
4
  import fitz
5
5
  from PIL import Image
6
- from pixelmatch.contrib.PIL import pixelmatch
6
+ from pixelmatch import pixelmatch
7
7
 
8
8
 
9
9
  def render_page_to_image(pdf_path: Path, page_num: int, dpi: int) -> Image.Image:
@@ -16,24 +16,28 @@ def render_page_to_image(pdf_path: Path, page_num: int, dpi: int) -> Image.Image
16
16
 
17
17
 
18
18
  def compare_images(
19
- ref_img: Image.Image, actual_img: Image.Image, threshold: float, output_path: Path
19
+ ref_img: Image.Image,
20
+ actual_img: Image.Image,
21
+ threshold: float,
22
+ output_path: Path | None,
20
23
  ) -> bool:
21
- diff_img = Image.new("RGB", ref_img.size)
22
- mismatch_count = pixelmatch(ref_img, actual_img, diff_img, threshold=threshold)
24
+ mismatch_count = pixelmatch(
25
+ ref_img, actual_img, diff_path=output_path, threshold=threshold
26
+ )
23
27
 
24
28
  if mismatch_count > 0:
25
- diff_img.save(output_path)
26
29
  return False
27
30
 
28
31
  return True
29
32
 
30
33
 
31
34
  def check_visual_content(
32
- ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path, logger
35
+ ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
33
36
  ) -> None:
34
37
  logger.info("[4/4] Checking visual content...")
35
38
 
36
- output_dir.mkdir(parents=True, exist_ok=True)
39
+ if output_dir is not None:
40
+ output_dir.mkdir(parents=True, exist_ok=True)
37
41
 
38
42
  ref_doc = fitz.open(ref)
39
43
  page_count = len(ref_doc)
@@ -45,11 +49,13 @@ def check_visual_content(
45
49
  ref_img = render_page_to_image(ref, page_num, dpi)
46
50
  actual_img = render_page_to_image(actual, page_num, dpi)
47
51
 
48
- ref_name = ref.stem
49
- actual_name = actual.stem
50
- output_path = (
51
- output_dir / f"{ref_name}_vs_{actual_name}_page{page_num + 1}_diff.png"
52
- )
52
+ output_path = None
53
+ if output_dir is not None:
54
+ ref_name = ref.stem
55
+ actual_name = actual.stem
56
+ output_path = (
57
+ output_dir / f"{ref_name}_vs_{actual_name}_page{page_num + 1}_diff.png"
58
+ )
53
59
 
54
60
  passed = compare_images(ref_img, actual_img, threshold, output_path)
55
61
 
@@ -0,0 +1,16 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ from diffpdf import diffpdf
6
+
7
+ TEST_ASSETS_DIR = Path(__file__).parent / "assets"
8
+
9
+
10
+ def test_diffpdf():
11
+ with pytest.raises(SystemExit) as exc_info:
12
+ diffpdf(
13
+ TEST_ASSETS_DIR / "pass/identical-A.pdf",
14
+ TEST_ASSETS_DIR / "pass/identical-B.pdf",
15
+ )
16
+ assert exc_info.value.code == 0
@@ -0,0 +1,36 @@
1
+ from pathlib import Path
2
+
3
+ from click.testing import CliRunner
4
+
5
+ from diffpdf.cli import cli
6
+
7
+ TEST_ASSETS_DIR = Path(__file__).parent / "assets"
8
+
9
+
10
+ def test_verbose_flag():
11
+ runner = CliRunner()
12
+ result = runner.invoke(
13
+ cli,
14
+ [
15
+ str(TEST_ASSETS_DIR / "pass/identical-A.pdf"),
16
+ str(TEST_ASSETS_DIR / "pass/identical-B.pdf"),
17
+ "-v",
18
+ ],
19
+ )
20
+ assert result.exit_code == 0
21
+ assert "INFO" in result.output
22
+ assert "DEBUG" not in result.output
23
+
24
+
25
+ def test_double_verbose_flag():
26
+ runner = CliRunner()
27
+ result = runner.invoke(
28
+ cli,
29
+ [
30
+ str(TEST_ASSETS_DIR / "pass/identical-A.pdf"),
31
+ str(TEST_ASSETS_DIR / "pass/identical-B.pdf"),
32
+ "-vv",
33
+ ],
34
+ )
35
+ assert result.exit_code == 0
36
+ assert "DEBUG" in result.output
@@ -8,35 +8,6 @@ from diffpdf.cli import cli
8
8
  TEST_ASSETS_DIR = Path(__file__).parent / "assets"
9
9
 
10
10
 
11
- def test_verbose_flag():
12
- runner = CliRunner()
13
- result = runner.invoke(
14
- cli,
15
- [
16
- str(TEST_ASSETS_DIR / "pass/identical-A.pdf"),
17
- str(TEST_ASSETS_DIR / "pass/identical-B.pdf"),
18
- "-v",
19
- ],
20
- )
21
- assert result.exit_code == 0
22
- assert "INFO" in result.output
23
- assert "DEBUG" not in result.output
24
-
25
-
26
- def test_double_verbose_flag():
27
- runner = CliRunner()
28
- result = runner.invoke(
29
- cli,
30
- [
31
- str(TEST_ASSETS_DIR / "pass/identical-A.pdf"),
32
- str(TEST_ASSETS_DIR / "pass/identical-B.pdf"),
33
- "-vv",
34
- ],
35
- )
36
- assert result.exit_code == 0
37
- assert "DEBUG" in result.output
38
-
39
-
40
11
  @pytest.mark.parametrize(
41
12
  "ref_pdf_rel,actual_pdf_rel,expected_exit_code",
42
13
  [
@@ -54,7 +25,6 @@ def test_double_verbose_flag():
54
25
  ],
55
26
  )
56
27
  def test_comparators(ref_pdf_rel, actual_pdf_rel, expected_exit_code):
57
- """Parametric integration test: CLI should exit with correct code for various PDF pairs."""
58
28
  runner = CliRunner()
59
29
 
60
30
  ref_pdf = str(TEST_ASSETS_DIR / ref_pdf_rel)
@@ -63,3 +33,16 @@ def test_comparators(ref_pdf_rel, actual_pdf_rel, expected_exit_code):
63
33
  result = runner.invoke(cli, [ref_pdf, actual_pdf])
64
34
 
65
35
  assert result.exit_code == expected_exit_code
36
+
37
+
38
+ def test_comparators_with_output_dir():
39
+ runner = CliRunner()
40
+
41
+ with runner.isolated_filesystem():
42
+ ref_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-A.pdf")
43
+ actual_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-B.pdf")
44
+
45
+ result = runner.invoke(cli, [ref_pdf, actual_pdf, "--output-dir", "./diff"])
46
+
47
+ assert result.exit_code == 1
48
+ assert Path("./diff").exists()
@@ -1,15 +0,0 @@
1
- from importlib.metadata import version
2
-
3
- from .cli import cli
4
-
5
- __version__ = version("diffpdf")
6
-
7
-
8
- def main(args=None): # pragma: no cover
9
- if args is None:
10
- cli()
11
- else:
12
- cli(args, standalone_mode=False)
13
-
14
-
15
- __all__ = ["main", "__version__"]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes