diffpdf 0.1.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. diffpdf-0.3.0/.github/workflows/build.yml +35 -0
  2. {diffpdf-0.1.2 → diffpdf-0.3.0}/.github/workflows/pypi-publish.yml +2 -2
  3. diffpdf-0.3.0/PKG-INFO +96 -0
  4. diffpdf-0.3.0/README.md +71 -0
  5. {diffpdf-0.1.2 → diffpdf-0.3.0}/pyproject.toml +13 -3
  6. diffpdf-0.3.0/src/diffpdf/__init__.py +27 -0
  7. diffpdf-0.3.0/src/diffpdf/cli.py +43 -0
  8. {diffpdf-0.1.2 → diffpdf-0.3.0}/src/diffpdf/comparators.py +1 -1
  9. diffpdf-0.3.0/src/diffpdf/logger.py +45 -0
  10. {diffpdf-0.1.2 → diffpdf-0.3.0}/src/diffpdf/text_check.py +14 -10
  11. {diffpdf-0.1.2 → diffpdf-0.3.0}/src/diffpdf/visual_check.py +18 -12
  12. diffpdf-0.3.0/tests/test_api.py +16 -0
  13. diffpdf-0.3.0/tests/test_cli.py +36 -0
  14. diffpdf-0.1.2/tests/test_cli.py → diffpdf-0.3.0/tests/test_comparators.py +18 -5
  15. diffpdf-0.1.2/.github/workflows/ci.yml +0 -41
  16. diffpdf-0.1.2/PKG-INFO +0 -82
  17. diffpdf-0.1.2/README.md +0 -58
  18. diffpdf-0.1.2/ruff.toml +0 -2
  19. diffpdf-0.1.2/src/diffpdf/__init__.py +0 -15
  20. diffpdf-0.1.2/src/diffpdf/cli.py +0 -71
  21. {diffpdf-0.1.2 → diffpdf-0.3.0}/.github/dependabot.yml +0 -0
  22. {diffpdf-0.1.2 → diffpdf-0.3.0}/.gitignore +0 -0
  23. {diffpdf-0.1.2 → diffpdf-0.3.0}/.vscode/settings.json +0 -0
  24. {diffpdf-0.1.2 → diffpdf-0.3.0}/LICENSE +0 -0
  25. {diffpdf-0.1.2 → diffpdf-0.3.0}/MANIFEST.in +0 -0
  26. {diffpdf-0.1.2 → diffpdf-0.3.0}/hooks/pre-commit +0 -0
  27. {diffpdf-0.1.2 → diffpdf-0.3.0}/src/diffpdf/hash_check.py +0 -0
  28. {diffpdf-0.1.2 → diffpdf-0.3.0}/src/diffpdf/page_check.py +0 -0
  29. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/fail/1-letter-diff-A.pdf +0 -0
  30. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/fail/1-letter-diff-B.pdf +0 -0
  31. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/fail/major-color-diff-A.pdf +0 -0
  32. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/fail/major-color-diff-B.pdf +0 -0
  33. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/fail/page-count-diff-A.pdf +0 -0
  34. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/fail/page-count-diff-B.pdf +0 -0
  35. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/pass/hash-diff-A.pdf +0 -0
  36. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/pass/hash-diff-B.pdf +0 -0
  37. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/pass/identical-A.pdf +0 -0
  38. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/pass/identical-B.pdf +0 -0
  39. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/pass/minor-color-diff-A.pdf +0 -0
  40. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/pass/minor-color-diff-B.pdf +0 -0
  41. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/pass/multiplatform-diff-A.pdf +0 -0
  42. {diffpdf-0.1.2 → diffpdf-0.3.0}/tests/assets/pass/multiplatform-diff-B.pdf +0 -0
@@ -0,0 +1,35 @@
1
+ name: Build
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ jobs:
8
+ test:
9
+ runs-on: ${{ matrix.os }}
10
+ strategy:
11
+ matrix:
12
+ os: [ubuntu-latest, windows-latest]
13
+ python-version: ["3.10", "3.x"]
14
+
15
+ steps:
16
+ - uses: actions/checkout@v6
17
+
18
+ - name: Set up Python ${{ matrix.python-version }}
19
+ uses: actions/setup-python@v6
20
+ with:
21
+ python-version: ${{ matrix.python-version }}
22
+
23
+ - name: Install package with dev dependencies
24
+ run: pip install -e .[dev]
25
+
26
+ - name: Run ruff
27
+ run: ruff check .
28
+
29
+ - name: Run pytest
30
+ run: pytest tests/ -v --cov --cov-branch --cov-report=xml
31
+
32
+ - name: Upload coverage reports to Codecov
33
+ uses: codecov/codecov-action@v5
34
+ with:
35
+ token: ${{ secrets.CODECOV_TOKEN }}
@@ -11,10 +11,10 @@ jobs:
11
11
  id-token: write
12
12
 
13
13
  steps:
14
- - uses: actions/checkout@v4
14
+ - uses: actions/checkout@v6
15
15
 
16
16
  - name: Set up Python
17
- uses: actions/setup-python@v5
17
+ uses: actions/setup-python@v6
18
18
  with:
19
19
  python-version: '3.10'
20
20
 
diffpdf-0.3.0/PKG-INFO ADDED
@@ -0,0 +1,96 @@
1
+ Metadata-Version: 2.4
2
+ Name: diffpdf
3
+ Version: 0.3.0
4
+ Summary: A tool for comparing PDF files
5
+ Project-URL: Homepage, https://github.com/JustusRijke/DiffPDF
6
+ Project-URL: Issues, https://github.com/JustusRijke/DiffPDF/issues
7
+ Author-email: Justus Rijke <justusrijke@gmail.com>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Operating System :: Microsoft :: Windows
12
+ Classifier: Operating System :: POSIX :: Linux
13
+ Classifier: Programming Language :: Python :: 3
14
+ Requires-Python: >=3.10
15
+ Requires-Dist: click
16
+ Requires-Dist: colorlog
17
+ Requires-Dist: pillow>=10.0.0
18
+ Requires-Dist: pixelmatch-fast>=1.1.0
19
+ Requires-Dist: pymupdf>=1.23.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest; extra == 'dev'
22
+ Requires-Dist: pytest-cov; extra == 'dev'
23
+ Requires-Dist: ruff; extra == 'dev'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # DiffPDF
27
+
28
+ [![Build](https://github.com/JustusRijke/DiffPDF/actions/workflows/build.yml/badge.svg)](https://github.com/JustusRijke/DiffPDF/actions/workflows/build.yml)
29
+ [![codecov](https://codecov.io/gh/JustusRijke/DiffPDF/graph/badge.svg?token=O3ZJFG6X7A)](https://codecov.io/gh/JustusRijke/DiffPDF)
30
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue)](https://www.python.org/downloads/)
31
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
32
+
33
+ CLI tool for detecting structural, textual, and visual differences between PDF files, for use in automatic regression tests.
34
+
35
+ ## How It Works
36
+
37
+ DiffPDF uses a fail-fast sequential pipeline to compare PDFs:
38
+
39
+ 1. **Hash Check** - SHA-256 comparison. If identical, exit immediately with pass.
40
+ 2. **Page Count** - Verify both PDFs have the same number of pages.
41
+ 3. **Text Content** - Extract and compare text from all pages (ignoring whitespace).
42
+ 4. **Visual Check** - Render pages to images and compare using [pixelmatch-fast](https://pypi.org/project/pixelmatch-fast/).
43
+
44
+ Each stage only runs if all previous stages pass.
45
+
46
+ ## Installation
47
+
48
+ ```bash
49
+ pip install diffpdf
50
+ ```
51
+
52
+ ## CLI Usage
53
+ ```
54
+ Usage: diffpdf [OPTIONS] REFERENCE ACTUAL
55
+
56
+ Compare two PDF files for structural, textual, and visual differences.
57
+
58
+ Options:
59
+ --threshold FLOAT Pixelmatch threshold (0.0-1.0)
60
+ --dpi INTEGER Render resolution
61
+ --output-dir DIRECTORY Diff image output directory (optional, if not specified no diff images are saved)
62
+ -v, --verbose Increase verbosity (-v for INFO, -vv for DEBUG)
63
+ --save-log Write log output to log.txt
64
+ --version Show the version and exit.
65
+ --help Show this message and exit.
66
+ ```
67
+
68
+ **Exit Codes**
69
+
70
+ - `0` — Pass (PDFs are equivalent)
71
+ - `1` — Fail (differences detected)
72
+ - `2` — Error (invalid input or processing error)
73
+
74
+ ## Library Usage
75
+
76
+ ```python
77
+ from diffpdf import diffpdf
78
+
79
+ # Basic usage (no diff images saved)
80
+ diffpdf("reference.pdf", "actual.pdf")
81
+
82
+ # With options (save diff images to ./output directory)
83
+ diffpdf("reference.pdf", "actual.pdf", output_dir="./output", threshold=0.2, dpi=150, verbosity=2)
84
+ ```
85
+
86
+ ## Development
87
+
88
+ ```bash
89
+ pip install -e .[dev]
90
+ pytest tests/ -v
91
+ ruff check .
92
+ ```
93
+
94
+ ## Acknowledgements
95
+
96
+ Built with [PyMuPDF](https://pymupdf.readthedocs.io/) for PDF parsing and [pixelmatch-fast](https://pypi.org/project/pixelmatch-fast/) (Python port of [pixelmatch](https://github.com/mapbox/pixelmatch)) for visual comparison.
@@ -0,0 +1,71 @@
1
+ # DiffPDF
2
+
3
+ [![Build](https://github.com/JustusRijke/DiffPDF/actions/workflows/build.yml/badge.svg)](https://github.com/JustusRijke/DiffPDF/actions/workflows/build.yml)
4
+ [![codecov](https://codecov.io/gh/JustusRijke/DiffPDF/graph/badge.svg?token=O3ZJFG6X7A)](https://codecov.io/gh/JustusRijke/DiffPDF)
5
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue)](https://www.python.org/downloads/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
7
+
8
+ CLI tool for detecting structural, textual, and visual differences between PDF files, for use in automatic regression tests.
9
+
10
+ ## How It Works
11
+
12
+ DiffPDF uses a fail-fast sequential pipeline to compare PDFs:
13
+
14
+ 1. **Hash Check** - SHA-256 comparison. If identical, exit immediately with pass.
15
+ 2. **Page Count** - Verify both PDFs have the same number of pages.
16
+ 3. **Text Content** - Extract and compare text from all pages (ignoring whitespace).
17
+ 4. **Visual Check** - Render pages to images and compare using [pixelmatch-fast](https://pypi.org/project/pixelmatch-fast/).
18
+
19
+ Each stage only runs if all previous stages pass.
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ pip install diffpdf
25
+ ```
26
+
27
+ ## CLI Usage
28
+ ```
29
+ Usage: diffpdf [OPTIONS] REFERENCE ACTUAL
30
+
31
+ Compare two PDF files for structural, textual, and visual differences.
32
+
33
+ Options:
34
+ --threshold FLOAT Pixelmatch threshold (0.0-1.0)
35
+ --dpi INTEGER Render resolution
36
+ --output-dir DIRECTORY Diff image output directory (optional, if not specified no diff images are saved)
37
+ -v, --verbose Increase verbosity (-v for INFO, -vv for DEBUG)
38
+ --save-log Write log output to log.txt
39
+ --version Show the version and exit.
40
+ --help Show this message and exit.
41
+ ```
42
+
43
+ **Exit Codes**
44
+
45
+ - `0` — Pass (PDFs are equivalent)
46
+ - `1` — Fail (differences detected)
47
+ - `2` — Error (invalid input or processing error)
48
+
49
+ ## Library Usage
50
+
51
+ ```python
52
+ from diffpdf import diffpdf
53
+
54
+ # Basic usage (no diff images saved)
55
+ diffpdf("reference.pdf", "actual.pdf")
56
+
57
+ # With options (save diff images to ./output directory)
58
+ diffpdf("reference.pdf", "actual.pdf", output_dir="./output", threshold=0.2, dpi=150, verbosity=2)
59
+ ```
60
+
61
+ ## Development
62
+
63
+ ```bash
64
+ pip install -e .[dev]
65
+ pytest tests/ -v
66
+ ruff check .
67
+ ```
68
+
69
+ ## Acknowledgements
70
+
71
+ Built with [PyMuPDF](https://pymupdf.readthedocs.io/) for PDF parsing and [pixelmatch-fast](https://pypi.org/project/pixelmatch-fast/) (Python port of [pixelmatch](https://github.com/mapbox/pixelmatch)) for visual comparison.
@@ -24,7 +24,7 @@ dependencies = [
24
24
  "click",
25
25
  "colorlog",
26
26
  "pymupdf>=1.23.0",
27
- "pixelmatch>=0.3.0",
27
+ "pixelmatch-fast>=1.1.0",
28
28
  "Pillow>=10.0.0",
29
29
  ]
30
30
 
@@ -33,13 +33,23 @@ Homepage = "https://github.com/JustusRijke/DiffPDF"
33
33
  Issues = "https://github.com/JustusRijke/DiffPDF/issues"
34
34
 
35
35
  [project.optional-dependencies]
36
- dev = ["pytest", "ruff"]
36
+ dev = [
37
+ "pytest",
38
+ "pytest-cov",
39
+ "ruff",
40
+ ]
37
41
 
38
42
  [project.scripts]
39
- diffpdf = "diffpdf:main"
43
+ diffpdf = "diffpdf.cli:cli"
40
44
 
41
45
  [tool.hatch.version]
42
46
  source = "vcs"
43
47
 
44
48
  [tool.hatch.version.raw-options]
45
49
  local_scheme = "no-local-version"
50
+
51
+ [tool.ruff]
52
+ target-version = "py310"
53
+
54
+ [tool.ruff.lint]
55
+ extend-select = ["I"] # Sort imports
@@ -0,0 +1,27 @@
1
+ from importlib.metadata import version
2
+ from pathlib import Path
3
+
4
+ from .comparators import compare_pdfs
5
+ from .logger import setup_logging
6
+
7
+ __version__ = version("diffpdf")
8
+
9
+
10
+ def diffpdf(
11
+ reference: str | Path,
12
+ actual: str | Path,
13
+ threshold: float = 0.1,
14
+ dpi: int = 96,
15
+ output_dir: str | Path | None = None,
16
+ verbosity: int = 0,
17
+ save_log: bool = False,
18
+ ) -> None:
19
+ ref_path = Path(reference) if isinstance(reference, str) else reference
20
+ actual_path = Path(actual) if isinstance(actual, str) else actual
21
+ out_path = Path(output_dir) if isinstance(output_dir, str) else output_dir
22
+
23
+ logger = setup_logging(verbosity, save_log)
24
+ compare_pdfs(ref_path, actual_path, threshold, dpi, out_path, logger)
25
+
26
+
27
+ __all__ = ["diffpdf", "__version__"]
@@ -0,0 +1,43 @@
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ import click
5
+
6
+ from .comparators import compare_pdfs
7
+ from .logger import setup_logging
8
+
9
+
10
+ @click.command()
11
+ @click.argument(
12
+ "reference", type=click.Path(exists=True, dir_okay=False, path_type=Path)
13
+ )
14
+ @click.argument("actual", type=click.Path(exists=True, dir_okay=False, path_type=Path))
15
+ @click.option(
16
+ "--threshold", type=float, default=0.1, help="Pixelmatch threshold (0.0-1.0)"
17
+ )
18
+ @click.option("--dpi", type=int, default=96, help="Render resolution")
19
+ @click.option(
20
+ "--output-dir",
21
+ type=click.Path(file_okay=False, path_type=Path),
22
+ default=None,
23
+ help="Diff image output directory (if not specified, no diff images are saved)",
24
+ )
25
+ @click.option(
26
+ "-v",
27
+ "--verbose",
28
+ "verbosity",
29
+ count=True,
30
+ help="Increase verbosity (-v for INFO, -vv for DEBUG)",
31
+ )
32
+ @click.option("--save-log", is_flag=True, help="Write log output to log.txt")
33
+ @click.version_option(package_name="diffpdf")
34
+ def cli(reference, actual, threshold, dpi, output_dir, verbosity, save_log):
35
+ """Compare two PDF files for structural, textual, and visual differences."""
36
+ logger = setup_logging(verbosity, save_log)
37
+ logger.debug("Debug logging enabled")
38
+
39
+ try:
40
+ compare_pdfs(reference, actual, threshold, dpi, output_dir, logger)
41
+ except Exception as e: # pragma: no cover
42
+ logger.critical(f"Error: {e}", exc_info=True)
43
+ sys.exit(2)
@@ -8,7 +8,7 @@ from .visual_check import check_visual_content
8
8
 
9
9
 
10
10
  def compare_pdfs(
11
- ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path, logger
11
+ ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
12
12
  ) -> None:
13
13
  check_hash(ref, actual, logger)
14
14
 
@@ -0,0 +1,45 @@
1
+ import logging
2
+
3
+ import colorlog
4
+
5
+ LOG_FORMAT = (
6
+ "%(asctime)s %(levelname)-8s %(filename)s:%(lineno)d (%(funcName)s): %(message)s"
7
+ )
8
+ DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
9
+ LOG_COLORS = {
10
+ "DEBUG": "cyan",
11
+ "INFO": "green",
12
+ "WARNING": "yellow",
13
+ "ERROR": "red",
14
+ "CRITICAL": "red,bg_white",
15
+ }
16
+
17
+
18
+ def setup_logging(verbosity, save_log):
19
+ if verbosity == 0:
20
+ level = logging.WARNING
21
+ elif verbosity == 1:
22
+ level = logging.INFO
23
+ else:
24
+ level = logging.DEBUG
25
+
26
+ formatter = colorlog.ColoredFormatter(
27
+ f"%(log_color)s{LOG_FORMAT}%(reset)s",
28
+ datefmt=DATE_FORMAT,
29
+ log_colors=LOG_COLORS,
30
+ )
31
+
32
+ console_handler = logging.StreamHandler()
33
+ console_handler.setFormatter(formatter)
34
+
35
+ logger = logging.getLogger()
36
+ logger.setLevel(level)
37
+ logger.addHandler(console_handler)
38
+
39
+ if save_log: # pragma: no cover
40
+ file_formatter = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)
41
+ file_handler = logging.FileHandler("log.txt")
42
+ file_handler.setFormatter(file_formatter)
43
+ logger.addHandler(file_handler)
44
+
45
+ return logger
@@ -1,6 +1,8 @@
1
1
  import difflib
2
+ import re
2
3
  import sys
3
4
  from pathlib import Path
5
+ from typing import Iterable
4
6
 
5
7
  import fitz
6
8
 
@@ -14,32 +16,34 @@ def extract_text(pdf_path: Path) -> str:
14
16
  return text.strip()
15
17
 
16
18
 
17
- def generate_diff(ref_text: str, actual_text: str) -> str:
19
+ def generate_diff(
20
+ ref_text: str, ref: Path, actual_text: str, actual: Path
21
+ ) -> Iterable[str]:
18
22
  ref_lines = ref_text.splitlines(keepends=True)
19
23
  actual_lines = actual_text.splitlines(keepends=True)
20
24
 
21
25
  diff = difflib.unified_diff(
22
26
  ref_lines,
23
27
  actual_lines,
24
- fromfile="reference.pdf",
25
- tofile="actual.pdf",
28
+ fromfile=ref.name,
29
+ tofile=actual.name,
26
30
  lineterm="",
27
31
  )
28
32
 
29
- return "".join(diff)
33
+ return diff
30
34
 
31
35
 
32
36
  def check_text_content(ref: Path, actual: Path, logger) -> None:
33
37
  logger.info("[3/4] Checking text content...")
34
38
 
35
- ref_text = extract_text(ref)
36
- actual_text = extract_text(actual)
39
+ # Extract text and remove whitespace
40
+ ref_text = re.sub(r"\s+", " ", extract_text(ref)).strip()
41
+ actual_text = re.sub(r"\s+", " ", extract_text(actual)).strip()
37
42
 
38
43
  if ref_text != actual_text:
39
- diff = generate_diff(ref_text, actual_text)
40
- logger.error("Text content mismatch")
41
- for line in diff.splitlines():
42
- logger.error(line)
44
+ diff = generate_diff(ref_text, ref, actual_text, actual)
45
+ diff_text = "\n".join(diff)
46
+ logger.error(f"Text content mismatch:\n {diff_text}")
43
47
  sys.exit(1)
44
48
 
45
49
  logger.info("Text content matches")
@@ -3,7 +3,7 @@ from pathlib import Path
3
3
 
4
4
  import fitz
5
5
  from PIL import Image
6
- from pixelmatch.contrib.PIL import pixelmatch
6
+ from pixelmatch import pixelmatch
7
7
 
8
8
 
9
9
  def render_page_to_image(pdf_path: Path, page_num: int, dpi: int) -> Image.Image:
@@ -16,24 +16,28 @@ def render_page_to_image(pdf_path: Path, page_num: int, dpi: int) -> Image.Image
16
16
 
17
17
 
18
18
  def compare_images(
19
- ref_img: Image.Image, actual_img: Image.Image, threshold: float, output_path: Path
19
+ ref_img: Image.Image,
20
+ actual_img: Image.Image,
21
+ threshold: float,
22
+ output_path: Path | None,
20
23
  ) -> bool:
21
- diff_img = Image.new("RGB", ref_img.size)
22
- mismatch_count = pixelmatch(ref_img, actual_img, diff_img, threshold=threshold)
24
+ mismatch_count = pixelmatch(
25
+ ref_img, actual_img, diff_path=output_path, threshold=threshold
26
+ )
23
27
 
24
28
  if mismatch_count > 0:
25
- diff_img.save(output_path)
26
29
  return False
27
30
 
28
31
  return True
29
32
 
30
33
 
31
34
  def check_visual_content(
32
- ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path, logger
35
+ ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
33
36
  ) -> None:
34
37
  logger.info("[4/4] Checking visual content...")
35
38
 
36
- output_dir.mkdir(parents=True, exist_ok=True)
39
+ if output_dir is not None:
40
+ output_dir.mkdir(parents=True, exist_ok=True)
37
41
 
38
42
  ref_doc = fitz.open(ref)
39
43
  page_count = len(ref_doc)
@@ -45,11 +49,13 @@ def check_visual_content(
45
49
  ref_img = render_page_to_image(ref, page_num, dpi)
46
50
  actual_img = render_page_to_image(actual, page_num, dpi)
47
51
 
48
- ref_name = ref.stem
49
- actual_name = actual.stem
50
- output_path = (
51
- output_dir / f"{ref_name}_vs_{actual_name}_page{page_num + 1}_diff.png"
52
- )
52
+ output_path = None
53
+ if output_dir is not None:
54
+ ref_name = ref.stem
55
+ actual_name = actual.stem
56
+ output_path = (
57
+ output_dir / f"{ref_name}_vs_{actual_name}_page{page_num + 1}_diff.png"
58
+ )
53
59
 
54
60
  passed = compare_images(ref_img, actual_img, threshold, output_path)
55
61
 
@@ -0,0 +1,16 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ from diffpdf import diffpdf
6
+
7
+ TEST_ASSETS_DIR = Path(__file__).parent / "assets"
8
+
9
+
10
+ def test_diffpdf():
11
+ with pytest.raises(SystemExit) as exc_info:
12
+ diffpdf(
13
+ TEST_ASSETS_DIR / "pass/identical-A.pdf",
14
+ TEST_ASSETS_DIR / "pass/identical-B.pdf",
15
+ )
16
+ assert exc_info.value.code == 0
@@ -0,0 +1,36 @@
1
+ from pathlib import Path
2
+
3
+ from click.testing import CliRunner
4
+
5
+ from diffpdf.cli import cli
6
+
7
+ TEST_ASSETS_DIR = Path(__file__).parent / "assets"
8
+
9
+
10
+ def test_verbose_flag():
11
+ runner = CliRunner()
12
+ result = runner.invoke(
13
+ cli,
14
+ [
15
+ str(TEST_ASSETS_DIR / "pass/identical-A.pdf"),
16
+ str(TEST_ASSETS_DIR / "pass/identical-B.pdf"),
17
+ "-v",
18
+ ],
19
+ )
20
+ assert result.exit_code == 0
21
+ assert "INFO" in result.output
22
+ assert "DEBUG" not in result.output
23
+
24
+
25
+ def test_double_verbose_flag():
26
+ runner = CliRunner()
27
+ result = runner.invoke(
28
+ cli,
29
+ [
30
+ str(TEST_ASSETS_DIR / "pass/identical-A.pdf"),
31
+ str(TEST_ASSETS_DIR / "pass/identical-B.pdf"),
32
+ "-vv",
33
+ ],
34
+ )
35
+ assert result.exit_code == 0
36
+ assert "DEBUG" in result.output
@@ -5,6 +5,8 @@ from click.testing import CliRunner
5
5
 
6
6
  from diffpdf.cli import cli
7
7
 
8
+ TEST_ASSETS_DIR = Path(__file__).parent / "assets"
9
+
8
10
 
9
11
  @pytest.mark.parametrize(
10
12
  "ref_pdf_rel,actual_pdf_rel,expected_exit_code",
@@ -22,14 +24,25 @@ from diffpdf.cli import cli
22
24
  ("nonexistent.pdf", "another.pdf", 2),
23
25
  ],
24
26
  )
25
- def test_cli(ref_pdf_rel, actual_pdf_rel, expected_exit_code):
26
- """Parametric integration test: CLI should exit with correct code for various PDF pairs."""
27
+ def test_comparators(ref_pdf_rel, actual_pdf_rel, expected_exit_code):
27
28
  runner = CliRunner()
28
- test_assets_dir = Path(__file__).parent / "assets"
29
29
 
30
- ref_pdf = str(test_assets_dir / ref_pdf_rel)
31
- actual_pdf = str(test_assets_dir / actual_pdf_rel)
30
+ ref_pdf = str(TEST_ASSETS_DIR / ref_pdf_rel)
31
+ actual_pdf = str(TEST_ASSETS_DIR / actual_pdf_rel)
32
32
 
33
33
  result = runner.invoke(cli, [ref_pdf, actual_pdf])
34
34
 
35
35
  assert result.exit_code == expected_exit_code
36
+
37
+
38
+ def test_comparators_with_output_dir():
39
+ runner = CliRunner()
40
+
41
+ with runner.isolated_filesystem():
42
+ ref_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-A.pdf")
43
+ actual_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-B.pdf")
44
+
45
+ result = runner.invoke(cli, [ref_pdf, actual_pdf, "--output-dir", "./diff"])
46
+
47
+ assert result.exit_code == 1
48
+ assert Path("./diff").exists()
@@ -1,41 +0,0 @@
1
- name: CI
2
-
3
- on:
4
- push:
5
- pull_request:
6
-
7
- jobs:
8
- test:
9
- runs-on: ${{ matrix.os }}
10
- strategy:
11
- matrix:
12
- os: [ubuntu-latest, windows-latest]
13
-
14
- steps:
15
- - uses: actions/checkout@v6
16
- with:
17
- fetch-depth: 0
18
-
19
- - name: Set up Python
20
- uses: actions/setup-python@v6
21
- with:
22
- python-version: "3.10"
23
-
24
- - name: Install package with dev dependencies
25
- run: pip install -e .[dev]
26
-
27
- - name: Run ruff
28
- run: ruff check .
29
-
30
- - name: Run pytest
31
- run: pytest tests/ -v
32
-
33
- - name: Verify version detection
34
- run: |
35
- VERSION=$(diffpdf --version | sed -n 's/.*version \([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p')
36
- if [ "$VERSION" = "0.0.0" ]; then
37
- echo "Error: Version is 0.0.0, setuptools-scm failed to detect version"
38
- exit 1
39
- fi
40
- echo "Version detected correctly: $VERSION"
41
- shell: bash
diffpdf-0.1.2/PKG-INFO DELETED
@@ -1,82 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: diffpdf
3
- Version: 0.1.2
4
- Summary: A tool for comparing PDF files
5
- Project-URL: Homepage, https://github.com/JustusRijke/DiffPDF
6
- Project-URL: Issues, https://github.com/JustusRijke/DiffPDF/issues
7
- Author-email: Justus Rijke <justusrijke@gmail.com>
8
- License-Expression: MIT
9
- License-File: LICENSE
10
- Classifier: Development Status :: 4 - Beta
11
- Classifier: Operating System :: Microsoft :: Windows
12
- Classifier: Operating System :: POSIX :: Linux
13
- Classifier: Programming Language :: Python :: 3
14
- Requires-Python: >=3.10
15
- Requires-Dist: click
16
- Requires-Dist: colorlog
17
- Requires-Dist: pillow>=10.0.0
18
- Requires-Dist: pixelmatch>=0.3.0
19
- Requires-Dist: pymupdf>=1.23.0
20
- Provides-Extra: dev
21
- Requires-Dist: pytest; extra == 'dev'
22
- Requires-Dist: ruff; extra == 'dev'
23
- Description-Content-Type: text/markdown
24
-
25
- # DiffPDF
26
-
27
- [![CI](https://github.com/JustusRijke/DiffPDF/actions/workflows/ci.yml/badge.svg)](https://github.com/JustusRijke/DiffPDF/actions/workflows/ci.yml)
28
-
29
- CLI tool for detecting structural, textual, and visual differences between PDF files, for use in automatic regression tests.
30
-
31
- ## Installation
32
-
33
- ```bash
34
- pip install diffpdf
35
- ```
36
-
37
- ## Usage
38
-
39
- ```bash
40
- diffpdf <baseline.pdf> <actual.pdf> [OPTIONS]
41
- ```
42
-
43
- ## How It Works
44
-
45
- DiffPDF uses a fail-fast sequential pipeline to compare PDFs:
46
-
47
- 1. **Hash Check** - SHA-256 comparison. If identical, exit immediately with pass.
48
- 2. **Page Count** - Verify both PDFs have the same number of pages.
49
- 3. **Text Content** - Extract and compare text from all pages.
50
- 4. **Visual Check** - Render pages to images and compare using pixelmatch.
51
-
52
- Each stage only runs if all previous stages pass.
53
-
54
- **⚠️ Performance Warning:** The Python port of pixelmatch is extremely slow.
55
-
56
- ## Options
57
-
58
- | Option | Default | Description |
59
- |--------|---------|-------------|
60
- | `--threshold` | 0.1 | Pixelmatch threshold (0.0-1.0) |
61
- | `--dpi` | 96 | Render resolution |
62
- | `--output-dir` | ./ | Directory for diff images |
63
- | `--debug` | - | Verbose logging |
64
- | `--save-log` | - | Write log to log.txt |
65
-
66
- ## Exit Codes
67
-
68
- - `0` — Pass (PDFs are equivalent)
69
- - `1` — Fail (differences detected)
70
- - `2` — Error (invalid input or processing error)
71
-
72
- ## Development
73
-
74
- ```bash
75
- pip install -e .[dev]
76
- pytest tests/ -v
77
- ruff check .
78
- ```
79
-
80
- ## Acknowledgements
81
-
82
- Built with [PyMuPDF](https://pymupdf.readthedocs.io/) for PDF parsing and [pixelmatch-py](https://github.com/whtsky/pixelmatch-py) (Python port of [pixelmatch](https://github.com/mapbox/pixelmatch)) for visual comparison.
diffpdf-0.1.2/README.md DELETED
@@ -1,58 +0,0 @@
1
- # DiffPDF
2
-
3
- [![CI](https://github.com/JustusRijke/DiffPDF/actions/workflows/ci.yml/badge.svg)](https://github.com/JustusRijke/DiffPDF/actions/workflows/ci.yml)
4
-
5
- CLI tool for detecting structural, textual, and visual differences between PDF files, for use in automatic regression tests.
6
-
7
- ## Installation
8
-
9
- ```bash
10
- pip install diffpdf
11
- ```
12
-
13
- ## Usage
14
-
15
- ```bash
16
- diffpdf <baseline.pdf> <actual.pdf> [OPTIONS]
17
- ```
18
-
19
- ## How It Works
20
-
21
- DiffPDF uses a fail-fast sequential pipeline to compare PDFs:
22
-
23
- 1. **Hash Check** - SHA-256 comparison. If identical, exit immediately with pass.
24
- 2. **Page Count** - Verify both PDFs have the same number of pages.
25
- 3. **Text Content** - Extract and compare text from all pages.
26
- 4. **Visual Check** - Render pages to images and compare using pixelmatch.
27
-
28
- Each stage only runs if all previous stages pass.
29
-
30
- **⚠️ Performance Warning:** The Python port of pixelmatch is extremely slow.
31
-
32
- ## Options
33
-
34
- | Option | Default | Description |
35
- |--------|---------|-------------|
36
- | `--threshold` | 0.1 | Pixelmatch threshold (0.0-1.0) |
37
- | `--dpi` | 96 | Render resolution |
38
- | `--output-dir` | ./ | Directory for diff images |
39
- | `--debug` | - | Verbose logging |
40
- | `--save-log` | - | Write log to log.txt |
41
-
42
- ## Exit Codes
43
-
44
- - `0` — Pass (PDFs are equivalent)
45
- - `1` — Fail (differences detected)
46
- - `2` — Error (invalid input or processing error)
47
-
48
- ## Development
49
-
50
- ```bash
51
- pip install -e .[dev]
52
- pytest tests/ -v
53
- ruff check .
54
- ```
55
-
56
- ## Acknowledgements
57
-
58
- Built with [PyMuPDF](https://pymupdf.readthedocs.io/) for PDF parsing and [pixelmatch-py](https://github.com/whtsky/pixelmatch-py) (Python port of [pixelmatch](https://github.com/mapbox/pixelmatch)) for visual comparison.
diffpdf-0.1.2/ruff.toml DELETED
@@ -1,2 +0,0 @@
1
- target-version = "py312"
2
- lint.select = ["I"]
@@ -1,15 +0,0 @@
1
- from importlib.metadata import version
2
-
3
- from .cli import cli
4
-
5
- __version__ = version("diffpdf")
6
-
7
-
8
- def main(args=None): # pragma: no cover
9
- if args is None:
10
- cli()
11
- else:
12
- cli(args, standalone_mode=False)
13
-
14
-
15
- __all__ = ["main", "__version__"]
@@ -1,71 +0,0 @@
1
- import logging
2
- import sys
3
- from pathlib import Path
4
-
5
- import click
6
- import colorlog
7
-
8
- from .comparators import compare_pdfs
9
-
10
-
11
- def setup_logging(debug, save_log): # pragma: no cover
12
- level = logging.DEBUG if debug else logging.INFO
13
-
14
- formatter = colorlog.ColoredFormatter(
15
- "%(log_color)s%(asctime)s %(levelname)-8s%(reset)s %(message)s",
16
- datefmt="%Y-%m-%d %H:%M:%S",
17
- log_colors={
18
- "DEBUG": "cyan",
19
- "INFO": "green",
20
- "WARNING": "yellow",
21
- "ERROR": "red",
22
- "CRITICAL": "red,bg_white",
23
- },
24
- )
25
-
26
- console_handler = logging.StreamHandler()
27
- console_handler.setFormatter(formatter)
28
-
29
- logger = logging.getLogger()
30
- logger.setLevel(level)
31
- logger.addHandler(console_handler)
32
-
33
- if save_log:
34
- file_formatter = logging.Formatter(
35
- "%(asctime)s %(levelname)-8s %(message)s",
36
- datefmt="%Y-%m-%d %H:%M:%S",
37
- )
38
- file_handler = logging.FileHandler("log.txt")
39
- file_handler.setFormatter(file_formatter)
40
- logger.addHandler(file_handler)
41
-
42
- return logger
43
-
44
-
45
- @click.command()
46
- @click.argument(
47
- "reference", type=click.Path(exists=True, dir_okay=False, path_type=Path)
48
- )
49
- @click.argument("actual", type=click.Path(exists=True, dir_okay=False, path_type=Path))
50
- @click.option(
51
- "--threshold", type=float, default=0.1, help="Pixelmatch threshold (0.0-1.0)"
52
- )
53
- @click.option("--dpi", type=int, default=96, help="Render resolution")
54
- @click.option(
55
- "--output-dir",
56
- type=click.Path(file_okay=False, path_type=Path),
57
- default="./",
58
- help="Diff image output directory",
59
- )
60
- @click.option("--debug", is_flag=True, help="Verbose logging")
61
- @click.option("--save-log", is_flag=True, help="Write log output to log.txt")
62
- @click.version_option(package_name="diffpdf")
63
- def cli(reference, actual, threshold, dpi, output_dir, debug, save_log):
64
- """Compare two PDF files for structural, textual, and visual differences."""
65
- logger = setup_logging(debug, save_log)
66
-
67
- try:
68
- compare_pdfs(reference, actual, threshold, dpi, output_dir, logger)
69
- except Exception as e: # pragma: no cover
70
- logger.critical(f"Error: {e}")
71
- sys.exit(2)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes