diffpdf 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {diffpdf-0.3.0 → diffpdf-0.3.1}/PKG-INFO +1 -1
  2. {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/__init__.py +2 -2
  3. {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/cli.py +4 -1
  4. diffpdf-0.3.1/src/diffpdf/comparators.py +31 -0
  5. {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/hash_check.py +2 -9
  6. {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/page_check.py +3 -5
  7. {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/text_check.py +4 -6
  8. {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/visual_check.py +4 -6
  9. diffpdf-0.3.1/tests/test_api.py +12 -0
  10. diffpdf-0.3.0/src/diffpdf/comparators.py +0 -22
  11. diffpdf-0.3.0/tests/test_api.py +0 -16
  12. {diffpdf-0.3.0 → diffpdf-0.3.1}/.github/dependabot.yml +0 -0
  13. {diffpdf-0.3.0 → diffpdf-0.3.1}/.github/workflows/build.yml +0 -0
  14. {diffpdf-0.3.0 → diffpdf-0.3.1}/.github/workflows/pypi-publish.yml +0 -0
  15. {diffpdf-0.3.0 → diffpdf-0.3.1}/.gitignore +0 -0
  16. {diffpdf-0.3.0 → diffpdf-0.3.1}/.vscode/settings.json +0 -0
  17. {diffpdf-0.3.0 → diffpdf-0.3.1}/LICENSE +0 -0
  18. {diffpdf-0.3.0 → diffpdf-0.3.1}/MANIFEST.in +0 -0
  19. {diffpdf-0.3.0 → diffpdf-0.3.1}/README.md +0 -0
  20. {diffpdf-0.3.0 → diffpdf-0.3.1}/hooks/pre-commit +0 -0
  21. {diffpdf-0.3.0 → diffpdf-0.3.1}/pyproject.toml +0 -0
  22. {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/logger.py +0 -0
  23. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/1-letter-diff-A.pdf +0 -0
  24. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/1-letter-diff-B.pdf +0 -0
  25. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/major-color-diff-A.pdf +0 -0
  26. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/major-color-diff-B.pdf +0 -0
  27. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/page-count-diff-A.pdf +0 -0
  28. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/page-count-diff-B.pdf +0 -0
  29. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/hash-diff-A.pdf +0 -0
  30. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/hash-diff-B.pdf +0 -0
  31. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/identical-A.pdf +0 -0
  32. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/identical-B.pdf +0 -0
  33. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/minor-color-diff-A.pdf +0 -0
  34. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/minor-color-diff-B.pdf +0 -0
  35. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/multiplatform-diff-A.pdf +0 -0
  36. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/multiplatform-diff-B.pdf +0 -0
  37. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/test_cli.py +0 -0
  38. {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/test_comparators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffpdf
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: A tool for comparing PDF files
5
5
  Project-URL: Homepage, https://github.com/JustusRijke/DiffPDF
6
6
  Project-URL: Issues, https://github.com/JustusRijke/DiffPDF/issues
@@ -15,13 +15,13 @@ def diffpdf(
15
15
  output_dir: str | Path | None = None,
16
16
  verbosity: int = 0,
17
17
  save_log: bool = False,
18
- ) -> None:
18
+ ) -> bool:
19
19
  ref_path = Path(reference) if isinstance(reference, str) else reference
20
20
  actual_path = Path(actual) if isinstance(actual, str) else actual
21
21
  out_path = Path(output_dir) if isinstance(output_dir, str) else output_dir
22
22
 
23
23
  logger = setup_logging(verbosity, save_log)
24
- compare_pdfs(ref_path, actual_path, threshold, dpi, out_path, logger)
24
+ return compare_pdfs(ref_path, actual_path, threshold, dpi, out_path, logger)
25
25
 
26
26
 
27
27
  __all__ = ["diffpdf", "__version__"]
@@ -37,7 +37,10 @@ def cli(reference, actual, threshold, dpi, output_dir, verbosity, save_log):
37
37
  logger.debug("Debug logging enabled")
38
38
 
39
39
  try:
40
- compare_pdfs(reference, actual, threshold, dpi, output_dir, logger)
40
+ if compare_pdfs(reference, actual, threshold, dpi, output_dir, logger):
41
+ sys.exit(0)
42
+ else:
43
+ sys.exit(1)
41
44
  except Exception as e: # pragma: no cover
42
45
  logger.critical(f"Error: {e}", exc_info=True)
43
46
  sys.exit(2)
@@ -0,0 +1,31 @@
1
+ from pathlib import Path
2
+
3
+ from .hash_check import check_hash
4
+ from .page_check import check_page_counts
5
+ from .text_check import check_text_content
6
+ from .visual_check import check_visual_content
7
+
8
+
9
+ def compare_pdfs(
10
+ ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
11
+ ) -> bool:
12
+ logger.info("[1/4] Checking file hashes...")
13
+ if check_hash(ref, actual):
14
+ logger.info("Files are identical (hash match)")
15
+ return True
16
+ logger.info("Hashes differ, continuing checks")
17
+
18
+ logger.info("[2/4] Checking page counts...")
19
+ if not check_page_counts(ref, actual, logger):
20
+ return False
21
+
22
+ logger.info("[3/4] Checking text content...")
23
+ if not check_text_content(ref, actual, logger):
24
+ return False
25
+
26
+ logger.info("[4/4] Checking visual content...")
27
+ if not check_visual_content(ref, actual, threshold, dpi, output_dir, logger):
28
+ return False
29
+
30
+ logger.info("PDFs are equivalent")
31
+ return True
@@ -1,5 +1,4 @@
1
1
  import hashlib
2
- import sys
3
2
  from pathlib import Path
4
3
 
5
4
 
@@ -11,14 +10,8 @@ def compute_file_hash(filepath: Path) -> str:
11
10
  return sha256.hexdigest()
12
11
 
13
12
 
14
- def check_hash(ref: Path, actual: Path, logger) -> None:
15
- logger.info("[1/4] Checking file hashes...")
16
-
13
+ def check_hash(ref: Path, actual: Path) -> bool:
17
14
  ref_hash = compute_file_hash(ref)
18
15
  actual_hash = compute_file_hash(actual)
19
16
 
20
- if ref_hash == actual_hash:
21
- logger.info("Files are identical (hash match)")
22
- sys.exit(0)
23
-
24
- logger.info("Hashes differ, continuing checks")
17
+ return ref_hash == actual_hash
@@ -1,4 +1,3 @@
1
- import sys
2
1
  from pathlib import Path
3
2
 
4
3
  import fitz
@@ -11,14 +10,13 @@ def get_page_count(pdf_path: Path) -> int:
11
10
  return count
12
11
 
13
12
 
14
- def check_page_counts(ref: Path, actual: Path, logger) -> None:
15
- logger.info("[2/4] Checking page counts...")
16
-
13
+ def check_page_counts(ref: Path, actual: Path, logger) -> bool:
17
14
  ref_count = get_page_count(ref)
18
15
  actual_count = get_page_count(actual)
19
16
 
20
17
  if ref_count != actual_count:
21
18
  logger.error(f"Page count mismatch: expected {ref_count}, got {actual_count}")
22
- sys.exit(1)
19
+ return False
23
20
 
24
21
  logger.info(f"Page counts match ({ref_count} pages)")
22
+ return True
@@ -1,6 +1,5 @@
1
1
  import difflib
2
2
  import re
3
- import sys
4
3
  from pathlib import Path
5
4
  from typing import Iterable
6
5
 
@@ -33,9 +32,7 @@ def generate_diff(
33
32
  return diff
34
33
 
35
34
 
36
- def check_text_content(ref: Path, actual: Path, logger) -> None:
37
- logger.info("[3/4] Checking text content...")
38
-
35
+ def check_text_content(ref: Path, actual: Path, logger) -> bool:
39
36
  # Extract text and remove whitespace
40
37
  ref_text = re.sub(r"\s+", " ", extract_text(ref)).strip()
41
38
  actual_text = re.sub(r"\s+", " ", extract_text(actual)).strip()
@@ -44,6 +41,7 @@ def check_text_content(ref: Path, actual: Path, logger) -> None:
44
41
  diff = generate_diff(ref_text, ref, actual_text, actual)
45
42
  diff_text = "\n".join(diff)
46
43
  logger.error(f"Text content mismatch:\n {diff_text}")
47
- sys.exit(1)
44
+ return False
48
45
 
49
- logger.info("Text content matches")
46
+ logger.info("Text content identical")
47
+ return True
@@ -1,4 +1,3 @@
1
- import sys
2
1
  from pathlib import Path
3
2
 
4
3
  import fitz
@@ -10,7 +9,7 @@ def render_page_to_image(pdf_path: Path, page_num: int, dpi: int) -> Image.Image
10
9
  doc = fitz.open(pdf_path)
11
10
  page = doc[page_num]
12
11
  pix = page.get_pixmap(dpi=dpi)
13
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
12
+ img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
14
13
  doc.close()
15
14
  return img
16
15
 
@@ -33,9 +32,7 @@ def compare_images(
33
32
 
34
33
  def check_visual_content(
35
34
  ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
36
- ) -> None:
37
- logger.info("[4/4] Checking visual content...")
38
-
35
+ ) -> bool:
39
36
  if output_dir is not None:
40
37
  output_dir.mkdir(parents=True, exist_ok=True)
41
38
 
@@ -64,6 +61,7 @@ def check_visual_content(
64
61
 
65
62
  if failing_pages:
66
63
  logger.error(f"Visual mismatch on pages: {', '.join(map(str, failing_pages))}")
67
- sys.exit(1)
64
+ return False
68
65
 
69
66
  logger.info("Visual content matches")
67
+ return True
@@ -0,0 +1,12 @@
1
+ from pathlib import Path
2
+
3
+ from diffpdf import diffpdf
4
+
5
+ TEST_ASSETS_DIR = Path(__file__).parent / "assets"
6
+
7
+
8
+ def test_diffpdf():
9
+ assert diffpdf(
10
+ TEST_ASSETS_DIR / "pass/identical-A.pdf",
11
+ TEST_ASSETS_DIR / "pass/identical-B.pdf",
12
+ )
@@ -1,22 +0,0 @@
1
- import sys
2
- from pathlib import Path
3
-
4
- from .hash_check import check_hash
5
- from .page_check import check_page_counts
6
- from .text_check import check_text_content
7
- from .visual_check import check_visual_content
8
-
9
-
10
- def compare_pdfs(
11
- ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
12
- ) -> None:
13
- check_hash(ref, actual, logger)
14
-
15
- check_page_counts(ref, actual, logger)
16
-
17
- check_text_content(ref, actual, logger)
18
-
19
- check_visual_content(ref, actual, threshold, dpi, output_dir, logger)
20
-
21
- logger.info("PDFs are equivalent")
22
- sys.exit(0)
@@ -1,16 +0,0 @@
1
- from pathlib import Path
2
-
3
- import pytest
4
-
5
- from diffpdf import diffpdf
6
-
7
- TEST_ASSETS_DIR = Path(__file__).parent / "assets"
8
-
9
-
10
- def test_diffpdf():
11
- with pytest.raises(SystemExit) as exc_info:
12
- diffpdf(
13
- TEST_ASSETS_DIR / "pass/identical-A.pdf",
14
- TEST_ASSETS_DIR / "pass/identical-B.pdf",
15
- )
16
- assert exc_info.value.code == 0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes