diffpdf 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffpdf-0.3.0 → diffpdf-0.3.1}/PKG-INFO +1 -1
- {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/__init__.py +2 -2
- {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/cli.py +4 -1
- diffpdf-0.3.1/src/diffpdf/comparators.py +31 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/hash_check.py +2 -9
- {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/page_check.py +3 -5
- {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/text_check.py +4 -6
- {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/visual_check.py +4 -6
- diffpdf-0.3.1/tests/test_api.py +12 -0
- diffpdf-0.3.0/src/diffpdf/comparators.py +0 -22
- diffpdf-0.3.0/tests/test_api.py +0 -16
- {diffpdf-0.3.0 → diffpdf-0.3.1}/.github/dependabot.yml +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/.github/workflows/build.yml +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/.github/workflows/pypi-publish.yml +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/.gitignore +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/.vscode/settings.json +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/LICENSE +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/MANIFEST.in +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/README.md +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/hooks/pre-commit +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/pyproject.toml +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/src/diffpdf/logger.py +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/1-letter-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/1-letter-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/major-color-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/major-color-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/page-count-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/fail/page-count-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/hash-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/hash-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/identical-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/identical-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/minor-color-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/minor-color-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/multiplatform-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/assets/pass/multiplatform-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/test_cli.py +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.1}/tests/test_comparators.py +0 -0
|
@@ -15,13 +15,13 @@ def diffpdf(
|
|
|
15
15
|
output_dir: str | Path | None = None,
|
|
16
16
|
verbosity: int = 0,
|
|
17
17
|
save_log: bool = False,
|
|
18
|
-
) ->
|
|
18
|
+
) -> bool:
|
|
19
19
|
ref_path = Path(reference) if isinstance(reference, str) else reference
|
|
20
20
|
actual_path = Path(actual) if isinstance(actual, str) else actual
|
|
21
21
|
out_path = Path(output_dir) if isinstance(output_dir, str) else output_dir
|
|
22
22
|
|
|
23
23
|
logger = setup_logging(verbosity, save_log)
|
|
24
|
-
compare_pdfs(ref_path, actual_path, threshold, dpi, out_path, logger)
|
|
24
|
+
return compare_pdfs(ref_path, actual_path, threshold, dpi, out_path, logger)
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
__all__ = ["diffpdf", "__version__"]
|
|
@@ -37,7 +37,10 @@ def cli(reference, actual, threshold, dpi, output_dir, verbosity, save_log):
|
|
|
37
37
|
logger.debug("Debug logging enabled")
|
|
38
38
|
|
|
39
39
|
try:
|
|
40
|
-
compare_pdfs(reference, actual, threshold, dpi, output_dir, logger)
|
|
40
|
+
if compare_pdfs(reference, actual, threshold, dpi, output_dir, logger):
|
|
41
|
+
sys.exit(0)
|
|
42
|
+
else:
|
|
43
|
+
sys.exit(1)
|
|
41
44
|
except Exception as e: # pragma: no cover
|
|
42
45
|
logger.critical(f"Error: {e}", exc_info=True)
|
|
43
46
|
sys.exit(2)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from .hash_check import check_hash
|
|
4
|
+
from .page_check import check_page_counts
|
|
5
|
+
from .text_check import check_text_content
|
|
6
|
+
from .visual_check import check_visual_content
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def compare_pdfs(
|
|
10
|
+
ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
|
|
11
|
+
) -> bool:
|
|
12
|
+
logger.info("[1/4] Checking file hashes...")
|
|
13
|
+
if check_hash(ref, actual):
|
|
14
|
+
logger.info("Files are identical (hash match)")
|
|
15
|
+
return True
|
|
16
|
+
logger.info("Hashes differ, continuing checks")
|
|
17
|
+
|
|
18
|
+
logger.info("[2/4] Checking page counts...")
|
|
19
|
+
if not check_page_counts(ref, actual, logger):
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
logger.info("[3/4] Checking text content...")
|
|
23
|
+
if not check_text_content(ref, actual, logger):
|
|
24
|
+
return False
|
|
25
|
+
|
|
26
|
+
logger.info("[4/4] Checking visual content...")
|
|
27
|
+
if not check_visual_content(ref, actual, threshold, dpi, output_dir, logger):
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
logger.info("PDFs are equivalent")
|
|
31
|
+
return True
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import hashlib
|
|
2
|
-
import sys
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
4
|
|
|
@@ -11,14 +10,8 @@ def compute_file_hash(filepath: Path) -> str:
|
|
|
11
10
|
return sha256.hexdigest()
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
def check_hash(ref: Path, actual: Path
|
|
15
|
-
logger.info("[1/4] Checking file hashes...")
|
|
16
|
-
|
|
13
|
+
def check_hash(ref: Path, actual: Path) -> bool:
|
|
17
14
|
ref_hash = compute_file_hash(ref)
|
|
18
15
|
actual_hash = compute_file_hash(actual)
|
|
19
16
|
|
|
20
|
-
|
|
21
|
-
logger.info("Files are identical (hash match)")
|
|
22
|
-
sys.exit(0)
|
|
23
|
-
|
|
24
|
-
logger.info("Hashes differ, continuing checks")
|
|
17
|
+
return ref_hash == actual_hash
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import sys
|
|
2
1
|
from pathlib import Path
|
|
3
2
|
|
|
4
3
|
import fitz
|
|
@@ -11,14 +10,13 @@ def get_page_count(pdf_path: Path) -> int:
|
|
|
11
10
|
return count
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
def check_page_counts(ref: Path, actual: Path, logger) ->
|
|
15
|
-
logger.info("[2/4] Checking page counts...")
|
|
16
|
-
|
|
13
|
+
def check_page_counts(ref: Path, actual: Path, logger) -> bool:
|
|
17
14
|
ref_count = get_page_count(ref)
|
|
18
15
|
actual_count = get_page_count(actual)
|
|
19
16
|
|
|
20
17
|
if ref_count != actual_count:
|
|
21
18
|
logger.error(f"Page count mismatch: expected {ref_count}, got {actual_count}")
|
|
22
|
-
|
|
19
|
+
return False
|
|
23
20
|
|
|
24
21
|
logger.info(f"Page counts match ({ref_count} pages)")
|
|
22
|
+
return True
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import difflib
|
|
2
2
|
import re
|
|
3
|
-
import sys
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
from typing import Iterable
|
|
6
5
|
|
|
@@ -33,9 +32,7 @@ def generate_diff(
|
|
|
33
32
|
return diff
|
|
34
33
|
|
|
35
34
|
|
|
36
|
-
def check_text_content(ref: Path, actual: Path, logger) ->
|
|
37
|
-
logger.info("[3/4] Checking text content...")
|
|
38
|
-
|
|
35
|
+
def check_text_content(ref: Path, actual: Path, logger) -> bool:
|
|
39
36
|
# Extract text and remove whitespace
|
|
40
37
|
ref_text = re.sub(r"\s+", " ", extract_text(ref)).strip()
|
|
41
38
|
actual_text = re.sub(r"\s+", " ", extract_text(actual)).strip()
|
|
@@ -44,6 +41,7 @@ def check_text_content(ref: Path, actual: Path, logger) -> None:
|
|
|
44
41
|
diff = generate_diff(ref_text, ref, actual_text, actual)
|
|
45
42
|
diff_text = "\n".join(diff)
|
|
46
43
|
logger.error(f"Text content mismatch:\n {diff_text}")
|
|
47
|
-
|
|
44
|
+
return False
|
|
48
45
|
|
|
49
|
-
logger.info("Text content
|
|
46
|
+
logger.info("Text content identical")
|
|
47
|
+
return True
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import sys
|
|
2
1
|
from pathlib import Path
|
|
3
2
|
|
|
4
3
|
import fitz
|
|
@@ -10,7 +9,7 @@ def render_page_to_image(pdf_path: Path, page_num: int, dpi: int) -> Image.Image
|
|
|
10
9
|
doc = fitz.open(pdf_path)
|
|
11
10
|
page = doc[page_num]
|
|
12
11
|
pix = page.get_pixmap(dpi=dpi)
|
|
13
|
-
img = Image.frombytes("RGB",
|
|
12
|
+
img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
|
|
14
13
|
doc.close()
|
|
15
14
|
return img
|
|
16
15
|
|
|
@@ -33,9 +32,7 @@ def compare_images(
|
|
|
33
32
|
|
|
34
33
|
def check_visual_content(
|
|
35
34
|
ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
|
|
36
|
-
) ->
|
|
37
|
-
logger.info("[4/4] Checking visual content...")
|
|
38
|
-
|
|
35
|
+
) -> bool:
|
|
39
36
|
if output_dir is not None:
|
|
40
37
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
41
38
|
|
|
@@ -64,6 +61,7 @@ def check_visual_content(
|
|
|
64
61
|
|
|
65
62
|
if failing_pages:
|
|
66
63
|
logger.error(f"Visual mismatch on pages: {', '.join(map(str, failing_pages))}")
|
|
67
|
-
|
|
64
|
+
return False
|
|
68
65
|
|
|
69
66
|
logger.info("Visual content matches")
|
|
67
|
+
return True
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import sys
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from .hash_check import check_hash
|
|
5
|
-
from .page_check import check_page_counts
|
|
6
|
-
from .text_check import check_text_content
|
|
7
|
-
from .visual_check import check_visual_content
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def compare_pdfs(
|
|
11
|
-
ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
|
|
12
|
-
) -> None:
|
|
13
|
-
check_hash(ref, actual, logger)
|
|
14
|
-
|
|
15
|
-
check_page_counts(ref, actual, logger)
|
|
16
|
-
|
|
17
|
-
check_text_content(ref, actual, logger)
|
|
18
|
-
|
|
19
|
-
check_visual_content(ref, actual, threshold, dpi, output_dir, logger)
|
|
20
|
-
|
|
21
|
-
logger.info("PDFs are equivalent")
|
|
22
|
-
sys.exit(0)
|
diffpdf-0.3.0/tests/test_api.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
from diffpdf import diffpdf
|
|
6
|
-
|
|
7
|
-
TEST_ASSETS_DIR = Path(__file__).parent / "assets"
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def test_diffpdf():
|
|
11
|
-
with pytest.raises(SystemExit) as exc_info:
|
|
12
|
-
diffpdf(
|
|
13
|
-
TEST_ASSETS_DIR / "pass/identical-A.pdf",
|
|
14
|
-
TEST_ASSETS_DIR / "pass/identical-B.pdf",
|
|
15
|
-
)
|
|
16
|
-
assert exc_info.value.code == 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|