diffpdf 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffpdf-0.3.0 → diffpdf-0.3.2}/.github/workflows/build.yml +12 -2
- diffpdf-0.3.2/.vscode/extensions.json +17 -0
- diffpdf-0.3.2/.vscode/settings.json +18 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/PKG-INFO +7 -6
- {diffpdf-0.3.0 → diffpdf-0.3.2}/README.md +3 -3
- {diffpdf-0.3.0 → diffpdf-0.3.2}/hooks/pre-commit +15 -15
- diffpdf-0.3.2/mypy.ini +4 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/pyproject.toml +3 -2
- diffpdf-0.3.2/src/diffpdf/__init__.py +49 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/src/diffpdf/cli.py +16 -10
- {diffpdf-0.3.0 → diffpdf-0.3.2}/src/diffpdf/hash_check.py +2 -9
- diffpdf-0.3.2/src/diffpdf/logger.py +24 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/src/diffpdf/page_check.py +5 -5
- diffpdf-0.3.2/src/diffpdf/py.typed +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/src/diffpdf/text_check.py +6 -6
- {diffpdf-0.3.0 → diffpdf-0.3.2}/src/diffpdf/visual_check.py +12 -8
- diffpdf-0.3.2/tests/test_api.py +31 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/test_cli.py +14 -15
- diffpdf-0.3.0/.vscode/settings.json +0 -7
- diffpdf-0.3.0/src/diffpdf/__init__.py +0 -27
- diffpdf-0.3.0/src/diffpdf/comparators.py +0 -22
- diffpdf-0.3.0/src/diffpdf/logger.py +0 -45
- diffpdf-0.3.0/tests/test_api.py +0 -16
- diffpdf-0.3.0/tests/test_comparators.py +0 -48
- {diffpdf-0.3.0 → diffpdf-0.3.2}/.github/dependabot.yml +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/.github/workflows/pypi-publish.yml +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/.gitignore +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/LICENSE +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/MANIFEST.in +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/fail/1-letter-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/fail/1-letter-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/fail/major-color-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/fail/major-color-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/fail/page-count-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/fail/page-count-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/pass/hash-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/pass/hash-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/pass/identical-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/pass/identical-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/pass/minor-color-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/pass/minor-color-diff-B.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/pass/multiplatform-diff-A.pdf +0 -0
- {diffpdf-0.3.0 → diffpdf-0.3.2}/tests/assets/pass/multiplatform-diff-B.pdf +0 -0
|
@@ -23,13 +23,23 @@ jobs:
|
|
|
23
23
|
- name: Install package with dev dependencies
|
|
24
24
|
run: pip install -e .[dev]
|
|
25
25
|
|
|
26
|
-
- name: Run ruff
|
|
27
|
-
|
|
26
|
+
- name: Run ruff (check)
|
|
27
|
+
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10'
|
|
28
|
+
run: ruff check
|
|
29
|
+
|
|
30
|
+
- name: Run ruff (format)
|
|
31
|
+
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10'
|
|
32
|
+
run: ruff format --check
|
|
33
|
+
|
|
34
|
+
- name: Run mypy
|
|
35
|
+
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10'
|
|
36
|
+
run: mypy .
|
|
28
37
|
|
|
29
38
|
- name: Run pytest
|
|
30
39
|
run: pytest tests/ -v --cov --cov-branch --cov-report=xml
|
|
31
40
|
|
|
32
41
|
- name: Upload coverage reports to Codecov
|
|
42
|
+
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10'
|
|
33
43
|
uses: codecov/codecov-action@v5
|
|
34
44
|
with:
|
|
35
45
|
token: ${{ secrets.CODECOV_TOKEN }}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
// See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations.
|
|
3
|
+
// Extension identifier format: ${publisher}.${name}. Example: vscode.csharp
|
|
4
|
+
|
|
5
|
+
// List of extensions which should be recommended for users of this workspace.
|
|
6
|
+
"recommendations": [
|
|
7
|
+
"ms-python.python",
|
|
8
|
+
"matangover.mypy",
|
|
9
|
+
"charliermarsh.ruff",
|
|
10
|
+
"ryanluker.vscode-coverage-gutters",
|
|
11
|
+
"astral-sh.ty"
|
|
12
|
+
],
|
|
13
|
+
// List of extensions recommended by VS Code that should not be recommended for users of this workspace.
|
|
14
|
+
"unwantedRecommendations": [
|
|
15
|
+
"ms-python.vscode-pylance"
|
|
16
|
+
]
|
|
17
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
// Format with Ruff on save & sort imports
|
|
3
|
+
"editor.formatOnSave": true,
|
|
4
|
+
"editor.defaultFormatter": "charliermarsh.ruff",
|
|
5
|
+
"editor.codeActionsOnSave": {
|
|
6
|
+
"source.organizeImports.ruff": "explicit"
|
|
7
|
+
},
|
|
8
|
+
|
|
9
|
+
// Enable MyPy type checker
|
|
10
|
+
"mypy.runUsingActiveInterpreter": true,
|
|
11
|
+
|
|
12
|
+
// Configure Pytest
|
|
13
|
+
"python.testing.pytestArgs": [
|
|
14
|
+
"tests"
|
|
15
|
+
],
|
|
16
|
+
"python.testing.unittestEnabled": false,
|
|
17
|
+
"python.testing.pytestEnabled": true
|
|
18
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diffpdf
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: A tool for comparing PDF files
|
|
5
5
|
Project-URL: Homepage, https://github.com/JustusRijke/DiffPDF
|
|
6
6
|
Project-URL: Issues, https://github.com/JustusRijke/DiffPDF/issues
|
|
@@ -11,13 +11,14 @@ Classifier: Development Status :: 4 - Beta
|
|
|
11
11
|
Classifier: Operating System :: Microsoft :: Windows
|
|
12
12
|
Classifier: Operating System :: POSIX :: Linux
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Typing :: Typed
|
|
14
15
|
Requires-Python: >=3.10
|
|
15
16
|
Requires-Dist: click
|
|
16
|
-
Requires-Dist: colorlog
|
|
17
17
|
Requires-Dist: pillow>=10.0.0
|
|
18
|
-
Requires-Dist: pixelmatch-fast>=1.
|
|
18
|
+
Requires-Dist: pixelmatch-fast>=1.3.0
|
|
19
19
|
Requires-Dist: pymupdf>=1.23.0
|
|
20
20
|
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
21
22
|
Requires-Dist: pytest; extra == 'dev'
|
|
22
23
|
Requires-Dist: pytest-cov; extra == 'dev'
|
|
23
24
|
Requires-Dist: ruff; extra == 'dev'
|
|
@@ -29,6 +30,7 @@ Description-Content-Type: text/markdown
|
|
|
29
30
|
[](https://codecov.io/gh/JustusRijke/DiffPDF)
|
|
30
31
|
[](https://www.python.org/downloads/)
|
|
31
32
|
[](LICENSE)
|
|
33
|
+
[](https://pypi.org/project/DiffPDF/)
|
|
32
34
|
|
|
33
35
|
CLI tool for detecting structural, textual, and visual differences between PDF files, for use in automatic regression tests.
|
|
34
36
|
|
|
@@ -59,8 +61,7 @@ Options:
|
|
|
59
61
|
--threshold FLOAT Pixelmatch threshold (0.0-1.0)
|
|
60
62
|
--dpi INTEGER Render resolution
|
|
61
63
|
--output-dir DIRECTORY Diff image output directory (optional, if not specified no diff images are saved)
|
|
62
|
-
-v, --verbose Increase verbosity
|
|
63
|
-
--save-log Write log output to log.txt
|
|
64
|
+
-v, --verbose Increase verbosity
|
|
64
65
|
--version Show the version and exit.
|
|
65
66
|
--help Show this message and exit.
|
|
66
67
|
```
|
|
@@ -80,7 +81,7 @@ from diffpdf import diffpdf
|
|
|
80
81
|
diffpdf("reference.pdf", "actual.pdf")
|
|
81
82
|
|
|
82
83
|
# With options (save diff images to ./output directory)
|
|
83
|
-
diffpdf("reference.pdf", "actual.pdf", output_dir="./output", threshold=0.2, dpi=150,
|
|
84
|
+
diffpdf("reference.pdf", "actual.pdf", output_dir="./output", threshold=0.2, dpi=150, verbose=True)
|
|
84
85
|
```
|
|
85
86
|
|
|
86
87
|
## Development
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
[](https://codecov.io/gh/JustusRijke/DiffPDF)
|
|
5
5
|
[](https://www.python.org/downloads/)
|
|
6
6
|
[](LICENSE)
|
|
7
|
+
[](https://pypi.org/project/DiffPDF/)
|
|
7
8
|
|
|
8
9
|
CLI tool for detecting structural, textual, and visual differences between PDF files, for use in automatic regression tests.
|
|
9
10
|
|
|
@@ -34,8 +35,7 @@ Options:
|
|
|
34
35
|
--threshold FLOAT Pixelmatch threshold (0.0-1.0)
|
|
35
36
|
--dpi INTEGER Render resolution
|
|
36
37
|
--output-dir DIRECTORY Diff image output directory (optional, if not specified no diff images are saved)
|
|
37
|
-
-v, --verbose Increase verbosity
|
|
38
|
-
--save-log Write log output to log.txt
|
|
38
|
+
-v, --verbose Increase verbosity
|
|
39
39
|
--version Show the version and exit.
|
|
40
40
|
--help Show this message and exit.
|
|
41
41
|
```
|
|
@@ -55,7 +55,7 @@ from diffpdf import diffpdf
|
|
|
55
55
|
diffpdf("reference.pdf", "actual.pdf")
|
|
56
56
|
|
|
57
57
|
# With options (save diff images to ./output directory)
|
|
58
|
-
diffpdf("reference.pdf", "actual.pdf", output_dir="./output", threshold=0.2, dpi=150,
|
|
58
|
+
diffpdf("reference.pdf", "actual.pdf", output_dir="./output", threshold=0.2, dpi=150, verbose=True)
|
|
59
59
|
```
|
|
60
60
|
|
|
61
61
|
## Development
|
|
@@ -39,21 +39,21 @@ EOF
|
|
|
39
39
|
fi
|
|
40
40
|
|
|
41
41
|
# Ruff checks
|
|
42
|
-
ruff check
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
ruff format --check
|
|
46
|
-
FORMAT_EXIT=$?
|
|
47
|
-
|
|
48
|
-
if [ $CHECK_EXIT -ne 0 ] || [ $FORMAT_EXIT -ne 0 ]; then
|
|
49
|
-
if [ $CHECK_EXIT -ne 0 ]; then
|
|
50
|
-
echo "Ruff found linting errors. Run: ruff check --fix"
|
|
51
|
-
fi
|
|
52
|
-
if [ $FORMAT_EXIT -ne 0 ]; then
|
|
53
|
-
echo "Ruff found formatting issues. Run: ruff format"
|
|
54
|
-
fi
|
|
42
|
+
ruff check -q
|
|
43
|
+
if [ $? -ne 0 ]; then
|
|
44
|
+
echo "Ruff found linting errors. Run: ruff check --fix"
|
|
55
45
|
exit 1
|
|
56
46
|
fi
|
|
57
47
|
|
|
58
|
-
|
|
59
|
-
|
|
48
|
+
ruff format --check -q
|
|
49
|
+
if [ $? -ne 0 ]; then
|
|
50
|
+
echo "Ruff found formatting issues. Run: ruff format"
|
|
51
|
+
exit 1
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
# Mypy checks
|
|
55
|
+
mypy .
|
|
56
|
+
if [ $? -ne 0 ]; then
|
|
57
|
+
echo "Mypy found type errors. Fix them before committing."
|
|
58
|
+
exit 1
|
|
59
|
+
fi
|
diffpdf-0.3.2/mypy.ini
ADDED
|
@@ -19,12 +19,12 @@ classifiers = [
|
|
|
19
19
|
"Development Status :: 4 - Beta",
|
|
20
20
|
"Operating System :: Microsoft :: Windows",
|
|
21
21
|
"Operating System :: POSIX :: Linux",
|
|
22
|
+
"Typing :: Typed",
|
|
22
23
|
]
|
|
23
24
|
dependencies = [
|
|
24
25
|
"click",
|
|
25
|
-
"colorlog",
|
|
26
26
|
"pymupdf>=1.23.0",
|
|
27
|
-
"pixelmatch-fast>=1.
|
|
27
|
+
"pixelmatch-fast>=1.3.0",
|
|
28
28
|
"Pillow>=10.0.0",
|
|
29
29
|
]
|
|
30
30
|
|
|
@@ -37,6 +37,7 @@ dev = [
|
|
|
37
37
|
"pytest",
|
|
38
38
|
"pytest-cov",
|
|
39
39
|
"ruff",
|
|
40
|
+
"mypy"
|
|
40
41
|
]
|
|
41
42
|
|
|
42
43
|
[project.scripts]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from importlib.metadata import version
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from .hash_check import check_hash
|
|
5
|
+
from .logger import setup_logging
|
|
6
|
+
from .page_check import check_page_counts
|
|
7
|
+
from .text_check import check_text_content
|
|
8
|
+
from .visual_check import check_visual_content
|
|
9
|
+
|
|
10
|
+
__version__ = version("diffpdf")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def diffpdf(
|
|
14
|
+
reference: str | Path,
|
|
15
|
+
actual: str | Path,
|
|
16
|
+
threshold: float = 0.1,
|
|
17
|
+
dpi: int = 96,
|
|
18
|
+
output_dir: str | Path | None = None,
|
|
19
|
+
verbose: bool = False,
|
|
20
|
+
) -> bool:
|
|
21
|
+
ref_path = Path(reference) if isinstance(reference, str) else reference
|
|
22
|
+
actual_path = Path(actual) if isinstance(actual, str) else actual
|
|
23
|
+
out_path = Path(output_dir) if isinstance(output_dir, str) else output_dir
|
|
24
|
+
|
|
25
|
+
logger = setup_logging(verbose)
|
|
26
|
+
|
|
27
|
+
logger.info("[1/4] Checking file hashes...")
|
|
28
|
+
if check_hash(ref_path, actual_path):
|
|
29
|
+
logger.info("Files are identical (hash match)")
|
|
30
|
+
return True
|
|
31
|
+
logger.info("Hashes differ, continuing checks")
|
|
32
|
+
|
|
33
|
+
logger.info("[2/4] Checking page counts...")
|
|
34
|
+
if not check_page_counts(ref_path, actual_path):
|
|
35
|
+
return False
|
|
36
|
+
|
|
37
|
+
logger.info("[3/4] Checking text content...")
|
|
38
|
+
if not check_text_content(ref_path, actual_path):
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
logger.info("[4/4] Checking visual content...")
|
|
42
|
+
if not check_visual_content(ref_path, actual_path, threshold, dpi, out_path):
|
|
43
|
+
return False
|
|
44
|
+
|
|
45
|
+
logger.info("PDFs are equivalent")
|
|
46
|
+
return True
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
__all__ = ["diffpdf", "__version__"]
|
|
@@ -3,7 +3,7 @@ from pathlib import Path
|
|
|
3
3
|
|
|
4
4
|
import click
|
|
5
5
|
|
|
6
|
-
from .
|
|
6
|
+
from . import diffpdf
|
|
7
7
|
from .logger import setup_logging
|
|
8
8
|
|
|
9
9
|
|
|
@@ -25,19 +25,25 @@ from .logger import setup_logging
|
|
|
25
25
|
@click.option(
|
|
26
26
|
"-v",
|
|
27
27
|
"--verbose",
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
help="Increase verbosity (-v for INFO, -vv for DEBUG)",
|
|
28
|
+
is_flag=True,
|
|
29
|
+
help="Increase verbosity",
|
|
31
30
|
)
|
|
32
|
-
@click.option("--save-log", is_flag=True, help="Write log output to log.txt")
|
|
33
31
|
@click.version_option(package_name="diffpdf")
|
|
34
|
-
def cli(
|
|
32
|
+
def cli(
|
|
33
|
+
reference: Path,
|
|
34
|
+
actual: Path,
|
|
35
|
+
threshold: float,
|
|
36
|
+
dpi: int,
|
|
37
|
+
output_dir: Path | None,
|
|
38
|
+
verbose: bool,
|
|
39
|
+
) -> None:
|
|
35
40
|
"""Compare two PDF files for structural, textual, and visual differences."""
|
|
36
|
-
logger = setup_logging(verbosity, save_log)
|
|
37
|
-
logger.debug("Debug logging enabled")
|
|
38
|
-
|
|
39
41
|
try:
|
|
40
|
-
|
|
42
|
+
if diffpdf(reference, actual, threshold, dpi, output_dir, verbose):
|
|
43
|
+
sys.exit(0)
|
|
44
|
+
else:
|
|
45
|
+
sys.exit(1)
|
|
41
46
|
except Exception as e: # pragma: no cover
|
|
47
|
+
logger = setup_logging(verbose)
|
|
42
48
|
logger.critical(f"Error: {e}", exc_info=True)
|
|
43
49
|
sys.exit(2)
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import hashlib
|
|
2
|
-
import sys
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
4
|
|
|
@@ -11,14 +10,8 @@ def compute_file_hash(filepath: Path) -> str:
|
|
|
11
10
|
return sha256.hexdigest()
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
def check_hash(ref: Path, actual: Path
|
|
15
|
-
logger.info("[1/4] Checking file hashes...")
|
|
16
|
-
|
|
13
|
+
def check_hash(ref: Path, actual: Path) -> bool:
|
|
17
14
|
ref_hash = compute_file_hash(ref)
|
|
18
15
|
actual_hash = compute_file_hash(actual)
|
|
19
16
|
|
|
20
|
-
|
|
21
|
-
logger.info("Files are identical (hash match)")
|
|
22
|
-
sys.exit(0)
|
|
23
|
-
|
|
24
|
-
logger.info("Hashes differ, continuing checks")
|
|
17
|
+
return ref_hash == actual_hash
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
LOG_FORMAT = (
|
|
4
|
+
"%(asctime)s %(levelname)-8s %(filename)s:%(lineno)d (%(funcName)s): %(message)s"
|
|
5
|
+
)
|
|
6
|
+
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def setup_logging(verbose: bool) -> logging.Logger:
|
|
10
|
+
if verbose:
|
|
11
|
+
level = logging.INFO
|
|
12
|
+
else:
|
|
13
|
+
level = logging.WARNING
|
|
14
|
+
|
|
15
|
+
formatter = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)
|
|
16
|
+
|
|
17
|
+
console_handler = logging.StreamHandler()
|
|
18
|
+
console_handler.setFormatter(formatter)
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger()
|
|
21
|
+
logger.setLevel(level)
|
|
22
|
+
logger.addHandler(console_handler)
|
|
23
|
+
|
|
24
|
+
return logger
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
import fitz
|
|
@@ -11,14 +11,14 @@ def get_page_count(pdf_path: Path) -> int:
|
|
|
11
11
|
return count
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def check_page_counts(ref: Path, actual: Path
|
|
15
|
-
logger.
|
|
16
|
-
|
|
14
|
+
def check_page_counts(ref: Path, actual: Path) -> bool:
|
|
15
|
+
logger = logging.getLogger()
|
|
17
16
|
ref_count = get_page_count(ref)
|
|
18
17
|
actual_count = get_page_count(actual)
|
|
19
18
|
|
|
20
19
|
if ref_count != actual_count:
|
|
21
20
|
logger.error(f"Page count mismatch: expected {ref_count}, got {actual_count}")
|
|
22
|
-
|
|
21
|
+
return False
|
|
23
22
|
|
|
24
23
|
logger.info(f"Page counts match ({ref_count} pages)")
|
|
24
|
+
return True
|
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import difflib
|
|
2
|
+
import logging
|
|
2
3
|
import re
|
|
3
|
-
import sys
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Iterable
|
|
6
6
|
|
|
@@ -33,9 +33,8 @@ def generate_diff(
|
|
|
33
33
|
return diff
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
def check_text_content(ref: Path, actual: Path
|
|
37
|
-
logger.
|
|
38
|
-
|
|
36
|
+
def check_text_content(ref: Path, actual: Path) -> bool:
|
|
37
|
+
logger = logging.getLogger()
|
|
39
38
|
# Extract text and remove whitespace
|
|
40
39
|
ref_text = re.sub(r"\s+", " ", extract_text(ref)).strip()
|
|
41
40
|
actual_text = re.sub(r"\s+", " ", extract_text(actual)).strip()
|
|
@@ -44,6 +43,7 @@ def check_text_content(ref: Path, actual: Path, logger) -> None:
|
|
|
44
43
|
diff = generate_diff(ref_text, ref, actual_text, actual)
|
|
45
44
|
diff_text = "\n".join(diff)
|
|
46
45
|
logger.error(f"Text content mismatch:\n {diff_text}")
|
|
47
|
-
|
|
46
|
+
return False
|
|
48
47
|
|
|
49
|
-
logger.info("Text content
|
|
48
|
+
logger.info("Text content identical")
|
|
49
|
+
return True
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
import fitz
|
|
@@ -10,7 +10,7 @@ def render_page_to_image(pdf_path: Path, page_num: int, dpi: int) -> Image.Image
|
|
|
10
10
|
doc = fitz.open(pdf_path)
|
|
11
11
|
page = doc[page_num]
|
|
12
12
|
pix = page.get_pixmap(dpi=dpi)
|
|
13
|
-
img = Image.frombytes("RGB",
|
|
13
|
+
img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
|
|
14
14
|
doc.close()
|
|
15
15
|
return img
|
|
16
16
|
|
|
@@ -22,7 +22,7 @@ def compare_images(
|
|
|
22
22
|
output_path: Path | None,
|
|
23
23
|
) -> bool:
|
|
24
24
|
mismatch_count = pixelmatch(
|
|
25
|
-
ref_img, actual_img,
|
|
25
|
+
ref_img, actual_img, output=output_path, threshold=threshold
|
|
26
26
|
)
|
|
27
27
|
|
|
28
28
|
if mismatch_count > 0:
|
|
@@ -32,10 +32,13 @@ def compare_images(
|
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def check_visual_content(
|
|
35
|
-
ref: Path,
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
ref: Path,
|
|
36
|
+
actual: Path,
|
|
37
|
+
threshold: float,
|
|
38
|
+
dpi: int,
|
|
39
|
+
output_dir: Path | None,
|
|
40
|
+
) -> bool:
|
|
41
|
+
logger = logging.getLogger()
|
|
39
42
|
if output_dir is not None:
|
|
40
43
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
41
44
|
|
|
@@ -64,6 +67,7 @@ def check_visual_content(
|
|
|
64
67
|
|
|
65
68
|
if failing_pages:
|
|
66
69
|
logger.error(f"Visual mismatch on pages: {', '.join(map(str, failing_pages))}")
|
|
67
|
-
|
|
70
|
+
return False
|
|
68
71
|
|
|
69
72
|
logger.info("Visual content matches")
|
|
73
|
+
return True
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# type: ignore
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from diffpdf import diffpdf
|
|
7
|
+
|
|
8
|
+
TEST_ASSETS_DIR = Path(__file__).parent / "assets"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.mark.parametrize(
|
|
12
|
+
"ref_pdf_rel,actual_pdf_rel,should_pass",
|
|
13
|
+
[
|
|
14
|
+
# Pass cases
|
|
15
|
+
("pass/identical-A.pdf", "pass/identical-B.pdf", True),
|
|
16
|
+
("pass/hash-diff-A.pdf", "pass/hash-diff-B.pdf", True),
|
|
17
|
+
("pass/minor-color-diff-A.pdf", "pass/minor-color-diff-B.pdf", True),
|
|
18
|
+
("pass/multiplatform-diff-A.pdf", "pass/multiplatform-diff-B.pdf", True),
|
|
19
|
+
# Fail cases
|
|
20
|
+
("fail/1-letter-diff-A.pdf", "fail/1-letter-diff-B.pdf", False),
|
|
21
|
+
("fail/major-color-diff-A.pdf", "fail/major-color-diff-B.pdf", False),
|
|
22
|
+
("fail/page-count-diff-A.pdf", "fail/page-count-diff-B.pdf", False),
|
|
23
|
+
],
|
|
24
|
+
)
|
|
25
|
+
def test_api(ref_pdf_rel, actual_pdf_rel, should_pass):
|
|
26
|
+
ref_pdf = TEST_ASSETS_DIR / ref_pdf_rel
|
|
27
|
+
actual_pdf = TEST_ASSETS_DIR / actual_pdf_rel
|
|
28
|
+
|
|
29
|
+
result = diffpdf(ref_pdf, actual_pdf)
|
|
30
|
+
|
|
31
|
+
assert result == should_pass
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# type: ignore
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
|
|
3
4
|
from click.testing import CliRunner
|
|
@@ -7,30 +8,28 @@ from diffpdf.cli import cli
|
|
|
7
8
|
TEST_ASSETS_DIR = Path(__file__).parent / "assets"
|
|
8
9
|
|
|
9
10
|
|
|
10
|
-
def
|
|
11
|
+
def test_cli_with_output_dir():
|
|
11
12
|
runner = CliRunner()
|
|
12
|
-
result = runner.invoke(
|
|
13
|
-
cli,
|
|
14
|
-
[
|
|
15
|
-
str(TEST_ASSETS_DIR / "pass/identical-A.pdf"),
|
|
16
|
-
str(TEST_ASSETS_DIR / "pass/identical-B.pdf"),
|
|
17
|
-
"-v",
|
|
18
|
-
],
|
|
19
|
-
)
|
|
20
|
-
assert result.exit_code == 0
|
|
21
|
-
assert "INFO" in result.output
|
|
22
|
-
assert "DEBUG" not in result.output
|
|
23
13
|
|
|
14
|
+
with runner.isolated_filesystem():
|
|
15
|
+
ref_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-A.pdf")
|
|
16
|
+
actual_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-B.pdf")
|
|
17
|
+
|
|
18
|
+
result = runner.invoke(cli, [ref_pdf, actual_pdf, "--output-dir", "./diff"])
|
|
19
|
+
|
|
20
|
+
assert result.exit_code == 1
|
|
21
|
+
assert Path("./diff").exists()
|
|
24
22
|
|
|
25
|
-
|
|
23
|
+
|
|
24
|
+
def test_verbose_flag():
|
|
26
25
|
runner = CliRunner()
|
|
27
26
|
result = runner.invoke(
|
|
28
27
|
cli,
|
|
29
28
|
[
|
|
30
29
|
str(TEST_ASSETS_DIR / "pass/identical-A.pdf"),
|
|
31
30
|
str(TEST_ASSETS_DIR / "pass/identical-B.pdf"),
|
|
32
|
-
"-
|
|
31
|
+
"-v",
|
|
33
32
|
],
|
|
34
33
|
)
|
|
35
34
|
assert result.exit_code == 0
|
|
36
|
-
assert "
|
|
35
|
+
assert "INFO" in result.output
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from importlib.metadata import version
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from .comparators import compare_pdfs
|
|
5
|
-
from .logger import setup_logging
|
|
6
|
-
|
|
7
|
-
__version__ = version("diffpdf")
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def diffpdf(
|
|
11
|
-
reference: str | Path,
|
|
12
|
-
actual: str | Path,
|
|
13
|
-
threshold: float = 0.1,
|
|
14
|
-
dpi: int = 96,
|
|
15
|
-
output_dir: str | Path | None = None,
|
|
16
|
-
verbosity: int = 0,
|
|
17
|
-
save_log: bool = False,
|
|
18
|
-
) -> None:
|
|
19
|
-
ref_path = Path(reference) if isinstance(reference, str) else reference
|
|
20
|
-
actual_path = Path(actual) if isinstance(actual, str) else actual
|
|
21
|
-
out_path = Path(output_dir) if isinstance(output_dir, str) else output_dir
|
|
22
|
-
|
|
23
|
-
logger = setup_logging(verbosity, save_log)
|
|
24
|
-
compare_pdfs(ref_path, actual_path, threshold, dpi, out_path, logger)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
__all__ = ["diffpdf", "__version__"]
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import sys
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from .hash_check import check_hash
|
|
5
|
-
from .page_check import check_page_counts
|
|
6
|
-
from .text_check import check_text_content
|
|
7
|
-
from .visual_check import check_visual_content
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def compare_pdfs(
|
|
11
|
-
ref: Path, actual: Path, threshold: float, dpi: int, output_dir: Path | None, logger
|
|
12
|
-
) -> None:
|
|
13
|
-
check_hash(ref, actual, logger)
|
|
14
|
-
|
|
15
|
-
check_page_counts(ref, actual, logger)
|
|
16
|
-
|
|
17
|
-
check_text_content(ref, actual, logger)
|
|
18
|
-
|
|
19
|
-
check_visual_content(ref, actual, threshold, dpi, output_dir, logger)
|
|
20
|
-
|
|
21
|
-
logger.info("PDFs are equivalent")
|
|
22
|
-
sys.exit(0)
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
|
-
import colorlog
|
|
4
|
-
|
|
5
|
-
LOG_FORMAT = (
|
|
6
|
-
"%(asctime)s %(levelname)-8s %(filename)s:%(lineno)d (%(funcName)s): %(message)s"
|
|
7
|
-
)
|
|
8
|
-
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
|
|
9
|
-
LOG_COLORS = {
|
|
10
|
-
"DEBUG": "cyan",
|
|
11
|
-
"INFO": "green",
|
|
12
|
-
"WARNING": "yellow",
|
|
13
|
-
"ERROR": "red",
|
|
14
|
-
"CRITICAL": "red,bg_white",
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def setup_logging(verbosity, save_log):
|
|
19
|
-
if verbosity == 0:
|
|
20
|
-
level = logging.WARNING
|
|
21
|
-
elif verbosity == 1:
|
|
22
|
-
level = logging.INFO
|
|
23
|
-
else:
|
|
24
|
-
level = logging.DEBUG
|
|
25
|
-
|
|
26
|
-
formatter = colorlog.ColoredFormatter(
|
|
27
|
-
f"%(log_color)s{LOG_FORMAT}%(reset)s",
|
|
28
|
-
datefmt=DATE_FORMAT,
|
|
29
|
-
log_colors=LOG_COLORS,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
console_handler = logging.StreamHandler()
|
|
33
|
-
console_handler.setFormatter(formatter)
|
|
34
|
-
|
|
35
|
-
logger = logging.getLogger()
|
|
36
|
-
logger.setLevel(level)
|
|
37
|
-
logger.addHandler(console_handler)
|
|
38
|
-
|
|
39
|
-
if save_log: # pragma: no cover
|
|
40
|
-
file_formatter = logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT)
|
|
41
|
-
file_handler = logging.FileHandler("log.txt")
|
|
42
|
-
file_handler.setFormatter(file_formatter)
|
|
43
|
-
logger.addHandler(file_handler)
|
|
44
|
-
|
|
45
|
-
return logger
|
diffpdf-0.3.0/tests/test_api.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
from diffpdf import diffpdf
|
|
6
|
-
|
|
7
|
-
TEST_ASSETS_DIR = Path(__file__).parent / "assets"
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def test_diffpdf():
|
|
11
|
-
with pytest.raises(SystemExit) as exc_info:
|
|
12
|
-
diffpdf(
|
|
13
|
-
TEST_ASSETS_DIR / "pass/identical-A.pdf",
|
|
14
|
-
TEST_ASSETS_DIR / "pass/identical-B.pdf",
|
|
15
|
-
)
|
|
16
|
-
assert exc_info.value.code == 0
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
from click.testing import CliRunner
|
|
5
|
-
|
|
6
|
-
from diffpdf.cli import cli
|
|
7
|
-
|
|
8
|
-
TEST_ASSETS_DIR = Path(__file__).parent / "assets"
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@pytest.mark.parametrize(
|
|
12
|
-
"ref_pdf_rel,actual_pdf_rel,expected_exit_code",
|
|
13
|
-
[
|
|
14
|
-
# Pass cases (exit code 0)
|
|
15
|
-
("pass/identical-A.pdf", "pass/identical-B.pdf", 0),
|
|
16
|
-
("pass/hash-diff-A.pdf", "pass/hash-diff-B.pdf", 0),
|
|
17
|
-
("pass/minor-color-diff-A.pdf", "pass/minor-color-diff-B.pdf", 0),
|
|
18
|
-
("pass/multiplatform-diff-A.pdf", "pass/multiplatform-diff-B.pdf", 0),
|
|
19
|
-
# Fail cases (exit code 1)
|
|
20
|
-
("fail/1-letter-diff-A.pdf", "fail/1-letter-diff-B.pdf", 1),
|
|
21
|
-
("fail/major-color-diff-A.pdf", "fail/major-color-diff-B.pdf", 1),
|
|
22
|
-
("fail/page-count-diff-A.pdf", "fail/page-count-diff-B.pdf", 1),
|
|
23
|
-
# Critical error cases (exit code 2)
|
|
24
|
-
("nonexistent.pdf", "another.pdf", 2),
|
|
25
|
-
],
|
|
26
|
-
)
|
|
27
|
-
def test_comparators(ref_pdf_rel, actual_pdf_rel, expected_exit_code):
|
|
28
|
-
runner = CliRunner()
|
|
29
|
-
|
|
30
|
-
ref_pdf = str(TEST_ASSETS_DIR / ref_pdf_rel)
|
|
31
|
-
actual_pdf = str(TEST_ASSETS_DIR / actual_pdf_rel)
|
|
32
|
-
|
|
33
|
-
result = runner.invoke(cli, [ref_pdf, actual_pdf])
|
|
34
|
-
|
|
35
|
-
assert result.exit_code == expected_exit_code
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def test_comparators_with_output_dir():
|
|
39
|
-
runner = CliRunner()
|
|
40
|
-
|
|
41
|
-
with runner.isolated_filesystem():
|
|
42
|
-
ref_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-A.pdf")
|
|
43
|
-
actual_pdf = str(TEST_ASSETS_DIR / "fail/major-color-diff-B.pdf")
|
|
44
|
-
|
|
45
|
-
result = runner.invoke(cli, [ref_pdf, actual_pdf, "--output-dir", "./diff"])
|
|
46
|
-
|
|
47
|
-
assert result.exit_code == 1
|
|
48
|
-
assert Path("./diff").exists()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|