rosetta-squint 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,95 @@
1
+ Metadata-Version: 2.4
2
+ Name: rosetta-squint
3
+ Version: 1.0.0
4
+ Summary: Cross-language byte-exact perceptual image hashing — decode + hash in one call
5
+ Author-email: Will Metcalf <william.metcalf@gmail.com>
6
+ License: BSD-2-Clause
7
+ Project-URL: Homepage, https://github.com/wmetcalf/rosetta-squint
8
+ Project-URL: Repository, https://github.com/wmetcalf/rosetta-squint
9
+ Project-URL: Issues, https://github.com/wmetcalf/rosetta-squint/issues
10
+ Project-URL: Changelog, https://github.com/wmetcalf/rosetta-squint/blob/main/CHANGELOG.md
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: rosetta-squint-hash<2.0,>=1.0.0
14
+ Requires-Dist: Pillow==12.2.*
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest>=7; extra == "test"
17
+
18
+ # rosetta_squint — Python convenience API
19
+
20
+ Point at an image file or pass in raw image bytes; get back the same perceptual hash hex string that every other `rosetta-squint` port produces for the same input.
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install -e ../../hash/python # rosetta-squint-hash (wrapper around imagehash)
26
+ pip install -e . # this package
27
+ ```
28
+
29
+ (Not on PyPI yet — both are local.)
30
+
31
+ ## Usage
32
+
33
+ ```python
34
+ import rosetta_squint as rs
35
+
36
+ # Path on disk
37
+ h = rs.phash("photo.jpg", 8)
38
+ print(h) # "c3f8a1b27d0e4f96"
39
+
40
+ # Raw image bytes (from an HTTP response, a database BLOB, a multipart upload)
41
+ with open("photo.jpg", "rb") as f:
42
+ h = rs.phash_bytes(f.read(), 8)
43
+
44
+ # Every algorithm available has both flavors:
45
+ rs.average_hash(path, 8) # rs.average_hash_bytes(bytes, 8)
46
+ rs.phash(path, 8) # rs.phash_bytes(bytes, 8)
47
+ rs.phash_simple(path, 8) # rs.phash_simple_bytes(bytes, 8)
48
+ rs.dhash(path, 8) # rs.dhash_bytes(bytes, 8)
49
+ rs.dhash_vertical(path, 8) # rs.dhash_vertical_bytes(bytes, 8)
50
+ rs.whash_haar(path, 8) # rs.whash_haar_bytes(bytes, 8)
51
+ rs.whash_db4(path, 8) # rs.whash_db4_bytes(bytes, 8)
52
+ rs.whash_db4_robust(path, 8) # rs.whash_db4_robust_bytes(bytes, 8) — cross-port-stable
53
+ rs.colorhash(path, 3) # rs.colorhash_bytes(bytes, 3) — takes binbits
54
+ rs.crop_resistant_hash(path) # rs.crop_resistant_hash_bytes(bytes) — no size, returns ImageMultiHash
55
+
56
+ # Hex round-trips:
57
+ restored = rs.hex_to_hash("c3f8a1b27d0e4f96")
58
+ restored = rs.hex_to_flathash("...", hashsize=3)
59
+ restored = rs.hex_to_multihash("hex1,hex2,hex3")
60
+ ```
61
+
62
+ ## Cross-port equivalence
63
+
64
+ The output of `rs.phash("photo.jpg", 8)` is the same hex string as you'd get from the Rust, Go, Java, JS, or Swift `rosetta-squint` ports given the same byte input. **Verified live for `imagehash.png` at size 8: `ba8c84536bd3c366` across Python, Go, Java, JS, Swift.**
65
+
66
+ ## Decode strategy
67
+
68
+ | Format | Decoder | Why |
69
+ |---|---|---|
70
+ | BMP, PNG, GIF, JPEG, WebP, TIFF | PIL/Pillow (system) | The canonical Python decoders; the goldens used to validate the 5 native ports were generated by PIL itself, so output matches by construction. |
71
+ | HEIC | ctypes wrapper around system `libheif.so.1` | pillow-heif bundles libheif 1.21.2 in its wheel; the 5 native ports link to system libheif 1.17.6. The wrapper avoids the ±1 px divergence. |
72
+
73
+ If you already have a `PIL.Image.Image` (from `PIL.Image.open(...)`, a thumbnailer, etc.), use the `rosetta_squint_hash` lower-level API directly — the squint layer's only job is the decode step:
74
+
75
+ ```python
76
+ import rosetta_squint_hash as rih
77
+ from PIL import Image
78
+ img = Image.open("photo.jpg")
79
+ h = rih.phash(img, hash_size=8)
80
+ ```
81
+
82
+ ## Dependencies
83
+
84
+ - `rosetta_squint_hash` (which re-exports `imagehash==4.3.2` + adds `whash_db4_robust`)
85
+ - `Pillow==12.2.*`
86
+
87
+ Tight pins are intentional. See [`../../hash/python/README.md`](../../hash/python/README.md) under "Version policy" for the upgrade workflow.
88
+
89
+ ## Testing
90
+
91
+ ```bash
92
+ pytest
93
+ ```
94
+
95
+ Tests verify (1) path/bytes parity for every algorithm, (2) chain consistency between `rs.phash(path)` and `imagehash.phash(rs.decode_file(path))`, (3) cross-port byte-exact equality with Go/Java/JS for `imagehash.png`.
@@ -0,0 +1,78 @@
1
+ # rosetta_squint — Python convenience API
2
+
3
+ Point at an image file or pass in raw image bytes; get back the same perceptual hash hex string that every other `rosetta-squint` port produces for the same input.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install -e ../../hash/python # rosetta-squint-hash (wrapper around imagehash)
9
+ pip install -e . # this package
10
+ ```
11
+
12
+ (Not on PyPI yet — both are local.)
13
+
14
+ ## Usage
15
+
16
+ ```python
17
+ import rosetta_squint as rs
18
+
19
+ # Path on disk
20
+ h = rs.phash("photo.jpg", 8)
21
+ print(h) # "c3f8a1b27d0e4f96"
22
+
23
+ # Raw image bytes (from an HTTP response, a database BLOB, a multipart upload)
24
+ with open("photo.jpg", "rb") as f:
25
+ h = rs.phash_bytes(f.read(), 8)
26
+
27
+ # Every algorithm available has both flavors:
28
+ rs.average_hash(path, 8) # rs.average_hash_bytes(bytes, 8)
29
+ rs.phash(path, 8) # rs.phash_bytes(bytes, 8)
30
+ rs.phash_simple(path, 8) # rs.phash_simple_bytes(bytes, 8)
31
+ rs.dhash(path, 8) # rs.dhash_bytes(bytes, 8)
32
+ rs.dhash_vertical(path, 8) # rs.dhash_vertical_bytes(bytes, 8)
33
+ rs.whash_haar(path, 8) # rs.whash_haar_bytes(bytes, 8)
34
+ rs.whash_db4(path, 8) # rs.whash_db4_bytes(bytes, 8)
35
+ rs.whash_db4_robust(path, 8) # rs.whash_db4_robust_bytes(bytes, 8) — cross-port-stable
36
+ rs.colorhash(path, 3) # rs.colorhash_bytes(bytes, 3) — takes binbits
37
+ rs.crop_resistant_hash(path) # rs.crop_resistant_hash_bytes(bytes) — no size, returns ImageMultiHash
38
+
39
+ # Hex round-trips:
40
+ restored = rs.hex_to_hash("c3f8a1b27d0e4f96")
41
+ restored = rs.hex_to_flathash("...", hashsize=3)
42
+ restored = rs.hex_to_multihash("hex1,hex2,hex3")
43
+ ```
44
+
45
+ ## Cross-port equivalence
46
+
47
+ The output of `rs.phash("photo.jpg", 8)` is the same hex string as you'd get from the Rust, Go, Java, JS, or Swift `rosetta-squint` ports given the same byte input. **Verified live for `imagehash.png` at size 8: `ba8c84536bd3c366` across Python, Go, Java, JS, Swift.**
48
+
49
+ ## Decode strategy
50
+
51
+ | Format | Decoder | Why |
52
+ |---|---|---|
53
+ | BMP, PNG, GIF, JPEG, WebP, TIFF | PIL/Pillow (system) | The canonical Python decoders; the goldens used to validate the 5 native ports were generated by PIL itself, so output matches by construction. |
54
+ | HEIC | ctypes wrapper around system `libheif.so.1` | pillow-heif bundles libheif 1.21.2 in its wheel; the 5 native ports link to system libheif 1.17.6. The wrapper avoids the ±1 px divergence. |
55
+
56
+ If you already have a `PIL.Image.Image` (from `PIL.Image.open(...)`, a thumbnailer, etc.), use the `rosetta_squint_hash` lower-level API directly — the squint layer's only job is the decode step:
57
+
58
+ ```python
59
+ import rosetta_squint_hash as rih
60
+ from PIL import Image
61
+ img = Image.open("photo.jpg")
62
+ h = rih.phash(img, hash_size=8)
63
+ ```
64
+
65
+ ## Dependencies
66
+
67
+ - `rosetta_squint_hash` (which re-exports `imagehash==4.3.2` + adds `whash_db4_robust`)
68
+ - `Pillow==12.2.*`
69
+
70
+ Tight pins are intentional. See [`../../hash/python/README.md`](../../hash/python/README.md) under "Version policy" for the upgrade workflow.
71
+
72
+ ## Testing
73
+
74
+ ```bash
75
+ pytest
76
+ ```
77
+
78
+ Tests verify (1) path/bytes parity for every algorithm, (2) chain consistency between `rs.phash(path)` and `imagehash.phash(rs.decode_file(path))`, (3) cross-port byte-exact equality with Go/Java/JS for `imagehash.png`.
@@ -0,0 +1,59 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "rosetta-squint"
7
+ version = "1.0.0"
8
+ description = "Cross-language byte-exact perceptual image hashing — decode + hash in one call"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "BSD-2-Clause" }
12
+ authors = [
13
+ { name = "Will Metcalf", email = "william.metcalf@gmail.com" },
14
+ ]
15
+ # Tight upstream pins — bumped only when we cut a new rosetta-squint release
16
+ # after re-validating goldens. See ../../hash/python/README.md for the policy.
17
+ #
18
+ # Note: `rosetta-squint-hash` is a sibling local package in this repo. Until
19
+ # the project is on PyPI, install it manually from ../../hash/python BEFORE
20
+ # installing this package:
21
+ # pip install -e ../../hash/python
22
+ # pip install -e .
23
+ # Once on PyPI, this dependency will be specified normally.
24
+ dependencies = [
25
+ "rosetta-squint-hash>=1.0.0,<2.0",
26
+ "Pillow==12.2.*",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ test = ["pytest>=7"]
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/wmetcalf/rosetta-squint"
34
+ Repository = "https://github.com/wmetcalf/rosetta-squint"
35
+ Issues = "https://github.com/wmetcalf/rosetta-squint/issues"
36
+ Changelog = "https://github.com/wmetcalf/rosetta-squint/blob/main/CHANGELOG.md"
37
+
38
+ [tool.setuptools.packages.find]
39
+ include = ["rosetta_squint*"]
40
+ exclude = ["tests*"]
41
+
42
+ # ─── Static analysis config (consumed by the CI lint job) ─────────────────
43
+ [tool.ruff]
44
+ line-length = 120
45
+ target-version = "py39"
46
+
47
+ [tool.ruff.lint]
48
+ select = ["E", "F", "W", "B"]
49
+ ignore = [
50
+ "E203",
51
+ "F403",
52
+ "F401",
53
+ ]
54
+ exclude = ["tests"]
55
+
56
+ [tool.mypy]
57
+ python_version = "3.9"
58
+ ignore_missing_imports = true
59
+ follow_imports = "silent"
@@ -0,0 +1,92 @@
1
+ """rosetta_squint — point at an image (path or bytes), get the same phash
2
+ hex string as every other rosetta-squint port for the same input.
3
+
4
+ This is the Python implementation of the cross-language perceptual-hash
5
+ convenience API. It depends on `rosetta_squint_hash` (which re-exports
6
+ upstream `imagehash` + adds `whash_db4_robust`) and uses PIL/Pillow for
7
+ decoding most formats. HEIC is decoded via a ctypes wrapper around
8
+ system libheif so that output matches the 5 native ports (which all FFI
9
+ to the same system libheif).
10
+
11
+ Each public function comes in three flavors:
12
+ - `phash(path_or_image, ...)` — accept a file path str/Path OR a PIL.Image
13
+ - `phash_bytes(bytes, ...)` — accept raw image bytes in memory
14
+
15
+ API matches the same names in the non-Python rosetta-squint ports
16
+ (`phash`, `dhash`, `average_hash`, `whash_haar`, `colorhash`,
17
+ `crop_resistant_hash`, plus the extensions `whash_db4`, `whash_db4_robust`,
18
+ `phash_simple`, `dhash_vertical`).
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from ._impl import (
24
+ # Path-based entries
25
+ average_hash,
26
+ colorhash,
27
+ crop_resistant_hash,
28
+ dhash,
29
+ dhash_vertical,
30
+ phash,
31
+ phash_simple,
32
+ whash_db4,
33
+ whash_db4_robust,
34
+ whash_haar,
35
+ # Bytes-based entries
36
+ average_hash_bytes,
37
+ colorhash_bytes,
38
+ crop_resistant_hash_bytes,
39
+ dhash_bytes,
40
+ dhash_vertical_bytes,
41
+ phash_bytes,
42
+ phash_simple_bytes,
43
+ whash_db4_bytes,
44
+ whash_db4_robust_bytes,
45
+ whash_haar_bytes,
46
+ # Decode helpers
47
+ decode_bytes,
48
+ decode_file,
49
+ )
50
+
51
+ # Re-export hash types so callers don't need to import rosetta_squint_hash
52
+ # separately.
53
+ from rosetta_squint_hash import (
54
+ ImageHash,
55
+ ImageMultiHash,
56
+ hex_to_flathash,
57
+ hex_to_hash,
58
+ hex_to_multihash,
59
+ )
60
+
61
+ __version__ = "0.1.0"
62
+
63
+ __all__ = [
64
+ "ImageHash",
65
+ "ImageMultiHash",
66
+ "hex_to_flathash",
67
+ "hex_to_hash",
68
+ "hex_to_multihash",
69
+ "decode_file",
70
+ "decode_bytes",
71
+ "average_hash",
72
+ "average_hash_bytes",
73
+ "colorhash",
74
+ "colorhash_bytes",
75
+ "crop_resistant_hash",
76
+ "crop_resistant_hash_bytes",
77
+ "dhash",
78
+ "dhash_bytes",
79
+ "dhash_vertical",
80
+ "dhash_vertical_bytes",
81
+ "phash",
82
+ "phash_bytes",
83
+ "phash_simple",
84
+ "phash_simple_bytes",
85
+ "whash_db4",
86
+ "whash_db4_bytes",
87
+ "whash_db4_robust",
88
+ "whash_db4_robust_bytes",
89
+ "whash_haar",
90
+ "whash_haar_bytes",
91
+ "__version__",
92
+ ]
@@ -0,0 +1,462 @@
1
+ """Implementation of the rosetta_squint convenience API.
2
+
3
+ For most formats we use PIL.Image.open() because that's what upstream
4
+ `imagehash` itself uses, so we match imagehash's behavior exactly. For
5
+ HEIC specifically, we decode via a ctypes wrapper around system libheif
6
+ (NOT pillow-heif, which bundles libheif 1.21.2 and diverges ±1 px from
7
+ the system libheif 1.17.6 that the 5 native ports link to).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import ctypes
13
+ import ctypes.util
14
+ import io
15
+ import os
16
+ import stat
17
+ import sys
18
+ from pathlib import Path
19
+ from typing import Union
20
+
21
+ import imagehash
22
+ import rosetta_squint_hash as rih
23
+ from PIL import Image
24
+
25
+
26
+ # Reject path-based decode of files that are too large or are non-regular
27
+ # (e.g., /dev/zero, named pipes, character devices) BEFORE reading bytes.
28
+ # Callers that genuinely need to process images larger than this threshold
29
+ # should decode via rosetta-squint-decode directly after explicit validation.
30
+ MAX_FILE_SIZE = 256 * 1024 * 1024 # 256 MiB
31
+
32
+
33
+ def _load_libheif_xplat() -> ctypes.CDLL:
34
+ """Cross-platform libheif loader.
35
+
36
+ Linux: libheif.so.1
37
+ macOS: libheif.dylib (Homebrew unversioned) or libheif.1.dylib
38
+ Windows: libheif.dll / libheif-1.dll
39
+ Other: ctypes.util.find_library fallback
40
+
41
+ Raises OSError with a clear message if no candidate loads.
42
+ """
43
+ if sys.platform == "darwin":
44
+ candidates = ["libheif.dylib", "libheif.1.dylib"]
45
+ elif sys.platform == "win32":
46
+ candidates = ["libheif.dll", "libheif-1.dll"]
47
+ else:
48
+ candidates = ["libheif.so.1", "libheif.so"]
49
+ # ctypes.util.find_library lets us pick up homebrew/macports/other paths
50
+ found = ctypes.util.find_library("heif")
51
+ if found:
52
+ candidates.append(found)
53
+ for name in candidates:
54
+ try:
55
+ return ctypes.CDLL(name)
56
+ except OSError:
57
+ continue
58
+ raise OSError(
59
+ f"libheif not found. Tried: {', '.join(candidates)}. "
60
+ f"Install via your package manager (apt install libheif-dev, "
61
+ f"brew install libheif, etc.)."
62
+ )
63
+
64
+ PathOrBytes = Union[str, Path, bytes, bytearray, memoryview]
65
+
66
+
67
+ class HeifError(ctypes.Structure):
68
+ """C struct heif_error { int32 code; int32 subcode; const char* message; }.
69
+
70
+ libheif's fallible functions return this 12-byte struct by value.
71
+ Declaring restype = ctypes.c_int64 (the previous behaviour) only reads
72
+ 8 of those bytes and reinterprets them as an integer, which silently
73
+ drops the message pointer and ignores the subcode.
74
+ """
75
+ _fields_ = (
76
+ ("code", ctypes.c_int),
77
+ ("subcode", ctypes.c_int),
78
+ ("message", ctypes.c_char_p),
79
+ )
80
+
81
+
82
+ def _check_heif(err: "HeifError", op: str) -> None:
83
+ """Raise RuntimeError when a libheif error struct indicates failure."""
84
+ if err.code != 0:
85
+ msg = err.message.decode("utf-8", "replace") if err.message else ""
86
+ raise RuntimeError(
87
+ f"libheif {op} failed: code={err.code} subcode={err.subcode} msg={msg}"
88
+ )
89
+
90
+
91
+ # ─── Decode helpers ──────────────────────────────────────────────────────────
92
+
93
+
94
+ def _is_heic(path_or_first_bytes) -> bool:
95
+ """Detect HEIC by ftyp box brand at offset 4..12 with brand in the
96
+ HEIC-family set. Mirrors what the 5 native ports do."""
97
+ if isinstance(path_or_first_bytes, (bytes, bytearray, memoryview)):
98
+ b = bytes(path_or_first_bytes[:12])
99
+ else:
100
+ with open(path_or_first_bytes, "rb") as f:
101
+ b = f.read(12)
102
+ if len(b) < 12 or b[4:8] != b"ftyp":
103
+ return False
104
+ brand = b[8:12]
105
+ return brand in (b"heic", b"heix", b"mif1", b"msf1", b"hevc", b"hevx")
106
+
107
+
108
+ def _decode_heic_via_system_libheif(data: bytes) -> Image.Image:
109
+ """Decode HEIC bytes using ctypes around system libheif so the result
110
+ matches the 5 native ports (which all link to system libheif via
111
+ FFI). pillow-heif would bundle libheif 1.21.2 and diverge from
112
+ system libheif 1.17.6 by ±1 px on lossy fixtures."""
113
+ lib = _load_libheif_xplat()
114
+ lib.heif_context_alloc.restype = ctypes.c_void_p
115
+ lib.heif_context_free.argtypes = [ctypes.c_void_p]
116
+ lib.heif_context_read_from_memory_without_copy.argtypes = [
117
+ ctypes.c_void_p,
118
+ ctypes.c_char_p,
119
+ ctypes.c_size_t,
120
+ ctypes.c_void_p,
121
+ ]
122
+ lib.heif_context_read_from_memory_without_copy.restype = HeifError
123
+ lib.heif_context_get_primary_image_handle.argtypes = [
124
+ ctypes.c_void_p,
125
+ ctypes.POINTER(ctypes.c_void_p),
126
+ ]
127
+ lib.heif_context_get_primary_image_handle.restype = HeifError
128
+ lib.heif_image_handle_release.argtypes = [ctypes.c_void_p]
129
+ lib.heif_image_handle_get_width.argtypes = [ctypes.c_void_p]
130
+ lib.heif_image_handle_get_width.restype = ctypes.c_int
131
+ lib.heif_image_handle_get_height.argtypes = [ctypes.c_void_p]
132
+ lib.heif_image_handle_get_height.restype = ctypes.c_int
133
+ lib.heif_image_handle_has_alpha_channel.argtypes = [ctypes.c_void_p]
134
+ lib.heif_image_handle_has_alpha_channel.restype = ctypes.c_int
135
+ lib.heif_decode_image.argtypes = [
136
+ ctypes.c_void_p,
137
+ ctypes.POINTER(ctypes.c_void_p),
138
+ ctypes.c_int,
139
+ ctypes.c_int,
140
+ ctypes.c_void_p,
141
+ ]
142
+ lib.heif_decode_image.restype = HeifError
143
+ lib.heif_image_release.argtypes = [ctypes.c_void_p]
144
+ lib.heif_image_get_plane_readonly.argtypes = [
145
+ ctypes.c_void_p,
146
+ ctypes.c_int,
147
+ ctypes.POINTER(ctypes.c_int),
148
+ ]
149
+ lib.heif_image_get_plane_readonly.restype = ctypes.c_void_p
150
+
151
+ HEIF_COLORSPACE_RGB = 1
152
+ HEIF_CHROMA_INTERLEAVED_RGB = 10
153
+ HEIF_CHROMA_INTERLEAVED_RGBA = 11
154
+ HEIF_CHANNEL_INTERLEAVED = 10
155
+
156
+ ctx = lib.heif_context_alloc()
157
+ if not ctx:
158
+ raise RuntimeError("heif_context_alloc failed")
159
+ try:
160
+ err = lib.heif_context_read_from_memory_without_copy(
161
+ ctx, data, len(data), None
162
+ )
163
+ _check_heif(err, "heif_context_read_from_memory_without_copy")
164
+ handle_ref = ctypes.c_void_p()
165
+ err = lib.heif_context_get_primary_image_handle(
166
+ ctx, ctypes.byref(handle_ref)
167
+ )
168
+ # Register the cleanup BEFORE checking the error so that any
169
+ # partial handle libheif may have written into handle_ref is
170
+ # released even if _check_heif raises.
171
+ try:
172
+ _check_heif(err, "heif_context_get_primary_image_handle")
173
+ if not handle_ref.value:
174
+ raise RuntimeError("heif_context_get_primary_image_handle failed")
175
+ handle = handle_ref.value
176
+ width = lib.heif_image_handle_get_width(handle)
177
+ height = lib.heif_image_handle_get_height(handle)
178
+ if width <= 0 or height <= 0:
179
+ raise RuntimeError(
180
+ f"invalid HEIC dimensions {width}x{height}"
181
+ )
182
+ has_alpha = lib.heif_image_handle_has_alpha_channel(handle) != 0
183
+ chroma = (
184
+ HEIF_CHROMA_INTERLEAVED_RGBA
185
+ if has_alpha
186
+ else HEIF_CHROMA_INTERLEAVED_RGB
187
+ )
188
+ mode = "RGBA" if has_alpha else "RGB"
189
+ channels = 4 if has_alpha else 3
190
+
191
+ img_ref = ctypes.c_void_p()
192
+ err = lib.heif_decode_image(
193
+ handle, ctypes.byref(img_ref), HEIF_COLORSPACE_RGB, chroma, None
194
+ )
195
+ # Same pattern: register cleanup BEFORE the error check so the
196
+ # image is released even if _check_heif raises.
197
+ try:
198
+ _check_heif(err, "heif_decode_image")
199
+ if not img_ref.value:
200
+ raise RuntimeError("heif_decode_image failed")
201
+ img = img_ref.value
202
+ stride = ctypes.c_int(0)
203
+ plane = lib.heif_image_get_plane_readonly(
204
+ img, HEIF_CHANNEL_INTERLEAVED, ctypes.byref(stride)
205
+ )
206
+ if not plane:
207
+ raise RuntimeError("null plane")
208
+ row_bytes = width * channels
209
+ if stride.value < row_bytes:
210
+ raise RuntimeError(
211
+ f"libheif returned stride {stride.value} smaller than "
212
+ f"required {row_bytes} (width={width} channels={channels})"
213
+ )
214
+ pixels = bytearray()
215
+ for y in range(height):
216
+ row = ctypes.string_at(plane + y * stride.value, row_bytes)
217
+ pixels.extend(row)
218
+ return Image.frombytes(mode, (width, height), bytes(pixels))
219
+ finally:
220
+ if img_ref.value:
221
+ lib.heif_image_release(img_ref.value)
222
+ finally:
223
+ if handle_ref.value:
224
+ lib.heif_image_handle_release(handle_ref.value)
225
+ finally:
226
+ lib.heif_context_free(ctx)
227
+
228
+
229
+ def _read_path_safely(path: Union[str, Path]) -> bytes:
230
+ """Open `path`, validate stat against the same fd that we read from
231
+ (closing a TOCTOU window), enforce symlink + regular-file + size
232
+ guards, and return the bytes. Mirrors the fd-based stat-and-read
233
+ pattern used in the other 5 squint ports.
234
+
235
+ A naive `os.stat(path)` then `open(path).read()` has a classic TOCTOU
236
+ window: an attacker with write access along the path can swap the
237
+ target between the two syscalls (file → symlink to /dev/zero, file →
238
+ larger file, regular file → FIFO). Holding the same fd across stat
239
+ and read defeats this — fstat reports the inode the read will actually
240
+ consume.
241
+
242
+ The open itself uses ``O_NOFOLLOW`` so that a symlink at ``path``
243
+ causes the open to fail rather than silently resolving to whatever
244
+ the symlink currently points at — closing a separate TOCTOU window
245
+ on the symlink target itself. Callers who genuinely want symlink
246
+ resolution must do it explicitly (e.g. ``Path(path).resolve()``)
247
+ before calling this function. Windows has no ``O_NOFOLLOW`` flag;
248
+ fall back to an ``lstat`` check.
249
+ """
250
+ if sys.platform == "win32":
251
+ # Windows: pre-check with os.lstat. There's a narrow race
252
+ # between the lstat and the open below, but Windows lacks
253
+ # O_NOFOLLOW and the alternative (reparse-point flags) would
254
+ # require ctypes wrapping of CreateFileW.
255
+ try:
256
+ st_link = os.lstat(path)
257
+ except OSError as e:
258
+ raise OSError(e.errno, f"lstat failed for {path}: {e.strerror}") from e
259
+ if stat.S_ISLNK(st_link.st_mode):
260
+ raise ValueError(f"symlink not allowed: {path}")
261
+ fd = os.open(path, os.O_RDONLY | os.O_BINARY) # type: ignore[attr-defined]
262
+ else:
263
+ try:
264
+ fd = os.open(path, os.O_RDONLY | os.O_NOFOLLOW)
265
+ except OSError as e:
266
+ # ELOOP (40 on Linux, 62 on macOS) is what O_NOFOLLOW raises
267
+ # when the final path component is a symlink. Translate to a
268
+ # clearer error so callers can distinguish symlink rejection
269
+ # from a generic "not a regular file" or I/O error.
270
+ import errno as _errno
271
+ if e.errno == _errno.ELOOP:
272
+ raise ValueError(f"symlink not allowed: {path}") from e
273
+ raise
274
+ # Wrap the bare fd in a Python file object so close-on-GC is automatic.
275
+ # If `os.fdopen` itself raises (e.g., MemoryError on a stressed system),
276
+ # the fd would leak — close it explicitly to be safe.
277
+ try:
278
+ f = os.fdopen(fd, "rb")
279
+ except BaseException:
280
+ os.close(fd)
281
+ raise
282
+ try:
283
+ st = os.fstat(f.fileno())
284
+ if not stat.S_ISREG(st.st_mode):
285
+ raise RuntimeError(f"not a regular file: {path}")
286
+ if st.st_size > MAX_FILE_SIZE:
287
+ raise RuntimeError(
288
+ f"input file too large: {st.st_size} bytes "
289
+ f"(max {MAX_FILE_SIZE} bytes / 256 MiB). For images above this "
290
+ f"threshold, decode via rosetta-squint-decode directly after "
291
+ f"explicit validation."
292
+ )
293
+ # Read up to MAX_FILE_SIZE+1 so we detect "file grew between fstat
294
+ # and read" (e.g. concurrent writer appending). The +1 absence is
295
+ # the contract: if we got more than MAX_FILE_SIZE bytes, reject.
296
+ data = f.read(MAX_FILE_SIZE + 1)
297
+ if len(data) > MAX_FILE_SIZE:
298
+ raise RuntimeError(
299
+ f"input file too large: {len(data)} bytes "
300
+ f"(max {MAX_FILE_SIZE} bytes / 256 MiB). For images above this "
301
+ f"threshold, decode via rosetta-squint-decode directly after "
302
+ f"explicit validation."
303
+ )
304
+ return data
305
+ finally:
306
+ f.close()
307
+
308
+
309
+ def decode_file(path: Union[str, Path]) -> Image.Image:
310
+ """Decode a file at `path` into a PIL.Image suitable for hashing.
311
+ HEIC uses the system-libheif ctypes wrapper; everything else uses
312
+ PIL.Image.open.
313
+
314
+ Refuses symlinks (via ``O_NOFOLLOW`` on POSIX / ``lstat`` on Windows),
315
+ non-regular files (FIFOs, /dev/zero, character devices, etc.) and
316
+ files larger than MAX_FILE_SIZE BEFORE reading bytes. The
317
+ regular-file and size checks run against the same fd as the read,
318
+ closing the obvious TOCTOU window. Callers who genuinely want symlink
319
+ resolution must do it explicitly (e.g. ``Path(path).resolve()``)
320
+ before calling this function.
321
+ """
322
+ data = _read_path_safely(path)
323
+ if _is_heic(data):
324
+ return _decode_heic_via_system_libheif(data)
325
+ return Image.open(io.BytesIO(data))
326
+
327
+
328
+ def decode_bytes(data: bytes) -> Image.Image:
329
+ """Decode raw image bytes into a PIL.Image. HEIC bytes use the
330
+ ctypes wrapper around system libheif; everything else goes through
331
+ PIL.Image.open."""
332
+ if isinstance(data, (bytearray, memoryview)):
333
+ data = bytes(data)
334
+ if _is_heic(data):
335
+ return _decode_heic_via_system_libheif(data)
336
+ return Image.open(io.BytesIO(data))
337
+
338
+
339
+ # ─── Convenience hash functions ──────────────────────────────────────────────
340
+ # Each algorithm gets a (path, size) and a (_bytes, size) variant.
341
+
342
+
343
+ def average_hash(path: Union[str, Path], hash_size: int = 8) -> imagehash.ImageHash:
344
+ return rih.average_hash(decode_file(path), hash_size=hash_size)
345
+
346
+
347
+ def average_hash_bytes(data: bytes, hash_size: int = 8) -> imagehash.ImageHash:
348
+ return rih.average_hash(decode_bytes(data), hash_size=hash_size)
349
+
350
+
351
+ def phash(
352
+ path: Union[str, Path],
353
+ hash_size: int = 8,
354
+ highfreq_factor: int = 4,
355
+ ) -> imagehash.ImageHash:
356
+ return rih.phash(
357
+ decode_file(path), hash_size=hash_size, highfreq_factor=highfreq_factor
358
+ )
359
+
360
+
361
+ def phash_bytes(
362
+ data: bytes, hash_size: int = 8, highfreq_factor: int = 4
363
+ ) -> imagehash.ImageHash:
364
+ return rih.phash(
365
+ decode_bytes(data), hash_size=hash_size, highfreq_factor=highfreq_factor
366
+ )
367
+
368
+
369
+ def phash_simple(
370
+ path: Union[str, Path],
371
+ hash_size: int = 8,
372
+ highfreq_factor: int = 4,
373
+ ) -> imagehash.ImageHash:
374
+ return rih.phash_simple(
375
+ decode_file(path), hash_size=hash_size, highfreq_factor=highfreq_factor
376
+ )
377
+
378
+
379
+ def phash_simple_bytes(
380
+ data: bytes, hash_size: int = 8, highfreq_factor: int = 4
381
+ ) -> imagehash.ImageHash:
382
+ return rih.phash_simple(
383
+ decode_bytes(data), hash_size=hash_size, highfreq_factor=highfreq_factor
384
+ )
385
+
386
+
387
+ def dhash(path: Union[str, Path], hash_size: int = 8) -> imagehash.ImageHash:
388
+ return rih.dhash(decode_file(path), hash_size=hash_size)
389
+
390
+
391
+ def dhash_bytes(data: bytes, hash_size: int = 8) -> imagehash.ImageHash:
392
+ return rih.dhash(decode_bytes(data), hash_size=hash_size)
393
+
394
+
395
+ def dhash_vertical(
396
+ path: Union[str, Path], hash_size: int = 8
397
+ ) -> imagehash.ImageHash:
398
+ return rih.dhash_vertical(decode_file(path), hash_size=hash_size)
399
+
400
+
401
+ def dhash_vertical_bytes(data: bytes, hash_size: int = 8) -> imagehash.ImageHash:
402
+ return rih.dhash_vertical(decode_bytes(data), hash_size=hash_size)
403
+
404
+
405
+ def whash_haar(
406
+ path: Union[str, Path], hash_size: int = 8
407
+ ) -> imagehash.ImageHash:
408
+ return rih.whash(
409
+ decode_file(path),
410
+ hash_size=hash_size,
411
+ mode="haar",
412
+ remove_max_haar_ll=True,
413
+ )
414
+
415
+
416
+ def whash_haar_bytes(data: bytes, hash_size: int = 8) -> imagehash.ImageHash:
417
+ return rih.whash(
418
+ decode_bytes(data),
419
+ hash_size=hash_size,
420
+ mode="haar",
421
+ remove_max_haar_ll=True,
422
+ )
423
+
424
+
425
+ def whash_db4(path: Union[str, Path], hash_size: int = 8) -> imagehash.ImageHash:
426
+ # rih.whash_db4 is the port-local snap-applying override (NOT
427
+ # rih.whash(mode='db4'), which forwards to upstream imagehash without
428
+ # the snap-to-threshold tie-break). See spec/SPEC.md §"Threshold
429
+ # tie-break".
430
+ return rih.whash_db4(decode_file(path), hash_size=hash_size)
431
+
432
+
433
+ def whash_db4_bytes(data: bytes, hash_size: int = 8) -> imagehash.ImageHash:
434
+ return rih.whash_db4(decode_bytes(data), hash_size=hash_size)
435
+
436
+
437
+ def whash_db4_robust(
438
+ path: Union[str, Path], hash_size: int = 8
439
+ ) -> imagehash.ImageHash:
440
+ return rih.whash_db4_robust(decode_file(path), hash_size=hash_size)
441
+
442
+
443
+ def whash_db4_robust_bytes(
444
+ data: bytes, hash_size: int = 8
445
+ ) -> imagehash.ImageHash:
446
+ return rih.whash_db4_robust(decode_bytes(data), hash_size=hash_size)
447
+
448
+
449
+ def colorhash(path: Union[str, Path], binbits: int = 3) -> imagehash.ImageHash:
450
+ return rih.colorhash(decode_file(path), binbits=binbits)
451
+
452
+
453
+ def colorhash_bytes(data: bytes, binbits: int = 3) -> imagehash.ImageHash:
454
+ return rih.colorhash(decode_bytes(data), binbits=binbits)
455
+
456
+
457
+ def crop_resistant_hash(path: Union[str, Path]) -> imagehash.ImageMultiHash:
458
+ return rih.crop_resistant_hash(decode_file(path))
459
+
460
+
461
+ def crop_resistant_hash_bytes(data: bytes) -> imagehash.ImageMultiHash:
462
+ return rih.crop_resistant_hash(decode_bytes(data))
@@ -0,0 +1,95 @@
1
+ Metadata-Version: 2.4
2
+ Name: rosetta-squint
3
+ Version: 1.0.0
4
+ Summary: Cross-language byte-exact perceptual image hashing — decode + hash in one call
5
+ Author-email: Will Metcalf <william.metcalf@gmail.com>
6
+ License: BSD-2-Clause
7
+ Project-URL: Homepage, https://github.com/wmetcalf/rosetta-squint
8
+ Project-URL: Repository, https://github.com/wmetcalf/rosetta-squint
9
+ Project-URL: Issues, https://github.com/wmetcalf/rosetta-squint/issues
10
+ Project-URL: Changelog, https://github.com/wmetcalf/rosetta-squint/blob/main/CHANGELOG.md
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: rosetta-squint-hash<2.0,>=1.0.0
14
+ Requires-Dist: Pillow==12.2.*
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest>=7; extra == "test"
17
+
18
+ # rosetta_squint — Python convenience API
19
+
20
+ Point at an image file or pass in raw image bytes; get back the same perceptual hash hex string that every other `rosetta-squint` port produces for the same input.
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install -e ../../hash/python # rosetta-squint-hash (wrapper around imagehash)
26
+ pip install -e . # this package
27
+ ```
28
+
29
+ (Not on PyPI yet — both are local.)
30
+
31
+ ## Usage
32
+
33
+ ```python
34
+ import rosetta_squint as rs
35
+
36
+ # Path on disk
37
+ h = rs.phash("photo.jpg", 8)
38
+ print(h) # "c3f8a1b27d0e4f96"
39
+
40
+ # Raw image bytes (from an HTTP response, a database BLOB, a multipart upload)
41
+ with open("photo.jpg", "rb") as f:
42
+ h = rs.phash_bytes(f.read(), 8)
43
+
44
+ # Every algorithm available has both flavors:
45
+ rs.average_hash(path, 8) # rs.average_hash_bytes(bytes, 8)
46
+ rs.phash(path, 8) # rs.phash_bytes(bytes, 8)
47
+ rs.phash_simple(path, 8) # rs.phash_simple_bytes(bytes, 8)
48
+ rs.dhash(path, 8) # rs.dhash_bytes(bytes, 8)
49
+ rs.dhash_vertical(path, 8) # rs.dhash_vertical_bytes(bytes, 8)
50
+ rs.whash_haar(path, 8) # rs.whash_haar_bytes(bytes, 8)
51
+ rs.whash_db4(path, 8) # rs.whash_db4_bytes(bytes, 8)
52
+ rs.whash_db4_robust(path, 8) # rs.whash_db4_robust_bytes(bytes, 8) — cross-port-stable
53
+ rs.colorhash(path, 3) # rs.colorhash_bytes(bytes, 3) — takes binbits
54
+ rs.crop_resistant_hash(path) # rs.crop_resistant_hash_bytes(bytes) — no size, returns ImageMultiHash
55
+
56
+ # Hex round-trips:
57
+ restored = rs.hex_to_hash("c3f8a1b27d0e4f96")
58
+ restored = rs.hex_to_flathash("...", hashsize=3)
59
+ restored = rs.hex_to_multihash("hex1,hex2,hex3")
60
+ ```
61
+
62
+ ## Cross-port equivalence
63
+
64
+ The output of `rs.phash("photo.jpg", 8)` is the same hex string as you'd get from the Rust, Go, Java, JS, or Swift `rosetta-squint` ports given the same byte input. **Verified live for `imagehash.png` at size 8: `ba8c84536bd3c366` across Python, Go, Java, JS, Swift.**
65
+
66
+ ## Decode strategy
67
+
68
+ | Format | Decoder | Why |
69
+ |---|---|---|
70
+ | BMP, PNG, GIF, JPEG, WebP, TIFF | PIL/Pillow (system) | The canonical Python decoders; the goldens used to validate the 5 native ports were generated by PIL itself, so output matches by construction. |
71
+ | HEIC | ctypes wrapper around system `libheif.so.1` | pillow-heif bundles libheif 1.21.2 in its wheel; the 5 native ports link to system libheif 1.17.6. The wrapper avoids the ±1 px divergence. |
72
+
73
+ If you already have a `PIL.Image.Image` (from `PIL.Image.open(...)`, a thumbnailer, etc.), use the `rosetta_squint_hash` lower-level API directly — the squint layer's only job is the decode step:
74
+
75
+ ```python
76
+ import rosetta_squint_hash as rih
77
+ from PIL import Image
78
+ img = Image.open("photo.jpg")
79
+ h = rih.phash(img, hash_size=8)
80
+ ```
81
+
82
+ ## Dependencies
83
+
84
+ - `rosetta_squint_hash` (which re-exports `imagehash==4.3.2` + adds `whash_db4_robust`)
85
+ - `Pillow==12.2.*`
86
+
87
+ Tight pins are intentional. See [`../../hash/python/README.md`](../../hash/python/README.md) under "Version policy" for the upgrade workflow.
88
+
89
+ ## Testing
90
+
91
+ ```bash
92
+ pytest
93
+ ```
94
+
95
+ Tests verify (1) path/bytes parity for every algorithm, (2) chain consistency between `rs.phash(path)` and `imagehash.phash(rs.decode_file(path))`, (3) cross-port byte-exact equality with Go/Java/JS for `imagehash.png`.
@@ -0,0 +1,10 @@
1
+ README.md
2
+ pyproject.toml
3
+ rosetta_squint/__init__.py
4
+ rosetta_squint/_impl.py
5
+ rosetta_squint.egg-info/PKG-INFO
6
+ rosetta_squint.egg-info/SOURCES.txt
7
+ rosetta_squint.egg-info/dependency_links.txt
8
+ rosetta_squint.egg-info/requires.txt
9
+ rosetta_squint.egg-info/top_level.txt
10
+ tests/test_squint.py
@@ -0,0 +1,5 @@
1
+ rosetta-squint-hash<2.0,>=1.0.0
2
+ Pillow==12.2.*
3
+
4
+ [test]
5
+ pytest>=7
@@ -0,0 +1 @@
1
+ rosetta_squint
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,261 @@
1
+ """Integration tests for rosetta_squint.
2
+
3
+ Each algorithm is tested in three ways:
4
+ 1. Path input returns a non-empty ImageHash
5
+ 2. Path input == bytes input (both produce same hex)
6
+ 3. Squint output == calling rosetta_squint_hash directly on the decoded
7
+ PIL.Image (chain consistency — no surprise transformations)
8
+
9
+ We do NOT compare against `hash/spec/goldens.json` directly because those
10
+ goldens were generated by PIL.Image.open() which is exactly what
11
+ rosetta_squint uses for non-HEIC formats. So spec/goldens.json values
12
+ *should* match rosetta_squint output for PNG/JPEG fixtures — we assert
13
+ this where it adds confidence.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ from pathlib import Path
20
+
21
+ import pytest
22
+
23
+ import rosetta_squint as rs
24
+
25
+ # Fixtures from the hash side of the merged repo.
26
+ # tests/test_squint.py → tests/ → python/ → squint/ → rosetta-squint/
27
+ REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent
28
+ HASH_FIXTURES = REPO_ROOT / "hash" / "spec" / "fixtures"
29
+ HASH_GOLDENS = REPO_ROOT / "hash" / "spec" / "goldens.json"
30
+ DECODE_FIXTURES = REPO_ROOT / "decode" / "spec" / "fixtures"
31
+
32
+ PNG_FIXTURE = HASH_FIXTURES / "imagehash.png"
33
+ PEPPERS = HASH_FIXTURES / "peppers.png"
34
+ JPEG_FIXTURE = DECODE_FIXTURES / "jpeg" / "valid" / "8x8-grayscale.jpg"
35
+
36
+
37
+ def _read(path: Path) -> bytes:
38
+ return path.read_bytes()
39
+
40
+
41
+ # Sanity: fixtures exist where we expect them.
42
+ def test_fixtures_exist():
43
+ assert PNG_FIXTURE.exists(), f"missing {PNG_FIXTURE}"
44
+ assert PEPPERS.exists(), f"missing {PEPPERS}"
45
+ assert JPEG_FIXTURE.exists(), f"missing {JPEG_FIXTURE}"
46
+
47
+
48
+ # Path/bytes parity for all 10 algorithms.
49
+
50
+ @pytest.mark.parametrize(
51
+ "algo,size_arg,fixture",
52
+ [
53
+ ("phash", 8, PEPPERS),
54
+ ("phash", 16, PEPPERS),
55
+ ("phash_simple", 8, PEPPERS),
56
+ ("dhash", 8, PEPPERS),
57
+ ("dhash_vertical", 8, PEPPERS),
58
+ ("average_hash", 8, PEPPERS),
59
+ ("whash_haar", 8, PEPPERS),
60
+ ("whash_db4", 8, PEPPERS),
61
+ ("whash_db4_robust", 8, PEPPERS),
62
+ # colorhash takes binbits, default 3
63
+ ("colorhash", 3, PEPPERS),
64
+ # JPEG path through whole pipeline including libjpeg-turbo via PIL
65
+ ("phash", 8, JPEG_FIXTURE),
66
+ ("dhash", 8, JPEG_FIXTURE),
67
+ ],
68
+ )
69
+ def test_path_equals_bytes(algo: str, size_arg: int, fixture: Path):
70
+ """path and bytes entry points produce identical hex."""
71
+ fn_path = getattr(rs, algo)
72
+ fn_bytes = getattr(rs, f"{algo}_bytes")
73
+ # colorhash uses positional binbits, others use hash_size — both accept positional int
74
+ h1 = fn_path(fixture, size_arg)
75
+ h2 = fn_bytes(_read(fixture), size_arg)
76
+ assert str(h1) == str(h2), (
77
+ f"{algo} {fixture.name} size={size_arg}: path={h1} bytes={h2}"
78
+ )
79
+
80
+
81
+ def test_crop_resistant_hash_path_equals_bytes():
82
+ """crop_resistant_hash has no size param; check path/bytes parity."""
83
+ h_path = rs.crop_resistant_hash(PEPPERS)
84
+ h_bytes = rs.crop_resistant_hash_bytes(_read(PEPPERS))
85
+ assert str(h_path) == str(h_bytes)
86
+
87
+
88
+ # Chain consistency: rs.phash(path) == imagehash.phash(rs.decode_file(path)).
89
+
90
+ def test_phash_chain_consistency():
91
+ import imagehash
92
+ img = rs.decode_file(PEPPERS)
93
+ h_chain = imagehash.phash(img, hash_size=8)
94
+ h_squint = rs.phash(PEPPERS, 8)
95
+ assert str(h_chain) == str(h_squint)
96
+
97
+
98
+ def test_phash_simple_chain_consistency():
99
+ import imagehash
100
+ img = rs.decode_file(PEPPERS)
101
+ h_chain = imagehash.phash_simple(img, hash_size=8)
102
+ h_squint = rs.phash_simple(PEPPERS, 8)
103
+ assert str(h_chain) == str(h_squint)
104
+
105
+
106
+ def test_dhash_chain_consistency():
107
+ import imagehash
108
+ img = rs.decode_file(PEPPERS)
109
+ h_chain = imagehash.dhash(img, hash_size=8)
110
+ h_squint = rs.dhash(PEPPERS, 8)
111
+ assert str(h_chain) == str(h_squint)
112
+
113
+
114
+ def test_average_hash_chain_consistency():
115
+ import imagehash
116
+ img = rs.decode_file(PEPPERS)
117
+ h_chain = imagehash.average_hash(img, hash_size=8)
118
+ h_squint = rs.average_hash(PEPPERS, 8)
119
+ assert str(h_chain) == str(h_squint)
120
+
121
+
122
+ def test_crop_resistant_hash_chain_consistency():
123
+ import imagehash
124
+ img = rs.decode_file(PEPPERS)
125
+ h_chain = imagehash.crop_resistant_hash(img)
126
+ h_squint = rs.crop_resistant_hash(PEPPERS)
127
+ assert str(h_chain) == str(h_squint)
128
+
129
+
130
+ # Sanity that the hex format is what we expect.
131
+ def test_phash_returns_16_hex_chars_for_size_8():
132
+ h = rs.phash(PEPPERS, 8)
133
+ assert len(str(h)) == 16
134
+ assert all(c in "0123456789abcdef" for c in str(h))
135
+
136
+
137
+ # Cross-port live verification: confirm that phash("imagehash.png", 8) yields
138
+ # the same hex string that the Go/Java/JS squint ports report (ba8c84536bd3c366).
139
+ def test_phash_imagehash_png_matches_other_ports():
140
+ h = rs.phash(PNG_FIXTURE, 8)
141
+ # This was reported by Go, Java, and JS squint implementations.
142
+ assert str(h) == "ba8c84536bd3c366", (
143
+ f"Python rosetta_squint diverged from Go/Java/JS for phash on imagehash.png. "
144
+ f"Python: {h}, others: ba8c84536bd3c366"
145
+ )
146
+
147
+
148
+ def test_crop_resistant_hash_returns_imagemultihash():
149
+ h = rs.crop_resistant_hash(PEPPERS)
150
+ assert hasattr(h, "segment_hashes")
151
+ # Distance to self must be float, not int.
152
+ d = h - h
153
+ assert isinstance(d, float)
154
+ assert d == 0.0
155
+
156
+
157
+ def test_hex_to_multihash_roundtrip():
158
+ mh = rs.crop_resistant_hash(PEPPERS)
159
+ s = str(mh)
160
+ restored = rs.hex_to_multihash(s)
161
+ assert str(restored) == s
162
+
163
+
164
+ # ─── HEIC tests — exercise the ctypes-around-system-libheif bridge ──────────
165
+ #
166
+ # rosetta_squint decodes HEIC via a ctypes wrapper around system libheif
167
+ # (instead of pillow-heif), because pillow-heif bundles libheif 1.21.2 and
168
+ # diverges ±1 px from the system libheif 1.17.6 that the 5 native ports link
169
+ # to. These tests confirm:
170
+ # (a) HEIC happy-path: decode and hash succeed
171
+ # (b) HEIC alpha handling: RGBA fixture decodes as RGBA
172
+ # (c) HEIC negative paths: malformed/AVIF inputs raise rather than panic
173
+ #
174
+ # The HEIC bridge has historically been the largest ctypes-FFI surface in
175
+ # the project — these tests guard against regressions in the HeifError
176
+ # struct handling, handle ownership, plane stride trust, etc. (S-M3, S-M4).
177
+
178
+ HEIC_RGB = DECODE_FIXTURES / "heic" / "valid" / "16x16.heic"
179
+ HEIC_RGBA = DECODE_FIXTURES / "heic" / "valid" / "16x16-rgba.heic"
180
+ HEIC_LOSSLESS = DECODE_FIXTURES / "heic" / "valid" / "16x16-lossless.heic"
181
+ HEIC_AVIF = DECODE_FIXTURES / "heic" / "invalid" / "avif.heic"
182
+ HEIC_BAD = DECODE_FIXTURES / "heic" / "invalid" / "bad-magic.heic"
183
+ HEIC_TRUNCATED = DECODE_FIXTURES / "heic" / "invalid" / "truncated.heic"
184
+
185
+
186
+ def _maybe_skip_heic():
187
+ """Skip the HEIC tests cleanly if libheif isn't installed at all."""
188
+ try:
189
+ from rosetta_squint._impl import _load_libheif_xplat
190
+ _load_libheif_xplat()
191
+ except OSError as e:
192
+ pytest.skip(f"system libheif unavailable — skipping HEIC tests: {e}")
193
+
194
+
195
+ def test_heic_decode_rgb_hashes_successfully():
196
+ _maybe_skip_heic()
197
+ if not HEIC_RGB.exists():
198
+ pytest.skip(f"missing fixture {HEIC_RGB}")
199
+ h = rs.phash(HEIC_RGB, 8)
200
+ # phash output is always 16 lowercase hex chars
201
+ assert len(str(h)) == 16
202
+ assert all(c in "0123456789abcdef" for c in str(h))
203
+
204
+
205
+ def test_heic_path_equals_bytes():
206
+ """HEIC must round-trip through path and bytes APIs identically."""
207
+ _maybe_skip_heic()
208
+ if not HEIC_RGB.exists():
209
+ pytest.skip(f"missing fixture {HEIC_RGB}")
210
+ h_path = rs.phash(HEIC_RGB, 8)
211
+ h_bytes = rs.phash_bytes(HEIC_RGB.read_bytes(), 8)
212
+ assert str(h_path) == str(h_bytes)
213
+
214
+
215
+ def test_heic_rgba_decodes():
216
+ """RGBA HEIC must decode cleanly — exercises has_alpha_channel + RGBA chroma path."""
217
+ _maybe_skip_heic()
218
+ if not HEIC_RGBA.exists():
219
+ pytest.skip(f"missing fixture {HEIC_RGBA}")
220
+ # Decode independently to verify mode/dimensions; then hash via squint.
221
+ img = rs.decode_file(HEIC_RGBA)
222
+ assert img.mode in ("RGB", "RGBA")
223
+ assert img.size == (16, 16)
224
+ h = rs.dhash(HEIC_RGBA, 8)
225
+ assert len(str(h)) == 16
226
+
227
+
228
+ def test_heic_lossless_decodes():
229
+ """Lossless HEIC must decode cleanly."""
230
+ _maybe_skip_heic()
231
+ if not HEIC_LOSSLESS.exists():
232
+ pytest.skip(f"missing fixture {HEIC_LOSSLESS}")
233
+ h = rs.average_hash(HEIC_LOSSLESS, 8)
234
+ assert len(str(h)) == 16
235
+
236
+
237
+ def test_heic_invalid_avif_raises():
238
+ """AVIF brand inside a HEIC ftyp is intentionally unsupported in v1."""
239
+ _maybe_skip_heic()
240
+ if not HEIC_AVIF.exists():
241
+ pytest.skip(f"missing fixture {HEIC_AVIF}")
242
+ with pytest.raises(Exception): # noqa: BLE001 — any exception is acceptable
243
+ rs.phash(HEIC_AVIF, 8)
244
+
245
+
246
+ def test_heic_invalid_bad_magic_raises():
247
+ """A file whose ftyp brand isn't in the HEIC family must fail."""
248
+ _maybe_skip_heic()
249
+ if not HEIC_BAD.exists():
250
+ pytest.skip(f"missing fixture {HEIC_BAD}")
251
+ with pytest.raises(Exception): # noqa: BLE001
252
+ rs.phash(HEIC_BAD, 8)
253
+
254
+
255
+ def test_heic_invalid_truncated_raises():
256
+ """A truncated HEIC must surface a libheif diagnostic, not silently produce garbage."""
257
+ _maybe_skip_heic()
258
+ if not HEIC_TRUNCATED.exists():
259
+ pytest.skip(f"missing fixture {HEIC_TRUNCATED}")
260
+ with pytest.raises(Exception): # noqa: BLE001
261
+ rs.phash(HEIC_TRUNCATED, 8)