rosetta-squint 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ """rosetta_squint — point at an image (path or bytes), get the same phash
2
+ hex string as every other rosetta-squint port for the same input.
3
+
4
+ This is the Python implementation of the cross-language perceptual-hash
5
+ convenience API. It depends on `rosetta_squint_hash` (which re-exports
6
+ upstream `imagehash` + adds `whash_db4_robust`) and uses PIL/Pillow for
7
+ decoding most formats. HEIC is decoded via a ctypes wrapper around
8
+ system libheif so that output matches the 5 native ports (which all FFI
9
+ to the same system libheif).
10
+
11
+ Each public function comes in three flavors:
12
+ - `phash(path_or_image, ...)` — accept a file path str/Path OR a PIL.Image
13
+ - `phash_bytes(bytes, ...)` — accept raw image bytes in memory
14
+
15
+ API matches the same names in the non-Python rosetta-squint ports
16
+ (`phash`, `dhash`, `average_hash`, `whash_haar`, `colorhash`,
17
+ `crop_resistant_hash`, plus the extensions `whash_db4`, `whash_db4_robust`,
18
+ `phash_simple`, `dhash_vertical`).
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from ._impl import (
24
+ # Path-based entries
25
+ average_hash,
26
+ colorhash,
27
+ crop_resistant_hash,
28
+ dhash,
29
+ dhash_vertical,
30
+ phash,
31
+ phash_simple,
32
+ whash_db4,
33
+ whash_db4_robust,
34
+ whash_haar,
35
+ # Bytes-based entries
36
+ average_hash_bytes,
37
+ colorhash_bytes,
38
+ crop_resistant_hash_bytes,
39
+ dhash_bytes,
40
+ dhash_vertical_bytes,
41
+ phash_bytes,
42
+ phash_simple_bytes,
43
+ whash_db4_bytes,
44
+ whash_db4_robust_bytes,
45
+ whash_haar_bytes,
46
+ # Decode helpers
47
+ decode_bytes,
48
+ decode_file,
49
+ )
50
+
51
+ # Re-export hash types so callers don't need to import rosetta_squint_hash
52
+ # separately.
53
+ from rosetta_squint_hash import (
54
+ ImageHash,
55
+ ImageMultiHash,
56
+ hex_to_flathash,
57
+ hex_to_hash,
58
+ hex_to_multihash,
59
+ )
60
+
61
+ __version__ = "0.1.0"
62
+
63
+ __all__ = [
64
+ "ImageHash",
65
+ "ImageMultiHash",
66
+ "hex_to_flathash",
67
+ "hex_to_hash",
68
+ "hex_to_multihash",
69
+ "decode_file",
70
+ "decode_bytes",
71
+ "average_hash",
72
+ "average_hash_bytes",
73
+ "colorhash",
74
+ "colorhash_bytes",
75
+ "crop_resistant_hash",
76
+ "crop_resistant_hash_bytes",
77
+ "dhash",
78
+ "dhash_bytes",
79
+ "dhash_vertical",
80
+ "dhash_vertical_bytes",
81
+ "phash",
82
+ "phash_bytes",
83
+ "phash_simple",
84
+ "phash_simple_bytes",
85
+ "whash_db4",
86
+ "whash_db4_bytes",
87
+ "whash_db4_robust",
88
+ "whash_db4_robust_bytes",
89
+ "whash_haar",
90
+ "whash_haar_bytes",
91
+ "__version__",
92
+ ]
@@ -0,0 +1,462 @@
1
+ """Implementation of the rosetta_squint convenience API.
2
+
3
+ For most formats we use PIL.Image.open() because that's what upstream
4
+ `imagehash` itself uses, so we match imagehash's behavior exactly. For
5
+ HEIC specifically, we decode via a ctypes wrapper around system libheif
6
+ (NOT pillow-heif, which bundles libheif 1.21.2 and diverges ±1 px from
7
+ the system libheif 1.17.6 that the 5 native ports link to).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import ctypes
13
+ import ctypes.util
14
+ import io
15
+ import os
16
+ import stat
17
+ import sys
18
+ from pathlib import Path
19
+ from typing import Union
20
+
21
+ import imagehash
22
+ import rosetta_squint_hash as rih
23
+ from PIL import Image
24
+
25
+
26
+ # Reject path-based decode of files that are too large or are non-regular
27
+ # (e.g., /dev/zero, named pipes, character devices) BEFORE reading bytes.
28
+ # Callers that genuinely need to process images larger than this threshold
29
+ # should decode via rosetta-squint-decode directly after explicit validation.
30
+ MAX_FILE_SIZE = 256 * 1024 * 1024 # 256 MiB
31
+
32
+
33
+ def _load_libheif_xplat() -> ctypes.CDLL:
34
+ """Cross-platform libheif loader.
35
+
36
+ Linux: libheif.so.1
37
+ macOS: libheif.dylib (Homebrew unversioned) or libheif.1.dylib
38
+ Windows: libheif.dll / libheif-1.dll
39
+ Other: ctypes.util.find_library fallback
40
+
41
+ Raises OSError with a clear message if no candidate loads.
42
+ """
43
+ if sys.platform == "darwin":
44
+ candidates = ["libheif.dylib", "libheif.1.dylib"]
45
+ elif sys.platform == "win32":
46
+ candidates = ["libheif.dll", "libheif-1.dll"]
47
+ else:
48
+ candidates = ["libheif.so.1", "libheif.so"]
49
+ # ctypes.util.find_library lets us pick up homebrew/macports/other paths
50
+ found = ctypes.util.find_library("heif")
51
+ if found:
52
+ candidates.append(found)
53
+ for name in candidates:
54
+ try:
55
+ return ctypes.CDLL(name)
56
+ except OSError:
57
+ continue
58
+ raise OSError(
59
+ f"libheif not found. Tried: {', '.join(candidates)}. "
60
+ f"Install via your package manager (apt install libheif-dev, "
61
+ f"brew install libheif, etc.)."
62
+ )
63
+
64
+ PathOrBytes = Union[str, Path, bytes, bytearray, memoryview]
65
+
66
+
67
+ class HeifError(ctypes.Structure):
68
+ """C struct heif_error { int32 code; int32 subcode; const char* message; }.
69
+
70
+ libheif's fallible functions return this 12-byte struct by value.
71
+ Declaring restype = ctypes.c_int64 (the previous behaviour) only reads
72
+ 8 of those bytes and reinterprets them as an integer, which silently
73
+ drops the message pointer and ignores the subcode.
74
+ """
75
+ _fields_ = (
76
+ ("code", ctypes.c_int),
77
+ ("subcode", ctypes.c_int),
78
+ ("message", ctypes.c_char_p),
79
+ )
80
+
81
+
82
+ def _check_heif(err: "HeifError", op: str) -> None:
83
+ """Raise RuntimeError when a libheif error struct indicates failure."""
84
+ if err.code != 0:
85
+ msg = err.message.decode("utf-8", "replace") if err.message else ""
86
+ raise RuntimeError(
87
+ f"libheif {op} failed: code={err.code} subcode={err.subcode} msg={msg}"
88
+ )
89
+
90
+
91
+ # ─── Decode helpers ──────────────────────────────────────────────────────────
92
+
93
+
94
+ def _is_heic(path_or_first_bytes) -> bool:
95
+ """Detect HEIC by ftyp box brand at offset 4..12 with brand in the
96
+ HEIC-family set. Mirrors what the 5 native ports do."""
97
+ if isinstance(path_or_first_bytes, (bytes, bytearray, memoryview)):
98
+ b = bytes(path_or_first_bytes[:12])
99
+ else:
100
+ with open(path_or_first_bytes, "rb") as f:
101
+ b = f.read(12)
102
+ if len(b) < 12 or b[4:8] != b"ftyp":
103
+ return False
104
+ brand = b[8:12]
105
+ return brand in (b"heic", b"heix", b"mif1", b"msf1", b"hevc", b"hevx")
106
+
107
+
108
+ def _decode_heic_via_system_libheif(data: bytes) -> Image.Image:
109
+ """Decode HEIC bytes using ctypes around system libheif so the result
110
+ matches the 5 native ports (which all link to system libheif via
111
+ FFI). pillow-heif would bundle libheif 1.21.2 and diverge from
112
+ system libheif 1.17.6 by ±1 px on lossy fixtures."""
113
+ lib = _load_libheif_xplat()
114
+ lib.heif_context_alloc.restype = ctypes.c_void_p
115
+ lib.heif_context_free.argtypes = [ctypes.c_void_p]
116
+ lib.heif_context_read_from_memory_without_copy.argtypes = [
117
+ ctypes.c_void_p,
118
+ ctypes.c_char_p,
119
+ ctypes.c_size_t,
120
+ ctypes.c_void_p,
121
+ ]
122
+ lib.heif_context_read_from_memory_without_copy.restype = HeifError
123
+ lib.heif_context_get_primary_image_handle.argtypes = [
124
+ ctypes.c_void_p,
125
+ ctypes.POINTER(ctypes.c_void_p),
126
+ ]
127
+ lib.heif_context_get_primary_image_handle.restype = HeifError
128
+ lib.heif_image_handle_release.argtypes = [ctypes.c_void_p]
129
+ lib.heif_image_handle_get_width.argtypes = [ctypes.c_void_p]
130
+ lib.heif_image_handle_get_width.restype = ctypes.c_int
131
+ lib.heif_image_handle_get_height.argtypes = [ctypes.c_void_p]
132
+ lib.heif_image_handle_get_height.restype = ctypes.c_int
133
+ lib.heif_image_handle_has_alpha_channel.argtypes = [ctypes.c_void_p]
134
+ lib.heif_image_handle_has_alpha_channel.restype = ctypes.c_int
135
+ lib.heif_decode_image.argtypes = [
136
+ ctypes.c_void_p,
137
+ ctypes.POINTER(ctypes.c_void_p),
138
+ ctypes.c_int,
139
+ ctypes.c_int,
140
+ ctypes.c_void_p,
141
+ ]
142
+ lib.heif_decode_image.restype = HeifError
143
+ lib.heif_image_release.argtypes = [ctypes.c_void_p]
144
+ lib.heif_image_get_plane_readonly.argtypes = [
145
+ ctypes.c_void_p,
146
+ ctypes.c_int,
147
+ ctypes.POINTER(ctypes.c_int),
148
+ ]
149
+ lib.heif_image_get_plane_readonly.restype = ctypes.c_void_p
150
+
151
+ HEIF_COLORSPACE_RGB = 1
152
+ HEIF_CHROMA_INTERLEAVED_RGB = 10
153
+ HEIF_CHROMA_INTERLEAVED_RGBA = 11
154
+ HEIF_CHANNEL_INTERLEAVED = 10
155
+
156
+ ctx = lib.heif_context_alloc()
157
+ if not ctx:
158
+ raise RuntimeError("heif_context_alloc failed")
159
+ try:
160
+ err = lib.heif_context_read_from_memory_without_copy(
161
+ ctx, data, len(data), None
162
+ )
163
+ _check_heif(err, "heif_context_read_from_memory_without_copy")
164
+ handle_ref = ctypes.c_void_p()
165
+ err = lib.heif_context_get_primary_image_handle(
166
+ ctx, ctypes.byref(handle_ref)
167
+ )
168
+ # Register the cleanup BEFORE checking the error so that any
169
+ # partial handle libheif may have written into handle_ref is
170
+ # released even if _check_heif raises.
171
+ try:
172
+ _check_heif(err, "heif_context_get_primary_image_handle")
173
+ if not handle_ref.value:
174
+ raise RuntimeError("heif_context_get_primary_image_handle failed")
175
+ handle = handle_ref.value
176
+ width = lib.heif_image_handle_get_width(handle)
177
+ height = lib.heif_image_handle_get_height(handle)
178
+ if width <= 0 or height <= 0:
179
+ raise RuntimeError(
180
+ f"invalid HEIC dimensions {width}x{height}"
181
+ )
182
+ has_alpha = lib.heif_image_handle_has_alpha_channel(handle) != 0
183
+ chroma = (
184
+ HEIF_CHROMA_INTERLEAVED_RGBA
185
+ if has_alpha
186
+ else HEIF_CHROMA_INTERLEAVED_RGB
187
+ )
188
+ mode = "RGBA" if has_alpha else "RGB"
189
+ channels = 4 if has_alpha else 3
190
+
191
+ img_ref = ctypes.c_void_p()
192
+ err = lib.heif_decode_image(
193
+ handle, ctypes.byref(img_ref), HEIF_COLORSPACE_RGB, chroma, None
194
+ )
195
+ # Same pattern: register cleanup BEFORE the error check so the
196
+ # image is released even if _check_heif raises.
197
+ try:
198
+ _check_heif(err, "heif_decode_image")
199
+ if not img_ref.value:
200
+ raise RuntimeError("heif_decode_image failed")
201
+ img = img_ref.value
202
+ stride = ctypes.c_int(0)
203
+ plane = lib.heif_image_get_plane_readonly(
204
+ img, HEIF_CHANNEL_INTERLEAVED, ctypes.byref(stride)
205
+ )
206
+ if not plane:
207
+ raise RuntimeError("null plane")
208
+ row_bytes = width * channels
209
+ if stride.value < row_bytes:
210
+ raise RuntimeError(
211
+ f"libheif returned stride {stride.value} smaller than "
212
+ f"required {row_bytes} (width={width} channels={channels})"
213
+ )
214
+ pixels = bytearray()
215
+ for y in range(height):
216
+ row = ctypes.string_at(plane + y * stride.value, row_bytes)
217
+ pixels.extend(row)
218
+ return Image.frombytes(mode, (width, height), bytes(pixels))
219
+ finally:
220
+ if img_ref.value:
221
+ lib.heif_image_release(img_ref.value)
222
+ finally:
223
+ if handle_ref.value:
224
+ lib.heif_image_handle_release(handle_ref.value)
225
+ finally:
226
+ lib.heif_context_free(ctx)
227
+
228
+
229
+ def _read_path_safely(path: Union[str, Path]) -> bytes:
230
+ """Open `path`, validate stat against the same fd that we read from
231
+ (closing a TOCTOU window), enforce symlink + regular-file + size
232
+ guards, and return the bytes. Mirrors the fd-based stat-and-read
233
+ pattern used in the other 5 squint ports.
234
+
235
+ A naive `os.stat(path)` then `open(path).read()` has a classic TOCTOU
236
+ window: an attacker with write access along the path can swap the
237
+ target between the two syscalls (file → symlink to /dev/zero, file →
238
+ larger file, regular file → FIFO). Holding the same fd across stat
239
+ and read defeats this — fstat reports the inode the read will actually
240
+ consume.
241
+
242
+ The open itself uses ``O_NOFOLLOW`` so that a symlink at ``path``
243
+ causes the open to fail rather than silently resolving to whatever
244
+ the symlink currently points at — closing a separate TOCTOU window
245
+ on the symlink target itself. Callers who genuinely want symlink
246
+ resolution must do it explicitly (e.g. ``Path(path).resolve()``)
247
+ before calling this function. Windows has no ``O_NOFOLLOW`` flag;
248
+ fall back to an ``lstat`` check.
249
+ """
250
+ if sys.platform == "win32":
251
+ # Windows: pre-check with os.lstat. There's a narrow race
252
+ # between the lstat and the open below, but Windows lacks
253
+ # O_NOFOLLOW and the alternative (reparse-point flags) would
254
+ # require ctypes wrapping of CreateFileW.
255
+ try:
256
+ st_link = os.lstat(path)
257
+ except OSError as e:
258
+ raise OSError(e.errno, f"lstat failed for {path}: {e.strerror}") from e
259
+ if stat.S_ISLNK(st_link.st_mode):
260
+ raise ValueError(f"symlink not allowed: {path}")
261
+ fd = os.open(path, os.O_RDONLY | os.O_BINARY) # type: ignore[attr-defined]
262
+ else:
263
+ try:
264
+ fd = os.open(path, os.O_RDONLY | os.O_NOFOLLOW)
265
+ except OSError as e:
266
+ # ELOOP (40 on Linux, 62 on macOS) is what O_NOFOLLOW raises
267
+ # when the final path component is a symlink. Translate to a
268
+ # clearer error so callers can distinguish symlink rejection
269
+ # from a generic "not a regular file" or I/O error.
270
+ import errno as _errno
271
+ if e.errno == _errno.ELOOP:
272
+ raise ValueError(f"symlink not allowed: {path}") from e
273
+ raise
274
+ # Wrap the bare fd in a Python file object so close-on-GC is automatic.
275
+ # If `os.fdopen` itself raises (e.g., MemoryError on a stressed system),
276
+ # the fd would leak — close it explicitly to be safe.
277
+ try:
278
+ f = os.fdopen(fd, "rb")
279
+ except BaseException:
280
+ os.close(fd)
281
+ raise
282
+ try:
283
+ st = os.fstat(f.fileno())
284
+ if not stat.S_ISREG(st.st_mode):
285
+ raise RuntimeError(f"not a regular file: {path}")
286
+ if st.st_size > MAX_FILE_SIZE:
287
+ raise RuntimeError(
288
+ f"input file too large: {st.st_size} bytes "
289
+ f"(max {MAX_FILE_SIZE} bytes / 256 MiB). For images above this "
290
+ f"threshold, decode via rosetta-squint-decode directly after "
291
+ f"explicit validation."
292
+ )
293
+ # Read up to MAX_FILE_SIZE+1 so we detect "file grew between fstat
294
+ # and read" (e.g. concurrent writer appending). The +1 absence is
295
+ # the contract: if we got more than MAX_FILE_SIZE bytes, reject.
296
+ data = f.read(MAX_FILE_SIZE + 1)
297
+ if len(data) > MAX_FILE_SIZE:
298
+ raise RuntimeError(
299
+ f"input file too large: {len(data)} bytes "
300
+ f"(max {MAX_FILE_SIZE} bytes / 256 MiB). For images above this "
301
+ f"threshold, decode via rosetta-squint-decode directly after "
302
+ f"explicit validation."
303
+ )
304
+ return data
305
+ finally:
306
+ f.close()
307
+
308
+
309
+ def decode_file(path: Union[str, Path]) -> Image.Image:
310
+ """Decode a file at `path` into a PIL.Image suitable for hashing.
311
+ HEIC uses the system-libheif ctypes wrapper; everything else uses
312
+ PIL.Image.open.
313
+
314
+ Refuses symlinks (via ``O_NOFOLLOW`` on POSIX / ``lstat`` on Windows),
315
+ non-regular files (FIFOs, /dev/zero, character devices, etc.) and
316
+ files larger than MAX_FILE_SIZE BEFORE reading bytes. The
317
+ regular-file and size checks run against the same fd as the read,
318
+ closing the obvious TOCTOU window. Callers who genuinely want symlink
319
+ resolution must do it explicitly (e.g. ``Path(path).resolve()``)
320
+ before calling this function.
321
+ """
322
+ data = _read_path_safely(path)
323
+ if _is_heic(data):
324
+ return _decode_heic_via_system_libheif(data)
325
+ return Image.open(io.BytesIO(data))
326
+
327
+
328
+ def decode_bytes(data: bytes) -> Image.Image:
329
+ """Decode raw image bytes into a PIL.Image. HEIC bytes use the
330
+ ctypes wrapper around system libheif; everything else goes through
331
+ PIL.Image.open."""
332
+ if isinstance(data, (bytearray, memoryview)):
333
+ data = bytes(data)
334
+ if _is_heic(data):
335
+ return _decode_heic_via_system_libheif(data)
336
+ return Image.open(io.BytesIO(data))
337
+
338
+
339
+ # ─── Convenience hash functions ──────────────────────────────────────────────
340
+ # Each algorithm gets a (path, size) and a (_bytes, size) variant.
341
+
342
+
343
+ def average_hash(path: Union[str, Path], hash_size: int = 8) -> imagehash.ImageHash:
344
+ return rih.average_hash(decode_file(path), hash_size=hash_size)
345
+
346
+
347
+ def average_hash_bytes(data: bytes, hash_size: int = 8) -> imagehash.ImageHash:
348
+ return rih.average_hash(decode_bytes(data), hash_size=hash_size)
349
+
350
+
351
+ def phash(
352
+ path: Union[str, Path],
353
+ hash_size: int = 8,
354
+ highfreq_factor: int = 4,
355
+ ) -> imagehash.ImageHash:
356
+ return rih.phash(
357
+ decode_file(path), hash_size=hash_size, highfreq_factor=highfreq_factor
358
+ )
359
+
360
+
361
+ def phash_bytes(
362
+ data: bytes, hash_size: int = 8, highfreq_factor: int = 4
363
+ ) -> imagehash.ImageHash:
364
+ return rih.phash(
365
+ decode_bytes(data), hash_size=hash_size, highfreq_factor=highfreq_factor
366
+ )
367
+
368
+
369
+ def phash_simple(
370
+ path: Union[str, Path],
371
+ hash_size: int = 8,
372
+ highfreq_factor: int = 4,
373
+ ) -> imagehash.ImageHash:
374
+ return rih.phash_simple(
375
+ decode_file(path), hash_size=hash_size, highfreq_factor=highfreq_factor
376
+ )
377
+
378
+
379
+ def phash_simple_bytes(
380
+ data: bytes, hash_size: int = 8, highfreq_factor: int = 4
381
+ ) -> imagehash.ImageHash:
382
+ return rih.phash_simple(
383
+ decode_bytes(data), hash_size=hash_size, highfreq_factor=highfreq_factor
384
+ )
385
+
386
+
387
+ def dhash(path: Union[str, Path], hash_size: int = 8) -> imagehash.ImageHash:
388
+ return rih.dhash(decode_file(path), hash_size=hash_size)
389
+
390
+
391
+ def dhash_bytes(data: bytes, hash_size: int = 8) -> imagehash.ImageHash:
392
+ return rih.dhash(decode_bytes(data), hash_size=hash_size)
393
+
394
+
395
+ def dhash_vertical(
396
+ path: Union[str, Path], hash_size: int = 8
397
+ ) -> imagehash.ImageHash:
398
+ return rih.dhash_vertical(decode_file(path), hash_size=hash_size)
399
+
400
+
401
+ def dhash_vertical_bytes(data: bytes, hash_size: int = 8) -> imagehash.ImageHash:
402
+ return rih.dhash_vertical(decode_bytes(data), hash_size=hash_size)
403
+
404
+
405
+ def whash_haar(
406
+ path: Union[str, Path], hash_size: int = 8
407
+ ) -> imagehash.ImageHash:
408
+ return rih.whash(
409
+ decode_file(path),
410
+ hash_size=hash_size,
411
+ mode="haar",
412
+ remove_max_haar_ll=True,
413
+ )
414
+
415
+
416
+ def whash_haar_bytes(data: bytes, hash_size: int = 8) -> imagehash.ImageHash:
417
+ return rih.whash(
418
+ decode_bytes(data),
419
+ hash_size=hash_size,
420
+ mode="haar",
421
+ remove_max_haar_ll=True,
422
+ )
423
+
424
+
425
+ def whash_db4(path: Union[str, Path], hash_size: int = 8) -> imagehash.ImageHash:
426
+ # rih.whash_db4 is the port-local snap-applying override (NOT
427
+ # rih.whash(mode='db4'), which forwards to upstream imagehash without
428
+ # the snap-to-threshold tie-break). See spec/SPEC.md §"Threshold
429
+ # tie-break".
430
+ return rih.whash_db4(decode_file(path), hash_size=hash_size)
431
+
432
+
433
+ def whash_db4_bytes(data: bytes, hash_size: int = 8) -> imagehash.ImageHash:
434
+ return rih.whash_db4(decode_bytes(data), hash_size=hash_size)
435
+
436
+
437
+ def whash_db4_robust(
438
+ path: Union[str, Path], hash_size: int = 8
439
+ ) -> imagehash.ImageHash:
440
+ return rih.whash_db4_robust(decode_file(path), hash_size=hash_size)
441
+
442
+
443
+ def whash_db4_robust_bytes(
444
+ data: bytes, hash_size: int = 8
445
+ ) -> imagehash.ImageHash:
446
+ return rih.whash_db4_robust(decode_bytes(data), hash_size=hash_size)
447
+
448
+
449
+ def colorhash(path: Union[str, Path], binbits: int = 3) -> imagehash.ImageHash:
450
+ return rih.colorhash(decode_file(path), binbits=binbits)
451
+
452
+
453
+ def colorhash_bytes(data: bytes, binbits: int = 3) -> imagehash.ImageHash:
454
+ return rih.colorhash(decode_bytes(data), binbits=binbits)
455
+
456
+
457
+ def crop_resistant_hash(path: Union[str, Path]) -> imagehash.ImageMultiHash:
458
+ return rih.crop_resistant_hash(decode_file(path))
459
+
460
+
461
+ def crop_resistant_hash_bytes(data: bytes) -> imagehash.ImageMultiHash:
462
+ return rih.crop_resistant_hash(decode_bytes(data))
@@ -0,0 +1,95 @@
1
+ Metadata-Version: 2.4
2
+ Name: rosetta-squint
3
+ Version: 1.0.0
4
+ Summary: Cross-language byte-exact perceptual image hashing — decode + hash in one call
5
+ Author-email: Will Metcalf <william.metcalf@gmail.com>
6
+ License: BSD-2-Clause
7
+ Project-URL: Homepage, https://github.com/wmetcalf/rosetta-squint
8
+ Project-URL: Repository, https://github.com/wmetcalf/rosetta-squint
9
+ Project-URL: Issues, https://github.com/wmetcalf/rosetta-squint/issues
10
+ Project-URL: Changelog, https://github.com/wmetcalf/rosetta-squint/blob/main/CHANGELOG.md
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: rosetta-squint-hash<2.0,>=1.0.0
14
+ Requires-Dist: Pillow==12.2.*
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest>=7; extra == "test"
17
+
18
+ # rosetta_squint — Python convenience API
19
+
20
+ Point at an image file or pass in raw image bytes; get back the same perceptual hash hex string that every other `rosetta-squint` port produces for the same input.
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install -e ../../hash/python # rosetta-squint-hash (wrapper around imagehash)
26
+ pip install -e . # this package
27
+ ```
28
+
29
+ (Not on PyPI yet — both are local.)
30
+
31
+ ## Usage
32
+
33
+ ```python
34
+ import rosetta_squint as rs
35
+
36
+ # Path on disk
37
+ h = rs.phash("photo.jpg", 8)
38
+ print(h) # "c3f8a1b27d0e4f96"
39
+
40
+ # Raw image bytes (from an HTTP response, a database BLOB, a multipart upload)
41
+ with open("photo.jpg", "rb") as f:
42
+ h = rs.phash_bytes(f.read(), 8)
43
+
44
+ # Every algorithm available has both flavors:
45
+ rs.average_hash(path, 8) # rs.average_hash_bytes(bytes, 8)
46
+ rs.phash(path, 8) # rs.phash_bytes(bytes, 8)
47
+ rs.phash_simple(path, 8) # rs.phash_simple_bytes(bytes, 8)
48
+ rs.dhash(path, 8) # rs.dhash_bytes(bytes, 8)
49
+ rs.dhash_vertical(path, 8) # rs.dhash_vertical_bytes(bytes, 8)
50
+ rs.whash_haar(path, 8) # rs.whash_haar_bytes(bytes, 8)
51
+ rs.whash_db4(path, 8) # rs.whash_db4_bytes(bytes, 8)
52
+ rs.whash_db4_robust(path, 8) # rs.whash_db4_robust_bytes(bytes, 8) — cross-port-stable
53
+ rs.colorhash(path, 3) # rs.colorhash_bytes(bytes, 3) — takes binbits
54
+ rs.crop_resistant_hash(path) # rs.crop_resistant_hash_bytes(bytes) — no size, returns ImageMultiHash
55
+
56
+ # Hex round-trips:
57
+ restored = rs.hex_to_hash("c3f8a1b27d0e4f96")
58
+ restored = rs.hex_to_flathash("...", hashsize=3)
59
+ restored = rs.hex_to_multihash("hex1,hex2,hex3")
60
+ ```
61
+
62
+ ## Cross-port equivalence
63
+
64
+ The output of `rs.phash("photo.jpg", 8)` is the same hex string as you'd get from the Rust, Go, Java, JS, or Swift `rosetta-squint` ports given the same byte input. **Verified live for `imagehash.png` at size 8: `ba8c84536bd3c366` across Python, Go, Java, JS, Swift.**
65
+
66
+ ## Decode strategy
67
+
68
+ | Format | Decoder | Why |
69
+ |---|---|---|
70
+ | BMP, PNG, GIF, JPEG, WebP, TIFF | PIL/Pillow (system) | The canonical Python decoders; the goldens used to validate the 5 native ports were generated by PIL itself, so output matches by construction. |
71
+ | HEIC | ctypes wrapper around system `libheif.so.1` | pillow-heif bundles libheif 1.21.2 in its wheel; the 5 native ports link to system libheif 1.17.6. The wrapper avoids the ±1 px divergence. |
72
+
73
+ If you already have a `PIL.Image.Image` (from `PIL.Image.open(...)`, a thumbnailer, etc.), use the `rosetta_squint_hash` lower-level API directly — the squint layer's only job is the decode step:
74
+
75
+ ```python
76
+ import rosetta_squint_hash as rih
77
+ from PIL import Image
78
+ img = Image.open("photo.jpg")
79
+ h = rih.phash(img, hash_size=8)
80
+ ```
81
+
82
+ ## Dependencies
83
+
84
+ - `rosetta_squint_hash` (which re-exports `imagehash==4.3.2` + adds `whash_db4_robust`)
85
+ - `Pillow==12.2.*`
86
+
87
+ Tight pins are intentional. See [`../../hash/python/README.md`](../../hash/python/README.md) under "Version policy" for the upgrade workflow.
88
+
89
+ ## Testing
90
+
91
+ ```bash
92
+ pytest
93
+ ```
94
+
95
+ Tests verify (1) path/bytes parity for every algorithm, (2) chain consistency between `rs.phash(path)` and `imagehash.phash(rs.decode_file(path))`, (3) cross-port byte-exact equality with Go/Java/JS for `imagehash.png`.
@@ -0,0 +1,6 @@
1
+ rosetta_squint/__init__.py,sha256=WS7jtOcUan-SQlotpcN2dGDyZgG58e6-u8wiRiXVg5A,2391
2
+ rosetta_squint/_impl.py,sha256=OI97L1meX-oDcx5uYIz2C7GIPvnOQxKCoeErH5k5vRk,17858
3
+ rosetta_squint-1.0.0.dist-info/METADATA,sha256=zrNzNFx7Z_MCdxrZltPjegom3TjOKJ7rSXzzzxW_EmY,4026
4
+ rosetta_squint-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
5
+ rosetta_squint-1.0.0.dist-info/top_level.txt,sha256=esQHc93VdXUXumPzwgdtqIqxhKO80WG_bzYg94LPNt4,15
6
+ rosetta_squint-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ rosetta_squint