phash_native 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +72 -0
- data/ext/phash_native/extconf.rb +9 -0
- data/ext/phash_native/phash_native.c +112 -0
- data/ext/phash_native/stb_image.h +7988 -0
- data/lib/phash_native/version.rb +3 -0
- data/lib/phash_native.rb +6 -0
- data/phash_native.gemspec +26 -0
- metadata +92 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: ec6f598c94538b3e622394dda53bd531269dfee837b969c7a1b7bf90a7a6c49e
|
|
4
|
+
data.tar.gz: 8fb3c0584f523de1ed12db2218fd076e8dcdf2bcf420414d3635a916e0859d46
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 358aedff50d8260fb12d259c5bd91304df4d91066fd7eda31d1fecda082448bc457e0c1ccc13bbde357b7cb27ad0c56df90afae51ec8e771f08934f6123c6dda
|
|
7
|
+
data.tar.gz: 7b78da8bfc5aedff3f18b790ef4de832d3c489661a1a15a0212e5ef5a58a177eb856aebe23e6b83a6b3fb5d3e77ad28e01ca22bb74fa0dd365b294ed16fc15e1
|
data/README.md
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# phash_native
|
|
2
|
+
|
|
3
|
+
A minimal, native, zero-dependency Ruby extension for computing [perceptual hashes (pHash)](https://www.phash.org/) from image files. Built entirely in C using `stb_image.h`, with no external dependencies outside the standard C library. Fast, simple, and does exactly one thing right.
|
|
4
|
+
|
|
5
|
+
## Why this exists
|
|
6
|
+
|
|
7
|
+
There are existing Ruby gems that attempt to do perceptual hashing (like `phashion`), but:
|
|
8
|
+
|
|
9
|
+
- They're old and abandonware.
|
|
10
|
+
- They rely on broken or outdated C++ libraries.
|
|
11
|
+
- They fail to compile on modern systems.
|
|
12
|
+
- They use external dependencies like FFTW or libjpeg, and end up being fragile and drifting out of compatibility over time.
|
|
13
|
+
|
|
14
|
+
I needed something that worked. Something that could:
|
|
15
|
+
|
|
16
|
+
- Read an image.
|
|
17
|
+
- Compute a stable 64-bit perceptual hash based on DCT.
|
|
18
|
+
- Compare hashes efficiently with Hamming distance.
|
|
19
|
+
- Compile without screwing around with a million packages.
|
|
20
|
+
|
|
21
|
+
So I wrote it.
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
gem install phash_native
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Or in your Gemfile:
|
|
30
|
+
|
|
31
|
+
```ruby
|
|
32
|
+
gem "phash_native"
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Usage
|
|
36
|
+
|
|
37
|
+
```ruby
|
|
38
|
+
require "phash_native"
|
|
39
|
+
|
|
40
|
+
hash = PhashNative.compute("some/image.png")
|
|
41
|
+
# => 64-bit integer, e.g. 0x4f393c7331c7e7cc
|
|
42
|
+
|
|
43
|
+
# Compare two images
|
|
44
|
+
a = PhashNative.compute("image1.png")
|
|
45
|
+
b = PhashNative.compute("image2.png")
|
|
46
|
+
PhashNative.hamming(a, b)
|
|
47
|
+
# => integer between 0 and 64
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Interpreting results
|
|
51
|
+
|
|
52
|
+
The closer two hashes are (i.e. the smaller the Hamming distance), the more visually similar the images are. A Hamming distance of 0 means they're identical in structure. Under 10 is usually a strong match. Over 20? Probably different content.
|
|
53
|
+
|
|
54
|
+
## Performance
|
|
55
|
+
|
|
56
|
+
The native code uses:
|
|
57
|
+
- A real 2D DCT on a 32×32 grayscale sample
|
|
58
|
+
- A simple median threshold over the top-left 8×8 DCT block
|
|
59
|
+
- Fast bitwise Hamming comparison (utilizes GCC intrinsics if available)
|
|
60
|
+
|
|
61
|
+
This means it runs fast — fast enough for batch processing thousands of frames without sweating.
|
|
62
|
+
|
|
63
|
+
## Limitations
|
|
64
|
+
|
|
65
|
+
- Only supports images readable by `stb_image.h` (which covers PNG, JPEG, BMP, etc.) But what more could you need?
|
|
66
|
+
- Always converts to grayscale and downscales to 32×32 internally
|
|
67
|
+
- No audio or video support — this is just for still images
|
|
68
|
+
- No SIMD or threading — yet. PRs welcome.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
Yes, you could write your own. But now you don’t have to.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# ext/phash_native/extconf.rb
|
|
2
|
+
require 'mkmf'
|
|
3
|
+
|
|
4
|
+
# Clean up bogus flags (GCC will emit warnings otherwise)
|
|
5
|
+
$CFLAGS.gsub!(/-Wno-self-assign/, '')
|
|
6
|
+
$CFLAGS.gsub!(/-Wno-parentheses-equality/, '')
|
|
7
|
+
$CFLAGS.gsub!(/-Wno-constant-logical-operand/, '')
|
|
8
|
+
|
|
9
|
+
create_makefile('phash_native/phash_native')
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
// phash_native.c
|
|
2
|
+
#include <ruby.h>
|
|
3
|
+
#include <stdint.h>
|
|
4
|
+
#include <stdlib.h>
|
|
5
|
+
#include <math.h>
|
|
6
|
+
#define STB_IMAGE_IMPLEMENTATION
|
|
7
|
+
#include "stb_image.h"
|
|
8
|
+
|
|
9
|
+
#define HASH_SIZE 8
|
|
10
|
+
#define RESIZE_DIM 32
|
|
11
|
+
|
|
12
|
+
static double dct_matrix[RESIZE_DIM][RESIZE_DIM];
|
|
13
|
+
|
|
14
|
+
// Precompute DCT basis functions
|
|
15
|
+
static void init_dct_matrix(void) {
|
|
16
|
+
for (int u = 0; u < RESIZE_DIM; u++) {
|
|
17
|
+
for (int x = 0; x < RESIZE_DIM; x++) {
|
|
18
|
+
double coef = (u == 0) ? sqrt(1.0 / RESIZE_DIM) : sqrt(2.0 / RESIZE_DIM);
|
|
19
|
+
dct_matrix[u][x] = coef * cos(((2 * x + 1) * u * M_PI) / (2.0 * RESIZE_DIM));
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Perform 2D DCT on grayscale matrix
|
|
25
|
+
static void compute_dct(const double in[RESIZE_DIM][RESIZE_DIM], double out[RESIZE_DIM][RESIZE_DIM]) {
|
|
26
|
+
for (int u = 0; u < RESIZE_DIM; u++) {
|
|
27
|
+
for (int v = 0; v < RESIZE_DIM; v++) {
|
|
28
|
+
double sum = 0.0;
|
|
29
|
+
for (int x = 0; x < RESIZE_DIM; x++) {
|
|
30
|
+
for (int y = 0; y < RESIZE_DIM; y++) {
|
|
31
|
+
sum += dct_matrix[u][x] * in[x][y] * dct_matrix[v][y];
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
out[u][v] = sum;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
static VALUE compute_phash(VALUE self, VALUE filepath) {
|
|
40
|
+
Check_Type(filepath, T_STRING);
|
|
41
|
+
const char *filename = StringValueCStr(filepath);
|
|
42
|
+
|
|
43
|
+
int width, height, channels;
|
|
44
|
+
unsigned char *data = stbi_load(filename, &width, &height, &channels, 1); // 1 = grayscale
|
|
45
|
+
if (!data) {
|
|
46
|
+
rb_raise(rb_eRuntimeError, "Failed to load image: %s", filename);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
double resized[RESIZE_DIM][RESIZE_DIM];
|
|
50
|
+
for (int y = 0; y < RESIZE_DIM; y++) {
|
|
51
|
+
for (int x = 0; x < RESIZE_DIM; x++) {
|
|
52
|
+
int src_x = x * width / RESIZE_DIM;
|
|
53
|
+
int src_y = y * height / RESIZE_DIM;
|
|
54
|
+
resized[y][x] = (double)data[src_y * width + src_x];
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
double dct[RESIZE_DIM][RESIZE_DIM];
|
|
59
|
+
compute_dct(resized, dct);
|
|
60
|
+
|
|
61
|
+
double sum = 0.0;
|
|
62
|
+
for (int y = 0; y < HASH_SIZE; y++) {
|
|
63
|
+
for (int x = 0; x < HASH_SIZE; x++) {
|
|
64
|
+
if (x == 0 && y == 0) continue; // Skip DC term
|
|
65
|
+
sum += dct[y][x];
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
double avg = sum / (HASH_SIZE * HASH_SIZE - 1);
|
|
69
|
+
|
|
70
|
+
uint64_t hash = 0;
|
|
71
|
+
for (int y = 0; y < HASH_SIZE; y++) {
|
|
72
|
+
for (int x = 0; x < HASH_SIZE; x++) {
|
|
73
|
+
if (x == 0 && y == 0) continue;
|
|
74
|
+
hash <<= 1;
|
|
75
|
+
hash |= (dct[y][x] > avg) ? 1 : 0;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
stbi_image_free(data);
|
|
80
|
+
return ULL2NUM(hash);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Hamming distance calculation
|
|
84
|
+
#ifdef __GNUC__
|
|
85
|
+
#include <x86intrin.h>
|
|
86
|
+
static int hamming_distance(uint64_t a, uint64_t b) {
|
|
87
|
+
return __builtin_popcountll(a ^ b);
|
|
88
|
+
}
|
|
89
|
+
#else
|
|
90
|
+
static int hamming_distance(uint64_t a, uint64_t b) {
|
|
91
|
+
uint64_t x = a ^ b;
|
|
92
|
+
int count = 0;
|
|
93
|
+
while (x) {
|
|
94
|
+
count += x & 1;
|
|
95
|
+
x >>= 1;
|
|
96
|
+
}
|
|
97
|
+
return count;
|
|
98
|
+
}
|
|
99
|
+
#endif
|
|
100
|
+
|
|
101
|
+
static VALUE hamming_distance_rb(VALUE self, VALUE a_val, VALUE b_val) {
|
|
102
|
+
uint64_t a = NUM2ULL(a_val);
|
|
103
|
+
uint64_t b = NUM2ULL(b_val);
|
|
104
|
+
return INT2NUM(hamming_distance(a, b));
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
void Init_phash_native(void) {
|
|
108
|
+
init_dct_matrix();
|
|
109
|
+
VALUE mPhashNative = rb_define_module("PhashNative");
|
|
110
|
+
rb_define_singleton_method(mPhashNative, "compute", compute_phash, 1);
|
|
111
|
+
rb_define_singleton_method(mPhashNative, "hamming", hamming_distance_rb, 2);
|
|
112
|
+
}
|