phamilie 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,56 @@
1
+ [![Gem Version](https://img.shields.io/gem/v/phamilie.svg?style=flat)](https://rubygems.org/gems/phamilie)
2
+ [![Build Status](https://img.shields.io/travis/toy/phamilie/master.svg?style=flat)](https://travis-ci.org/toy/phamilie)
3
+
4
+ # Phamilie
5
+
6
+ Originally forked from [deepfryed/similie](https://github.com/deepfryed/similie).
7
+
8
+ Phamilie is a simple DCT based image hashing interface that,
9
+
10
+ * computes a fingerprint based on low frequencies of an image.
11
+ * computes hamming distance between 2 fingerprints.
12
+
13
+ ## Example
14
+
15
+ ```ruby
16
+ require 'phamilie'
17
+
18
+ phamilie = Phamilie.new
19
+
20
+ lena1 = 'spec/lena1.png'
21
+ lena2 = 'spec/lena2.png' # lena1.png cropped and scaled
22
+ lena5 = 'spec/lena5.png' # a different image
23
+ lena6 = 'spec/lena6.png' # lena2.png rotated and scaled
24
+
25
+ phamilie.fingerprint(lena1) #=> 36170087496991428
26
+
27
+ phamilie.distance(lena1, lena2) #=> 2
28
+ phamilie.distance(lena1, lena5) #=> 12
29
+
30
+ phamilie.distance(lena1, lena6) #=> 19
31
+ phamilie.distance(lena2, lena6) #=> 19
32
+ phamilie.distance(lena5, lena6) #=> 23
33
+ phamilie.distance_with_rotations(lena1, lena6) #=> 2
34
+ phamilie.distance_with_rotations(lena2, lena6) #=> 0
35
+ phamilie.distance_with_rotations(lena5, lena6) #=> 12
36
+ ```
37
+
38
+ ## Caching
39
+
40
+ By default a Hash is used to cache fingerprints by path. Be carefull if images or current directory can change in process.
41
+
42
+ As cache you can use an instance of class responding to `[]` and `[]=`.
43
+
44
+ If using persistant cache take into account file size and mtime or even cryptographic hash of contents.
45
+
46
+ ## Dependencies
47
+
48
+ * ruby 1.9.1+
49
+ * CImg
50
+ * libpng if you need to read png images
51
+ * libjpeg if you need to read jpeg images
52
+ * ImageMagick if you need to read other images
53
+
54
+ # License
55
+
56
+ GPL — using code from pHash library
@@ -0,0 +1 @@
1
+ /extconf.h
@@ -0,0 +1,12 @@
1
+ require 'mkmf'
2
+
3
+ if have_header 'png.h'
4
+ $LDFLAGS << ' -lpng'
5
+ end
6
+
7
+ if have_header 'jpeglib.h'
8
+ $LDFLAGS << ' -ljpeg'
9
+ end
10
+
11
+ create_header
12
+ create_makefile 'phamilie/fingerprint'
@@ -0,0 +1,239 @@
1
+ /*
2
+ Original similie gem
3
+ (c) Bharanee Rathna 2011
4
+
5
+ CC BY-SA 3.0
6
+ http://creativecommons.org/licenses/by-sa/3.0/
7
+
8
+ Free for every type of use. The author cannot be legally held responsible for
9
+ any damages resulting from the use of this work. All modifications or derivatives
10
+ need to be attributed.
11
+ */
12
+ /*
13
+ Original pHash library (ph_dct_matrix and ph_dct_imagehash functions)
14
+
15
+ pHash, the open source perceptual hash library
16
+ Copyright (C) 2009 Aetilius, Inc.
17
+ All rights reserved.
18
+
19
+ This program is free software: you can redistribute it and/or modify
20
+ it under the terms of the GNU General Public License as published by
21
+ the Free Software Foundation, either version 3 of the License, or
22
+ (at your option) any later version.
23
+
24
+ This program is distributed in the hope that it will be useful,
25
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
26
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27
+ GNU General Public License for more details.
28
+
29
+ You should have received a copy of the GNU General Public License
30
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
31
+
32
+ Evan Klinger - eklinger@phash.org
33
+ D Grant Starkweather - dstarkweather@phash.org
34
+ */
35
+ /*
36
+ Reworking, adaptation, fixes
37
+ (c) Ivan Kuchin 2013
38
+ */
39
+
40
+ #include <ruby.h>
41
+ #include <ruby/encoding.h>
42
+
43
+ #include <stdio.h>
44
+ #include <stdlib.h>
45
+ #include <string.h>
46
+ #include <time.h>
47
+ #include <unistd.h>
48
+
49
+ #include "extconf.h"
50
+ #define cimg_display 0
51
+ #define cimg_verbosity 0
52
+
53
+ #ifdef HAVE_PNG_H
54
+ #define cimg_use_png
55
+ #endif
56
+
57
+ #ifdef HAVE_JPEGLIB_H
58
+ #define cimg_use_jpeg
59
+ #endif
60
+
61
+ #include "CImg.h"
62
+ using namespace cimg_library;
63
+
64
+ #define TO_S(v) rb_funcall(v, rb_intern("to_s"), 0)
65
+ #define CSTRING(v) RSTRING_PTR(TO_S(v))
66
+
67
+ #undef SIZET2NUM
68
+ #undef NUM2SIZET
69
+
70
+ #ifdef HAVE_LONG_LONG
71
+ #define SIZET2NUM(x) ULL2NUM(x)
72
+ #define NUM2SIZET(x) NUM2ULL(x)
73
+ #else
74
+ #define SIZET2NUM(x) ULONG2NUM(x)
75
+ #define NUM2SIZET(x) NUM2ULONG(x)
76
+ #endif
77
+
78
+ #define DCT_SIZE 32
79
+
80
+ #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
81
+ #define popcount __builtin_popcountll
82
+ #else
83
+ // http://en.wikipedia.org/wiki/Hamming_weight
84
+
85
+ const uint64_t m1 = 0x5555555555555555; //binary: 0101...
86
+ const uint64_t m2 = 0x3333333333333333; //binary: 00110011..
87
+ const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ...
88
+ const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3...
89
+
90
+ int popcount(uint64_t x) {
91
+ x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
92
+ x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
93
+ x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
94
+ return (x * h01)>>56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
95
+ }
96
+ #endif
97
+
98
+ CImg<float>* ph_dct_matrix(const int N) {
99
+ CImg<float> *ptr_matrix = new CImg<float>(N, N, 1, 1, 1 / sqrt((float) N));
100
+ const float c1 = sqrt(2.0 / N);
101
+ for (int x = 0; x < N; x++){
102
+ for (int y = 1; y < N; y++){
103
+ *ptr_matrix->data(x, y) = c1 * cos((cimg::PI / 2 / N) * y * (2 * x + 1));
104
+ }
105
+ }
106
+ return ptr_matrix;
107
+ }
108
+
109
+ void small_mono_image(CImg<uint8_t> &img, CImg<float> &small) {
110
+ CImg<float> meanfilter(7, 7, 1, 1, 1);
111
+ if (img.spectrum() == 3){
112
+ small = img.RGBtoYCbCr().channel(0).get_convolve(meanfilter);
113
+ } else if (img.spectrum() == 4){
114
+ int width = img.width();
115
+ int height = img.height();
116
+ small = img.crop(0, 0, 0, 0, width - 1, height - 1, 0, 2).RGBtoYCbCr().channel(0).get_convolve(meanfilter);
117
+ } else {
118
+ small = img.channel(0).get_convolve(meanfilter);
119
+ }
120
+ small.resize(DCT_SIZE, DCT_SIZE, -100, -100, 2);
121
+ }
122
+
123
+ uint64_t small_mono_image_fingerprint(CImg<float> &small) {
124
+ uint64_t hash;
125
+
126
+ CImg<float> *C = ph_dct_matrix(DCT_SIZE);
127
+ CImg<float> Ctransp = C->get_transpose();
128
+ CImg<float> dctImage = (*C) * small * Ctransp;
129
+ CImg<float> subsec = dctImage.crop(1, 1, 8, 8).unroll('x');
130
+
131
+ float median = subsec.median();
132
+ uint64_t one = 0x0000000000000001;
133
+ hash = 0x0000000000000000;
134
+ for (int i = 0; i < 64; i++){
135
+ float current = subsec(i);
136
+ if (current > median)
137
+ hash |= one;
138
+ one = one << 1;
139
+ }
140
+
141
+ delete C;
142
+
143
+ return hash;
144
+ }
145
+
146
+ uint64_t image_fingerprint(CImg<uint8_t> &image) {
147
+ CImg<float> small;
148
+
149
+ small_mono_image(image, small);
150
+
151
+ return small_mono_image_fingerprint(small);
152
+ }
153
+
154
+ void image_rotation_fingerprints(CImg<uint8_t> &image, uint64_t* phashs) {
155
+ static int a = 0;
156
+
157
+ CImg<float> small;
158
+
159
+ small_mono_image(image, small);
160
+
161
+ phashs[0] = small_mono_image_fingerprint(small);
162
+
163
+ small.mirror('x');
164
+ phashs[1] = small_mono_image_fingerprint(small);
165
+
166
+ small.mirror('y');
167
+ phashs[2] = small_mono_image_fingerprint(small);
168
+
169
+ small.mirror('x');
170
+ phashs[3] = small_mono_image_fingerprint(small);
171
+
172
+ small.transpose();
173
+ phashs[4] = small_mono_image_fingerprint(small);
174
+
175
+ small.mirror('x');
176
+ phashs[5] = small_mono_image_fingerprint(small);
177
+
178
+ small.mirror('y');
179
+ phashs[6] = small_mono_image_fingerprint(small);
180
+
181
+ small.mirror('x');
182
+ phashs[7] = small_mono_image_fingerprint(small);
183
+ }
184
+
185
+ VALUE rb_image_fingerprint_func(VALUE self, VALUE file) {
186
+ CImg<uint8_t> img;
187
+ try {
188
+ img.load(CSTRING(file));
189
+ } catch (CImgIOException ex){
190
+ rb_raise(rb_eArgError, "Invalid image or unsupported format: %s", CSTRING(file));
191
+ }
192
+
193
+ uint64_t phash = image_fingerprint(img);
194
+
195
+ return SIZET2NUM(phash);
196
+ }
197
+
198
+ VALUE rb_image_rotation_fingerprints_func(VALUE self, VALUE file) {
199
+ CImg<uint8_t> img;
200
+ try {
201
+ img.load(CSTRING(file));
202
+ } catch (CImgIOException ex){
203
+ rb_raise(rb_eArgError, "Invalid image or unsupported format: %s", CSTRING(file));
204
+ }
205
+
206
+ uint64_t phashs[8] = {};
207
+ image_rotation_fingerprints(img, phashs);
208
+
209
+ VALUE rotations = rb_ary_new();
210
+
211
+ for (int i = 0; i < 8; i++) {
212
+ rb_ary_push(rotations, SIZET2NUM(phashs[i]));
213
+ }
214
+
215
+ return rotations;
216
+ }
217
+
218
+ VALUE rb_fingerprint_distance_func(VALUE self, VALUE fingerprint1, VALUE fingerprint2) {
219
+ if (TYPE(fingerprint1) != T_BIGNUM && TYPE(fingerprint1) != T_FIXNUM)
220
+ rb_raise(rb_eArgError, "fingerprint1 needs to be a number");
221
+
222
+ if (TYPE(fingerprint2) != T_BIGNUM && TYPE(fingerprint2) != T_FIXNUM)
223
+ rb_raise(rb_eArgError, "fingerprint2 needs to be a number");
224
+
225
+ int dist = popcount(NUM2SIZET(fingerprint1) ^ NUM2SIZET(fingerprint2));
226
+
227
+ return INT2NUM(dist);
228
+ }
229
+
230
+ extern "C" {
231
+ void Init_fingerprint() {
232
+ VALUE cPhamilie = rb_define_class("Phamilie", rb_cObject);
233
+ VALUE mFingerprint = rb_define_module_under(cPhamilie, "Fingerprint");
234
+
235
+ rb_define_singleton_method(mFingerprint, "fingerprint", RUBY_METHOD_FUNC(rb_image_fingerprint_func), 1);
236
+ rb_define_singleton_method(mFingerprint, "rotations", RUBY_METHOD_FUNC(rb_image_rotation_fingerprints_func), 1);
237
+ rb_define_singleton_method(mFingerprint, "distance", RUBY_METHOD_FUNC(rb_fingerprint_distance_func), 2);
238
+ }
239
+ }
@@ -0,0 +1,62 @@
1
+ class Phamilie
2
+ def initialize(cache = {})
3
+ {:[] => 1, :[]= => 2}.each do |method, desired_arity|
4
+ unless cache.respond_to?(method)
5
+ raise ArgumentError.new("#{cache} does not respond to #{method}")
6
+ end
7
+
8
+ arity = cache.method(method).arity
9
+ unless arity == desired_arity
10
+ raise ArgumentError.new("#{cache} method #{method} arity should be #{desired_arity} instead of #{arity}")
11
+ end
12
+ end
13
+
14
+ @cache = cache
15
+ end
16
+
17
+ def fingerprint(path)
18
+ case cached = @cache[path]
19
+ when Array
20
+ cached[0]
21
+ when Integer
22
+ cached
23
+ when nil
24
+ @cache[path] = Fingerprint.fingerprint(path)
25
+ else
26
+ raise "Cache for #{path} contains non fingerprint #{cached}"
27
+ end
28
+ end
29
+
30
+ def rotations(path)
31
+ case cached = @cache[path]
32
+ when Array
33
+ cached
34
+ when Integer, nil
35
+ @cache[path] = Fingerprint.rotations(path)
36
+ else
37
+ raise "Cache for #{path} contains non fingerprint #{cached}"
38
+ end
39
+ end
40
+
41
+ def distance(path_a, path_b)
42
+ Fingerprint.distance(fingerprint(path_a), fingerprint(path_b))
43
+ end
44
+
45
+ def distance_with_rotations(path_a, path_b)
46
+ if @cache[path_a].is_a?(Array) || @cache[path_b].is_a?(Integer)
47
+ path_a, path_b = path_b, path_a
48
+ end
49
+
50
+ fingerprint_a = if @cache[path_a].is_a?(Integer)
51
+ fingerprint(path_a)
52
+ else
53
+ rotations(path_a)[0]
54
+ end
55
+
56
+ rotations(path_b).map do |rotation_b|
57
+ Fingerprint.distance(fingerprint_a, rotation_b)
58
+ end.min
59
+ end
60
+ end
61
+
62
+ require 'phamilie/fingerprint.so'
@@ -0,0 +1,16 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'phamilie'
3
+ s.version = '0.1.0'
4
+ s.authors = ['Bharanee Rathna', 'Ivan Kuchin']
5
+ s.summary = 'compute image fingerprints and similarity'
6
+ s.description = 'phamilie is an image fingerprinting & comparison utility'
7
+ s.homepage = 'http://github.com/toy/phamilie'
8
+ s.license = 'GPL'
9
+ s.files = `git ls-files`.split("\n")
10
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
11
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ s.extensions = `git ls-files -- ext/**/extconf.rb`.split("\n")
13
+ s.require_paths = %w[lib]
14
+
15
+ s.add_development_dependency 'rspec'
16
+ end
@@ -0,0 +1,9 @@
1
+ #!/bin/sh
2
+
3
+ convert rotation0.png -flop rotation1.png
4
+ convert rotation0.png -rotate 180 rotation2.png
5
+ convert rotation0.png -flip rotation3.png
6
+ convert rotation0.png -transpose rotation4.png
7
+ convert rotation0.png -rotate 270 rotation5.png
8
+ convert rotation0.png -transverse rotation6.png
9
+ convert rotation0.png -rotate 90 rotation7.png
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,134 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'ext')
3
+ require 'rspec'
4
+ require 'phamilie'
5
+ require 'pathname'
6
+
7
+ describe 'Phamilie fingerprinting' do
8
+ DIR = Pathname(__FILE__).dirname
9
+
10
+ it 'should fingerprint image' do
11
+ expect(Phamilie.new.fingerprint(DIR + 'lena1.png')).not_to be(nil)
12
+ end
13
+
14
+ it 'should barf on invalid path' do
15
+ expect{ Phamilie.new.fingerprint(DIR + 'foo') }.to raise_error(ArgumentError)
16
+ end
17
+
18
+ it 'should barf on non image' do
19
+ expect{ Phamilie.new.fingerprint(__FILE__) }.to raise_error(ArgumentError)
20
+ end
21
+
22
+ LENA1_FINGERPRINT = 11112265815244395537
23
+
24
+ it 'should fingerprint image' do
25
+ fingerprint = Phamilie.new.fingerprint(DIR + 'lena1.png')
26
+ expect(fingerprint).to eq(LENA1_FINGERPRINT)
27
+ end
28
+
29
+ it 'should fingerprint image rotations' do
30
+ rotations = Phamilie.new.rotations(DIR + 'lena1.png')
31
+ expect(rotations[0]).to eq(LENA1_FINGERPRINT)
32
+ end
33
+ end
34
+
35
+ describe 'Phamilie image distance' do
36
+ it 'should work for similar images' do
37
+ phamilie = Phamilie.new
38
+ images = (1..5).map{ |n| DIR + 'lena%d.png' % n }
39
+ images.unshift nil
40
+
41
+ expect(phamilie.distance(images[1], images[2])).to eq(0)
42
+ expect(phamilie.distance(images[2], images[3])).to eq(26)
43
+ expect(phamilie.distance(images[3], images[4])).to eq(26)
44
+ expect(phamilie.distance(images[1], images[4])).to eq(2)
45
+ expect(phamilie.distance(images[1], images[5])).to eq(32)
46
+ end
47
+ end
48
+
49
+ describe 'Phamilie caching' do
50
+ it 'should use cache' do
51
+ phamilie = Phamilie.new
52
+
53
+ images = (1..5).map{ |n| DIR + 'lena%d.png' % n }
54
+
55
+ images.each do |image|
56
+ expect(Phamilie::Fingerprint).to receive(:fingerprint).once.with(image).and_return(image.__id__)
57
+ end
58
+
59
+ images.permutation(2) do |a, b|
60
+ phamilie.distance(a, b)
61
+ end
62
+ end
63
+ end
64
+
65
+ describe 'Phamilie image reoriented distance' do
66
+ it 'should work for identical but rotated images' do
67
+ phamilie = Phamilie.new
68
+
69
+ images = (0..7).map{ |n| DIR + 'rotation%d.png' % n }
70
+
71
+ images.permutation(2) do |a, b|
72
+ expect(phamilie.distance(a, b)).not_to eq(0)
73
+ expect(phamilie.distance_with_rotations(a, b)).to eq(0)
74
+ end
75
+ end
76
+ end
77
+
78
+ describe 'Phamilie caching with rotations' do
79
+ it 'should use cache' do
80
+ phamilie = Phamilie.new
81
+
82
+ images = (1..5).map{ |n| DIR + 'lena%d.png' % n }
83
+
84
+ expect(Phamilie::Fingerprint).to receive(:fingerprint).exactly(images.length).times.and_return(0)
85
+ expect(Phamilie::Fingerprint).to receive(:rotations).exactly(images.length - 1).times.and_return(8.times.to_a)
86
+
87
+ images.permutation(2) do |a, b|
88
+ phamilie.distance(a, b)
89
+ end
90
+
91
+ images.permutation(2) do |a, b|
92
+ phamilie.distance_with_rotations(a, b)
93
+ end
94
+
95
+ images.permutation(2) do |a, b|
96
+ phamilie.distance(a, b)
97
+ end
98
+ end
99
+
100
+ it 'should calculate rotation fingerprints only when required' do
101
+ phamilie = Phamilie.new
102
+
103
+ images = (1..2).map{ |n| DIR + 'lena%d.png' % n }
104
+
105
+ expect(Phamilie::Fingerprint).to receive(:fingerprint).once.with(images[0]).and_return(0)
106
+ expect(Phamilie::Fingerprint).to receive(:rotations).once.with(images[0]).and_return(8.times.to_a)
107
+
108
+ expect(Phamilie::Fingerprint).to receive(:fingerprint).once.with(images[1]).and_return(0)
109
+ expect(Phamilie::Fingerprint).not_to receive(:rotations).with(images[1])
110
+
111
+ phamilie.distance(images[0], images[1])
112
+ phamilie.distance(images[1], images[0])
113
+ phamilie.distance_with_rotations(images[0], images[1])
114
+ phamilie.distance_with_rotations(images[1], images[0])
115
+ end
116
+
117
+ it 'should calculate rotation fingerprints when it should be more effective' do
118
+ phamilie = Phamilie.new
119
+
120
+ images = (1..2).map{ |n| DIR + 'lena%d.png' % n }
121
+
122
+ expect(Phamilie::Fingerprint).not_to receive(:fingerprint).with(images[0])
123
+ expect(Phamilie::Fingerprint).to receive(:rotations).once.with(images[0]).and_return(8.times.to_a)
124
+
125
+ expect(Phamilie::Fingerprint).not_to receive(:fingerprint).with(images[0])
126
+ expect(Phamilie::Fingerprint).to receive(:rotations).with(images[1]).and_return(8.times.to_a)
127
+
128
+ phamilie.distance_with_rotations(images[0], images[1])
129
+ phamilie.distance_with_rotations(images[1], images[0])
130
+ phamilie.distance(images[0], images[1])
131
+ phamilie.distance(images[1], images[0])
132
+ end
133
+
134
+ end