phamilie 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ [![Gem Version](https://img.shields.io/gem/v/phamilie.svg?style=flat)](https://rubygems.org/gems/phamilie)
2
+ [![Build Status](https://img.shields.io/travis/toy/phamilie/master.svg?style=flat)](https://travis-ci.org/toy/phamilie)
3
+
4
+ # Phamilie
5
+
6
+ Originally forked from [deepfryed/similie](https://github.com/deepfryed/similie).
7
+
8
+ Phamilie is a simple DCT based image hashing interface that,
9
+
10
+ * computes a fingerprint based on low frequencies of an image.
11
+ * computes hamming distance between 2 fingerprints.
12
+
13
+ ## Example
14
+
15
+ ```ruby
16
+ require 'phamilie'
17
+
18
+ phamilie = Phamilie.new
19
+
20
+ lena1 = 'spec/lena1.png'
21
+ lena2 = 'spec/lena2.png' # lena1.png cropped and scaled
22
+ lena5 = 'spec/lena5.png' # a different image
23
+ lena6 = 'spec/lena6.png' # lena2.png rotated and scaled
24
+
25
+ phamilie.fingerprint(lena1) #=> 36170087496991428
26
+
27
+ phamilie.distance(lena1, lena2) #=> 2
28
+ phamilie.distance(lena1, lena5) #=> 12
29
+
30
+ phamilie.distance(lena1, lena6) #=> 19
31
+ phamilie.distance(lena2, lena6) #=> 19
32
+ phamilie.distance(lena5, lena6) #=> 23
33
+ phamilie.distance_with_rotations(lena1, lena6) #=> 2
34
+ phamilie.distance_with_rotations(lena2, lena6) #=> 0
35
+ phamilie.distance_with_rotations(lena5, lena6) #=> 12
36
+ ```
37
+
38
+ ## Caching
39
+
40
+ By default a Hash is used to cache fingerprints by path. Be carefull if images or current directory can change in process.
41
+
42
+ As cache you can use an instance of class responding to `[]` and `[]=`.
43
+
44
+ If using persistant cache take into account file size and mtime or even cryptographic hash of contents.
45
+
46
+ ## Dependencies
47
+
48
+ * ruby 1.9.1+
49
+ * CImg
50
+ * libpng if you need to read png images
51
+ * libjpeg if you need to read jpeg images
52
+ * ImageMagick if you need to read other images
53
+
54
+ # License
55
+
56
+ GPL — using code from pHash library
@@ -0,0 +1 @@
1
+ /extconf.h
@@ -0,0 +1,12 @@
1
+ require 'mkmf'
2
+
3
+ if have_header 'png.h'
4
+ $LDFLAGS << ' -lpng'
5
+ end
6
+
7
+ if have_header 'jpeglib.h'
8
+ $LDFLAGS << ' -ljpeg'
9
+ end
10
+
11
+ create_header
12
+ create_makefile 'phamilie/fingerprint'
@@ -0,0 +1,239 @@
1
+ /*
2
+ Original similie gem
3
+ (c) Bharanee Rathna 2011
4
+
5
+ CC BY-SA 3.0
6
+ http://creativecommons.org/licenses/by-sa/3.0/
7
+
8
+ Free for every type of use. The author cannot be legally held responsible for
9
+ any damages resulting from the use of this work. All modifications or derivatives
10
+ need to be attributed.
11
+ */
12
+ /*
13
+ Original pHash library (ph_dct_matrix and ph_dct_imagehash functions)
14
+
15
+ pHash, the open source perceptual hash library
16
+ Copyright (C) 2009 Aetilius, Inc.
17
+ All rights reserved.
18
+
19
+ This program is free software: you can redistribute it and/or modify
20
+ it under the terms of the GNU General Public License as published by
21
+ the Free Software Foundation, either version 3 of the License, or
22
+ (at your option) any later version.
23
+
24
+ This program is distributed in the hope that it will be useful,
25
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
26
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27
+ GNU General Public License for more details.
28
+
29
+ You should have received a copy of the GNU General Public License
30
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
31
+
32
+ Evan Klinger - eklinger@phash.org
33
+ D Grant Starkweather - dstarkweather@phash.org
34
+ */
35
+ /*
36
+ Reworking, adaptation, fixes
37
+ (c) Ivan Kuchin 2013
38
+ */
39
+
40
+ #include <ruby.h>
41
+ #include <ruby/encoding.h>
42
+
43
+ #include <stdio.h>
44
+ #include <stdlib.h>
45
+ #include <string.h>
46
+ #include <time.h>
47
+ #include <unistd.h>
48
+
49
+ #include "extconf.h"
50
+ #define cimg_display 0
51
+ #define cimg_verbosity 0
52
+
53
+ #ifdef HAVE_PNG_H
54
+ #define cimg_use_png
55
+ #endif
56
+
57
+ #ifdef HAVE_JPEGLIB_H
58
+ #define cimg_use_jpeg
59
+ #endif
60
+
61
+ #include "CImg.h"
62
+ using namespace cimg_library;
63
+
64
+ #define TO_S(v) rb_funcall(v, rb_intern("to_s"), 0)
65
+ #define CSTRING(v) RSTRING_PTR(TO_S(v))
66
+
67
+ #undef SIZET2NUM
68
+ #undef NUM2SIZET
69
+
70
+ #ifdef HAVE_LONG_LONG
71
+ #define SIZET2NUM(x) ULL2NUM(x)
72
+ #define NUM2SIZET(x) NUM2ULL(x)
73
+ #else
74
+ #define SIZET2NUM(x) ULONG2NUM(x)
75
+ #define NUM2SIZET(x) NUM2ULONG(x)
76
+ #endif
77
+
78
+ #define DCT_SIZE 32
79
+
80
+ #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
81
+ #define popcount __builtin_popcountll
82
+ #else
83
+ // http://en.wikipedia.org/wiki/Hamming_weight
84
+
85
+ const uint64_t m1 = 0x5555555555555555; //binary: 0101...
86
+ const uint64_t m2 = 0x3333333333333333; //binary: 00110011..
87
+ const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ...
88
+ const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3...
89
+
90
+ int popcount(uint64_t x) {
91
+ x -= (x >> 1) & m1; //put count of each 2 bits into those 2 bits
92
+ x = (x & m2) + ((x >> 2) & m2); //put count of each 4 bits into those 4 bits
93
+ x = (x + (x >> 4)) & m4; //put count of each 8 bits into those 8 bits
94
+ return (x * h01)>>56; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
95
+ }
96
+ #endif
97
+
98
+ CImg<float>* ph_dct_matrix(const int N) {
99
+ CImg<float> *ptr_matrix = new CImg<float>(N, N, 1, 1, 1 / sqrt((float) N));
100
+ const float c1 = sqrt(2.0 / N);
101
+ for (int x = 0; x < N; x++){
102
+ for (int y = 1; y < N; y++){
103
+ *ptr_matrix->data(x, y) = c1 * cos((cimg::PI / 2 / N) * y * (2 * x + 1));
104
+ }
105
+ }
106
+ return ptr_matrix;
107
+ }
108
+
109
+ void small_mono_image(CImg<uint8_t> &img, CImg<float> &small) {
110
+ CImg<float> meanfilter(7, 7, 1, 1, 1);
111
+ if (img.spectrum() == 3){
112
+ small = img.RGBtoYCbCr().channel(0).get_convolve(meanfilter);
113
+ } else if (img.spectrum() == 4){
114
+ int width = img.width();
115
+ int height = img.height();
116
+ small = img.crop(0, 0, 0, 0, width - 1, height - 1, 0, 2).RGBtoYCbCr().channel(0).get_convolve(meanfilter);
117
+ } else {
118
+ small = img.channel(0).get_convolve(meanfilter);
119
+ }
120
+ small.resize(DCT_SIZE, DCT_SIZE, -100, -100, 2);
121
+ }
122
+
123
+ uint64_t small_mono_image_fingerprint(CImg<float> &small) {
124
+ uint64_t hash;
125
+
126
+ CImg<float> *C = ph_dct_matrix(DCT_SIZE);
127
+ CImg<float> Ctransp = C->get_transpose();
128
+ CImg<float> dctImage = (*C) * small * Ctransp;
129
+ CImg<float> subsec = dctImage.crop(1, 1, 8, 8).unroll('x');
130
+
131
+ float median = subsec.median();
132
+ uint64_t one = 0x0000000000000001;
133
+ hash = 0x0000000000000000;
134
+ for (int i = 0; i < 64; i++){
135
+ float current = subsec(i);
136
+ if (current > median)
137
+ hash |= one;
138
+ one = one << 1;
139
+ }
140
+
141
+ delete C;
142
+
143
+ return hash;
144
+ }
145
+
146
+ uint64_t image_fingerprint(CImg<uint8_t> &image) {
147
+ CImg<float> small;
148
+
149
+ small_mono_image(image, small);
150
+
151
+ return small_mono_image_fingerprint(small);
152
+ }
153
+
154
+ void image_rotation_fingerprints(CImg<uint8_t> &image, uint64_t* phashs) {
155
+ static int a = 0;
156
+
157
+ CImg<float> small;
158
+
159
+ small_mono_image(image, small);
160
+
161
+ phashs[0] = small_mono_image_fingerprint(small);
162
+
163
+ small.mirror('x');
164
+ phashs[1] = small_mono_image_fingerprint(small);
165
+
166
+ small.mirror('y');
167
+ phashs[2] = small_mono_image_fingerprint(small);
168
+
169
+ small.mirror('x');
170
+ phashs[3] = small_mono_image_fingerprint(small);
171
+
172
+ small.transpose();
173
+ phashs[4] = small_mono_image_fingerprint(small);
174
+
175
+ small.mirror('x');
176
+ phashs[5] = small_mono_image_fingerprint(small);
177
+
178
+ small.mirror('y');
179
+ phashs[6] = small_mono_image_fingerprint(small);
180
+
181
+ small.mirror('x');
182
+ phashs[7] = small_mono_image_fingerprint(small);
183
+ }
184
+
185
+ VALUE rb_image_fingerprint_func(VALUE self, VALUE file) {
186
+ CImg<uint8_t> img;
187
+ try {
188
+ img.load(CSTRING(file));
189
+ } catch (CImgIOException ex){
190
+ rb_raise(rb_eArgError, "Invalid image or unsupported format: %s", CSTRING(file));
191
+ }
192
+
193
+ uint64_t phash = image_fingerprint(img);
194
+
195
+ return SIZET2NUM(phash);
196
+ }
197
+
198
+ VALUE rb_image_rotation_fingerprints_func(VALUE self, VALUE file) {
199
+ CImg<uint8_t> img;
200
+ try {
201
+ img.load(CSTRING(file));
202
+ } catch (CImgIOException ex){
203
+ rb_raise(rb_eArgError, "Invalid image or unsupported format: %s", CSTRING(file));
204
+ }
205
+
206
+ uint64_t phashs[8] = {};
207
+ image_rotation_fingerprints(img, phashs);
208
+
209
+ VALUE rotations = rb_ary_new();
210
+
211
+ for (int i = 0; i < 8; i++) {
212
+ rb_ary_push(rotations, SIZET2NUM(phashs[i]));
213
+ }
214
+
215
+ return rotations;
216
+ }
217
+
218
+ VALUE rb_fingerprint_distance_func(VALUE self, VALUE fingerprint1, VALUE fingerprint2) {
219
+ if (TYPE(fingerprint1) != T_BIGNUM && TYPE(fingerprint1) != T_FIXNUM)
220
+ rb_raise(rb_eArgError, "fingerprint1 needs to be a number");
221
+
222
+ if (TYPE(fingerprint2) != T_BIGNUM && TYPE(fingerprint2) != T_FIXNUM)
223
+ rb_raise(rb_eArgError, "fingerprint2 needs to be a number");
224
+
225
+ int dist = popcount(NUM2SIZET(fingerprint1) ^ NUM2SIZET(fingerprint2));
226
+
227
+ return INT2NUM(dist);
228
+ }
229
+
230
+ extern "C" {
231
+ void Init_fingerprint() {
232
+ VALUE cPhamilie = rb_define_class("Phamilie", rb_cObject);
233
+ VALUE mFingerprint = rb_define_module_under(cPhamilie, "Fingerprint");
234
+
235
+ rb_define_singleton_method(mFingerprint, "fingerprint", RUBY_METHOD_FUNC(rb_image_fingerprint_func), 1);
236
+ rb_define_singleton_method(mFingerprint, "rotations", RUBY_METHOD_FUNC(rb_image_rotation_fingerprints_func), 1);
237
+ rb_define_singleton_method(mFingerprint, "distance", RUBY_METHOD_FUNC(rb_fingerprint_distance_func), 2);
238
+ }
239
+ }
@@ -0,0 +1,62 @@
1
+ class Phamilie
2
+ def initialize(cache = {})
3
+ {:[] => 1, :[]= => 2}.each do |method, desired_arity|
4
+ unless cache.respond_to?(method)
5
+ raise ArgumentError.new("#{cache} does not respond to #{method}")
6
+ end
7
+
8
+ arity = cache.method(method).arity
9
+ unless arity == desired_arity
10
+ raise ArgumentError.new("#{cache} method #{method} arity should be #{desired_arity} instead of #{arity}")
11
+ end
12
+ end
13
+
14
+ @cache = cache
15
+ end
16
+
17
+ def fingerprint(path)
18
+ case cached = @cache[path]
19
+ when Array
20
+ cached[0]
21
+ when Integer
22
+ cached
23
+ when nil
24
+ @cache[path] = Fingerprint.fingerprint(path)
25
+ else
26
+ raise "Cache for #{path} contains non fingerprint #{cached}"
27
+ end
28
+ end
29
+
30
+ def rotations(path)
31
+ case cached = @cache[path]
32
+ when Array
33
+ cached
34
+ when Integer, nil
35
+ @cache[path] = Fingerprint.rotations(path)
36
+ else
37
+ raise "Cache for #{path} contains non fingerprint #{cached}"
38
+ end
39
+ end
40
+
41
+ def distance(path_a, path_b)
42
+ Fingerprint.distance(fingerprint(path_a), fingerprint(path_b))
43
+ end
44
+
45
+ def distance_with_rotations(path_a, path_b)
46
+ if @cache[path_a].is_a?(Array) || @cache[path_b].is_a?(Integer)
47
+ path_a, path_b = path_b, path_a
48
+ end
49
+
50
+ fingerprint_a = if @cache[path_a].is_a?(Integer)
51
+ fingerprint(path_a)
52
+ else
53
+ rotations(path_a)[0]
54
+ end
55
+
56
+ rotations(path_b).map do |rotation_b|
57
+ Fingerprint.distance(fingerprint_a, rotation_b)
58
+ end.min
59
+ end
60
+ end
61
+
62
+ require 'phamilie/fingerprint.so'
@@ -0,0 +1,16 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'phamilie'
3
+ s.version = '0.1.0'
4
+ s.authors = ['Bharanee Rathna', 'Ivan Kuchin']
5
+ s.summary = 'compute image fingerprints and similarity'
6
+ s.description = 'phamilie is an image fingerprinting & comparison utility'
7
+ s.homepage = 'http://github.com/toy/phamilie'
8
+ s.license = 'GPL'
9
+ s.files = `git ls-files`.split("\n")
10
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
11
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ s.extensions = `git ls-files -- ext/**/extconf.rb`.split("\n")
13
+ s.require_paths = %w[lib]
14
+
15
+ s.add_development_dependency 'rspec'
16
+ end
@@ -0,0 +1,9 @@
1
+ #!/bin/sh
2
+
3
+ convert rotation0.png -flop rotation1.png
4
+ convert rotation0.png -rotate 180 rotation2.png
5
+ convert rotation0.png -flip rotation3.png
6
+ convert rotation0.png -transpose rotation4.png
7
+ convert rotation0.png -rotate 270 rotation5.png
8
+ convert rotation0.png -transverse rotation6.png
9
+ convert rotation0.png -rotate 90 rotation7.png
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,134 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
2
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'ext')
3
+ require 'rspec'
4
+ require 'phamilie'
5
+ require 'pathname'
6
+
7
+ describe 'Phamilie fingerprinting' do
8
+ DIR = Pathname(__FILE__).dirname
9
+
10
+ it 'should fingerprint image' do
11
+ expect(Phamilie.new.fingerprint(DIR + 'lena1.png')).not_to be(nil)
12
+ end
13
+
14
+ it 'should barf on invalid path' do
15
+ expect{ Phamilie.new.fingerprint(DIR + 'foo') }.to raise_error(ArgumentError)
16
+ end
17
+
18
+ it 'should barf on non image' do
19
+ expect{ Phamilie.new.fingerprint(__FILE__) }.to raise_error(ArgumentError)
20
+ end
21
+
22
+ LENA1_FINGERPRINT = 11112265815244395537
23
+
24
+ it 'should fingerprint image' do
25
+ fingerprint = Phamilie.new.fingerprint(DIR + 'lena1.png')
26
+ expect(fingerprint).to eq(LENA1_FINGERPRINT)
27
+ end
28
+
29
+ it 'should fingerprint image rotations' do
30
+ rotations = Phamilie.new.rotations(DIR + 'lena1.png')
31
+ expect(rotations[0]).to eq(LENA1_FINGERPRINT)
32
+ end
33
+ end
34
+
35
+ describe 'Phamilie image distance' do
36
+ it 'should work for similar images' do
37
+ phamilie = Phamilie.new
38
+ images = (1..5).map{ |n| DIR + 'lena%d.png' % n }
39
+ images.unshift nil
40
+
41
+ expect(phamilie.distance(images[1], images[2])).to eq(0)
42
+ expect(phamilie.distance(images[2], images[3])).to eq(26)
43
+ expect(phamilie.distance(images[3], images[4])).to eq(26)
44
+ expect(phamilie.distance(images[1], images[4])).to eq(2)
45
+ expect(phamilie.distance(images[1], images[5])).to eq(32)
46
+ end
47
+ end
48
+
49
+ describe 'Phamilie caching' do
50
+ it 'should use cache' do
51
+ phamilie = Phamilie.new
52
+
53
+ images = (1..5).map{ |n| DIR + 'lena%d.png' % n }
54
+
55
+ images.each do |image|
56
+ expect(Phamilie::Fingerprint).to receive(:fingerprint).once.with(image).and_return(image.__id__)
57
+ end
58
+
59
+ images.permutation(2) do |a, b|
60
+ phamilie.distance(a, b)
61
+ end
62
+ end
63
+ end
64
+
65
+ describe 'Phamilie image reoriented distance' do
66
+ it 'should work for identical but rotated images' do
67
+ phamilie = Phamilie.new
68
+
69
+ images = (0..7).map{ |n| DIR + 'rotation%d.png' % n }
70
+
71
+ images.permutation(2) do |a, b|
72
+ expect(phamilie.distance(a, b)).not_to eq(0)
73
+ expect(phamilie.distance_with_rotations(a, b)).to eq(0)
74
+ end
75
+ end
76
+ end
77
+
78
+ describe 'Phamilie caching with rotations' do
79
+ it 'should use cache' do
80
+ phamilie = Phamilie.new
81
+
82
+ images = (1..5).map{ |n| DIR + 'lena%d.png' % n }
83
+
84
+ expect(Phamilie::Fingerprint).to receive(:fingerprint).exactly(images.length).times.and_return(0)
85
+ expect(Phamilie::Fingerprint).to receive(:rotations).exactly(images.length - 1).times.and_return(8.times.to_a)
86
+
87
+ images.permutation(2) do |a, b|
88
+ phamilie.distance(a, b)
89
+ end
90
+
91
+ images.permutation(2) do |a, b|
92
+ phamilie.distance_with_rotations(a, b)
93
+ end
94
+
95
+ images.permutation(2) do |a, b|
96
+ phamilie.distance(a, b)
97
+ end
98
+ end
99
+
100
+ it 'should calculate rotation fingerprints only when required' do
101
+ phamilie = Phamilie.new
102
+
103
+ images = (1..2).map{ |n| DIR + 'lena%d.png' % n }
104
+
105
+ expect(Phamilie::Fingerprint).to receive(:fingerprint).once.with(images[0]).and_return(0)
106
+ expect(Phamilie::Fingerprint).to receive(:rotations).once.with(images[0]).and_return(8.times.to_a)
107
+
108
+ expect(Phamilie::Fingerprint).to receive(:fingerprint).once.with(images[1]).and_return(0)
109
+ expect(Phamilie::Fingerprint).not_to receive(:rotations).with(images[1])
110
+
111
+ phamilie.distance(images[0], images[1])
112
+ phamilie.distance(images[1], images[0])
113
+ phamilie.distance_with_rotations(images[0], images[1])
114
+ phamilie.distance_with_rotations(images[1], images[0])
115
+ end
116
+
117
+ it 'should calculate rotation fingerprints when it should be more effective' do
118
+ phamilie = Phamilie.new
119
+
120
+ images = (1..2).map{ |n| DIR + 'lena%d.png' % n }
121
+
122
+ expect(Phamilie::Fingerprint).not_to receive(:fingerprint).with(images[0])
123
+ expect(Phamilie::Fingerprint).to receive(:rotations).once.with(images[0]).and_return(8.times.to_a)
124
+
125
+ expect(Phamilie::Fingerprint).not_to receive(:fingerprint).with(images[0])
126
+ expect(Phamilie::Fingerprint).to receive(:rotations).with(images[1]).and_return(8.times.to_a)
127
+
128
+ phamilie.distance_with_rotations(images[0], images[1])
129
+ phamilie.distance_with_rotations(images[1], images[0])
130
+ phamilie.distance(images[0], images[1])
131
+ phamilie.distance(images[1], images[0])
132
+ end
133
+
134
+ end