dhash-vips 0.1.0.1 → 0.1.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 465577547953c3039cdea1665f7010ed0af57656
4
- data.tar.gz: 9bb9a40feba3b039d0c0ee12f94907dd9b23fa68
3
+ metadata.gz: e1b98ca69a92ce7e640e4497399a906ec5a3cb53
4
+ data.tar.gz: 2d3dffbbd39df0448114bb1b4f7d59afb729c162
5
5
  SHA512:
6
- metadata.gz: 5936ffe730fb274a719d812dde7dab4113c5002d3c7acaad3c58bb12ed8d1da5744f7afa9d73e728ad7b54ee14f5f1f300bf021d98eab64ed08fefbeeb5034e2
7
- data.tar.gz: 19d272fa20c2f954f111561537328f677fbb32a84836e4d03d605f1086c3694b27647390b9420b36ce4fe56c7ddd8537f247729464202a0ba8a968813670237e
6
+ metadata.gz: '081328a940448d63e396ba141f1d897c5367de1e923410dd67e8de9da8e811e83f035b1094ba2b7e81eb9ed9cf61a9c23cc1f88a19ba3cb822dabd63160f345b'
7
+ data.tar.gz: 59dbcdf54db72f70dc089b1b75c5574e3e06e41ec54ccfd04d270f7fd89d498a71346e62f97cf5c7aa807915f6a88bd542261243dbbdf19d4a72b9b85af9fb9e
data/Gemfile CHANGED
@@ -1,3 +1,11 @@
1
- source 'https://rubygems.org'
2
-
1
+ source "https://rubygems.org"
3
2
  gemspec
3
+
4
+ gem "byebug", "<11.1.0" # for Ruby 2.3
5
+ gem "rake"
6
+ gem "minitest"
7
+ gem "rmagick", "~>2.16"
8
+ gem "phamilie"
9
+ gem "dhash"
10
+ gem "get_process_mem"
11
+ gem "mll"
data/common.rb CHANGED
@@ -9,3 +9,13 @@ def download_and_keep image # returns path
9
9
  end unless File.exist?(path) && Digest::MD5.file(path) == File.basename(image, ".jpg")
10
10
  end
11
11
  end
12
+
13
+ def download_if_needed path
14
+ require "open-uri"
15
+ require "digest"
16
+ FileUtils.mkdir_p File.dirname path
17
+ open("https://storage.googleapis.com/dhash-vips.nakilon.pro/#{File.basename path}") do |link|
18
+ File.open(path, "wb"){ |file| IO.copy_stream link, file }
19
+ end unless File.exist?(path) && Digest::MD5.file(path) == File.basename(path, File.extname(path))
20
+ path
21
+ end
data/dhash-vips.gemspec CHANGED
@@ -1,27 +1,17 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "dhash-vips"
3
- spec.version = "0.1.0.1"
3
+ spec.version = "0.1.1.4"
4
+ spec.summary = "dHash and IDHash perceptual image hashing/fingerprinting"
5
+ # spec.homepage = "https://github.com/nakilon/dhash-vips"
6
+
4
7
  spec.author = "Victor Maslov"
5
8
  spec.email = "nakilon@gmail.com"
6
- spec.summary = "dHash and IDHash powered by Vips"
7
- spec.homepage = "https://github.com/nakilon/dhash-vips"
8
9
  spec.license = "MIT"
9
10
 
10
- spec.test_files = %w{ test.rb }
11
- spec.files = `git ls-files -z`.split("\x0") - spec.test_files - %w{ .gitignore } - Dir.glob("example_ocr/**")
11
+ spec.add_dependency "ruby-vips", "~> 2.0", "!= 2.1.0", "!= 2.1.1"
12
+
12
13
  spec.require_path = "lib"
14
+ spec.test_files = %w{ test.rb }
13
15
  spec.extensions = %w{ extconf.rb }
14
-
15
- spec.add_dependency "ruby-vips", "~>2.0.16"
16
-
17
- spec.add_development_dependency "rake"
18
- spec.add_development_dependency "minitest"
19
- spec.add_development_dependency "get_process_mem"
20
-
21
- spec.add_development_dependency "rmagick", "~>2.16"
22
- spec.add_development_dependency "phamilie"
23
- spec.add_development_dependency "dhash"
24
-
25
- spec.add_development_dependency "mll"
26
- spec.add_development_dependency "byebug"
16
+ spec.files = %w{ extconf.rb Gemfile LICENSE.txt common.rb dhash-vips.gemspec idhash.c lib/dhash-vips-post-install-test.rb lib/dhash-vips.rb }
27
17
  end
data/extconf.rb CHANGED
@@ -1,13 +1,17 @@
1
1
  require "mkmf"
2
2
 
3
3
  File.write "Makefile", dummy_makefile(?.).join
4
- unless Gem::Platform.local.os == "darwin" && Gem::Version.new(RUBY_VERSION) == Gem::Version.new("2.3.8")
5
- else
6
- begin
4
+
5
+ unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.3.8")
6
+ if ENV["RBENV_ROOT"] && ENV["RBENV_VERSION"]
7
7
  # https://github.com/rbenv/rbenv/issues/1199
8
- append_cppflags "-I#{Dir.glob("#{`rbenv root`.chomp}/sources/#{`rbenv version-name`.chomp}/*/").first}"
9
- rescue
8
+ if found = Dir.glob("#{ENV["RBENV_ROOT"]}/sources/#{ENV["RBENV_VERSION"]}/ruby-*/").first
9
+ append_cppflags "-I#{Dir.glob("#{ENV["RBENV_ROOT"]}/sources/#{ENV["RBENV_VERSION"]}/ruby-*/").first}"
10
+ end
10
11
  else
12
+ append_cppflags "-I/ruby/"
13
+ end
14
+ append_cppflags "-DRUBY_EXPORT" unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.4")
11
15
  create_makefile "idhash"
12
16
  # Why this hack?
13
17
  # 1. Because I want to use Ruby and ./idhash.bundle for tests, not C.
@@ -17,14 +21,14 @@ else
17
21
  test: all
18
22
  \t$(RUBY) -r./lib/dhash-vips.rb ./lib/dhash-vips-post-install-test.rb
19
23
  HEREDOC
20
- end
21
24
  end
22
25
 
23
- # Cases to check:
24
- # 0. all is ok
25
- # `rm -rf idhash.o idhash.bundle pkg && bundle exec rake install` # w/o ext # ["/Users/nakilon/_/dhash-vips/lib/dhash-vips.rb", 32]
26
- # `rm -f idhash.o idhash.bundle Makefile && ruby extconf.rb && make` # with ext # ["/Users/nakilon/_/dhash-vips/lib/dhash-vips.rb", 40]
27
- # `bundle exec rake -rdhash-vips -e "p DHashVips::IDHash.method(:distance3).source_location"`
26
+ # How to test:
27
+ # $ ruby extconf.rb && make clean && make
28
+ # $ bundle exec rake -rdhash-vips -e "p DHashVips::IDHash.method(:distance3).source_location"
29
+ # # ["/Users/nakilon/_/dhash-vips/lib/dhash-vips.rb", 52] # or 42 with Ruby<2.4
30
+ # # ["/Users/nakilon/_/dhash-vips/lib/dhash-vips.rb", 32] # if LoadError
31
+ # Other cases to check:
28
32
  # 1. not macOS && rbenv
29
33
  # 2. fail during append_cppflags
30
34
  # 3. failed compilation
@@ -20,8 +20,20 @@ ss = s.repeated_permutation(4).map do |s1, s2, s3, s4|
20
20
  end
21
21
  end
22
22
  fail unless :distance3 == DHashVips::IDHash.method(:distance3).original_name
23
+ if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.4")
24
+ check = lambda do |s1, s2|
25
+ s1.is_a?(Bignum) && s2.is_a?(Bignum)
26
+ end
27
+ else
28
+ require "rbconfig/sizeof"
29
+ check = lambda do |s1, s2|
30
+ # https://github.com/ruby/ruby/commit/de2f7416d2deb4166d78638a41037cb550d64484#diff-16b196bc6bfe8fba63951420f843cfb4R10
31
+ _FIXNUM_MAX = (1 << (8 * RbConfig::SIZEOF["long"] - 2)) - 1
32
+ s1 > _FIXNUM_MAX && s2 > _FIXNUM_MAX
33
+ end
34
+ end
23
35
  ss.product ss do |s1, s2|
24
- next unless s1.is_a?(Bignum) && s2.is_a?(Bignum)
36
+ next unless check.call s1, s2
25
37
  unless f[s1, s2] == DHashVips::IDHash.distance3_c(s1, s2)
26
38
  p [s1, s2]
27
39
  p [s1.to_s(16).rjust(64,?0)].pack("H*").unpack("N*").map{ |_| _.to_s(2).rjust(32, ?0) }
data/lib/dhash-vips.rb CHANGED
@@ -33,15 +33,29 @@ module DHashVips
33
33
  ((a ^ b) & (a | b) >> 128).to_s(2).count "1"
34
34
  end
35
35
  begin
36
- require_relative "../idhash.bundle"
36
+ require_relative "../idhash.#{Gem::Platform.local.os == "darwin" ? "bundle" : "o"}"
37
37
  rescue LoadError
38
38
  alias_method :distance3, :distance3_ruby
39
39
  else
40
- def distance3 a, b
41
- if a.is_a?(Bignum) && b.is_a?(Bignum)
42
- distance3_c a, b
43
- else
44
- distance3_ruby a, b
40
+ # we can't just do `defined? Bignum` because it's defined but deprecated (some internal CONST_DEPRECATED flag)
41
+ if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.4")
42
+ def distance3 a, b
43
+ if a.is_a?(Bignum) && b.is_a?(Bignum)
44
+ distance3_c a, b
45
+ else
46
+ distance3_ruby a, b
47
+ end
48
+ end
49
+ else
50
+ # https://github.com/ruby/ruby/commit/de2f7416d2deb4166d78638a41037cb550d64484#diff-16b196bc6bfe8fba63951420f843cfb4R10
51
+ require "rbconfig/sizeof"
52
+ FIXNUM_MAX = (1 << (8 * RbConfig::SIZEOF["long"] - 2)) - 1
53
+ def distance3 a, b
54
+ if a > FIXNUM_MAX && b > FIXNUM_MAX
55
+ distance3_c a, b
56
+ else
57
+ distance3_ruby a, b
58
+ end
45
59
  end
46
60
  end
47
61
  end
data/test.rb CHANGED
@@ -6,6 +6,7 @@ require "dhash-vips"
6
6
 
7
7
  [
8
8
  [DHashVips::DHash, :hamming, :calculate, 2, 23, 18, 50, 4],
9
+ # vips-8.9.1-Tue Jan 28 13:05:46 UTC 2020
9
10
  # [[0, 14, 26, 27, 31, 27, 32, 28, 43, 43, 34, 37, 37, 34, 35, 42],
10
11
  # [14, 0, 28, 25, 39, 35, 32, 32, 43, 43, 38, 41, 41, 38, 37, 50],
11
12
  # [26, 28, 0, 13, 35, 41, 28, 30, 41, 41, 36, 33, 35, 32, 27, 36],
@@ -23,22 +24,23 @@ require "dhash-vips"
23
24
  # [35, 37, 27, 26, 40, 34, 23, 27, 28, 28, 29, 30, 26, 21, 0, 23],
24
25
  # [42, 50, 36, 33, 31, 27, 34, 34, 27, 27, 18, 19, 23, 26, 23, 0]]
25
26
  [DHashVips::IDHash, :distance, :fingerprint, 6, 22, 23, 65, 0],
26
- # [[0, 17, 32, 35, 57, 45, 51, 50, 48, 47, 54, 48, 60, 50, 47, 56],
27
- # [17, 0, 30, 35, 58, 46, 54, 55, 47, 51, 57, 49, 62, 52, 52, 60],
27
+ # vips-8.9.1-Tue Jan 28 13:05:46 UTC 2020
28
+ # [[0, 16, 32, 35, 57, 45, 51, 50, 48, 47, 54, 48, 60, 50, 47, 56],
29
+ # [16, 0, 30, 34, 58, 47, 55, 56, 47, 50, 57, 49, 62, 52, 52, 61],
28
30
  # [32, 30, 0, 9, 47, 54, 45, 41, 65, 62, 42, 37, 51, 44, 49, 49],
29
- # [35, 35, 9, 0, 54, 64, 42, 40, 57, 56, 48, 39, 50, 40, 41, 51],
31
+ # [35, 34, 9, 0, 54, 64, 42, 40, 57, 56, 48, 39, 50, 40, 41, 51],
30
32
  # [57, 58, 47, 54, 0, 22, 43, 45, 64, 61, 48, 47, 35, 43, 47, 48],
31
- # [45, 46, 54, 64, 22, 0, 53, 54, 55, 54, 40, 46, 39, 42, 43, 42],
32
- # [51, 54, 45, 42, 43, 53, 0, 6, 33, 35, 52, 43, 46, 45, 44, 47],
33
- # [50, 55, 41, 40, 45, 54, 6, 0, 38, 41, 53, 50, 48, 45, 41, 42],
33
+ # [45, 47, 54, 64, 22, 0, 53, 54, 55, 54, 40, 46, 39, 42, 43, 42],
34
+ # [51, 55, 45, 42, 43, 53, 0, 6, 33, 35, 52, 43, 46, 45, 44, 47],
35
+ # [50, 56, 41, 40, 45, 54, 6, 0, 38, 41, 53, 50, 48, 45, 41, 42],
34
36
  # [48, 47, 65, 57, 64, 55, 33, 38, 0, 9, 51, 53, 47, 47, 41, 46],
35
- # [47, 51, 62, 56, 61, 54, 35, 41, 9, 0, 51, 57, 50, 49, 44, 43],
37
+ # [47, 50, 62, 56, 61, 54, 35, 41, 9, 0, 51, 57, 50, 49, 44, 43],
36
38
  # [54, 57, 42, 48, 48, 40, 52, 53, 51, 51, 0, 10, 33, 36, 38, 25],
37
39
  # [48, 49, 37, 39, 47, 46, 43, 50, 53, 57, 10, 0, 27, 30, 37, 27],
38
40
  # [60, 62, 51, 50, 35, 39, 46, 48, 47, 50, 33, 27, 0, 20, 23, 28],
39
41
  # [50, 52, 44, 40, 43, 42, 45, 45, 47, 49, 36, 30, 20, 0, 35, 39],
40
42
  # [47, 52, 49, 41, 47, 43, 44, 41, 41, 44, 38, 37, 23, 35, 0, 19],
41
- # [56, 60, 49, 51, 48, 42, 47, 42, 46, 43, 25, 27, 28, 39, 19, 0]]
43
+ # [56, 61, 49, 51, 48, 42, 47, 42, 46, 43, 25, 27, 28, 39, 19, 0]]
42
44
  ].each do |lib, dm, calc, min_similar, max_similar, min_not_similar, max_not_similar, bw_exceptional|
43
45
 
44
46
  describe lib do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dhash-vips
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.1
4
+ version: 0.1.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Maslov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-09 00:00:00.000000000 Z
11
+ date: 2022-02-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-vips
@@ -16,126 +16,26 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 2.0.16
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: 2.0.16
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: minitest
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: get_process_mem
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
19
+ version: '2.0'
20
+ - - "!="
67
21
  - !ruby/object:Gem::Version
68
- version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: rmagick
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - "~>"
22
+ version: 2.1.0
23
+ - - "!="
74
24
  - !ruby/object:Gem::Version
75
- version: '2.16'
76
- type: :development
25
+ version: 2.1.1
26
+ type: :runtime
77
27
  prerelease: false
78
28
  version_requirements: !ruby/object:Gem::Requirement
79
29
  requirements:
80
30
  - - "~>"
81
31
  - !ruby/object:Gem::Version
82
- version: '2.16'
83
- - !ruby/object:Gem::Dependency
84
- name: phamilie
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: dhash
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: mll
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - ">="
116
- - !ruby/object:Gem::Version
117
- version: '0'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - ">="
123
- - !ruby/object:Gem::Version
124
- version: '0'
125
- - !ruby/object:Gem::Dependency
126
- name: byebug
127
- requirement: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - ">="
32
+ version: '2.0'
33
+ - - "!="
130
34
  - !ruby/object:Gem::Version
131
- version: '0'
132
- type: :development
133
- prerelease: false
134
- version_requirements: !ruby/object:Gem::Requirement
135
- requirements:
136
- - - ">="
35
+ version: 2.1.0
36
+ - - "!="
137
37
  - !ruby/object:Gem::Version
138
- version: '0'
38
+ version: 2.1.1
139
39
  description:
140
40
  email: nakilon@gmail.com
141
41
  executables: []
@@ -145,8 +45,6 @@ extra_rdoc_files: []
145
45
  files:
146
46
  - Gemfile
147
47
  - LICENSE.txt
148
- - README.md
149
- - Rakefile
150
48
  - common.rb
151
49
  - dhash-vips.gemspec
152
50
  - extconf.rb
@@ -154,7 +52,7 @@ files:
154
52
  - lib/dhash-vips-post-install-test.rb
155
53
  - lib/dhash-vips.rb
156
54
  - test.rb
157
- homepage: https://github.com/nakilon/dhash-vips
55
+ homepage:
158
56
  licenses:
159
57
  - MIT
160
58
  metadata: {}
@@ -177,6 +75,6 @@ rubyforge_project:
177
75
  rubygems_version: 2.5.2.3
178
76
  signing_key:
179
77
  specification_version: 4
180
- summary: dHash and IDHash powered by Vips
78
+ summary: dHash and IDHash perceptual image hashing/fingerprinting
181
79
  test_files:
182
80
  - test.rb
data/README.md DELETED
@@ -1,211 +0,0 @@
1
- [![Gem Version](https://badge.fury.io/rb/dhash-vips.svg)](http://badge.fury.io/rb/dhash-vips)
2
-
3
- # dHash and IDHash gem powered by ruby-vips
4
-
5
- The **dHash** is the algorithm of image fingerprinting that can be used to measure the similarity of two images.
6
- The **IDHash** is the new algorithm that has some improvements over dHash -- I'll describe it further.
7
-
8
- You can read about the dHash and perceptual hashing in the article ["Kind of Like That" at "The Hacker Factor Blog"](http://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html) (21 January 2013). The idea is that you resize the otiginal image to 8x9 and then convert it to 8x8 array of bits -- each tells if the corresponding segment of the image is brighter or darker than the one on the right (or left). Then you apply the [Hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) to such arrays to measure how much they are different.
9
-
10
- There were several Ruby implementations on Github already but they all depended on ImageMagick. My implementation takes an advantage of speed of the libvips (the `ruby-vips` gem) -- it fingerprints images much faster:
11
-
12
- load the image and calculate the fingerprint:
13
- user system total real
14
- Dhash 6.191731 0.230885 6.422616 ( 6.428763)
15
- DHashVips::DHash 0.858045 0.144820 1.002865 ( 0.924308)
16
-
17
- `Dhash` here is [another gem](https://github.com/maccman/dhash) that I used earlier in my projects before I decided to make this one.
18
- Unfortunately both gems made slightly different fingerprints for two image files that are supposed to have the same fingerprint because from the human point of view they are the same (photo by Jordan Voth):
19
- ![](https://storage.googleapis.com/dhash-vips.nakilon.pro/dhash_issue_example.png)
20
- The distance here appeared to be equal to 5. This is why I've decided to improve the algorithm and this is how the "IDHash" appeared.
21
-
22
- ## IDHash (the Important Difference Hash)
23
-
24
- The main improvement over the dHash is what makes it insensitive to the resizing algorithm, color scheme and effectively made the pair of images above to have a distance of 0.
25
-
26
- * The "Importance" is an array of extra 64 bits that tells the comparing function which half of 64 bits is important (when the difference between neighbors was enough significant) and which is not. So not every bit in a fingerprint is being compared but only half of them.
27
- * It subtracts not only horizontally but also vertically -- that adds 128 more bits.
28
- * Instead of resizing to 8x9 it resizes to 8x8 and puts the image on a torus so it subtracts the very left column from the very right one and the top from the bottom.
29
-
30
- You could see in fingerprint calculation benchmark earlier that these improvements didn't make it slower than dHash because most of the time is spent on image resizing. Distance measurement is what became slower.
31
-
32
- ### Example
33
-
34
- Here are two photos (by Brian Lauer):
35
- ![](https://storage.googleapis.com/dhash-vips.nakilon.pro/idhash_example_in.png)
36
- and visualization of IDHash (`rake compare_images -- image1.jpg image2.jpg`):
37
- ![](https://storage.googleapis.com/dhash-vips.nakilon.pro/idhash_example_out.png)
38
-
39
- Here in each of 64 cells, there are two circles that color the difference between that cell and the neighbor one. If the difference is low the Importance bit is set to zero and the circle is invisible. So there are 128 pairs of corresponding circles and when you take one, if at least one circle is visible and is of different color the line is to be drawn. Here you see 15 lines and so the distance between fingerprints will be equal to 15 (that is pretty low and can be interpreted as "images look similar"). Also, you see here that floor on this photo matters -- classic dHash won't see that it's darker than wall because it's comparing only horizontal neighbors and if one photo had no floor the distance function won't notice that. Also, it sees the Important difference between the very right and left columns because the wall has a slow but visible gradient.
40
-
41
- ### Remaining problems
42
-
43
- * Neither dHash nor IDHash can't automatically detect very shifted crops and rotated images but you can make a wrapper that would call the comparison function iteratively.
44
- * These algorithms are color blind because of converting an image to grayscale. If you take a photo of something in your yard the sun will create lights and shadows, but if you compare photos of something green painted on a blue wall there is a possibility the machine would see nothing painted at all. The `dhash` gem had such image in specs and that made them pretty useless (this was supposed to be a face):
45
- ![](https://storage.googleapis.com/dhash-vips.nakilon.pro/colorblind.png)
46
- * If you have a pile of 1000000 images comparing them with each other would take a month or two. To improve the process in case of dHash that uses Hamming distance you may want to read these threads on Stackexchange network:
47
- * [How to find the closest pairs of a string of binary bins in Ruby without O^2 issues?](https://stackoverflow.com/q/8734034/322020)
48
- * [Find all pairs of values that are close under Hamming distance](https://cstheory.stackexchange.com/q/18516/27420)
49
- * [Finding the closest pair between two sets of points on the hypercube](https://cstheory.stackexchange.com/q/16322/27420)
50
- * [Would PCA work for boolean data types?](https://stats.stackexchange.com/q/159705/1125)
51
- * [Using pHash to search agaist a huge image database, what is the best approach?](https://stackoverflow.com/q/18257641/322020)
52
-
53
- ## Installation
54
-
55
- brew install vips
56
-
57
- If you have troubles, see https://jcupitt.github.io/libvips/install.html
58
- Then:
59
-
60
- gem install dhash-vips
61
-
62
- If you have troubles with the `gem ruby-vips` dependency, see https://github.com/libvips/ruby-vips
63
-
64
- ## Usage
65
-
66
- ### dHash:
67
-
68
- ```ruby
69
- require "dhash-vips"
70
-
71
- hash1 = DHashVips::DHash.calculate "photo1.jpg"
72
- hash2 = DHashVips::DHash.calculate "photo2.jpg"
73
-
74
- distance = DHashVips::DHash.hamming hash1, hash2
75
- if distance < 10
76
- puts "Images are very similar"
77
- elsif distance < 20
78
- puts "Images are slightly similar"
79
- else
80
- puts "Images are different"
81
- end
82
- ```
83
-
84
- ### IDHash:
85
-
86
- ```ruby
87
- require "dhash-vips"
88
-
89
- hash1 = DHashVips::IDHash.fingerprint "photo1.jpg"
90
- hash2 = DHashVips::IDHash.fingerprint "photo2.jpg"
91
-
92
- distance = DHashVips::IDHash.distance hash1, hash2
93
- if distance < 15
94
- puts "Images are very similar"
95
- elsif distance < 25
96
- puts "Images are slightly similar"
97
- else
98
- puts "Images are different"
99
- end
100
- ```
101
-
102
- ### Notes and benchmarks
103
-
104
- * The above `15` and `25` constants are found empirically and just work enough well for 8-byte hashes. To find these tresholds we can run a rake task with hardcoded test cases (pairs of photos from the same photosession are not the same but are considered to be enough 'similar' for the purpose of this benchmark):
105
-
106
- $ rake compare_quality
107
-
108
- Dhash Phamilie DHashVips::DHash DHashVips::IDHash DHashVips::IDHash(4)
109
- The same image: 0..0 0..0 0..0 0..0 0..0
110
- 'Jordan Voth case': 4 2 4 0 0
111
- Similar images: 1..17 14..34 2..23 6..22 53..166
112
- Different images: 9..57 22..42 9..50 18..65 120..233
113
- 1/FMI^2 = 1.25 4.0 1.556 1.25 1.306
114
- FP, FN = [2, 0] [0, 6] [1, 2] [2, 0] [1, 1]
115
-
116
- The `FMI` line here is the "quality of algorithm", i.e. the best achievable function from the ["Fowlkes–Mallows index"](https://en.wikipedia.org/wiki/Fowlkes%E2%80%93Mallows_index) value if you take the "similar" and "different" test pairs and try to draw the treshold line. Smaller number is better. Here I've added the [`phamilie` gem](https://github.com/toy/phamilie) that is DCT based (not a kind of dhash). The last line shows number of false positives (`FP`) and false negatives (`FN`) in case of the best achieved FMI.
117
-
118
- * Methods were renamed from `#calculate` to `#fingerprint` and from `#hamming` to `#distance`.
119
- * The `DHash#calculate` accepts `hash_size` optional parameter that is 8 by default. The `IDHash#fingerprint`'s optional parameter is called `power` and works in a bit different way: 3 means 8 and 4 means 16 -- other sizes are not supported because they don't seem to be useful (higher fingerprint resolution makes it vulnerable to image shifts and croppings, also `#distance` becomes much slower). Because IDHash's fingerprint is more complex than DHash's one it's not that straight forward to compare them so under the hood the `#distance` method have to check the size of fingerprint. If you are sure that fingerprints were made with power=3 then to skip the check you may use the `#distance3` method directly.
120
- * The `#distance3` method will use Ruby C extension that is around 15 times faster than pure Ruby implementation -- native extension is currently hardcoded to be compiled only if it's macOS and rbenv Ruby 2.3.8 installed with `-k` flag but if you know how to make the gem gracefully fallback to native Ruby if `make` fails let me know or make a pull request. So the full benchmark:
121
-
122
- * Ruby 2.0.0
123
-
124
- $ bundle exec rake compare_speed
125
-
126
- load the image and calculate the fingerprint:
127
- user system total real
128
- Dhash 12.400000 0.820000 13.220000 ( 13.329952)
129
- DHashVips::DHash 1.330000 0.230000 1.560000 ( 1.509826)
130
- DHashVips::IDHash 1.060000 0.090000 1.150000 ( 1.100332)
131
- DHashVips::IDHash 4 1.030000 0.080000 1.110000 ( 1.089148)
132
-
133
- measure the distance (1000 times):
134
- user system total real
135
- Dhash hamming 3.140000 0.020000 3.160000 ( 3.179392)
136
- DHashVips::DHash hamming 3.040000 0.020000 3.060000 ( 3.095190)
137
- DHashVips::IDHash distance 8.170000 0.040000 8.210000 ( 8.279950)
138
- DHashVips::IDHash distance3 6.720000 0.030000 6.750000 ( 6.790900)
139
- DHashVips::IDHash distance 4 24.430000 0.130000 24.560000 ( 24.652625)
140
-
141
- * Ruby 2.3.3 seems to have some bit arithmetics improvement compared to 2.0:
142
-
143
- load the image and calculate the fingerprint:
144
- user system total real
145
- Dhash 13.110000 0.950000 14.060000 ( 14.537057)
146
- DHashVips::DHash 1.480000 0.310000 1.790000 ( 1.808787)
147
- DHashVips::IDHash 1.080000 0.100000 1.180000 ( 1.156446)
148
- DHashVips::IDHash 4 1.030000 0.090000 1.120000 ( 1.076117)
149
-
150
- measure the distance (1000 times):
151
- user system total real
152
- Dhash hamming 1.770000 0.010000 1.780000 ( 1.815612)
153
- DHashVips::DHash hamming 1.810000 0.010000 1.820000 ( 1.875666)
154
- DHashVips::IDHash distance 4.250000 0.020000 4.270000 ( 4.350071)
155
- DHashVips::IDHash distance3 3.430000 0.020000 3.450000 ( 3.499031)
156
- DHashVips::IDHash distance 4 8.210000 0.110000 8.320000 ( 8.510735)
157
-
158
- * Ruby 2.3.8p459 (2.4.6, 2.5.5 and 2.6.3 are all similar) with newer CPU (`sysctl -n machdep.cpu.brand_string #=> Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz`):
159
-
160
- load the image and calculate the fingerprint:
161
- user system total real
162
- Dhash 6.191731 0.230885 6.422616 ( 6.428763)
163
- Phamilie 5.361751 0.037524 5.399275 ( 5.402553)
164
- DHashVips::DHash 0.858045 0.144820 1.002865 ( 0.924308)
165
- DHashVips::IDHash 0.769975 0.071087 0.841062 ( 0.790470)
166
- DHashVips::IDHash 4 0.805311 0.077918 0.883229 ( 0.825897)
167
-
168
- measure the distance (2000 times):
169
- user system total real
170
- Dhash hamming 1.810000 0.000000 1.810000 ( 1.824719)
171
- Phamilie distance 1.000000 0.010000 1.010000 ( 1.006127)
172
- DHashVips::DHash hamming 1.810000 0.000000 1.810000 ( 1.817415)
173
- DHashVips::IDHash distance 1.400000 0.000000 1.400000 ( 1.401333)
174
- DHashVips::IDHash distance3_ruby 3.320000 0.010000 3.330000 ( 3.337920)
175
- DHashVips::IDHash distance3_c 0.210000 0.000000 0.210000 ( 0.212864)
176
- DHashVips::IDHash distance 4 8.300000 0.120000 8.420000 ( 8.499735)
177
-
178
- * Also note that to make `#distance` able to assume the fingerprint resolution from the size of Integer that represents it, the change in its structure was needed (left half of bits was swapped with right one), so fingerprints between versions 0.0.4 and 0.0.5 became incompatible, but you probably can convert them manually. Otherwise if we put the version or structure information inside fingerprint it would became slow to (de)serialize and store.
179
-
180
- ## Development
181
-
182
- * OS X El Captain and rbenv may cause environment issues that would make you do things like:
183
-
184
- $ ./ruby `rbenv which rake` compare_matrixes
185
-
186
- instead of just
187
-
188
- $ rake compare_matrixes
189
-
190
- For more information on that: https://github.com/jcupitt/ruby-vips/issues/141
191
-
192
- * On macOS, when you do `bundle install` it may fail to install `rmagick` gem (`dhash` gem dependency) saying:
193
-
194
- ERROR: Can't install RMagick 4.0.0. Can't find magick/MagickCore.h.
195
-
196
- To resolve this do:
197
-
198
- $ brew install imagemagick@6
199
- $ LDFLAGS="-L/usr/local/opt/imagemagick@6/lib" CPPFLAGS="-I/usr/local/opt/imagemagick@6/include" bundle install
200
-
201
- * If you get `No package 'MagickCore' found` try:
202
-
203
- $ PKG_CONFIG_PATH="/usr/local/Cellar/imagemagick@6/6.9.10-74/lib/pkgconfig" bundle install
204
-
205
- * You might need to prepend `bundle exec` to all the `rake` commands.
206
-
207
- * Execute the `rake compare_quality` at least once before executing other rake tasks because it's currently the only one that downloads the test images.
208
-
209
- ## Credits
210
-
211
- [John Cupitt](https://github.com/jcupitt) (libvips and ruby-vips maintainer) helped me a lot.
data/Rakefile DELETED
@@ -1,273 +0,0 @@
1
- STDOUT.sync = true
2
- require "pp"
3
-
4
- require "bundler/gem_tasks" # to push to rubygems
5
-
6
- visualize_hash = lambda do |hash|
7
- puts hash.to_s(2).rjust(64, ?0).gsub(/(?<=.)/, '\0 ').scan(/.{16}/)
8
- end
9
-
10
- desc "Compare how Vips and ImageMagick resize images to 9x8"
11
- task :compare_pixelation do |_|
12
- require_relative "lib/dhash-vips"
13
- require "dhash"
14
-
15
- ARGV.drop(1).each do |arg|
16
- FileUtils.mkdir_p "compare_pixelation/#{File.dirname arg}"
17
-
18
- puts filename = "compare_pixelation/#{arg}.dhash-vips.png"
19
- DHashVips::DHash.pixelate(arg, 8).
20
- colourspace(:srgb). # otherwise we may get `Vips::Error` `RGB color space not permitted on grayscale PNG` when the image was already bw
21
- write_to_file filename
22
- visualize_hash.call DHashVips::DHash.calculate arg
23
-
24
- puts filename = "compare_pixelation/#{arg}.dhash.png"
25
- Magick::Image.read(arg).first.quantize(256, Magick::Rec601LumaColorspace, Magick::NoDitherMethod, 8).resize!(9, 8).
26
- write filename
27
- visualize_hash.call Dhash.calculate arg
28
- end
29
- end
30
-
31
- desc "Compare how Vips resizes image to 9x8 with different kernels"
32
- task :compare_kernels do |_|
33
- require_relative "lib/dhash-vips"
34
- # require "dhash"
35
-
36
- %i{ nearest linear cubic lanczos2 lanczos3 }.each do |kernel|
37
- hashes = ARGV.drop(1).map do |arg|
38
- puts arg
39
- DHashVips::DHash.calculate(arg, 8, kernel).tap &visualize_hash
40
- end
41
- puts "kernel: #{kernel}, distance: #{DHashVips::DHash.hamming *hashes}"
42
- end
43
- end
44
-
45
- desc "Compare the quality of Dhash, Phamilie, DHashVips::DHash, DHashVips::IDHash"
46
- # in this test we want to know not that photos are the same but rather that they are from the same photosession
47
- task :compare_quality do
48
- require "dhash"
49
- require "phamilie"
50
- phamilie = Phamilie.new
51
- require_relative "lib/dhash-vips"
52
- require "mll"
53
-
54
- puts MLL::grid.call( [
55
- ["", "The same image:", "'Jordan Voth case':", "Similar images:", "Different images:", "1/FMI^2 =", "FP, FN ="],
56
- *[
57
- [Dhash, :calculate, :hamming],
58
- [phamilie, :fingerprint, :distance, nil, 0],
59
- [DHashVips::DHash, :calculate, :hamming],
60
- [DHashVips::IDHash, :fingerprint, :distance],
61
- [DHashVips::IDHash, :fingerprint, :distance, 4],
62
- ].map do |m, calc, dm, power, ii|
63
- require_relative "common"
64
- hashes = %w{
65
- 71662d4d4029a3b41d47d5baf681ab9a.jpg ad8a37f872956666c3077a3e9e737984.jpg
66
-
67
- 1b1d4bde376084011d027bba1c047a4b.jpg 6d97739b4a08f965dc9239dd24382e96.jpg
68
-
69
- 1d468d064d2e26b5b5de9a0241ef2d4b.jpg 92d90b8977f813af803c78107e7f698e.jpg
70
- 309666c7b45ecbf8f13e85a0bd6b0a4c.jpg 3f9f3db06db20d1d9f8188cd753f6ef4.jpg
71
- 679634ff89a31279a39f03e278bc9a01.jpg df0a3b93e9412536ee8a11255f974141.jpg
72
- 54192a3f65bd03163b04849e1577a40b.jpg 6d32f57459e5b79b5deca2a361eb8c6e.jpg
73
- 4b62e0eef58bfbc8d0d2fbf2b9d05483.jpg b8eb0ca91855b657f12fb3d627d45c53.jpg
74
- 21cd9a6986d98976b6b4655e1de7baf4.jpg 9b158c0d4953d47171a22ed84917f812.jpg
75
- 9c2c240ec02356472fb532f404d28dde.jpg fc762fa286489d8afc80adc8cdcb125e.jpg
76
- 7a833d873f8d49f12882e86af1cc6b79.jpg ac033cf01a3941dd1baa876082938bc9.jpg
77
- }.map(&method(:download_and_keep)).map{ |filename| [filename, m.public_send(calc, filename, *power)] }
78
- table = MLL::table[m.method(dm), [hashes.map{|_|_[ii||1]}], [hashes.map{|_|_[ii||1]}]]
79
- report = Struct.new(:same, :bw, :sim, :not_sim).new [], [], [], []
80
- hashes.size.times.to_a.repeated_combination(2) do |i, j|
81
- report[
82
- case
83
- when i == j ; :same
84
- when [i, j] == [0, 1] ; :bw
85
- when i > 3 && i + 1 == j && i % 2 == 0 ; :sim
86
- else ; :not_sim
87
- end
88
- ].push table[i][j]
89
- end
90
- min, max = [*report.sim, *report.not_sim].minmax
91
- fmi, fp, fn = (min..max+1).map do |b|
92
- fp = report.not_sim.count{ |_| _ < b }
93
- tp = report.sim.count{ |_| _ < b }
94
- fn = report.sim.count{ |_| _ >= b }
95
- [((tp + fp) * (tp + fn)).fdiv(tp * tp), fp, fn]
96
- end.reject{ |_,| _.nan? }.min_by(&:first)
97
- [
98
- "#{m.is_a?(Module) ? m : m.class}#{"(#{power})" if power}",
99
- report.same. minmax.join(".."),
100
- report.bw[0],
101
- report.sim. minmax.join(".."),
102
- report.not_sim.minmax.join(".."),
103
- fmi.round(3),
104
- [fp, fn]
105
- ]
106
- end,
107
- ].transpose, spacings: [1.5, 0], alignment: :right )
108
- end
109
-
110
- # ruby -c Rakefile && rm -f ab.png && rake compare_images -- fc762fa286489d8afc80adc8cdcb125e.jpg 9c2c240ec02356472fb532f404d28dde.jpg 2>/dev/null && ql ab.png
111
- # rm -f ab.png && ./ruby `rbenv which rake` compare_images -- 6d97739b4a08f965dc9239dd24382e96.jpg 1b1d4bde376084011d027bba1c047a4b.jpg 2>/dev/null && ql ab.png
112
- desc "Visualizes the IDHash difference measurement between two images"
113
- task :compare_images do |_|
114
- abort "there should be two image filenames passed as arguments (and optionally the `power`)" unless (3..4) === ARGV.size
115
- abort "the optional argument should be either 3 or 4" unless [3, 4].include?(power = (ARGV[3] || 3).to_i)
116
- task ARGV.last do ; end
117
- require_relative "lib/dhash-vips"
118
- ha, hb = ARGV[1, 2].map{ |filename| DHashVips::IDHash.fingerprint(filename, power) }
119
- puts "distance: #{DHashVips::IDHash.distance ha, hb}"
120
- size = 2 ** power
121
- shift = 2 * size * size
122
- ai = ha >> shift
123
- ad = ha - (ai << shift)
124
- bi = hb >> shift
125
- bd = hb - (bi << shift)
126
-
127
- a, b = ARGV[1, 2].map do |filename|
128
- image = Vips::Image.new_from_file filename
129
- image = image.resize(size.fdiv(image.width), vscale: size.fdiv(image.height)).colourspace("b-w").
130
- resize(100, vscale: 100, kernel: :nearest).colourspace("srgb")
131
- end
132
- fail unless a.width == b.width && a.height == b.height
133
-
134
- _127 = shift - 1
135
- _63 = size * size - 1
136
- n = 0
137
- width = a.width
138
- height = a.height
139
-
140
- Vips::Operation.class_eval do
141
- old_initialize = instance_method :initialize
142
- define_method :initialize do |value|
143
- old_initialize.bind(self).(value).tap do
144
- self.instance_variable_set "@operation_name", value
145
- end
146
- end
147
- old_set = instance_method :set
148
- define_method :set do |*args|
149
- args[1].instance_variable_set "@operation_name", self.instance_variable_get("@operation_name") if args.first == "image"
150
- old_set.bind(self).(*args)
151
- end
152
- end
153
- Vips::Image.class_eval do
154
- def copy
155
- return self if caller.first.end_with?("/gems/ruby-vips-2.0.9/lib/vips/operation.rb:148:in `set'") &&
156
- %w{ draw_line draw_circle }.include?(instance_variable_get "@operation_name")
157
- method_missing :copy
158
- end
159
- end
160
-
161
- require "get_process_mem"
162
- a, b = [[a, ad, ai], [b, bd, bi]].map do |image, xd, xi|
163
- _127.downto(0).each_with_index do |i, ii|
164
- mem = GetProcessMem.new(Process.pid).mb
165
- abort ">1000mb of memory consumed" if 1000 < mem
166
- if i > _63
167
- y, x = (_127 - i).divmod size
168
- else
169
- x, y = (_63 - i).divmod size
170
- end
171
- x = (width * (x + 0.5) / size).round
172
- y = (height * (y + 0.5) / size).round
173
- if i > _63
174
- (x-2..x+2).map do |x| [
175
- [x, y , x, (y + height / size / 2 - 1) % height],
176
- [x, (y + height / size / 2 + 1) % height, x, (y + height / size ) % height],
177
- ] end
178
- else
179
- (y-2..y+2).map do |y| [
180
- [ x , y, (x + width / size / 2 - 1) % width, y],
181
- [(x + width / size / 2 + 1) % width, y, (x + width / size ) % width, y],
182
- ] end
183
- end.each do |coords1, coords2|
184
- n += 1
185
- image = image.draw_line (1 - xd[i]) * 255, *coords1
186
- image = image.draw_line xd[i] * 255, *coords2
187
- end if ai[i] + bi[i] > 0 && ad[i] != bd[i]
188
- cx, cy = if i > _63
189
- [x, y + 30]
190
- else
191
- [x + 30, y]
192
- end
193
- image = image.draw_circle xd[i] * 255, cx, cy, 11, fill: true if xi[i] > 0
194
- image = image.draw_circle (1 - xd[i]) * 255, cx, cy, 10, fill: true if xi[i] > 0
195
- end
196
- image
197
- end
198
- puts "distance: #{n / 10}"
199
- puts "(above should be equal if raketask works correcly)"
200
-
201
- a.join(b, :horizontal, shim: 15).write_to_file "ab.png"
202
- end
203
-
204
- # ./ruby `rbenv which rake` compare_speed
205
- desc "Benchmarks Dhash, DHashVips::DHash and DHashVips::IDHash"
206
- task :compare_speed do
207
- require "dhash"
208
- require "phamilie"
209
- phamilie = Phamilie.new
210
- require_relative "lib/dhash-vips"
211
-
212
- filenames = %w{
213
- 71662d4d4029a3b41d47d5baf681ab9a.jpg
214
- ad8a37f872956666c3077a3e9e737984.jpg
215
- 1d468d064d2e26b5b5de9a0241ef2d4b.jpg
216
- 92d90b8977f813af803c78107e7f698e.jpg
217
- 309666c7b45ecbf8f13e85a0bd6b0a4c.jpg
218
- 3f9f3db06db20d1d9f8188cd753f6ef4.jpg
219
- df0a3b93e9412536ee8a11255f974141.jpg
220
- 679634ff89a31279a39f03e278bc9a01.jpg
221
- }.flat_map do |filename|
222
- image = Vips::Image.new_from_file "images/#{filename}"
223
- [0, 1, 2, 3].map do |a|
224
- "benchmark/#{a}_#{filename}".tap do |filename|
225
- next if File.exist? filename
226
- FileUtils.mkdir_p "benchmark"
227
- image.rot(a).write_to_file filename
228
- end
229
- end
230
- end
231
-
232
- require "benchmark"
233
- puts "load the image and calculate the fingerprint:"
234
- hashes = []
235
- Benchmark.bm 19 do |bm|
236
- [
237
- [Dhash, :calculate],
238
- [phamilie, :fingerprint],
239
- [DHashVips::DHash, :calculate],
240
- [DHashVips::IDHash, :fingerprint],
241
- [DHashVips::IDHash, :fingerprint, 4],
242
- ].each do |m, calc, power|
243
- bm.report "#{m.is_a?(Module) ? m : m.class} #{power}" do
244
- hashes.push filenames.map{ |filename| m.send calc, filename, *power }
245
- end
246
- end
247
- end
248
-
249
- # for `distance`, `distance3_ruby` and `distance3_c` we use the same hashes
250
- hashes[-1, 1] = hashes[-2, 2]
251
- hashes[-1, 1] = hashes[-2, 2]
252
-
253
- puts "\nmeasure the distance (2000 times):"
254
- Benchmark.bm 32 do |bm|
255
- [
256
- [Dhash, :hamming],
257
- [phamilie, :distance, nil, 1],
258
- [DHashVips::DHash, :hamming],
259
- [DHashVips::IDHash, :distance],
260
- [DHashVips::IDHash, :distance3_ruby],
261
- [DHashVips::IDHash, :distance3_c],
262
- [DHashVips::IDHash, :distance, 4],
263
- ].zip(hashes) do |(m, dm, power, ii), hs|
264
- bm.report "#{m.is_a?(Module) ? m : m.class} #{dm} #{power}" do
265
- _ = [hs, filenames][ii || 0]
266
- _.product _ do |h1, h2|
267
- 2000.times{ m.public_send dm, h1, h2 }
268
- end
269
- end
270
- end
271
- end
272
-
273
- end