dhash-vips 0.1.1.0 → 0.1.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (9) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +10 -2
  3. data/common.rb +10 -0
  4. data/dhash-vips.gemspec +7 -22
  5. data/extconf.rb +23 -21
  6. data/lib/dhash-vips.rb +1 -1
  7. metadata +14 -116
  8. data/README.md +0 -196
  9. data/Rakefile +0 -364
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e462c5c41c5f2c5916223c083f22d16ae37a062b
4
- data.tar.gz: 3261d15852334f30b59bd53cbfdd03e4a6db8758
3
+ metadata.gz: fff14578bcad5c73887655e363e79dddc3acefe7
4
+ data.tar.gz: f4633556e075270a05b456e80b367b9be0bf8a6a
5
5
  SHA512:
6
- metadata.gz: 80c90dd232d5c9ea5ccfd65d715c48bafb56ca94aac70e45ced117ad27e748ae81030010f4eb62f963b86f0854d1a9c6d3f4174de730cb8f763fd7dcfcf60f89
7
- data.tar.gz: bbe1bee6c35949f68fe886512668d3ea5215daa2207f49fe3f2db51b5ea3c1829d2fb7c69a4d3b05a05afab6df7d243ba37d10d6de39aae1088b404189d43326
6
+ metadata.gz: d3cea61fb131a2737b9e1cb3bcd671ea1e017c8fd2d098e045c6f4cd3463e8621a239ffb7432dacc0196ced7f5060ff1fec8aaf9366d55adc758605df5887966
7
+ data.tar.gz: 316e06f339504f6eddbce864c2585e55428689444da97c9afdb0e1faa0fe976404412240465944a40411bf7ab624d4ddcb187563fb7d32f4f9b0c7dc2ae4dbec
data/Gemfile CHANGED
@@ -1,3 +1,11 @@
1
- source 'https://rubygems.org'
2
-
1
+ source "https://rubygems.org"
3
2
  gemspec
3
+
4
+ gem "byebug", "<11.1.0" # for Ruby 2.3
5
+ gem "rake"
6
+ gem "minitest"
7
+ gem "rmagick", "~>2.16"
8
+ gem "phamilie"
9
+ gem "dhash"
10
+ gem "get_process_mem"
11
+ gem "mll"
data/common.rb CHANGED
@@ -9,3 +9,13 @@ def download_and_keep image # returns path
9
9
  end unless File.exist?(path) && Digest::MD5.file(path) == File.basename(image, ".jpg")
10
10
  end
11
11
  end
12
+
13
+ def download_if_needed path
14
+ require "open-uri"
15
+ require "digest"
16
+ FileUtils.mkdir_p File.dirname path
17
+ open("https://storage.googleapis.com/dhash-vips.nakilon.pro/#{File.basename path}") do |link|
18
+ File.open(path, "wb"){ |file| IO.copy_stream link, file }
19
+ end unless File.exist?(path) && Digest::MD5.file(path) == File.basename(path, File.extname(path))
20
+ path
21
+ end
data/dhash-vips.gemspec CHANGED
@@ -1,32 +1,17 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "dhash-vips"
3
- spec.version = "0.1.1.0"
3
+ spec.version = "0.1.1.5"
4
+ spec.summary = "dHash and IDHash perceptual image hashing/fingerprinting"
5
+ # spec.homepage = "https://github.com/nakilon/dhash-vips"
6
+
4
7
  spec.author = "Victor Maslov"
5
8
  spec.email = "nakilon@gmail.com"
6
- spec.summary = "dHash and IDHash perceptual image hashing/fingerprinting"
7
- spec.homepage = "https://github.com/nakilon/dhash-vips"
8
9
  spec.license = "MIT"
9
10
 
11
+ spec.add_dependency "ruby-vips", "~> 2.0", "!= 2.1.0", "!= 2.1.1"
12
+
10
13
  spec.require_path = "lib"
11
14
  spec.test_files = %w{ test.rb }
12
15
  spec.extensions = %w{ extconf.rb }
13
- spec.files = `git ls-files -z`.split("\x0") -
14
- spec.test_files -
15
- %w{ .gitignore Dockerfile } -
16
- Dir.glob("example_*/**/*") -
17
- Dir.glob(".github/**/*")
18
-
19
- spec.add_dependency "ruby-vips", "~>2.0.16"
20
-
21
- spec.add_development_dependency "rake"
22
- spec.add_development_dependency "minitest"
23
-
24
- spec.add_development_dependency "rmagick", "~>2.16"
25
- spec.add_development_dependency "phamilie"
26
- spec.add_development_dependency "dhash"
27
-
28
- spec.add_development_dependency "get_process_mem"
29
-
30
- spec.add_development_dependency "mll"
31
- spec.add_development_dependency "byebug"
16
+ spec.files = %w{ extconf.rb Gemfile LICENSE.txt common.rb dhash-vips.gemspec idhash.c lib/dhash-vips-post-install-test.rb lib/dhash-vips.rb }
32
17
  end
data/extconf.rb CHANGED
@@ -1,17 +1,15 @@
1
1
  require "mkmf"
2
2
 
3
3
  File.write "Makefile", dummy_makefile(?.).join
4
- unless Gem::Platform.local.os == "darwin" && ENV["RBENV_ROOT"] && ENV["RBENV_VERSION"]
5
- else
6
- if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.3.8") ||
7
- Gem::Version.new(RUBY_VERSION) > Gem::Version.new("2.4.9")
8
- else
9
- if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.4")
10
- else
11
- append_cppflags "-DRUBY_EXPORT"
12
- end
13
- # https://github.com/rbenv/rbenv/issues/1199
14
- append_cppflags "-I#{Dir.glob("#{ENV["RBENV_ROOT"]}/sources/#{ENV["RBENV_VERSION"]}/ruby-*/").first}"
4
+
5
+ unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.3.8")
6
+ if ruby_source_dir = if File.directory? "/ruby"
7
+ "-I/ruby" # for Github Actions: docker (currently disabled) and benchmark
8
+ elsif ENV["RBENV_ROOT"] && ENV["RBENV_VERSION"] && File.exist?(t = "#{ENV["RBENV_ROOT"]}/sources/#{ENV["RBENV_VERSION"]}/ruby-#{ENV["RBENV_VERSION"]}/bignum.c") # https://github.com/rbenv/rbenv/issues/1199
9
+ "-I#{File.dirname t}"
10
+ end
11
+ append_cppflags ruby_source_dir
12
+ append_cppflags "-DRUBY_EXPORT" unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.4")
15
13
  create_makefile "idhash"
16
14
  # Why this hack?
17
15
  # 1. Because I want to use Ruby and ./idhash.bundle for tests, not C.
@@ -24,13 +22,17 @@ else
24
22
  end
25
23
  end
26
24
 
27
- # Cases to check:
28
- # 0. everything is ok
29
- # `rm -rf idhash.o idhash.bundle idhash.so pkg && bundle exec rake install`
30
- # `bundle exec rake -rdhash-vips -e "p DHashVips::IDHash.method(:distance3).source_location"` # => # ["/Users/nakilon/_/dhash-vips/lib/dhash-vips.rb", 32] # currently falsely says that gem install failed idk why
31
- # `rm -f idhash.o idhash.bundle idhash.so Makefile && ruby extconf.rb && make`
32
- # `bundle exec rake -rdhash-vips -e "p DHashVips::IDHash.method(:distance3).source_location"` # => # ["/Users/nakilon/_/dhash-vips/lib/dhash-vips.rb", 53]
33
- # 1. not macOS && rbenv
34
- # 2. fail during append_cppflags
35
- # 3. failed compilation
36
- # 4. failed tests
25
+ __END__
26
+
27
+ to test: $ rake clean && rake install
28
+
29
+ $ ruby extconf.rb && make clean && make
30
+ $ ruby -rdhash-vips -e "p DHashVips::IDHash.method(:distance3).source_location"
31
+ # [".../dhash-vips.rb", 32] # if LoadError
32
+ # [".../dhash-vips.rb", 52] # if native (or 42 with Ruby<2.4)
33
+
34
+ Other cases to check:
35
+ 1. not macOS && rbenv
36
+ 2. fail during append_cppflags
37
+ 3. failed compilation
38
+ 4. failed tests
data/lib/dhash-vips.rb CHANGED
@@ -33,7 +33,7 @@ module DHashVips
33
33
  ((a ^ b) & (a | b) >> 128).to_s(2).count "1"
34
34
  end
35
35
  begin
36
- require_relative "../idhash.bundle"
36
+ require_relative "../idhash.#{Gem::Platform.local.os == "darwin" ? "bundle" : "o"}"
37
37
  rescue LoadError
38
38
  alias_method :distance3, :distance3_ruby
39
39
  else
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dhash-vips
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1.0
4
+ version: 0.1.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Maslov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-26 00:00:00.000000000 Z
11
+ date: 2022-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-vips
@@ -16,126 +16,26 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 2.0.16
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: 2.0.16
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: minitest
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
19
+ version: '2.0'
20
+ - - "!="
53
21
  - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: rmagick
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
22
+ version: 2.1.0
23
+ - - "!="
60
24
  - !ruby/object:Gem::Version
61
- version: '2.16'
62
- type: :development
25
+ version: 2.1.1
26
+ type: :runtime
63
27
  prerelease: false
64
28
  version_requirements: !ruby/object:Gem::Requirement
65
29
  requirements:
66
30
  - - "~>"
67
31
  - !ruby/object:Gem::Version
68
- version: '2.16'
69
- - !ruby/object:Gem::Dependency
70
- name: phamilie
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: dhash
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: get_process_mem
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: mll
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - ">="
116
- - !ruby/object:Gem::Version
117
- version: '0'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - ">="
123
- - !ruby/object:Gem::Version
124
- version: '0'
125
- - !ruby/object:Gem::Dependency
126
- name: byebug
127
- requirement: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - ">="
32
+ version: '2.0'
33
+ - - "!="
130
34
  - !ruby/object:Gem::Version
131
- version: '0'
132
- type: :development
133
- prerelease: false
134
- version_requirements: !ruby/object:Gem::Requirement
135
- requirements:
136
- - - ">="
35
+ version: 2.1.0
36
+ - - "!="
137
37
  - !ruby/object:Gem::Version
138
- version: '0'
38
+ version: 2.1.1
139
39
  description:
140
40
  email: nakilon@gmail.com
141
41
  executables: []
@@ -145,8 +45,6 @@ extra_rdoc_files: []
145
45
  files:
146
46
  - Gemfile
147
47
  - LICENSE.txt
148
- - README.md
149
- - Rakefile
150
48
  - common.rb
151
49
  - dhash-vips.gemspec
152
50
  - extconf.rb
@@ -154,7 +52,7 @@ files:
154
52
  - lib/dhash-vips-post-install-test.rb
155
53
  - lib/dhash-vips.rb
156
54
  - test.rb
157
- homepage: https://github.com/nakilon/dhash-vips
55
+ homepage:
158
56
  licenses:
159
57
  - MIT
160
58
  metadata: {}
data/README.md DELETED
@@ -1,196 +0,0 @@
1
- [![Gem Version](https://badge.fury.io/rb/dhash-vips.svg)](http://badge.fury.io/rb/dhash-vips) [![Docker Image](https://github.com/nakilon/dhash-vips/workflows/Docker%20Image/badge.svg)](https://hub.docker.com/repository/docker/nakilonishe/dhash-vips/general)
2
-
3
- # dHash and IDHash gem powered by ruby-vips
4
-
5
- The **dHash** is the algorithm of image fingerprinting that can be used to measure the similarity of two images.
6
- The **IDHash** is the new algorithm that has some improvements over dHash -- I'll describe it further.
7
-
8
- You can read about the dHash and perceptual hashing in the article ["Kind of Like That" at "The Hacker Factor Blog"](http://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html) (21 January 2013). The idea is that you resize the otiginal image to 8x9 and then convert it to 8x8 array of bits -- each tells if the corresponding segment of the image is brighter or darker than the one on the right (or left). Then you apply the [Hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) to such arrays to measure how much they are different.
9
-
10
- There were several Ruby implementations on Github already but they all depended on ImageMagick. My implementation takes an advantage of speed of the libvips (the `ruby-vips` gem) -- it fingerprints images much faster. For even more speed the fingerprint comparison function is made as native C extension.
11
-
12
- ## IDHash (the Important Difference Hash)
13
-
14
- The main improvement over the dHash is what makes it insensitive to the resizing algorithm and possible errors due to color scheme conversion.
15
-
16
- * The "Importance" is an array of extra 64 bits that tells the comparing function which half of 64 bits is important (when the difference between neighbors was enough significant) and which is not. So not every bit in a fingerprint is being compared but only half of them.
17
- * It subtracts not only horizontally but also vertically -- that adds 128 more bits.
18
- * Instead of resizing to 8x9 it resizes to 8x8 and puts the image on a torus so it subtracts the very left column from the very right one and the top from the bottom.
19
-
20
- ### Example
21
-
22
- Here are two photos (by Brian Lauer):
23
- ![](https://storage.googleapis.com/dhash-vips.nakilon.pro/idhash_example_in.png)
24
- and visualization of IDHash (`rake compare_images -- image1.jpg image2.jpg`):
25
- ![](https://storage.googleapis.com/dhash-vips.nakilon.pro/idhash_example_out.png)
26
-
27
- Here in each of 64 cells, there are two circles that color the difference between that cell and the neighbor one. If the difference is low the Importance bit is set to zero and the circle is invisible. So there are 128 pairs of corresponding circles and when you take one, if at least one circle is visible and is of different color the line is to be drawn. Here you see 15 lines and so the distance between fingerprints will be equal to 15 (that is pretty low and can be interpreted as "images look similar"). Also, you see here that floor on this photo matters -- classic dHash won't see that it's darker than wall because it's comparing only horizontal neighbors and if one photo had no floor the distance function won't notice that. Also, it sees the Important difference between the very right and left columns because the wall has a slow but visible gradient.
28
-
29
- ### Remaining problems
30
-
31
- * Neither dHash nor IDHash can't automatically detect very shifted crops and rotated images but you can make a wrapper that would call the comparison function iteratively.
32
- * These algorithms are color blind because of converting an image to grayscale. If you take a photo of something in your yard the sun will create lights and shadows, but if you compare photos of something green painted on a blue wall there is a possibility the machine would see nothing painted at all. The `dhash` gem had such image in specs and that made them pretty useless (this was supposed to be a face):
33
- ![](https://storage.googleapis.com/dhash-vips.nakilon.pro/colorblind.png)
34
- * If you have a pile of 1000000 images comparing them with each other would take a month or two. To improve the process in case of dHash that uses Hamming distance you may want to read these threads on Stackexchange network:
35
- * [How to find the closest pairs of a string of binary bins in Ruby without O^2 issues?](https://stackoverflow.com/q/8734034/322020)
36
- * [Find all pairs of values that are close under Hamming distance](https://cstheory.stackexchange.com/q/18516/27420)
37
- * [Finding the closest pair between two sets of points on the hypercube](https://cstheory.stackexchange.com/q/16322/27420)
38
- * [Would PCA work for boolean data types?](https://stats.stackexchange.com/q/159705/1125)
39
- * [Using pHash to search agaist a huge image database, what is the best approach?](https://stackoverflow.com/q/18257641/322020)
40
- * [How do I speed up this BIT_COUNT query for hamming distance?](https://stackoverflow.com/q/35065675/322020)
41
- * [Hamming distance on binary strings in SQL](https://stackoverflow.com/q/4777070/322020)
42
-
43
- ## Installation
44
-
45
- brew install vips
46
-
47
- If you have troubles, see https://jcupitt.github.io/libvips/install.html
48
- Then:
49
-
50
- gem install dhash-vips
51
-
52
- If you have troubles with the `gem ruby-vips` dependency, see https://github.com/libvips/ruby-vips
53
-
54
- ## Usage
55
-
56
- ### dHash:
57
-
58
- ```ruby
59
- require "dhash-vips"
60
-
61
- hash1 = DHashVips::DHash.calculate "photo1.jpg"
62
- hash2 = DHashVips::DHash.calculate "photo2.jpg"
63
-
64
- distance = DHashVips::DHash.hamming hash1, hash2
65
- if distance < 10
66
- puts "Images are very similar"
67
- elsif distance < 20
68
- puts "Images are slightly similar"
69
- else
70
- puts "Images are different"
71
- end
72
- ```
73
-
74
- ### IDHash:
75
-
76
- ```ruby
77
- require "dhash-vips"
78
-
79
- hash1 = DHashVips::IDHash.fingerprint "photo1.jpg"
80
- hash2 = DHashVips::IDHash.fingerprint "photo2.jpg"
81
-
82
- distance = DHashVips::IDHash.distance hash1, hash2
83
- if distance < 15
84
- puts "Images are very similar"
85
- elsif distance < 25
86
- puts "Images are slightly similar"
87
- else
88
- puts "Images are different"
89
- end
90
- ```
91
-
92
- ### Notes and benchmarks
93
-
94
- * The above `15` and `25` constants are found empirically and just work enough well for 8-byte hashes. To find these thresholds we can run a rake task with hardcoded test cases (pairs of photos from the same photosession are not the same but are considered to be enough 'similar' for the purpose of this benchmark):
95
-
96
- $ rake compare_quality
97
-
98
- Dhash Phamilie DHashVips::DHash DHashVips::IDHash DHashVips::IDHash(4)
99
- The same image: 0..0 0..0 0..0 0..0 0..0
100
- 'Jordan Voth case': 2 2 4 0 0
101
- Similar images: 1..15 14..34 2..23 6..22 53..166
102
- Different images: 10..54 22..42 10..50 17..65 120..233
103
- 1/FMI^2 = 1.375 4.0 1.556 1.25 1.306
104
- FP, FN = [3, 0] [0, 6] [1, 2] [2, 0] [1, 1]
105
-
106
- The `FMI` line here is the "quality of algorithm", i.e. the best achievable function from the ["Fowlkes–Mallows index"](https://en.wikipedia.org/wiki/Fowlkes%E2%80%93Mallows_index) value if you take the "similar" and "different" test pairs and try to draw the threshold line. Smaller number is better. The last line shows number of false positives (`FP`) and false negatives (`FN`) in case of the best achieved FMI. Here I've added the [`phamilie` gem](https://github.com/toy/phamilie) that is DCT based (not a kind of dhash).
107
-
108
- * Methods were renamed from `#calculate` to `#fingerprint` and from `#hamming` to `#distance`.
109
- * The `DHash#calculate` accepts `hash_size` optional parameter that is 8 by default. The `IDHash#fingerprint`'s optional parameter is called `power` and works in a bit different way: 3 means 8 and 4 means 16 -- other sizes are not supported because they don't seem to be useful (higher fingerprint resolution makes it vulnerable to image shifts and croppings, also `#distance` becomes much slower). Because IDHash's fingerprint is more complex than DHash's one it's not that straight forward to compare them so under the hood the `#distance` method have to check the size of fingerprint. If you are sure that fingerprints were made with power=3 then to skip the check you may use the `#distance3` method directly.
110
- * The `#distance3` method will try to compile and use the Ruby C extension that is around 15 times faster than pure Ruby implementation -- native extension currently works on macOS rbenv Ruby from 2.3.8 to 2.4.9 installed with rbenv `-k` flag. So the full benchmark:
111
-
112
- * Ruby 2.3.8p459:
113
-
114
- load the image and calculate the fingerprint:
115
- user system total real
116
- Dhash 6.191731 0.230885 6.422616 ( 6.428763)
117
- Phamilie 5.361751 0.037524 5.399275 ( 5.402553)
118
- DHashVips::DHash 0.858045 0.144820 1.002865 ( 0.924308)
119
- DHashVips::IDHash 0.769975 0.071087 0.841062 ( 0.790470)
120
- DHashVips::IDHash 4 0.805311 0.077918 0.883229 ( 0.825897)
121
-
122
- measure the distance (32*32*2000 times):
123
- user system total real
124
- Dhash hamming 1.810000 0.000000 1.810000 ( 1.824719)
125
- Phamilie distance 1.000000 0.010000 1.010000 ( 1.006127)
126
- DHashVips::DHash hamming 1.810000 0.000000 1.810000 ( 1.817415)
127
- DHashVips::IDHash distance 1.400000 0.000000 1.400000 ( 1.401333)
128
- DHashVips::IDHash distance3_ruby 3.320000 0.010000 3.330000 ( 3.337920)
129
- DHashVips::IDHash distance3_c 0.210000 0.000000 0.210000 ( 0.212864)
130
- DHashVips::IDHash distance 4 8.300000 0.120000 8.420000 ( 8.499735)
131
-
132
- * There is now a benchmark that runs both speed and quality tests summing results to a single table where lower numbers are better:
133
-
134
- ruby 2.3.8p459 (2018-10-18 revision 65136) [x86_64-darwin18]
135
- vips-8.9.2-Tue Apr 21 09:26:11 UTC 2020
136
- Version: ImageMagick 6.9.11-24 Q16 x86_64 2020-07-18
137
- Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz
138
-
139
- Fingerprint Compare 1/FMI^2
140
- Phamilie 3.943 0.630 4.000
141
- Dhash 4.969 1.097 1.375
142
- DHash 0.434 1.089 1.556
143
- IDHash 0.396 0.126 1.250
144
-
145
- * Also note that to make `#distance` able to assume the fingerprint resolution from the size of Integer that represents it, the change in its structure was needed (left half of bits was swapped with right one), so fingerprints between versions 0.0.4 and 0.0.5 became incompatible, but you probably can convert them manually. Otherwise if we put the version or structure information inside fingerprint it would became slow to (de)serialize and store.
146
-
147
- ## Development notes
148
-
149
- $ ruby test.rb
150
-
151
- * You might need to prepend `bundle exec` to all the `rake` commands.
152
-
153
- * You get this:
154
-
155
- Can't install RMagick 2.16.0. Can't find MagickWand.h.
156
-
157
- because Imagemagick sucks but we need it to benchmark alternative gems, so:
158
-
159
- $ brew install imagemagick@6
160
- $ brew unlink imagemagick@7
161
- $ brew link imagemagick@6 --force
162
-
163
- * OS X El Captain and rbenv may cause environment issues that would make you do things like:
164
-
165
- $ ./ruby `rbenv which rake` compare_matrixes
166
-
167
- instead of just
168
-
169
- $ rake compare_matrixes
170
-
171
- For more information on that: https://github.com/jcupitt/ruby-vips/issues/141
172
-
173
- * On macOS, when you do `bundle install` it may fail to install `rmagick` gem (`dhash` gem dependency) saying:
174
-
175
- ERROR: Can't install RMagick 4.0.0. Can't find magick/MagickCore.h.
176
-
177
- To resolve this do:
178
-
179
- $ brew install imagemagick@6
180
- $ LDFLAGS="-L/usr/local/opt/imagemagick@6/lib" CPPFLAGS="-I/usr/local/opt/imagemagick@6/include" bundle install
181
-
182
- * If you get `No package 'MagickCore' found` try:
183
-
184
- $ PKG_CONFIG_PATH="/usr/local/Cellar/imagemagick@6/6.9.10-74/lib/pkgconfig" bundle install
185
-
186
- * Execute the `rake compare_quality` at least once before executing other rake tasks because it's currently the only one that downloads the test images.
187
-
188
- * The tag `v0.0.0.4` is not semver and not real gem version -- it's only for Github Actions testing purposes.
189
-
190
- * To quickly find out what does the dhash-vips Docker image include: `docker run --rm <image_name> sh -c "cat /etc/alpine-release; ruby -v; vips -v; gem list dhash-vips` (TODO: write in this README about the existing Docker image).
191
-
192
- * Phamilie works with filenames instead of fingerprints and caches them but not distances.
193
-
194
- ## Credits
195
-
196
- [John Cupitt](https://github.com/jcupitt) (libvips and ruby-vips maintainer) helped me a lot.
data/Rakefile DELETED
@@ -1,364 +0,0 @@
1
- begin
2
- # for `rake release`
3
- require "bundler/gem_tasks"
4
- rescue LoadError
5
- puts "consider to `gem install bundler` to be able to `rake release`"
6
- end
7
-
8
- require "pp"
9
-
10
- visualize_hash = lambda do |hash|
11
- puts hash.to_s(2).rjust(64, ?0).gsub(/(?<=.)/, '\0 ').scan(/.{16}/)
12
- end
13
-
14
- desc "Compare how Vips and ImageMagick resize images to 9x8"
15
- task :compare_pixelation do |_|
16
- require_relative "lib/dhash-vips"
17
- require "dhash"
18
-
19
- ARGV.drop(1).each do |arg|
20
- FileUtils.mkdir_p "compare_pixelation/#{File.dirname arg}"
21
-
22
- puts filename = "compare_pixelation/#{arg}.dhash-vips.png"
23
- DHashVips::DHash.pixelate(arg, 8).
24
- colourspace(:srgb). # otherwise we may get `Vips::Error` `RGB color space not permitted on grayscale PNG` when the image was already bw
25
- write_to_file filename
26
- visualize_hash.call DHashVips::DHash.calculate arg
27
-
28
- puts filename = "compare_pixelation/#{arg}.dhash.png"
29
- Magick::Image.read(arg).first.quantize(256, Magick::Rec601LumaColorspace, Magick::NoDitherMethod, 8).resize!(9, 8).
30
- write filename
31
- visualize_hash.call Dhash.calculate arg
32
- end
33
- end
34
-
35
- desc "Compare how Vips resizes image to 9x8 with different kernels"
36
- task :compare_kernels do |_|
37
- require_relative "lib/dhash-vips"
38
- # require "dhash"
39
-
40
- %i{ nearest linear cubic lanczos2 lanczos3 }.each do |kernel|
41
- hashes = ARGV.drop(1).map do |arg|
42
- puts arg
43
- DHashVips::DHash.calculate(arg, 8, kernel).tap &visualize_hash
44
- end
45
- puts "kernel: #{kernel}, distance: #{DHashVips::DHash.hamming *hashes}"
46
- end
47
- end
48
-
49
- desc "Compare the quality of Dhash, Phamilie, DHashVips::DHash, DHashVips::IDHash"
50
- # in this test we want to know not that photos are the same but rather that they are from the same photosession
51
- task :compare_quality do
52
- require "dhash"
53
- require "phamilie"
54
- phamilie = Phamilie.new
55
- require_relative "lib/dhash-vips"
56
- require "mll"
57
-
58
- puts MLL::grid.call( [
59
- ["", "The same image:", "'Jordan Voth case':", "Similar images:", "Different images:", "1/FMI^2 =", "FP, FN ="],
60
- *[
61
- [Dhash, :calculate, :hamming],
62
- [phamilie, :fingerprint, :distance, nil, 0],
63
- [DHashVips::DHash, :calculate, :hamming],
64
- [DHashVips::IDHash, :fingerprint, :distance],
65
- [DHashVips::IDHash, :fingerprint, :distance, 4],
66
- ].map do |m, calc, dm, power, ii|
67
- require_relative "common"
68
- hashes = %w{
69
- 71662d4d4029a3b41d47d5baf681ab9a.jpg ad8a37f872956666c3077a3e9e737984.jpg
70
-
71
- 1b1d4bde376084011d027bba1c047a4b.jpg 6d97739b4a08f965dc9239dd24382e96.jpg
72
-
73
- 1d468d064d2e26b5b5de9a0241ef2d4b.jpg 92d90b8977f813af803c78107e7f698e.jpg
74
- 309666c7b45ecbf8f13e85a0bd6b0a4c.jpg 3f9f3db06db20d1d9f8188cd753f6ef4.jpg
75
- 679634ff89a31279a39f03e278bc9a01.jpg df0a3b93e9412536ee8a11255f974141.jpg
76
- 54192a3f65bd03163b04849e1577a40b.jpg 6d32f57459e5b79b5deca2a361eb8c6e.jpg
77
- 4b62e0eef58bfbc8d0d2fbf2b9d05483.jpg b8eb0ca91855b657f12fb3d627d45c53.jpg
78
- 21cd9a6986d98976b6b4655e1de7baf4.jpg 9b158c0d4953d47171a22ed84917f812.jpg
79
- 9c2c240ec02356472fb532f404d28dde.jpg fc762fa286489d8afc80adc8cdcb125e.jpg
80
- 7a833d873f8d49f12882e86af1cc6b79.jpg ac033cf01a3941dd1baa876082938bc9.jpg
81
- }.map(&method(:download_and_keep)).map{ |filename| [filename, m.public_send(calc, filename, *power)] }
82
- table = MLL::table[m.method(dm), [hashes.map{|_|_[ii||1]}], [hashes.map{|_|_[ii||1]}]]
83
- report = Struct.new(:same, :bw, :sim, :not_sim).new [], [], [], []
84
- hashes.size.times.to_a.repeated_combination(2) do |i, j|
85
- report[
86
- case
87
- when i == j ; :same
88
- when [i, j] == [0, 1] ; :bw
89
- when i > 3 && i + 1 == j && i % 2 == 0 ; :sim
90
- else ; :not_sim
91
- end
92
- ].push table[i][j]
93
- end
94
- min, max = [*report.sim, *report.not_sim].minmax
95
- fmi, fp, fn = (min..max+1).map do |b|
96
- fp = report.not_sim.count{ |_| _ < b }
97
- tp = report.sim.count{ |_| _ < b }
98
- fn = report.sim.count{ |_| _ >= b }
99
- [((tp + fp) * (tp + fn)).fdiv(tp * tp), fp, fn]
100
- end.reject{ |_,| _.nan? }.min_by(&:first)
101
- [
102
- "#{m.is_a?(Module) ? m : m.class}#{"(#{power})" if power}",
103
- report.same. minmax.join(".."),
104
- report.bw[0],
105
- report.sim. minmax.join(".."),
106
- report.not_sim.minmax.join(".."),
107
- fmi.round(3),
108
- [fp, fn]
109
- ]
110
- end,
111
- ].transpose, spacings: [1.5, 0], alignment: :right )
112
- end
113
-
114
- # ruby -c Rakefile && rm -f ab.png && rake compare_images -- fc762fa286489d8afc80adc8cdcb125e.jpg 9c2c240ec02356472fb532f404d28dde.jpg 2>/dev/null && ql ab.png
115
- # rm -f ab.png && ./ruby `rbenv which rake` compare_images -- 6d97739b4a08f965dc9239dd24382e96.jpg 1b1d4bde376084011d027bba1c047a4b.jpg 2>/dev/null && ql ab.png
116
- desc "Visualizes the IDHash difference measurement between two images"
117
- task :compare_images do |_|
118
- abort "there should be two image filenames passed as arguments (and optionally the `power`)" unless (3..4) === ARGV.size
119
- abort "the optional argument should be either 3 or 4" unless [3, 4].include?(power = (ARGV[3] || 3).to_i)
120
- task ARGV.last do ; end
121
- require_relative "lib/dhash-vips"
122
- ha, hb = ARGV[1, 2].map{ |filename| DHashVips::IDHash.fingerprint(filename, power) }
123
- puts "distance: #{DHashVips::IDHash.distance ha, hb}"
124
- size = 2 ** power
125
- shift = 2 * size * size
126
- ai = ha >> shift
127
- ad = ha - (ai << shift)
128
- bi = hb >> shift
129
- bd = hb - (bi << shift)
130
-
131
- a, b = ARGV[1, 2].map do |filename|
132
- image = Vips::Image.new_from_file filename
133
- image = image.resize(size.fdiv(image.width), vscale: size.fdiv(image.height)).colourspace("b-w").
134
- resize(100, vscale: 100, kernel: :nearest).colourspace("srgb")
135
- end
136
- fail unless a.width == b.width && a.height == b.height
137
-
138
- _127 = shift - 1
139
- _63 = size * size - 1
140
- n = 0
141
- width = a.width
142
- height = a.height
143
-
144
- Vips::Operation.class_eval do
145
- old_initialize = instance_method :initialize
146
- define_method :initialize do |value|
147
- old_initialize.bind(self).(value).tap do
148
- self.instance_variable_set "@operation_name", value
149
- end
150
- end
151
- old_set = instance_method :set
152
- define_method :set do |*args|
153
- args[1].instance_variable_set "@operation_name", self.instance_variable_get("@operation_name") if args.first == "image"
154
- old_set.bind(self).(*args)
155
- end
156
- end
157
- Vips::Image.class_eval do
158
- def copy
159
- return self if caller.first.end_with?("/gems/ruby-vips-2.0.9/lib/vips/operation.rb:148:in `set'") &&
160
- %w{ draw_line draw_circle }.include?(instance_variable_get "@operation_name")
161
- method_missing :copy
162
- end
163
- end
164
-
165
- require "get_process_mem"
166
- a, b = [[a, ad, ai], [b, bd, bi]].map do |image, xd, xi|
167
- _127.downto(0).each_with_index do |i, ii|
168
- mem = GetProcessMem.new(Process.pid).mb
169
- abort ">1000mb of memory consumed" if 1000 < mem
170
- if i > _63
171
- y, x = (_127 - i).divmod size
172
- else
173
- x, y = (_63 - i).divmod size
174
- end
175
- x = (width * (x + 0.5) / size).round
176
- y = (height * (y + 0.5) / size).round
177
- if i > _63
178
- (x-2..x+2).map do |x| [
179
- [x, y , x, (y + height / size / 2 - 1) % height],
180
- [x, (y + height / size / 2 + 1) % height, x, (y + height / size ) % height],
181
- ] end
182
- else
183
- (y-2..y+2).map do |y| [
184
- [ x , y, (x + width / size / 2 - 1) % width, y],
185
- [(x + width / size / 2 + 1) % width, y, (x + width / size ) % width, y],
186
- ] end
187
- end.each do |coords1, coords2|
188
- n += 1
189
- image = image.draw_line (1 - xd[i]) * 255, *coords1
190
- image = image.draw_line xd[i] * 255, *coords2
191
- end if ai[i] + bi[i] > 0 && ad[i] != bd[i]
192
- cx, cy = if i > _63
193
- [x, y + 30]
194
- else
195
- [x + 30, y]
196
- end
197
- image = image.draw_circle xd[i] * 255, cx, cy, 11, fill: true if xi[i] > 0
198
- image = image.draw_circle (1 - xd[i]) * 255, cx, cy, 10, fill: true if xi[i] > 0
199
- end
200
- image
201
- end
202
- puts "distance: #{n / 10}"
203
- puts "(above should be equal if raketask works correcly)"
204
-
205
- a.join(b, :horizontal, shim: 15).write_to_file "ab.png"
206
- end
207
-
208
- desc "Benchmark speed of Dhash, DHashVips::DHash, DHashVips::IDHash and Phamilie"
209
- task :compare_speed do
210
- require "dhash"
211
- require "phamilie"
212
- phamilie = Phamilie.new
213
- require_relative "lib/dhash-vips"
214
-
215
- filenames = %w{
216
- 71662d4d4029a3b41d47d5baf681ab9a.jpg
217
- ad8a37f872956666c3077a3e9e737984.jpg
218
- 1d468d064d2e26b5b5de9a0241ef2d4b.jpg
219
- 92d90b8977f813af803c78107e7f698e.jpg
220
- 309666c7b45ecbf8f13e85a0bd6b0a4c.jpg
221
- 3f9f3db06db20d1d9f8188cd753f6ef4.jpg
222
- df0a3b93e9412536ee8a11255f974141.jpg
223
- 679634ff89a31279a39f03e278bc9a01.jpg
224
- }.flat_map do |filename|
225
- image = Vips::Image.new_from_file "images/#{filename}"
226
- [0, 1, 2, 3].map do |a|
227
- "benchmark/#{a}_#{filename}".tap do |filename|
228
- next if File.exist? filename
229
- FileUtils.mkdir_p "benchmark"
230
- image.rot(a).write_to_file filename
231
- end
232
- end
233
- end
234
-
235
- require "benchmark"
236
- puts "load the image and calculate the fingerprint:"
237
- hashes = []
238
- Benchmark.bm 19 do |bm|
239
- [
240
- [Dhash, :calculate],
241
- [phamilie, :fingerprint],
242
- [DHashVips::DHash, :calculate],
243
- [DHashVips::IDHash, :fingerprint],
244
- [DHashVips::IDHash, :fingerprint, 4],
245
- ].each do |m, calc, power|
246
- bm.report "#{m.is_a?(Module) ? m : m.class} #{power}" do
247
- hashes.push filenames.map{ |filename| m.send calc, filename, *power }
248
- end
249
- end
250
- end
251
-
252
- # for `distance`, `distance3_ruby` and `distance3_c` we use the same hashes
253
- # this array manipulation converts [1, 2, 3, 4, 5] into [1, 2, 3, 4, 4, 4, 5]
254
- hashes[-1, 1] = hashes[-2, 2]
255
- hashes[-1, 1] = hashes[-2, 2]
256
-
257
- puts "\nmeasure the distance (32*32*2000 times):"
258
- Benchmark.bm 32 do |bm|
259
- [
260
- [Dhash, :hamming],
261
- [phamilie, :distance, nil, 1],
262
- [DHashVips::DHash, :hamming],
263
- [DHashVips::IDHash, :distance],
264
- [DHashVips::IDHash, :distance3_ruby],
265
- [DHashVips::IDHash, :distance3_c],
266
- [DHashVips::IDHash, :distance, 4],
267
- ].zip(hashes) do |(m, dm, power, ii), hs|
268
- bm.report "#{m.is_a?(Module) ? m : m.class} #{dm} #{power}" do
269
- _ = [hs, filenames][ii || 0]
270
- _.product _ do |h1, h2|
271
- 2000.times{ m.public_send dm, h1, h2 }
272
- end
273
- end
274
- end
275
- end
276
-
277
- end
278
-
279
- desc "Benchmarks everything about Dhash, DHashVips::DHash, DHashVips::IDHash and Phamilie"
280
- task :benchmark do
281
- abort "provide a folder with images grouped by similarity" unless 2 === ARGV.size
282
- abort "invalid folder provided" unless Dir.exist?(dir = ARGV.last)
283
-
284
- require "dhash"
285
- require "phamilie"
286
- phamilie = Phamilie.new
287
- require_relative "lib/dhash-vips"
288
-
289
- filenames = Dir.glob("#{dir}/*").map{ |_| Dir.glob "#{_}/*" }
290
- puts "image groups sizes: #{filenames.map(&:size)}"
291
- require "benchmark"
292
-
293
- puts "step 1 / 3 (fingerprinting)"
294
- hashes = []
295
- bm1 = [
296
- [phamilie, :fingerprint],
297
- [Dhash, :calculate],
298
- [DHashVips::DHash, :calculate],
299
- [DHashVips::IDHash, :fingerprint],
300
- ].map do |m, calc, power|
301
- Benchmark.realtime do
302
- hashes.push filenames.flatten.map{ |filename| m.send calc, filename, *power }
303
- end
304
- end
305
-
306
- puts "step 2 / 3 (comparing fingerprints)"
307
- combs = filenames.flatten.size ** 2
308
- n = 10_000_000_000_000 / Dir.glob("#{dir}/*/*").map(&File.method(:size)).inject(:+) / combs
309
- bm2 = [
310
- [phamilie, :distance, nil, filenames.flatten],
311
- [Dhash, :hamming],
312
- [DHashVips::DHash, :hamming],
313
- [DHashVips::IDHash, :distance3_c],
314
- ].zip(hashes).map do |(m, dm, power, ii), hs|
315
- Benchmark.realtime do
316
- _ = ii || hs
317
- _.product _ do |h1, h2|
318
- n.times{ m.public_send dm, h1, h2 }
319
- end
320
- end
321
- end
322
-
323
- puts "step 3 / 3 (looking for the best threshold)"
324
- bm3 = [
325
- [phamilie, :fingerprint, :distance, nil, 0],
326
- [Dhash, :calculate, :hamming],
327
- [DHashVips::DHash, :calculate, :hamming],
328
- [DHashVips::IDHash, :fingerprint, :distance],
329
- ].map do |m, calc, dm, power, ii|
330
- require_relative "common"
331
- hashes = Dir.glob("#{dir}/*").flat_map{ |_| Dir.glob "#{_}/*" }.map{ |filename| [filename, m.public_send(calc, filename, *power)] }
332
- report = Struct.new(:same, :sim, :not_sim).new [], [], []
333
- hashes.size.times.to_a.repeated_combination(2) do |i, j|
334
- report[
335
- case
336
- when i == j ; :same
337
- when File.split(File.split(hashes[i][0]).first).last ==
338
- File.split(File.split(hashes[j][0]).first).last && i < j ; :sim
339
- else ; :not_sim
340
- end
341
- ].push m.method(dm).call hashes[i][ii||1], hashes[j][ii||1]
342
- end
343
- min, max = [*report.sim, *report.not_sim].minmax
344
- fmi, fp, fn = (min..max+1).map do |b|
345
- fp = report.not_sim.count{ |_| _ < b }
346
- tp = report.sim.count{ |_| _ < b }
347
- fn = report.sim.count{ |_| _ >= b }
348
- [((tp + fp) * (tp + fn)).fdiv(tp * tp), fp, fn]
349
- end.reject{ |_,| _.nan? }.min_by(&:first)
350
- fmi
351
- end
352
-
353
- puts RUBY_DESCRIPTION
354
- system "vips -v"
355
- system "identify -version | /usr/bin/head -1"
356
- system "sysctl -n machdep.cpu.brand_string"
357
- require "mll"
358
- puts MLL::grid.call %w{ \ Fingerprint Compare 1/FMI^2 }.zip(*[
359
- %w{ Phamilie Dhash DHash IDHash },
360
- *[bm1, bm2].map{ |bm| bm.map{ |_| "%.3f" % _ } },
361
- bm3.map{ |_| "%.3f" % _ }
362
- ].transpose).transpose, spacings: [1.5, 0], alignment: :right
363
- puts "(lower numbers are better)"
364
- end