phash-rb 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +24 -1
- data/exe/phash +54 -4
- data/lib/phash/image.rb +1 -1
- data/lib/phash/version.rb +1 -1
- data/lib/phash.rb +29 -8
- metadata +7 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 25abc7a966b034a17a10113432e8135471c109f8b688c4797c5c2fbe676e0e57
|
|
4
|
+
data.tar.gz: 4b5220f02a144e1173d1acda79bd92b72885cc5c9c8f81d426f51a6587b9c9de
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 127987f334068eabbf5e92ee67a6b4560338797c59246a55bca2d92193a1776330cd62dda2c8b604ea57aa08dbafb71bd831bae5fde408fb741c350eb2798651
|
|
7
|
+
data.tar.gz: c4a2b91bc23f7ea34eafb4d02e2095ac7153f088f6b0be8ff381016b17e22f940ed9b65b39f831a09d919ce5a19d0cebd429f2a80991f4190505d5e8ed966654
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
### 0.3.0
|
|
6
|
+
|
|
7
|
+
- Optimize fingerprint calculation for large images (~24x faster for images >500x500)
|
|
8
|
+
- Cache DCT matrix for faster batch processing
|
|
9
|
+
- Lower required Ruby version from 3.0 to 2.6
|
|
10
|
+
- Improve CLI: support multiple files, `--compare`, `--help`, `--version`
|
|
11
|
+
- Add `Phash.distance(fp1, fp2)` class method for computing Hamming distance
|
|
12
|
+
|
|
5
13
|
### 0.2.0
|
|
6
14
|
|
|
7
15
|
Allow passing Vips::Image directly to fingerprint function
|
data/README.md
CHANGED
|
@@ -8,7 +8,7 @@ Phashion replacement without native extension (however it currently relies on li
|
|
|
8
8
|
## Requirements
|
|
9
9
|
|
|
10
10
|
- libvips (see requirements for [ruby-vips](https://github.com/libvips/ruby-vips))
|
|
11
|
-
- Ruby
|
|
11
|
+
- Ruby 2.6.0 or later
|
|
12
12
|
|
|
13
13
|
## Installation
|
|
14
14
|
|
|
@@ -52,11 +52,34 @@ require 'phash'
|
|
|
52
52
|
Phash::Image.new(filename1).fingerprint # 3714852948054213970
|
|
53
53
|
```
|
|
54
54
|
|
|
55
|
+
You can also compute the Hamming distance between two fingerprints directly:
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
fp1 = Phash.fingerprint(filename1)
|
|
59
|
+
fp2 = Phash.fingerprint(filename2)
|
|
60
|
+
Phash.distance(fp1, fp2) # 30
|
|
61
|
+
```
|
|
62
|
+
|
|
55
63
|
Fingerprint is also available in a command `phash`:
|
|
56
64
|
|
|
57
65
|
```bash
|
|
58
66
|
$ phash test/fixtures/test.jpg
|
|
59
67
|
3714852948054213970
|
|
68
|
+
|
|
69
|
+
$ phash image1.jpg image2.jpg image3.jpg
|
|
70
|
+
3714852948054213970
|
|
71
|
+
5378904591010983442
|
|
72
|
+
1234567890123456789
|
|
73
|
+
|
|
74
|
+
$ phash --compare image1.jpg image2.jpg
|
|
75
|
+
image1.jpg: 3714852948054213970
|
|
76
|
+
image2.jpg: 5378904591010983442
|
|
77
|
+
Hamming distance: 30
|
|
78
|
+
Result: different images
|
|
79
|
+
|
|
80
|
+
$ phash --help
|
|
81
|
+
Usage: phash [options] <file> [file...]
|
|
82
|
+
...
|
|
60
83
|
```
|
|
61
84
|
|
|
62
85
|
Additionally you can pass `Vips::Image` directly to fingerprint function:
|
data/exe/phash
CHANGED
|
@@ -2,12 +2,62 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../lib/phash"
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
def print_help
|
|
6
|
+
puts "Usage: phash [options] <file> [file...]"
|
|
7
|
+
puts ""
|
|
8
|
+
puts "Commands:"
|
|
9
|
+
puts " phash <file> Print fingerprint for file"
|
|
10
|
+
puts " phash <file1> <file2> ... Print fingerprints for multiple files"
|
|
11
|
+
puts " phash --compare <file1> <file2> Compare two images and print distance"
|
|
12
|
+
puts ""
|
|
13
|
+
puts "Options:"
|
|
14
|
+
puts " -h, --help Show this help message"
|
|
15
|
+
puts " -v, --version Show version"
|
|
16
|
+
puts " -c, --compare Compare two images"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def print_version
|
|
20
|
+
puts "phash-rb #{Phash::VERSION}"
|
|
21
|
+
end
|
|
6
22
|
|
|
7
|
-
|
|
23
|
+
def compute_fingerprint(filename)
|
|
8
24
|
puts Phash.fingerprint(filename)
|
|
9
25
|
rescue => e
|
|
10
|
-
|
|
11
|
-
|
|
26
|
+
warn "Failed to compute pHash for #{filename}: #{e.message}"
|
|
27
|
+
exit 1
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def compare_images(file1, file2)
|
|
31
|
+
fp1 = Phash.fingerprint(file1)
|
|
32
|
+
fp2 = Phash.fingerprint(file2)
|
|
33
|
+
distance = Phash.distance(fp1, fp2)
|
|
34
|
+
|
|
35
|
+
puts "#{file1}: #{fp1}"
|
|
36
|
+
puts "#{file2}: #{fp2}"
|
|
37
|
+
puts "Hamming distance: #{distance}"
|
|
38
|
+
puts distance < 10 ? "Result: likely duplicates" : "Result: different images"
|
|
39
|
+
rescue => e
|
|
40
|
+
warn "Failed to compare images: #{e.message}"
|
|
12
41
|
exit 1
|
|
13
42
|
end
|
|
43
|
+
|
|
44
|
+
if ARGV.empty? || ARGV.include?("-h") || ARGV.include?("--help")
|
|
45
|
+
print_help
|
|
46
|
+
exit 0
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
if ARGV.include?("-v") || ARGV.include?("--version")
|
|
50
|
+
print_version
|
|
51
|
+
exit 0
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
if ARGV.include?("-c") || ARGV.include?("--compare")
|
|
55
|
+
args = ARGV.reject { |a| a == "-c" || a == "--compare" }
|
|
56
|
+
if args.size != 2
|
|
57
|
+
warn "Error: --compare requires exactly 2 files"
|
|
58
|
+
exit 1
|
|
59
|
+
end
|
|
60
|
+
compare_images(args[0], args[1])
|
|
61
|
+
else
|
|
62
|
+
ARGV.each { |file| compute_fingerprint(file) }
|
|
63
|
+
end
|
data/lib/phash/image.rb
CHANGED
data/lib/phash/version.rb
CHANGED
data/lib/phash.rb
CHANGED
|
@@ -10,6 +10,10 @@ module Phash
|
|
|
10
10
|
CIMG_V = CIMG_PI / 2 / 32
|
|
11
11
|
CIMG_SCALE = 2**8 + 1
|
|
12
12
|
|
|
13
|
+
def self.distance(fingerprint1, fingerprint2)
|
|
14
|
+
(fingerprint1 ^ fingerprint2).to_s(2).count('1')
|
|
15
|
+
end
|
|
16
|
+
|
|
13
17
|
def self.fingerprint(path_or_img)
|
|
14
18
|
img = path_or_img.is_a?(Vips::Image) ? path_or_img : Vips::Image.new_from_file(path_or_img)
|
|
15
19
|
#Y = (66*R + 129*G + 25*B + 128)/256 + 16
|
|
@@ -35,19 +39,36 @@ module Phash
|
|
|
35
39
|
private
|
|
36
40
|
|
|
37
41
|
def self.ph_dct_matrix
|
|
38
|
-
|
|
39
|
-
|
|
42
|
+
@dct_matrix ||= begin
|
|
43
|
+
v = 1 / Math.sqrt(32)
|
|
44
|
+
c1 = Math.sqrt(2.0 / 32)
|
|
40
45
|
|
|
41
|
-
|
|
42
|
-
|
|
46
|
+
Matrix.build(32, 32) do |y, x|
|
|
47
|
+
(y < 1 ? v : c1 * Math.cos(CIMG_V * y * (2*x + 1))).round(6)
|
|
48
|
+
end
|
|
43
49
|
end
|
|
44
50
|
end
|
|
45
51
|
|
|
52
|
+
def self.large_image?(img)
|
|
53
|
+
img.width * img.height > 250_000
|
|
54
|
+
end
|
|
55
|
+
|
|
46
56
|
def self.sample(img)
|
|
47
|
-
w, h = img.width
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
57
|
+
w, h = img.width, img.height
|
|
58
|
+
w_step = w / 32.0
|
|
59
|
+
h_step = h / 32.0
|
|
60
|
+
|
|
61
|
+
if large_image?(img)
|
|
62
|
+
# Large images: use getpoint to avoid copying entire image to Ruby array
|
|
63
|
+
Matrix.build(32, 32) do |y, x|
|
|
64
|
+
img.getpoint((x * w_step).to_i, (y * h_step).to_i)[0] / CIMG_SCALE
|
|
65
|
+
end
|
|
66
|
+
else
|
|
67
|
+
# Small images: to_a is faster than 1024 getpoint calls
|
|
68
|
+
src = img.to_a
|
|
69
|
+
Matrix.build(32, 32) do |y, x|
|
|
70
|
+
src[(y * h_step).to_i][(x * w_step).to_i][0] / CIMG_SCALE
|
|
71
|
+
end
|
|
51
72
|
end
|
|
52
73
|
end
|
|
53
74
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: phash-rb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tomasz Ratajczak
|
|
8
8
|
- Krzysztof Hasiński
|
|
9
|
+
autorequire:
|
|
9
10
|
bindir: exe
|
|
10
11
|
cert_chain: []
|
|
11
|
-
date:
|
|
12
|
+
date: 2025-12-12 00:00:00.000000000 Z
|
|
12
13
|
dependencies:
|
|
13
14
|
- !ruby/object:Gem::Dependency
|
|
14
15
|
name: ruby-vips
|
|
@@ -63,6 +64,7 @@ metadata:
|
|
|
63
64
|
homepage_uri: http://github.com/khasinski/phash-rb
|
|
64
65
|
source_code_uri: http://github.com/khasinski/phash-rb
|
|
65
66
|
changelog_uri: http://github.com/khasinski/phash-rb
|
|
67
|
+
post_install_message:
|
|
66
68
|
rdoc_options: []
|
|
67
69
|
require_paths:
|
|
68
70
|
- lib
|
|
@@ -70,14 +72,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
70
72
|
requirements:
|
|
71
73
|
- - ">="
|
|
72
74
|
- !ruby/object:Gem::Version
|
|
73
|
-
version:
|
|
75
|
+
version: 2.6.0
|
|
74
76
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
77
|
requirements:
|
|
76
78
|
- - ">="
|
|
77
79
|
- !ruby/object:Gem::Version
|
|
78
80
|
version: '0'
|
|
79
81
|
requirements: []
|
|
80
|
-
rubygems_version: 3.
|
|
82
|
+
rubygems_version: 3.0.3.1
|
|
83
|
+
signing_key:
|
|
81
84
|
specification_version: 4
|
|
82
85
|
summary: Ruby implementation of pHash library
|
|
83
86
|
test_files: []
|