phash-rb 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 995d2669bfb1ed93482da8a528db5da15a4a01095ab4b04798a7a8bd2ef2d207
4
- data.tar.gz: 6abdafea8b495b031378b22b36bba58a1b69bcef263fa1f8981fd2dd3c774828
3
+ metadata.gz: cb8320eac1ac54f8b57ce98e88cefc376c7921e57e3874e40e4bd046544077ad
4
+ data.tar.gz: 62a21503a180cf5c42f5681d661f059032fe37e9947a5037d6dcb53727bb1a8a
5
5
  SHA512:
6
- metadata.gz: 7416ab673a2218b225a138e4934d7ca117af2617256d8ffeb8919ad54a23b18025b81795d1f1f3e592532f823bf30db3cca127acc22abd6fb0c4dd07f7ed9552
7
- data.tar.gz: 47830995008c37150316b48c08504747ece664d652fae7116752d1ec996fd53f444b612c70ff3a447e97e2e36a3946c62edc6d87eafc8564bf9b4636f2f878e4
6
+ metadata.gz: 923e41cf1d048b71ca224f47c6809315ce071348e85f916f508c4f1eca9b8a31c2508c9f8a47fd916cb3729b2d0a86010c0f69c67930f874338785ac8bc3ebc0
7
+ data.tar.gz: af270246aaafcd1671a40206fd7d93b7e154a550ad85720769a517227d31ae4e42c4e92f4e845126dfdebcc7cdd23e434db42f10369c3802e977b3efc46bbe65
data/CHANGELOG.md CHANGED
@@ -2,6 +2,18 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ### 0.3.1
6
+
7
+ - Fix fingerprinting for 4-band images (RGBA with alpha channel) on vips 8.15.1+
8
+
9
+ ### 0.3.0
10
+
11
+ - Optimize fingerprint calculation for large images (~24x faster for images >500x500)
12
+ - Cache DCT matrix for faster batch processing
13
+ - Lower required Ruby version from 3.0 to 2.6
14
+ - Improve CLI: support multiple files, `--compare`, `--help`, `--version`
15
+ - Add `Phash.distance(fp1, fp2)` class method for computing Hamming distance
16
+
5
17
  ### 0.2.0
6
18
 
7
19
  Allow passing Vips::Image directly to fingerprint function
data/README.md CHANGED
@@ -8,7 +8,7 @@ Phashion replacement without native extension (however it currently relies on li
8
8
  ## Requirements
9
9
 
10
10
  - libvips (see requirements for [ruby-vips](https://github.com/libvips/ruby-vips))
11
- - Ruby 3.0.0 or later
11
+ - Ruby 2.6.0 or later
12
12
 
13
13
  ## Installation
14
14
 
@@ -52,11 +52,34 @@ require 'phash'
52
52
  Phash::Image.new(filename1).fingerprint # 3714852948054213970
53
53
  ```
54
54
 
55
+ You can also compute the Hamming distance between two fingerprints directly:
56
+
57
+ ```ruby
58
+ fp1 = Phash.fingerprint(filename1)
59
+ fp2 = Phash.fingerprint(filename2)
60
+ Phash.distance(fp1, fp2) # 30
61
+ ```
62
+
55
63
  Fingerprint is also available in a command `phash`:
56
64
 
57
65
  ```bash
58
66
  $ phash test/fixtures/test.jpg
59
67
  3714852948054213970
68
+
69
+ $ phash image1.jpg image2.jpg image3.jpg
70
+ 3714852948054213970
71
+ 5378904591010983442
72
+ 1234567890123456789
73
+
74
+ $ phash --compare image1.jpg image2.jpg
75
+ image1.jpg: 3714852948054213970
76
+ image2.jpg: 5378904591010983442
77
+ Hamming distance: 30
78
+ Result: different images
79
+
80
+ $ phash --help
81
+ Usage: phash [options] <file> [file...]
82
+ ...
60
83
  ```
61
84
 
62
85
  Additionally you can pass `Vips::Image` directly to fingerprint function:
data/exe/phash CHANGED
@@ -2,12 +2,62 @@
2
2
 
3
3
  require_relative "../lib/phash"
4
4
 
5
- filename = ARGV[0]
5
+ def print_help
6
+ puts "Usage: phash [options] <file> [file...]"
7
+ puts ""
8
+ puts "Commands:"
9
+ puts " phash <file> Print fingerprint for file"
10
+ puts " phash <file1> <file2> ... Print fingerprints for multiple files"
11
+ puts " phash --compare <file1> <file2> Compare two images and print distance"
12
+ puts ""
13
+ puts "Options:"
14
+ puts " -h, --help Show this help message"
15
+ puts " -v, --version Show version"
16
+ puts " -c, --compare Compare two images"
17
+ end
18
+
19
+ def print_version
20
+ puts "phash-rb #{Phash::VERSION}"
21
+ end
6
22
 
7
- begin
23
+ def compute_fingerprint(filename)
8
24
  puts Phash.fingerprint(filename)
9
25
  rescue => e
10
- puts "Failed to compute pHash for #{filename}."
11
- puts e
26
+ warn "Failed to compute pHash for #{filename}: #{e.message}"
27
+ exit 1
28
+ end
29
+
30
+ def compare_images(file1, file2)
31
+ fp1 = Phash.fingerprint(file1)
32
+ fp2 = Phash.fingerprint(file2)
33
+ distance = Phash.distance(fp1, fp2)
34
+
35
+ puts "#{file1}: #{fp1}"
36
+ puts "#{file2}: #{fp2}"
37
+ puts "Hamming distance: #{distance}"
38
+ puts distance < 10 ? "Result: likely duplicates" : "Result: different images"
39
+ rescue => e
40
+ warn "Failed to compare images: #{e.message}"
12
41
  exit 1
13
42
  end
43
+
44
+ if ARGV.empty? || ARGV.include?("-h") || ARGV.include?("--help")
45
+ print_help
46
+ exit 0
47
+ end
48
+
49
+ if ARGV.include?("-v") || ARGV.include?("--version")
50
+ print_version
51
+ exit 0
52
+ end
53
+
54
+ if ARGV.include?("-c") || ARGV.include?("--compare")
55
+ args = ARGV.reject { |a| a == "-c" || a == "--compare" }
56
+ if args.size != 2
57
+ warn "Error: --compare requires exactly 2 files"
58
+ exit 1
59
+ end
60
+ compare_images(args[0], args[1])
61
+ else
62
+ ARGV.each { |file| compute_fingerprint(file) }
63
+ end
data/lib/phash/image.rb CHANGED
@@ -13,7 +13,7 @@ module Phash
13
13
  end
14
14
 
15
15
  def distance_from(other_image)
16
- (fingerprint ^ other_image.fingerprint).to_s(2).count('1')
16
+ Phash.distance(fingerprint, other_image.fingerprint)
17
17
  end
18
18
  end
19
19
  end
data/lib/phash/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Phash
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.1"
5
5
  end
data/lib/phash.rb CHANGED
@@ -10,8 +10,14 @@ module Phash
10
10
  CIMG_V = CIMG_PI / 2 / 32
11
11
  CIMG_SCALE = 2**8 + 1
12
12
 
13
+ def self.distance(fingerprint1, fingerprint2)
14
+ (fingerprint1 ^ fingerprint2).to_s(2).count('1')
15
+ end
16
+
13
17
  def self.fingerprint(path_or_img)
14
18
  img = path_or_img.is_a?(Vips::Image) ? path_or_img : Vips::Image.new_from_file(path_or_img)
19
+ # Strip alpha channel if present (4-band images)
20
+ img = img[0..2] if img.bands == 4
15
21
  #Y = (66*R + 129*G + 25*B + 128)/256 + 16
16
22
  img = img * [66.0 / 256, 129.0 / 256, 25.0 / 256]
17
23
  r, g, b = img.bandsplit
@@ -35,19 +41,36 @@ module Phash
35
41
  private
36
42
 
37
43
  def self.ph_dct_matrix
38
- v = 1 / Math.sqrt(32)
39
- c1 = Math.sqrt(2.0 / 32)
44
+ @dct_matrix ||= begin
45
+ v = 1 / Math.sqrt(32)
46
+ c1 = Math.sqrt(2.0 / 32)
40
47
 
41
- Matrix.build(32, 32) do |y, x|
42
- (y < 1 ? v : c1 * Math.cos(CIMG_V * y * (2*x + 1))).round(6)
48
+ Matrix.build(32, 32) do |y, x|
49
+ (y < 1 ? v : c1 * Math.cos(CIMG_V * y * (2*x + 1))).round(6)
50
+ end
43
51
  end
44
52
  end
45
53
 
54
+ def self.large_image?(img)
55
+ img.width * img.height > 250_000
56
+ end
57
+
46
58
  def self.sample(img)
47
- w, h = img.width / 32.0, img.height / 32.0
48
- src = img.to_a
49
- Matrix.build(32, 32) do |y, x|
50
- src[y*h][x*w][0] / CIMG_SCALE
59
+ w, h = img.width, img.height
60
+ w_step = w / 32.0
61
+ h_step = h / 32.0
62
+
63
+ if large_image?(img)
64
+ # Large images: use getpoint to avoid copying entire image to Ruby array
65
+ Matrix.build(32, 32) do |y, x|
66
+ img.getpoint((x * w_step).to_i, (y * h_step).to_i)[0] / CIMG_SCALE
67
+ end
68
+ else
69
+ # Small images: to_a is faster than 1024 getpoint calls
70
+ src = img.to_a
71
+ Matrix.build(32, 32) do |y, x|
72
+ src[(y * h_step).to_i][(x * w_step).to_i][0] / CIMG_SCALE
73
+ end
51
74
  end
52
75
  end
53
76
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: phash-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomasz Ratajczak
@@ -70,14 +70,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
70
70
  requirements:
71
71
  - - ">="
72
72
  - !ruby/object:Gem::Version
73
- version: 3.0.0
73
+ version: 2.6.0
74
74
  required_rubygems_version: !ruby/object:Gem::Requirement
75
75
  requirements:
76
76
  - - ">="
77
77
  - !ruby/object:Gem::Version
78
78
  version: '0'
79
79
  requirements: []
80
- rubygems_version: 3.6.7
80
+ rubygems_version: 4.0.2
81
81
  specification_version: 4
82
82
  summary: Ruby implementation of pHash library
83
83
  test_files: []