phashion 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGES.md ADDED
@@ -0,0 +1,16 @@
1
+ History
2
+ =========
3
+
4
+ 1.0.1
5
+ -------
6
+
7
+ * Remove RubyInline, use standard Ruby extension infrastructure.
8
+ * Update duplicate threshold constant based on wider image testing.
9
+ * Make duplicate threshold variable so users can tune it based on their dataset.
10
+ * Add Phashion::Image#fingerprint method which exposes an Image's 64-bit hash.
11
+
12
+
13
+ 1.0.0
14
+ -------
15
+
16
+ Initial release.
data/README.md CHANGED
@@ -9,7 +9,7 @@ Installation
9
9
 
10
10
  First you need to install pHash. pHash requires three libraries: CImg, ffmpeg and libjpeg. My system already came with libjpeg on it so I didn't have to do anything for it. YMMV.
11
11
 
12
- Install CImg.h by downloading the latest version from cimg.sf.net and placing the CImg.h header file in /usr/local/include.
12
+ Install CImg.h by downloading the latest version from http://cimg.sf.net and placing the CImg.h header file in /usr/local/include.
13
13
 
14
14
  If you are working with audio or video, you will need to install ffmpeg:
15
15
 
data/Rakefile CHANGED
@@ -10,8 +10,8 @@ begin
10
10
  gem.email = "mperham@gmail.com"
11
11
  gem.homepage = "http://github.com/mperham/phashion"
12
12
  gem.authors = ["Mike Perham"]
13
- gem.add_dependency 'RubyInline'
14
- gem.version = '1.0.0'
13
+ gem.add_development_dependency 'rake-compiler', '>= 0.7.0'
14
+ gem.version = '1.0.1'
15
15
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
16
  end
17
17
  Jeweler::GemcutterTasks.new
@@ -50,3 +50,9 @@ Rake::RDocTask.new do |rdoc|
50
50
  rdoc.rdoc_files.include('README*')
51
51
  rdoc.rdoc_files.include('lib/**/*.rb')
52
52
  end
53
+
54
+
55
+ gem 'rake-compiler', '>= 0.7.0'
56
+ require "rake/extensiontask"
57
+
58
+ Rake::ExtensionTask.new("phashion_ext")
data/TODO.md ADDED
@@ -0,0 +1,6 @@
1
+ TODO
2
+ =======
3
+
4
+ - Add video and audio APIs (someone else will have to do this, I have no need for it).
5
+ - Add broader support for the entire pHash API, include other types of hashes. Currently
6
+ the image support hardcodes use of the DCT hash.
@@ -0,0 +1,13 @@
1
+ require 'mkmf'
2
+
3
+ $CFLAGS << " -x c++ #{ENV["CFLAGS"]}"
4
+ $LIBS << " -lpHash #{ENV["LIBS"]}"
5
+
6
+ # TODO: need to figure this stuff out
7
+ # dir_config 'pHash'
8
+ # if !have_library('pHash', 'ph_dct_imagehash')
9
+ # puts "Unable to find pHash library, please use 'gem install phashion -- --with-pHash-dir=/phash/install/root'"
10
+ # exit 1
11
+ # end
12
+
13
+ create_makefile('phashion_ext')
@@ -0,0 +1,35 @@
1
+ #include "ruby.h"
2
+ #include "pHash.h"
3
+
4
+ static VALUE image_hash_for(VALUE self, VALUE _filename) {
5
+ char * filename = StringValuePtr(_filename);
6
+ ulong64 hash;
7
+ if (-1 == ph_dct_imagehash(filename, hash)) {
8
+ rb_raise(rb_eRuntimeError, "Unknown pHash error");
9
+ }
10
+ return ULL2NUM(hash);
11
+ }
12
+
13
+
14
+ static VALUE hamming_distance(VALUE self, VALUE a, VALUE b) {
15
+ int result = 0;
16
+ result = ph_hamming_distance(NUM2ULL(a), NUM2ULL(b));
17
+ if (-1 == result) {
18
+ rb_raise(rb_eRuntimeError, "Unknown pHash error");
19
+ }
20
+ return INT2NUM(result);
21
+ }
22
+
23
+ #ifdef __cplusplus
24
+ extern "C" {
25
+ #endif
26
+ void Init_phashion_ext() {
27
+ VALUE c = rb_cObject;
28
+ c = rb_const_get(c, rb_intern("Phashion"));
29
+
30
+ rb_define_singleton_method(c, "hamming_distance", (VALUE(*)(ANYARGS))hamming_distance, 2);
31
+ rb_define_singleton_method(c, "image_hash_for", (VALUE(*)(ANYARGS))image_hash_for, 1);
32
+ }
33
+ #ifdef __cplusplus
34
+ }
35
+ #endif
data/lib/phashion.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  require 'rubygems'
2
- require 'inline'
3
2
 
4
3
  ##
5
4
  # Provides a clean and simple API to detect duplicate image files using
@@ -9,11 +8,13 @@ require 'inline'
9
8
  # int ph_dct_imagehash(const char *file, ulong64 &hash);
10
9
  # int ph_hamming_distance(ulong64 hasha, ulong64 hashb);
11
10
 
12
- class Phashion
13
- VERSION = '1.0.0'
11
+ module Phashion
12
+ VERSION = '1.0.1'
14
13
 
15
14
  class Image
16
- DUPE_THRESHOLD = 26
15
+ SETTINGS = {
16
+ :dupe_threshold => 15
17
+ }
17
18
 
18
19
  attr_reader :filename
19
20
  def initialize(filename)
@@ -21,83 +22,14 @@ class Phashion
21
22
  end
22
23
 
23
24
  def duplicate?(other)
24
- Phashion.hamming_distance(hash_code, other.send(:hash_code)) < DUPE_THRESHOLD
25
+ Phashion.hamming_distance(fingerprint, other.fingerprint) < SETTINGS[:dupe_threshold]
25
26
  end
26
-
27
- private
28
27
 
29
- def hash_code
28
+ def fingerprint
30
29
  @hash ||= Phashion.image_hash_for(@filename)
31
30
  end
32
31
  end
33
32
 
34
- def self.image_hash_for(filename)
35
- end
36
-
37
- def self.hamming_distance(hashA, hashB)
38
- end
39
-
40
- inline do |builder|
41
- if test ?d, "/opt/local" then
42
- builder.add_compile_flags "-I/opt/local/include"
43
- builder.add_link_flags "-L/opt/local/lib"
44
- end
45
-
46
- builder.add_compile_flags '-x c++', '-lstdc++'
47
- builder.add_link_flags "-lpHash"
48
- builder.include '"pHash.h"'
49
-
50
- builder.c_singleton <<-"END"
51
- VALUE image_hash_for(const char *filename) {
52
- ulong64 hash;
53
- if (-1 == ph_dct_imagehash(filename, hash)) {
54
- rb_raise(rb_eRuntimeError, "Unknown pHash error");
55
- }
56
- return ULL2NUM(hash);
57
- }
58
- END
59
-
60
- builder.c_singleton <<-"END"
61
- VALUE hamming_distance(VALUE a, VALUE b) {
62
- int result = 0;
63
- result = ph_hamming_distance(NUM2ULL(a), NUM2ULL(b));
64
- if (-1 == result) {
65
- rb_raise(rb_eRuntimeError, "Unknown pHash error");
66
- }
67
- return INT2NUM(result);
68
- }
69
- END
70
-
71
- end
72
33
  end
73
34
 
74
- if __FILE__ == $0
75
-
76
- def memory
77
- `ps -o vsz,rss -p #{$$}`.strip
78
- end
79
-
80
- def assert_duplicate(a, b)
81
- raise ArgumentError, "#{a.filename} not dupe of #{b.filename}" unless a.duplicate?(b)
82
- end
83
-
84
- def assert_not_duplicate(a, b)
85
- raise ArgumentError, "#{a.filename} dupe of #{b.filename}" if a.duplicate?(b)
86
- end
87
-
88
- FILES = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg avatar.jpg)
89
-
90
- images = FILES.map {|f| PHash::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
91
- # GC.start
92
- # puts memory
93
- assert_duplicate images[0], images[1]
94
- assert_duplicate images[1], images[2]
95
- assert_duplicate images[0], images[2]
96
-
97
- assert_not_duplicate images[0], images[3]
98
- assert_not_duplicate images[1], images[3]
99
- assert_not_duplicate images[2], images[3]
100
- # GC.start
101
- # puts memory
102
-
103
- end
35
+ require 'phashion_ext'
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 1
7
7
  - 0
8
- - 0
9
- version: 1.0.0
8
+ - 1
9
+ version: 1.0.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - Mike Perham
@@ -18,7 +18,7 @@ date: 2010-05-20 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
- name: RubyInline
21
+ name: rake-compiler
22
22
  prerelease: false
23
23
  requirement: &id001 !ruby/object:Gem::Requirement
24
24
  requirements:
@@ -26,24 +26,30 @@ dependencies:
26
26
  - !ruby/object:Gem::Version
27
27
  segments:
28
28
  - 0
29
- version: "0"
30
- type: :runtime
29
+ - 7
30
+ - 0
31
+ version: 0.7.0
32
+ type: :development
31
33
  version_requirements: *id001
32
34
  description: Simple wrapper around the pHash library
33
35
  email: mperham@gmail.com
34
36
  executables: []
35
37
 
36
- extensions: []
37
-
38
+ extensions:
39
+ - ext/phashion_ext/extconf.rb
38
40
  extra_rdoc_files:
39
41
  - LICENSE
40
42
  - README.md
41
43
  files:
42
44
  - .document
43
45
  - .gitignore
46
+ - CHANGES.md
44
47
  - LICENSE
45
48
  - README.md
46
49
  - Rakefile
50
+ - TODO.md
51
+ - ext/phashion_ext/extconf.rb
52
+ - ext/phashion_ext/phashion_ext.c
47
53
  - lib/phashion.rb
48
54
  - test/86x86-0a1e.jpeg
49
55
  - test/86x86-83d6.jpeg