phashion 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGES.md ADDED
@@ -0,0 +1,16 @@
1
+ History
2
+ =========
3
+
4
+ 1.0.1
5
+ -------
6
+
7
+ * Remove RubyInline, use standard Ruby extension infrastructure.
8
+ * Update duplicate threshold constant based on wider image testing.
9
+ * Make duplicate threshold variable so users can tune it based on their dataset.
10
+ * Add Phashion::Image#fingerprint method which exposes an Image's 64-bit hash.
11
+
12
+
13
+ 1.0.0
14
+ -------
15
+
16
+ Initial release.
data/README.md CHANGED
@@ -9,7 +9,7 @@ Installation
9
9
 
10
10
  First you need to install pHash. pHash requires three libraries: CImg, ffmpeg and libjpeg. My system already came with libjpeg on it so I didn't have to do anything for it. YMMV.
11
11
 
12
- Install CImg.h by downloading the latest version from cimg.sf.net and placing the CImg.h header file in /usr/local/include.
12
+ Install CImg.h by downloading the latest version from http://cimg.sf.net and placing the CImg.h header file in /usr/local/include.
13
13
 
14
14
  If you are working with audio or video, you will need to install ffmpeg:
15
15
 
data/Rakefile CHANGED
@@ -10,8 +10,8 @@ begin
10
10
  gem.email = "mperham@gmail.com"
11
11
  gem.homepage = "http://github.com/mperham/phashion"
12
12
  gem.authors = ["Mike Perham"]
13
- gem.add_dependency 'RubyInline'
14
- gem.version = '1.0.0'
13
+ gem.add_development_dependency 'rake-compiler', '>= 0.7.0'
14
+ gem.version = '1.0.1'
15
15
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
16
  end
17
17
  Jeweler::GemcutterTasks.new
@@ -50,3 +50,9 @@ Rake::RDocTask.new do |rdoc|
50
50
  rdoc.rdoc_files.include('README*')
51
51
  rdoc.rdoc_files.include('lib/**/*.rb')
52
52
  end
53
+
54
+
55
+ gem 'rake-compiler', '>= 0.7.0'
56
+ require "rake/extensiontask"
57
+
58
+ Rake::ExtensionTask.new("phashion_ext")
data/TODO.md ADDED
@@ -0,0 +1,6 @@
1
+ TODO
2
+ =======
3
+
4
+ - Add video and audio APIs (someone else will have to do this, I have no need for it).
5
+ - Add broader support for the entire pHash API, include other types of hashes. Currently
6
+ the image support hardcodes use of the DCT hash.
@@ -0,0 +1,13 @@
1
+ require 'mkmf'
2
+
3
+ $CFLAGS << " -x c++ #{ENV["CFLAGS"]}"
4
+ $LIBS << " -lpHash #{ENV["LIBS"]}"
5
+
6
+ # TODO: need to figure this stuff out
7
+ # dir_config 'pHash'
8
+ # if !have_library('pHash', 'ph_dct_imagehash')
9
+ # puts "Unable to find pHash library, please use 'gem install phashion -- --with-pHash-dir=/phash/install/root'"
10
+ # exit 1
11
+ # end
12
+
13
+ create_makefile('phashion_ext')
@@ -0,0 +1,35 @@
1
+ #include "ruby.h"
2
+ #include "pHash.h"
3
+
4
+ static VALUE image_hash_for(VALUE self, VALUE _filename) {
5
+ char * filename = StringValuePtr(_filename);
6
+ ulong64 hash;
7
+ if (-1 == ph_dct_imagehash(filename, hash)) {
8
+ rb_raise(rb_eRuntimeError, "Unknown pHash error");
9
+ }
10
+ return ULL2NUM(hash);
11
+ }
12
+
13
+
14
+ static VALUE hamming_distance(VALUE self, VALUE a, VALUE b) {
15
+ int result = 0;
16
+ result = ph_hamming_distance(NUM2ULL(a), NUM2ULL(b));
17
+ if (-1 == result) {
18
+ rb_raise(rb_eRuntimeError, "Unknown pHash error");
19
+ }
20
+ return INT2NUM(result);
21
+ }
22
+
23
+ #ifdef __cplusplus
24
+ extern "C" {
25
+ #endif
26
+ void Init_phashion_ext() {
27
+ VALUE c = rb_cObject;
28
+ c = rb_const_get(c, rb_intern("Phashion"));
29
+
30
+ rb_define_singleton_method(c, "hamming_distance", (VALUE(*)(ANYARGS))hamming_distance, 2);
31
+ rb_define_singleton_method(c, "image_hash_for", (VALUE(*)(ANYARGS))image_hash_for, 1);
32
+ }
33
+ #ifdef __cplusplus
34
+ }
35
+ #endif
data/lib/phashion.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  require 'rubygems'
2
- require 'inline'
3
2
 
4
3
  ##
5
4
  # Provides a clean and simple API to detect duplicate image files using
@@ -9,11 +8,13 @@ require 'inline'
9
8
  # int ph_dct_imagehash(const char *file, ulong64 &hash);
10
9
  # int ph_hamming_distance(ulong64 hasha, ulong64 hashb);
11
10
 
12
- class Phashion
13
- VERSION = '1.0.0'
11
+ module Phashion
12
+ VERSION = '1.0.1'
14
13
 
15
14
  class Image
16
- DUPE_THRESHOLD = 26
15
+ SETTINGS = {
16
+ :dupe_threshold => 15
17
+ }
17
18
 
18
19
  attr_reader :filename
19
20
  def initialize(filename)
@@ -21,83 +22,14 @@ class Phashion
21
22
  end
22
23
 
23
24
  def duplicate?(other)
24
- Phashion.hamming_distance(hash_code, other.send(:hash_code)) < DUPE_THRESHOLD
25
+ Phashion.hamming_distance(fingerprint, other.fingerprint) < SETTINGS[:dupe_threshold]
25
26
  end
26
-
27
- private
28
27
 
29
- def hash_code
28
+ def fingerprint
30
29
  @hash ||= Phashion.image_hash_for(@filename)
31
30
  end
32
31
  end
33
32
 
34
- def self.image_hash_for(filename)
35
- end
36
-
37
- def self.hamming_distance(hashA, hashB)
38
- end
39
-
40
- inline do |builder|
41
- if test ?d, "/opt/local" then
42
- builder.add_compile_flags "-I/opt/local/include"
43
- builder.add_link_flags "-L/opt/local/lib"
44
- end
45
-
46
- builder.add_compile_flags '-x c++', '-lstdc++'
47
- builder.add_link_flags "-lpHash"
48
- builder.include '"pHash.h"'
49
-
50
- builder.c_singleton <<-"END"
51
- VALUE image_hash_for(const char *filename) {
52
- ulong64 hash;
53
- if (-1 == ph_dct_imagehash(filename, hash)) {
54
- rb_raise(rb_eRuntimeError, "Unknown pHash error");
55
- }
56
- return ULL2NUM(hash);
57
- }
58
- END
59
-
60
- builder.c_singleton <<-"END"
61
- VALUE hamming_distance(VALUE a, VALUE b) {
62
- int result = 0;
63
- result = ph_hamming_distance(NUM2ULL(a), NUM2ULL(b));
64
- if (-1 == result) {
65
- rb_raise(rb_eRuntimeError, "Unknown pHash error");
66
- }
67
- return INT2NUM(result);
68
- }
69
- END
70
-
71
- end
72
33
  end
73
34
 
74
- if __FILE__ == $0
75
-
76
- def memory
77
- `ps -o vsz,rss -p #{$$}`.strip
78
- end
79
-
80
- def assert_duplicate(a, b)
81
- raise ArgumentError, "#{a.filename} not dupe of #{b.filename}" unless a.duplicate?(b)
82
- end
83
-
84
- def assert_not_duplicate(a, b)
85
- raise ArgumentError, "#{a.filename} dupe of #{b.filename}" if a.duplicate?(b)
86
- end
87
-
88
- FILES = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg avatar.jpg)
89
-
90
- images = FILES.map {|f| PHash::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
91
- # GC.start
92
- # puts memory
93
- assert_duplicate images[0], images[1]
94
- assert_duplicate images[1], images[2]
95
- assert_duplicate images[0], images[2]
96
-
97
- assert_not_duplicate images[0], images[3]
98
- assert_not_duplicate images[1], images[3]
99
- assert_not_duplicate images[2], images[3]
100
- # GC.start
101
- # puts memory
102
-
103
- end
35
+ require 'phashion_ext'
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 1
7
7
  - 0
8
- - 0
9
- version: 1.0.0
8
+ - 1
9
+ version: 1.0.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - Mike Perham
@@ -18,7 +18,7 @@ date: 2010-05-20 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
- name: RubyInline
21
+ name: rake-compiler
22
22
  prerelease: false
23
23
  requirement: &id001 !ruby/object:Gem::Requirement
24
24
  requirements:
@@ -26,24 +26,30 @@ dependencies:
26
26
  - !ruby/object:Gem::Version
27
27
  segments:
28
28
  - 0
29
- version: "0"
30
- type: :runtime
29
+ - 7
30
+ - 0
31
+ version: 0.7.0
32
+ type: :development
31
33
  version_requirements: *id001
32
34
  description: Simple wrapper around the pHash library
33
35
  email: mperham@gmail.com
34
36
  executables: []
35
37
 
36
- extensions: []
37
-
38
+ extensions:
39
+ - ext/phashion_ext/extconf.rb
38
40
  extra_rdoc_files:
39
41
  - LICENSE
40
42
  - README.md
41
43
  files:
42
44
  - .document
43
45
  - .gitignore
46
+ - CHANGES.md
44
47
  - LICENSE
45
48
  - README.md
46
49
  - Rakefile
50
+ - TODO.md
51
+ - ext/phashion_ext/extconf.rb
52
+ - ext/phashion_ext/phashion_ext.c
47
53
  - lib/phashion.rb
48
54
  - test/86x86-0a1e.jpeg
49
55
  - test/86x86-83d6.jpeg