phashion 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Mike Perham
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,64 @@
1
+ Phashion
2
+ ===========
3
+
4
+ Phashion is a Ruby wrapper around the pHash library, "perceptual hash", which detects duplicate
5
+ and near duplicate multimedia files (images, audio, video). The wrapper currently only supports images.
6
+
7
+ Installation
8
+ -------------
9
+
10
+ First you need to install pHash. pHash requires three libraries: CImg, ffmpeg and libjpeg. My system already came with libjpeg on it so I didn't have to do anything for it. YMMV.
11
+
12
+ Install CImg.h by downloading the latest version from cimg.sf.net and placing the CImg.h header file in /usr/local/include.
13
+
14
+ If you are working with audio or video, you will need to install ffmpeg:
15
+
16
+ port install ffmpeg (OR)
17
+ brew install ffmpeg
18
+
19
+ Alternatively you can configure pHash to not support audio/video:
20
+
21
+ ./configure --disable-audio-hash --disable-video-hash
22
+
23
+ Download and install the latest pHash tarball from http://phash.org/download/. With 0.9.0, there are several issues with OSX: I had to disable audio and video support to avoid compilation issues and modify `ph_num_threads` in pHash.cpp to avoid Linux-specific code:
24
+
25
+ ./configure --disable-audio-hash --disable-video-hash
26
+
27
+ int ph_num_threads()
28
+ {
29
+ int numCPU = 2;
30
+ return numCPU;
31
+ }
32
+
33
+ Finally, run `make && make install` to install the pHash binaries.
34
+
35
+ Now you can install this gem:
36
+
37
+ gem install phashion
38
+
39
+ Usage
40
+ ---------
41
+
42
+ require 'phashion'
43
+ img1 = Phashion::Image.new(filename1)
44
+ img2 = Phashion::Image.new(filename2)
45
+ img1.duplicate?(img2)
46
+ --> true
47
+
48
+ Left to the reader: add equality semantics so that duplicate images placed in a Ruby set are automatically removed:
49
+
50
+ set = Set.new
51
+ set << img1
52
+ set << img2
53
+ set.size
54
+ --> 1
55
+
56
+ Author
57
+ ==========
58
+
59
+ Mike Perham, http://mikeperham.com, http://twitter.com/mperham, mperham AT gmail.com
60
+
61
+ Copyright
62
+ ----------
63
+
64
+ Copyright (c) 2010 Mike Perham. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,52 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "phashion"
8
+ gem.summary = %Q{Simple wrapper around the pHash library}
9
+ gem.description = gem.summary
10
+ gem.email = "mperham@gmail.com"
11
+ gem.homepage = "http://github.com/mperham/phashion"
12
+ gem.authors = ["Mike Perham"]
13
+ gem.add_dependency 'RubyInline'
14
+ gem.version = '1.0.0'
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'rake/testtask'
23
+ Rake::TestTask.new(:test) do |test|
24
+ test.libs << 'lib' << 'test'
25
+ test.pattern = 'test/**/test_*.rb'
26
+ test.verbose = true
27
+ end
28
+
29
+ begin
30
+ require 'rcov/rcovtask'
31
+ Rcov::RcovTask.new do |test|
32
+ test.libs << 'test'
33
+ test.pattern = 'test/**/test_*.rb'
34
+ test.verbose = true
35
+ end
36
+ rescue LoadError
37
+ task :rcov do
38
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
39
+ end
40
+ end
41
+
42
+ task :test => :check_dependencies
43
+
44
+ task :default => :test
45
+
46
+ require 'rake/rdoctask'
47
+ Rake::RDocTask.new do |rdoc|
48
+ rdoc.rdoc_dir = 'rdoc'
49
+ rdoc.title = "phashion"
50
+ rdoc.rdoc_files.include('README*')
51
+ rdoc.rdoc_files.include('lib/**/*.rb')
52
+ end
data/lib/phashion.rb ADDED
@@ -0,0 +1,103 @@
1
+ require 'rubygems'
2
+ require 'inline'
3
+
4
+ ##
5
+ # Provides a clean and simple API to detect duplicate image files using
6
+ # the pHash library under the covers.
7
+ #
8
+ # The C API:
9
+ # int ph_dct_imagehash(const char *file, ulong64 &hash);
10
+ # int ph_hamming_distance(ulong64 hasha, ulong64 hashb);
11
+
12
+ class Phashion
13
+ VERSION = '1.0.0'
14
+
15
+ class Image
16
+ DUPE_THRESHOLD = 26
17
+
18
+ attr_reader :filename
19
+ def initialize(filename)
20
+ @filename = filename
21
+ end
22
+
23
+ def duplicate?(other)
24
+ Phashion.hamming_distance(hash_code, other.send(:hash_code)) < DUPE_THRESHOLD
25
+ end
26
+
27
+ private
28
+
29
+ def hash_code
30
+ @hash ||= Phashion.image_hash_for(@filename)
31
+ end
32
+ end
33
+
34
+ def self.image_hash_for(filename)
35
+ end
36
+
37
+ def self.hamming_distance(hashA, hashB)
38
+ end
39
+
40
+ inline do |builder|
41
+ if test ?d, "/opt/local" then
42
+ builder.add_compile_flags "-I/opt/local/include"
43
+ builder.add_link_flags "-L/opt/local/lib"
44
+ end
45
+
46
+ builder.add_compile_flags '-x c++', '-lstdc++'
47
+ builder.add_link_flags "-lpHash"
48
+ builder.include '"pHash.h"'
49
+
50
+ builder.c_singleton <<-"END"
51
+ VALUE image_hash_for(const char *filename) {
52
+ ulong64 hash;
53
+ if (-1 == ph_dct_imagehash(filename, hash)) {
54
+ rb_raise(rb_eRuntimeError, "Unknown pHash error");
55
+ }
56
+ return ULL2NUM(hash);
57
+ }
58
+ END
59
+
60
+ builder.c_singleton <<-"END"
61
+ VALUE hamming_distance(VALUE a, VALUE b) {
62
+ int result = 0;
63
+ result = ph_hamming_distance(NUM2ULL(a), NUM2ULL(b));
64
+ if (-1 == result) {
65
+ rb_raise(rb_eRuntimeError, "Unknown pHash error");
66
+ }
67
+ return INT2NUM(result);
68
+ }
69
+ END
70
+
71
+ end
72
+ end
73
+
74
+ if __FILE__ == $0
75
+
76
+ def memory
77
+ `ps -o vsz,rss -p #{$$}`.strip
78
+ end
79
+
80
+ def assert_duplicate(a, b)
81
+ raise ArgumentError, "#{a.filename} not dupe of #{b.filename}" unless a.duplicate?(b)
82
+ end
83
+
84
+ def assert_not_duplicate(a, b)
85
+ raise ArgumentError, "#{a.filename} dupe of #{b.filename}" if a.duplicate?(b)
86
+ end
87
+
88
+ FILES = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg avatar.jpg)
89
+
90
+ images = FILES.map {|f| PHash::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
91
+ # GC.start
92
+ # puts memory
93
+ assert_duplicate images[0], images[1]
94
+ assert_duplicate images[1], images[2]
95
+ assert_duplicate images[0], images[2]
96
+
97
+ assert_not_duplicate images[0], images[3]
98
+ assert_not_duplicate images[1], images[3]
99
+ assert_not_duplicate images[2], images[3]
100
+ # GC.start
101
+ # puts memory
102
+
103
+ end
Binary file
Binary file
Binary file
data/test/avatar.jpg ADDED
Binary file
data/test/helper.rb ADDED
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'phashion'
4
+
5
+ class Test::Unit::TestCase
6
+ end
@@ -0,0 +1,30 @@
1
+ require 'helper'
2
+
3
+ class TestPhashion < Test::Unit::TestCase
4
+
5
+ def test_duplicate_detection
6
+ files = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg)
7
+ images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
8
+ assert_duplicate images[0], images[1]
9
+ assert_duplicate images[1], images[2]
10
+ assert_duplicate images[0], images[2]
11
+ end
12
+
13
+ def test_not_duplicate
14
+ files = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg avatar.jpg)
15
+ images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
16
+ assert_not_duplicate images[0], images[3]
17
+ assert_not_duplicate images[1], images[3]
18
+ assert_not_duplicate images[2], images[3]
19
+ end
20
+
21
+ private
22
+
23
+ def assert_duplicate(a, b)
24
+ raise ArgumentError, "#{a.filename} not dupe of #{b.filename}" unless a.duplicate?(b)
25
+ end
26
+
27
+ def assert_not_duplicate(a, b)
28
+ raise ArgumentError, "#{a.filename} dupe of #{b.filename}" if a.duplicate?(b)
29
+ end
30
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: phashion
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 0
8
+ - 0
9
+ version: 1.0.0
10
+ platform: ruby
11
+ authors:
12
+ - Mike Perham
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-05-20 00:00:00 -05:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: RubyInline
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ type: :runtime
31
+ version_requirements: *id001
32
+ description: Simple wrapper around the pHash library
33
+ email: mperham@gmail.com
34
+ executables: []
35
+
36
+ extensions: []
37
+
38
+ extra_rdoc_files:
39
+ - LICENSE
40
+ - README.md
41
+ files:
42
+ - .document
43
+ - .gitignore
44
+ - LICENSE
45
+ - README.md
46
+ - Rakefile
47
+ - lib/phashion.rb
48
+ - test/86x86-0a1e.jpeg
49
+ - test/86x86-83d6.jpeg
50
+ - test/86x86-a855.jpeg
51
+ - test/avatar.jpg
52
+ - test/helper.rb
53
+ - test/test_phashion.rb
54
+ has_rdoc: true
55
+ homepage: http://github.com/mperham/phashion
56
+ licenses: []
57
+
58
+ post_install_message:
59
+ rdoc_options:
60
+ - --charset=UTF-8
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ requirements: []
78
+
79
+ rubyforge_project:
80
+ rubygems_version: 1.3.6
81
+ signing_key:
82
+ specification_version: 3
83
+ summary: Simple wrapper around the pHash library
84
+ test_files:
85
+ - test/helper.rb
86
+ - test/test_phashion.rb