phashion 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.md +64 -0
- data/Rakefile +52 -0
- data/lib/phashion.rb +103 -0
- data/test/86x86-0a1e.jpeg +0 -0
- data/test/86x86-83d6.jpeg +0 -0
- data/test/86x86-a855.jpeg +0 -0
- data/test/avatar.jpg +0 -0
- data/test/helper.rb +6 -0
- data/test/test_phashion.rb +30 -0
- metadata +86 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Mike Perham
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
Phashion
|
2
|
+
===========
|
3
|
+
|
4
|
+
Phashion is a Ruby wrapper around the pHash library, "perceptual hash", which detects duplicate
|
5
|
+
and near duplicate multimedia files (images, audio, video). The wrapper currently only supports images.
|
6
|
+
|
7
|
+
Installation
|
8
|
+
-------------
|
9
|
+
|
10
|
+
First you need to install pHash. pHash requires three libraries: CImg, ffmpeg and libjpeg. My system already came with libjpeg on it so I didn't have to do anything for it. YMMV.
|
11
|
+
|
12
|
+
Install CImg.h by downloading the latest version from cimg.sf.net and placing the CImg.h header file in /usr/local/include.
|
13
|
+
|
14
|
+
If you are working with audio or video, you will need to install ffmpeg:
|
15
|
+
|
16
|
+
port install ffmpeg (OR)
|
17
|
+
brew install ffmpeg
|
18
|
+
|
19
|
+
Alternatively you can configure pHash to not support audio/video:
|
20
|
+
|
21
|
+
./configure --disable-audio-hash --disable-video-hash
|
22
|
+
|
23
|
+
Download and install the latest pHash tarball from http://phash.org/download/. With 0.9.0, there are several issues with OSX: I had to disable audio and video support to avoid compilation issues and modify `ph_num_threads` in pHash.cpp to avoid Linux-specific code:
|
24
|
+
|
25
|
+
./configure --disable-audio-hash --disable-video-hash
|
26
|
+
|
27
|
+
int ph_num_threads()
|
28
|
+
{
|
29
|
+
int numCPU = 2;
|
30
|
+
return numCPU;
|
31
|
+
}
|
32
|
+
|
33
|
+
Finally, run `make && make install` to install the pHash binaries.
|
34
|
+
|
35
|
+
Now you can install this gem:
|
36
|
+
|
37
|
+
gem install phashion
|
38
|
+
|
39
|
+
Usage
|
40
|
+
---------
|
41
|
+
|
42
|
+
require 'phashion'
|
43
|
+
img1 = Phashion::Image.new(filename1)
|
44
|
+
img2 = Phashion::Image.new(filename2)
|
45
|
+
img1.duplicate?(img2)
|
46
|
+
--> true
|
47
|
+
|
48
|
+
Left to the reader: add equality semantics so that duplicate images placed in a Ruby set are automatically removed:
|
49
|
+
|
50
|
+
set = Set.new
|
51
|
+
set << img1
|
52
|
+
set << img2
|
53
|
+
set.size
|
54
|
+
--> 1
|
55
|
+
|
56
|
+
Author
|
57
|
+
==========
|
58
|
+
|
59
|
+
Mike Perham, http://mikeperham.com, http://twitter.com/mperham, mperham AT gmail.com
|
60
|
+
|
61
|
+
Copyright
|
62
|
+
----------
|
63
|
+
|
64
|
+
Copyright (c) 2010 Mike Perham. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "phashion"
|
8
|
+
gem.summary = %Q{Simple wrapper around the pHash library}
|
9
|
+
gem.description = gem.summary
|
10
|
+
gem.email = "mperham@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/mperham/phashion"
|
12
|
+
gem.authors = ["Mike Perham"]
|
13
|
+
gem.add_dependency 'RubyInline'
|
14
|
+
gem.version = '1.0.0'
|
15
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
|
+
end
|
17
|
+
Jeweler::GemcutterTasks.new
|
18
|
+
rescue LoadError
|
19
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
20
|
+
end
|
21
|
+
|
22
|
+
require 'rake/testtask'
|
23
|
+
Rake::TestTask.new(:test) do |test|
|
24
|
+
test.libs << 'lib' << 'test'
|
25
|
+
test.pattern = 'test/**/test_*.rb'
|
26
|
+
test.verbose = true
|
27
|
+
end
|
28
|
+
|
29
|
+
begin
|
30
|
+
require 'rcov/rcovtask'
|
31
|
+
Rcov::RcovTask.new do |test|
|
32
|
+
test.libs << 'test'
|
33
|
+
test.pattern = 'test/**/test_*.rb'
|
34
|
+
test.verbose = true
|
35
|
+
end
|
36
|
+
rescue LoadError
|
37
|
+
task :rcov do
|
38
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
task :test => :check_dependencies
|
43
|
+
|
44
|
+
task :default => :test
|
45
|
+
|
46
|
+
require 'rake/rdoctask'
|
47
|
+
Rake::RDocTask.new do |rdoc|
|
48
|
+
rdoc.rdoc_dir = 'rdoc'
|
49
|
+
rdoc.title = "phashion"
|
50
|
+
rdoc.rdoc_files.include('README*')
|
51
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
52
|
+
end
|
data/lib/phashion.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'inline'
|
3
|
+
|
4
|
+
##
|
5
|
+
# Provides a clean and simple API to detect duplicate image files using
|
6
|
+
# the pHash library under the covers.
|
7
|
+
#
|
8
|
+
# The C API:
|
9
|
+
# int ph_dct_imagehash(const char *file, ulong64 &hash);
|
10
|
+
# int ph_hamming_distance(ulong64 hasha, ulong64 hashb);
|
11
|
+
|
12
|
+
class Phashion
|
13
|
+
VERSION = '1.0.0'
|
14
|
+
|
15
|
+
class Image
|
16
|
+
DUPE_THRESHOLD = 26
|
17
|
+
|
18
|
+
attr_reader :filename
|
19
|
+
def initialize(filename)
|
20
|
+
@filename = filename
|
21
|
+
end
|
22
|
+
|
23
|
+
def duplicate?(other)
|
24
|
+
Phashion.hamming_distance(hash_code, other.send(:hash_code)) < DUPE_THRESHOLD
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def hash_code
|
30
|
+
@hash ||= Phashion.image_hash_for(@filename)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.image_hash_for(filename)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.hamming_distance(hashA, hashB)
|
38
|
+
end
|
39
|
+
|
40
|
+
inline do |builder|
|
41
|
+
if test ?d, "/opt/local" then
|
42
|
+
builder.add_compile_flags "-I/opt/local/include"
|
43
|
+
builder.add_link_flags "-L/opt/local/lib"
|
44
|
+
end
|
45
|
+
|
46
|
+
builder.add_compile_flags '-x c++', '-lstdc++'
|
47
|
+
builder.add_link_flags "-lpHash"
|
48
|
+
builder.include '"pHash.h"'
|
49
|
+
|
50
|
+
builder.c_singleton <<-"END"
|
51
|
+
VALUE image_hash_for(const char *filename) {
|
52
|
+
ulong64 hash;
|
53
|
+
if (-1 == ph_dct_imagehash(filename, hash)) {
|
54
|
+
rb_raise(rb_eRuntimeError, "Unknown pHash error");
|
55
|
+
}
|
56
|
+
return ULL2NUM(hash);
|
57
|
+
}
|
58
|
+
END
|
59
|
+
|
60
|
+
builder.c_singleton <<-"END"
|
61
|
+
VALUE hamming_distance(VALUE a, VALUE b) {
|
62
|
+
int result = 0;
|
63
|
+
result = ph_hamming_distance(NUM2ULL(a), NUM2ULL(b));
|
64
|
+
if (-1 == result) {
|
65
|
+
rb_raise(rb_eRuntimeError, "Unknown pHash error");
|
66
|
+
}
|
67
|
+
return INT2NUM(result);
|
68
|
+
}
|
69
|
+
END
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
if __FILE__ == $0
|
75
|
+
|
76
|
+
def memory
|
77
|
+
`ps -o vsz,rss -p #{$$}`.strip
|
78
|
+
end
|
79
|
+
|
80
|
+
def assert_duplicate(a, b)
|
81
|
+
raise ArgumentError, "#{a.filename} not dupe of #{b.filename}" unless a.duplicate?(b)
|
82
|
+
end
|
83
|
+
|
84
|
+
def assert_not_duplicate(a, b)
|
85
|
+
raise ArgumentError, "#{a.filename} dupe of #{b.filename}" if a.duplicate?(b)
|
86
|
+
end
|
87
|
+
|
88
|
+
FILES = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg avatar.jpg)
|
89
|
+
|
90
|
+
images = FILES.map {|f| PHash::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
|
91
|
+
# GC.start
|
92
|
+
# puts memory
|
93
|
+
assert_duplicate images[0], images[1]
|
94
|
+
assert_duplicate images[1], images[2]
|
95
|
+
assert_duplicate images[0], images[2]
|
96
|
+
|
97
|
+
assert_not_duplicate images[0], images[3]
|
98
|
+
assert_not_duplicate images[1], images[3]
|
99
|
+
assert_not_duplicate images[2], images[3]
|
100
|
+
# GC.start
|
101
|
+
# puts memory
|
102
|
+
|
103
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
data/test/avatar.jpg
ADDED
Binary file
|
data/test/helper.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestPhashion < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_duplicate_detection
|
6
|
+
files = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg)
|
7
|
+
images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
|
8
|
+
assert_duplicate images[0], images[1]
|
9
|
+
assert_duplicate images[1], images[2]
|
10
|
+
assert_duplicate images[0], images[2]
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_not_duplicate
|
14
|
+
files = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg avatar.jpg)
|
15
|
+
images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
|
16
|
+
assert_not_duplicate images[0], images[3]
|
17
|
+
assert_not_duplicate images[1], images[3]
|
18
|
+
assert_not_duplicate images[2], images[3]
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def assert_duplicate(a, b)
|
24
|
+
raise ArgumentError, "#{a.filename} not dupe of #{b.filename}" unless a.duplicate?(b)
|
25
|
+
end
|
26
|
+
|
27
|
+
def assert_not_duplicate(a, b)
|
28
|
+
raise ArgumentError, "#{a.filename} dupe of #{b.filename}" if a.duplicate?(b)
|
29
|
+
end
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: phashion
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 1.0.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Mike Perham
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-05-20 00:00:00 -05:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: RubyInline
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
version: "0"
|
30
|
+
type: :runtime
|
31
|
+
version_requirements: *id001
|
32
|
+
description: Simple wrapper around the pHash library
|
33
|
+
email: mperham@gmail.com
|
34
|
+
executables: []
|
35
|
+
|
36
|
+
extensions: []
|
37
|
+
|
38
|
+
extra_rdoc_files:
|
39
|
+
- LICENSE
|
40
|
+
- README.md
|
41
|
+
files:
|
42
|
+
- .document
|
43
|
+
- .gitignore
|
44
|
+
- LICENSE
|
45
|
+
- README.md
|
46
|
+
- Rakefile
|
47
|
+
- lib/phashion.rb
|
48
|
+
- test/86x86-0a1e.jpeg
|
49
|
+
- test/86x86-83d6.jpeg
|
50
|
+
- test/86x86-a855.jpeg
|
51
|
+
- test/avatar.jpg
|
52
|
+
- test/helper.rb
|
53
|
+
- test/test_phashion.rb
|
54
|
+
has_rdoc: true
|
55
|
+
homepage: http://github.com/mperham/phashion
|
56
|
+
licenses: []
|
57
|
+
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options:
|
60
|
+
- --charset=UTF-8
|
61
|
+
require_paths:
|
62
|
+
- lib
|
63
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
segments:
|
75
|
+
- 0
|
76
|
+
version: "0"
|
77
|
+
requirements: []
|
78
|
+
|
79
|
+
rubyforge_project:
|
80
|
+
rubygems_version: 1.3.6
|
81
|
+
signing_key:
|
82
|
+
specification_version: 3
|
83
|
+
summary: Simple wrapper around the pHash library
|
84
|
+
test_files:
|
85
|
+
- test/helper.rb
|
86
|
+
- test/test_phashion.rb
|