ign-phashion 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+ .idea
21
+
22
+ ## PROJECT::SPECIFIC
data/CHANGES.md ADDED
@@ -0,0 +1,27 @@
1
+ History
2
+ =========
3
+
4
+ 1.0.3
5
+ -------
6
+
7
+ * Only add libjpeg to list of linked libraries when Ubuntu.
8
+
9
+ 1.0.2
10
+ -------
11
+
12
+ * Make installation much easier by distributing and building locally all the native dependencies.
13
+ This includes pHash 0.9.0 and CImg 1.3.4.
14
+
15
+ 1.0.1
16
+ -------
17
+
18
+ * Remove RubyInline, use standard Ruby extension infrastructure.
19
+ * Update duplicate threshold constant based on wider image testing.
20
+ * Make duplicate threshold variable so users can tune it based on their dataset.
21
+ * Add Phashion::Image#fingerprint method which exposes an Image's 64-bit hash.
22
+
23
+
24
+ 1.0.0
25
+ -------
26
+
27
+ Initial release.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Mike Perham
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ Phashion
2
+ ===========
3
+
4
+ Phashion is a Ruby wrapper around the pHash library, "perceptual hash", which detects duplicate
5
+ and near duplicate multimedia files (images, audio, video). The wrapper currently only supports images.
6
+
7
+ [See an overview of Phashion on my blog](http://www.mikeperham.com/2010/05/21/detecting-duplicate-images-with-phashion/).
8
+
9
+ Installation
10
+ -------------
11
+
12
+ You install it just like any other Ruby gem:
13
+
14
+ gem install phashion
15
+
16
+ Phashion is somewhat involved to install as it has a few dependencies. I've wrapped up those
17
+ dependencies into a custom tarball that is built locally just for this gem so you don't have to
18
+ do anything special. See the code in `ext/phashion_ext` for more details.
19
+
20
+ Because of this complexity, it is possible the gem install will fail on your platform. I've tested
21
+ it on Mac OSX 10.6 and Ubuntu 8.04 but please contact me if you have installation problems.
22
+
23
+ Usage
24
+ ---------
25
+
26
+ require 'phashion'
27
+ img1 = Phashion::Image.new(filename1)
28
+ img2 = Phashion::Image.new(filename2)
29
+ img1.duplicate?(img2)
30
+ --> true
31
+
32
+ Author
33
+ ==========
34
+
35
+ Mike Perham, http://mikeperham.com, http://twitter.com/mperham, mperham AT gmail.com
36
+
37
+ Copyright
38
+ ----------
39
+
40
+ Copyright (c) 2010 Mike Perham. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "ign-phashion"
8
+ gem.summary = %Q{Simple wrapper around the pHash library}
9
+ gem.description = gem.summary
10
+ gem.email = "mperham@gmail.com"
11
+ gem.homepage = "http://github.com/ign/phashion"
12
+ gem.authors = ["Mike Perham", "Bennett", "Thomas Nguyen"]
13
+ gem.add_development_dependency 'rake-compiler', '>= 0.7.0'
14
+ gem.version = '1.0.3'
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'rake/testtask'
23
+ Rake::TestTask.new(:test) do |test|
24
+ test.libs << 'lib' << 'test'
25
+ test.pattern = 'test/**/test_*.rb'
26
+ test.verbose = true
27
+ end
28
+
29
+ begin
30
+ require 'rcov/rcovtask'
31
+ Rcov::RcovTask.new do |test|
32
+ test.libs << 'test'
33
+ test.pattern = 'test/**/test_*.rb'
34
+ test.verbose = true
35
+ end
36
+ rescue LoadError
37
+ task :rcov do
38
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
39
+ end
40
+ end
41
+
42
+ task :test => :check_dependencies
43
+
44
+ task :default => :test
45
+
46
+ require 'rake/rdoctask'
47
+ Rake::RDocTask.new do |rdoc|
48
+ rdoc.rdoc_dir = 'rdoc'
49
+ rdoc.title = "phashion"
50
+ rdoc.rdoc_files.include('README*')
51
+ rdoc.rdoc_files.include('lib/**/*.rb')
52
+ end
53
+
54
+
55
+ gem 'rake-compiler', '>= 0.7.0'
56
+ require "rake/extensiontask"
57
+
58
+ Rake::ExtensionTask.new("phashion_ext")
data/TODO.md ADDED
@@ -0,0 +1,6 @@
1
+ TODO
2
+ =======
3
+
4
+ - Add video and audio APIs (someone else will have to do this, I have no need for it).
5
+ - Add broader support for the entire pHash API, include other types of hashes. Currently
6
+ the image support hardcodes use of the DCT hash.
@@ -0,0 +1,52 @@
1
+ require 'mkmf'
2
+
3
+ HERE = File.expand_path(File.dirname(__FILE__))
4
+ BUNDLE = Dir.glob("#{HERE}/pHash-*.tar.gz").first
5
+ BUNDLE_PATH = BUNDLE.gsub(".tar.gz", "")
6
+ $CFLAGS = " -x c++ #{ENV["CFLAGS"]}"
7
+ $includes = " -I#{HERE}/include"
8
+ $libraries = " -L#{HERE}/lib"
9
+ $LIBPATH = ["#{HERE}/lib"]
10
+ $CFLAGS = "#{$includes} #{$libraries} #{$CFLAGS}"
11
+ $LDFLAGS = "#{$libraries} #{$LDFLAGS}"
12
+
13
+ Dir.chdir(HERE) do
14
+ if File.exist?("lib")
15
+ puts "pHash already built; run 'rake clean' first if you need to rebuild."
16
+ else
17
+
18
+ puts(cmd = "tar xzf #{BUNDLE} 2>&1")
19
+ raise "'#{cmd}' failed" unless system(cmd)
20
+
21
+ Dir.chdir(BUNDLE_PATH) do
22
+ puts(cmd = "env CFLAGS='#{$CFLAGS}' LDFLAGS='#{$LDFLAGS}' ./configure --prefix=#{HERE} --disable-audio-hash --disable-video-hash --disable-shared --with-pic 2>&1")
23
+ raise "'#{cmd}' failed" unless system(cmd)
24
+
25
+ puts(cmd = "make || true 2>&1")
26
+ raise "'#{cmd}' failed" unless system(cmd)
27
+
28
+ puts(cmd = "make install || true 2>&1")
29
+ raise "'#{cmd}' failed" unless system(cmd)
30
+
31
+ puts(cmd = "mv CImg.h ../include 2>&1")
32
+ raise "'#{cmd}' failed" unless system(cmd)
33
+ end
34
+
35
+ system("rm -rf #{BUNDLE_PATH}") unless ENV['DEBUG'] or ENV['DEV']
36
+ end
37
+
38
+ Dir.chdir("#{HERE}/lib") do
39
+ system("cp -f libpHash.a libpHash_gem.a")
40
+ system("cp -f libpHash.la libpHash_gem.la")
41
+ end
42
+ $LIBS = " -lpHash_gem -lstdc++"
43
+ case RUBY_PLATFORM
44
+ when /linux/
45
+ uname = `uname -a`
46
+ if uname =~ /Ubuntu/
47
+ $LIBS += " -ljpeg"
48
+ end
49
+ end
50
+ end
51
+
52
+ create_makefile 'phashion_ext'
Binary file
@@ -0,0 +1,35 @@
1
+ #include "ruby.h"
2
+ #include "pHash.h"
3
+
4
+ static VALUE image_hash_for(VALUE self, VALUE _filename) {
5
+ char * filename = StringValuePtr(_filename);
6
+ ulong64 hash;
7
+ if (-1 == ph_dct_imagehash(filename, hash)) {
8
+ rb_raise(rb_eRuntimeError, "Unknown pHash error");
9
+ }
10
+ return ULL2NUM(hash);
11
+ }
12
+
13
+
14
+ static VALUE hamming_distance(VALUE self, VALUE a, VALUE b) {
15
+ int result = 0;
16
+ result = ph_hamming_distance(NUM2ULL(a), NUM2ULL(b));
17
+ if (-1 == result) {
18
+ rb_raise(rb_eRuntimeError, "Unknown pHash error");
19
+ }
20
+ return INT2NUM(result);
21
+ }
22
+
23
+ #ifdef __cplusplus
24
+ extern "C" {
25
+ #endif
26
+ void Init_phashion_ext() {
27
+ VALUE c = rb_cObject;
28
+ c = rb_const_get(c, rb_intern("Phashion"));
29
+
30
+ rb_define_singleton_method(c, "hamming_distance", (VALUE(*)(ANYARGS))hamming_distance, 2);
31
+ rb_define_singleton_method(c, "image_hash_for", (VALUE(*)(ANYARGS))image_hash_for, 1);
32
+ }
33
+ #ifdef __cplusplus
34
+ }
35
+ #endif
@@ -0,0 +1,66 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{ign-phashion}
8
+ s.version = "1.0.3"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Mike Perham", "Bennett", "Thomas Nguyen"]
12
+ s.date = %q{2010-12-14}
13
+ s.description = %q{Simple wrapper around the pHash library}
14
+ s.email = %q{mperham@gmail.com}
15
+ s.extensions = ["ext/phashion_ext/extconf.rb"]
16
+ s.extra_rdoc_files = [
17
+ "LICENSE",
18
+ "README.md"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".gitignore",
23
+ "CHANGES.md",
24
+ "LICENSE",
25
+ "README.md",
26
+ "Rakefile",
27
+ "TODO.md",
28
+ "ext/phashion_ext/extconf.rb",
29
+ "ext/phashion_ext/pHash-0.9.0.tar.gz",
30
+ "ext/phashion_ext/phashion_ext.c",
31
+ "ign-phashion.gemspec",
32
+ "lib/phashion.rb",
33
+ "test/86x86-0a1e.jpeg",
34
+ "test/86x86-83d6.jpeg",
35
+ "test/86x86-a855.jpeg",
36
+ "test/avatar.jpg",
37
+ "test/b32aade8c590e2d776c24f35868f0c7a588f51e1.jpeg",
38
+ "test/df9cc82f5b32d7463f36620c61854fde9d939f7f.jpeg",
39
+ "test/e7397898a7e395c2524978a5e64de0efabf08290.jpeg",
40
+ "test/helper.rb",
41
+ "test/test_phashion.rb"
42
+ ]
43
+ s.homepage = %q{http://github.com/ign/phashion}
44
+ s.rdoc_options = ["--charset=UTF-8"]
45
+ s.require_paths = ["lib"]
46
+ s.rubygems_version = %q{1.3.7}
47
+ s.summary = %q{Simple wrapper around the pHash library}
48
+ s.test_files = [
49
+ "test/helper.rb",
50
+ "test/test_phashion.rb"
51
+ ]
52
+
53
+ if s.respond_to? :specification_version then
54
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
55
+ s.specification_version = 3
56
+
57
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
58
+ s.add_development_dependency(%q<rake-compiler>, [">= 0.7.0"])
59
+ else
60
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.0"])
61
+ end
62
+ else
63
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.0"])
64
+ end
65
+ end
66
+
data/lib/phashion.rb ADDED
@@ -0,0 +1,33 @@
1
+ ##
2
+ # Provides a clean and simple API to detect duplicate image files using
3
+ # the pHash library under the covers.
4
+ #
5
+ # The C API:
6
+ # int ph_dct_imagehash(const char *file, ulong64 &hash);
7
+ # int ph_hamming_distance(ulong64 hasha, ulong64 hashb);
8
+
9
+ module Phashion
10
+ VERSION = '1.0.2'
11
+
12
+ class Image
13
+ SETTINGS = {
14
+ :dupe_threshold => 15
15
+ }
16
+
17
+ attr_reader :filename
18
+ def initialize(filename)
19
+ @filename = filename
20
+ end
21
+
22
+ def duplicate?(other)
23
+ Phashion.hamming_distance(fingerprint, other.fingerprint) < SETTINGS[:dupe_threshold]
24
+ end
25
+
26
+ def fingerprint
27
+ @hash ||= Phashion.image_hash_for(@filename)
28
+ end
29
+ end
30
+
31
+ end
32
+
33
+ require 'phashion_ext'
Binary file
Binary file
Binary file
data/test/avatar.jpg ADDED
Binary file
data/test/helper.rb ADDED
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'phashion'
4
+
5
+ class Test::Unit::TestCase
6
+ end
@@ -0,0 +1,38 @@
1
+ require 'helper'
2
+
3
+ class TestPhashion < Test::Unit::TestCase
4
+
5
+ def test_duplicate_detection
6
+ files = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg)
7
+ images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
8
+ assert_duplicate images[0], images[1]
9
+ assert_duplicate images[1], images[2]
10
+ assert_duplicate images[0], images[2]
11
+ end
12
+
13
+ def test_duplicate_detection_2
14
+ files = %w(b32aade8c590e2d776c24f35868f0c7a588f51e1.jpeg df9cc82f5b32d7463f36620c61854fde9d939f7f.jpeg e7397898a7e395c2524978a5e64de0efabf08290.jpeg)
15
+ images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
16
+ assert_duplicate images[0], images[1]
17
+ assert_duplicate images[1], images[2]
18
+ assert_duplicate images[0], images[2]
19
+ end
20
+
21
+ def test_not_duplicate
22
+ files = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg avatar.jpg)
23
+ images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
24
+ assert_not_duplicate images[0], images[3]
25
+ assert_not_duplicate images[1], images[3]
26
+ assert_not_duplicate images[2], images[3]
27
+ end
28
+
29
+ private
30
+
31
+ def assert_duplicate(a, b)
32
+ assert a.duplicate?(b), "#{a.filename} not dupe of #{b.filename}"
33
+ end
34
+
35
+ def assert_not_duplicate(a, b)
36
+ assert !a.duplicate?(b), "#{a.filename} dupe of #{b.filename}"
37
+ end
38
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ign-phashion
3
+ version: !ruby/object:Gem::Version
4
+ hash: 17
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 3
10
+ version: 1.0.3
11
+ platform: ruby
12
+ authors:
13
+ - Mike Perham
14
+ - Bennett
15
+ - Thomas Nguyen
16
+ autorequire:
17
+ bindir: bin
18
+ cert_chain: []
19
+
20
+ date: 2010-12-14 00:00:00 -08:00
21
+ default_executable:
22
+ dependencies:
23
+ - !ruby/object:Gem::Dependency
24
+ name: rake-compiler
25
+ prerelease: false
26
+ requirement: &id001 !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ hash: 3
32
+ segments:
33
+ - 0
34
+ - 7
35
+ - 0
36
+ version: 0.7.0
37
+ type: :development
38
+ version_requirements: *id001
39
+ description: Simple wrapper around the pHash library
40
+ email: mperham@gmail.com
41
+ executables: []
42
+
43
+ extensions:
44
+ - ext/phashion_ext/extconf.rb
45
+ extra_rdoc_files:
46
+ - LICENSE
47
+ - README.md
48
+ files:
49
+ - .document
50
+ - .gitignore
51
+ - CHANGES.md
52
+ - LICENSE
53
+ - README.md
54
+ - Rakefile
55
+ - TODO.md
56
+ - ext/phashion_ext/extconf.rb
57
+ - ext/phashion_ext/pHash-0.9.0.tar.gz
58
+ - ext/phashion_ext/phashion_ext.c
59
+ - ign-phashion.gemspec
60
+ - lib/phashion.rb
61
+ - test/86x86-0a1e.jpeg
62
+ - test/86x86-83d6.jpeg
63
+ - test/86x86-a855.jpeg
64
+ - test/avatar.jpg
65
+ - test/b32aade8c590e2d776c24f35868f0c7a588f51e1.jpeg
66
+ - test/df9cc82f5b32d7463f36620c61854fde9d939f7f.jpeg
67
+ - test/e7397898a7e395c2524978a5e64de0efabf08290.jpeg
68
+ - test/helper.rb
69
+ - test/test_phashion.rb
70
+ has_rdoc: true
71
+ homepage: http://github.com/ign/phashion
72
+ licenses: []
73
+
74
+ post_install_message:
75
+ rdoc_options:
76
+ - --charset=UTF-8
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ requirements: []
98
+
99
+ rubyforge_project:
100
+ rubygems_version: 1.3.7
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: Simple wrapper around the pHash library
104
+ test_files:
105
+ - test/helper.rb
106
+ - test/test_phashion.rb