ign-phashion 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+ .idea
21
+
22
+ ## PROJECT::SPECIFIC
data/CHANGES.md ADDED
@@ -0,0 +1,27 @@
1
+ History
2
+ =========
3
+
4
+ 1.0.3
5
+ -------
6
+
7
+ * Only add libjpeg to list of linked libraries when Ubuntu.
8
+
9
+ 1.0.2
10
+ -------
11
+
12
+ * Make installation much easier by distributing and building locally all the native dependencies.
13
+ This includes pHash 0.9.0 and CImg 1.3.4.
14
+
15
+ 1.0.1
16
+ -------
17
+
18
+ * Remove RubyInline, use standard Ruby extension infrastructure.
19
+ * Update duplicate threshold constant based on wider image testing.
20
+ * Make duplicate threshold variable so users can tune it based on their dataset.
21
+ * Add Phashion::Image#fingerprint method which exposes an Image's 64-bit hash.
22
+
23
+
24
+ 1.0.0
25
+ -------
26
+
27
+ Initial release.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Mike Perham
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ Phashion
2
+ ===========
3
+
4
+ Phashion is a Ruby wrapper around the pHash library, "perceptual hash", which detects duplicate
5
+ and near duplicate multimedia files (images, audio, video). The wrapper currently only supports images.
6
+
7
+ [See an overview of Phashion on my blog](http://www.mikeperham.com/2010/05/21/detecting-duplicate-images-with-phashion/).
8
+
9
+ Installation
10
+ -------------
11
+
12
+ You install it just like any other Ruby gem:
13
+
14
+ gem install phashion
15
+
16
+ Phashion is somewhat involved to install as it has a few dependencies. I've wrapped up those
17
+ dependencies into a custom tarball that is built locally just for this gem so you don't have to
18
+ do anything special. See the code in `ext/phashion_ext` for more details.
19
+
20
+ Because of this complexity, it is possible the gem install will fail on your platform. I've tested
21
+ it on Mac OSX 10.6 and Ubuntu 8.04 but please contact me if you have installation problems.
22
+
23
+ Usage
24
+ ---------
25
+
26
+ require 'phashion'
27
+ img1 = Phashion::Image.new(filename1)
28
+ img2 = Phashion::Image.new(filename2)
29
+ img1.duplicate?(img2)
30
+ --> true
31
+
32
+ Author
33
+ ==========
34
+
35
+ Mike Perham, http://mikeperham.com, http://twitter.com/mperham, mperham AT gmail.com
36
+
37
+ Copyright
38
+ ----------
39
+
40
+ Copyright (c) 2010 Mike Perham. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "ign-phashion"
8
+ gem.summary = %Q{Simple wrapper around the pHash library}
9
+ gem.description = gem.summary
10
+ gem.email = "mperham@gmail.com"
11
+ gem.homepage = "http://github.com/ign/phashion"
12
+ gem.authors = ["Mike Perham", "Bennett", "Thomas Nguyen"]
13
+ gem.add_development_dependency 'rake-compiler', '>= 0.7.0'
14
+ gem.version = '1.0.3'
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'rake/testtask'
23
+ Rake::TestTask.new(:test) do |test|
24
+ test.libs << 'lib' << 'test'
25
+ test.pattern = 'test/**/test_*.rb'
26
+ test.verbose = true
27
+ end
28
+
29
+ begin
30
+ require 'rcov/rcovtask'
31
+ Rcov::RcovTask.new do |test|
32
+ test.libs << 'test'
33
+ test.pattern = 'test/**/test_*.rb'
34
+ test.verbose = true
35
+ end
36
+ rescue LoadError
37
+ task :rcov do
38
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
39
+ end
40
+ end
41
+
42
+ task :test => :check_dependencies
43
+
44
+ task :default => :test
45
+
46
+ require 'rake/rdoctask'
47
+ Rake::RDocTask.new do |rdoc|
48
+ rdoc.rdoc_dir = 'rdoc'
49
+ rdoc.title = "phashion"
50
+ rdoc.rdoc_files.include('README*')
51
+ rdoc.rdoc_files.include('lib/**/*.rb')
52
+ end
53
+
54
+
55
+ gem 'rake-compiler', '>= 0.7.0'
56
+ require "rake/extensiontask"
57
+
58
+ Rake::ExtensionTask.new("phashion_ext")
data/TODO.md ADDED
@@ -0,0 +1,6 @@
1
+ TODO
2
+ =======
3
+
4
+ - Add video and audio APIs (someone else will have to do this, I have no need for it).
5
+ - Add broader support for the entire pHash API, include other types of hashes. Currently
6
+ the image support hardcodes use of the DCT hash.
@@ -0,0 +1,52 @@
1
+ require 'mkmf'
2
+
3
+ HERE = File.expand_path(File.dirname(__FILE__))
4
+ BUNDLE = Dir.glob("#{HERE}/pHash-*.tar.gz").first
5
+ BUNDLE_PATH = BUNDLE.gsub(".tar.gz", "")
6
+ $CFLAGS = " -x c++ #{ENV["CFLAGS"]}"
7
+ $includes = " -I#{HERE}/include"
8
+ $libraries = " -L#{HERE}/lib"
9
+ $LIBPATH = ["#{HERE}/lib"]
10
+ $CFLAGS = "#{$includes} #{$libraries} #{$CFLAGS}"
11
+ $LDFLAGS = "#{$libraries} #{$LDFLAGS}"
12
+
13
+ Dir.chdir(HERE) do
14
+ if File.exist?("lib")
15
+ puts "pHash already built; run 'rake clean' first if you need to rebuild."
16
+ else
17
+
18
+ puts(cmd = "tar xzf #{BUNDLE} 2>&1")
19
+ raise "'#{cmd}' failed" unless system(cmd)
20
+
21
+ Dir.chdir(BUNDLE_PATH) do
22
+ puts(cmd = "env CFLAGS='#{$CFLAGS}' LDFLAGS='#{$LDFLAGS}' ./configure --prefix=#{HERE} --disable-audio-hash --disable-video-hash --disable-shared --with-pic 2>&1")
23
+ raise "'#{cmd}' failed" unless system(cmd)
24
+
25
+ puts(cmd = "make || true 2>&1")
26
+ raise "'#{cmd}' failed" unless system(cmd)
27
+
28
+ puts(cmd = "make install || true 2>&1")
29
+ raise "'#{cmd}' failed" unless system(cmd)
30
+
31
+ puts(cmd = "mv CImg.h ../include 2>&1")
32
+ raise "'#{cmd}' failed" unless system(cmd)
33
+ end
34
+
35
+ system("rm -rf #{BUNDLE_PATH}") unless ENV['DEBUG'] or ENV['DEV']
36
+ end
37
+
38
+ Dir.chdir("#{HERE}/lib") do
39
+ system("cp -f libpHash.a libpHash_gem.a")
40
+ system("cp -f libpHash.la libpHash_gem.la")
41
+ end
42
+ $LIBS = " -lpHash_gem -lstdc++"
43
+ case RUBY_PLATFORM
44
+ when /linux/
45
+ uname = `uname -a`
46
+ if uname =~ /Ubuntu/
47
+ $LIBS += " -ljpeg"
48
+ end
49
+ end
50
+ end
51
+
52
+ create_makefile 'phashion_ext'
Binary file
@@ -0,0 +1,35 @@
1
+ #include "ruby.h"
2
+ #include "pHash.h"
3
+
4
+ static VALUE image_hash_for(VALUE self, VALUE _filename) {
5
+ char * filename = StringValuePtr(_filename);
6
+ ulong64 hash;
7
+ if (-1 == ph_dct_imagehash(filename, hash)) {
8
+ rb_raise(rb_eRuntimeError, "Unknown pHash error");
9
+ }
10
+ return ULL2NUM(hash);
11
+ }
12
+
13
+
14
+ static VALUE hamming_distance(VALUE self, VALUE a, VALUE b) {
15
+ int result = 0;
16
+ result = ph_hamming_distance(NUM2ULL(a), NUM2ULL(b));
17
+ if (-1 == result) {
18
+ rb_raise(rb_eRuntimeError, "Unknown pHash error");
19
+ }
20
+ return INT2NUM(result);
21
+ }
22
+
23
+ #ifdef __cplusplus
24
+ extern "C" {
25
+ #endif
26
+ void Init_phashion_ext() {
27
+ VALUE c = rb_cObject;
28
+ c = rb_const_get(c, rb_intern("Phashion"));
29
+
30
+ rb_define_singleton_method(c, "hamming_distance", (VALUE(*)(ANYARGS))hamming_distance, 2);
31
+ rb_define_singleton_method(c, "image_hash_for", (VALUE(*)(ANYARGS))image_hash_for, 1);
32
+ }
33
+ #ifdef __cplusplus
34
+ }
35
+ #endif
@@ -0,0 +1,66 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{ign-phashion}
8
+ s.version = "1.0.3"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Mike Perham", "Bennett", "Thomas Nguyen"]
12
+ s.date = %q{2010-12-14}
13
+ s.description = %q{Simple wrapper around the pHash library}
14
+ s.email = %q{mperham@gmail.com}
15
+ s.extensions = ["ext/phashion_ext/extconf.rb"]
16
+ s.extra_rdoc_files = [
17
+ "LICENSE",
18
+ "README.md"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".gitignore",
23
+ "CHANGES.md",
24
+ "LICENSE",
25
+ "README.md",
26
+ "Rakefile",
27
+ "TODO.md",
28
+ "ext/phashion_ext/extconf.rb",
29
+ "ext/phashion_ext/pHash-0.9.0.tar.gz",
30
+ "ext/phashion_ext/phashion_ext.c",
31
+ "ign-phashion.gemspec",
32
+ "lib/phashion.rb",
33
+ "test/86x86-0a1e.jpeg",
34
+ "test/86x86-83d6.jpeg",
35
+ "test/86x86-a855.jpeg",
36
+ "test/avatar.jpg",
37
+ "test/b32aade8c590e2d776c24f35868f0c7a588f51e1.jpeg",
38
+ "test/df9cc82f5b32d7463f36620c61854fde9d939f7f.jpeg",
39
+ "test/e7397898a7e395c2524978a5e64de0efabf08290.jpeg",
40
+ "test/helper.rb",
41
+ "test/test_phashion.rb"
42
+ ]
43
+ s.homepage = %q{http://github.com/ign/phashion}
44
+ s.rdoc_options = ["--charset=UTF-8"]
45
+ s.require_paths = ["lib"]
46
+ s.rubygems_version = %q{1.3.7}
47
+ s.summary = %q{Simple wrapper around the pHash library}
48
+ s.test_files = [
49
+ "test/helper.rb",
50
+ "test/test_phashion.rb"
51
+ ]
52
+
53
+ if s.respond_to? :specification_version then
54
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
55
+ s.specification_version = 3
56
+
57
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
58
+ s.add_development_dependency(%q<rake-compiler>, [">= 0.7.0"])
59
+ else
60
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.0"])
61
+ end
62
+ else
63
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.0"])
64
+ end
65
+ end
66
+
data/lib/phashion.rb ADDED
@@ -0,0 +1,33 @@
1
+ ##
2
+ # Provides a clean and simple API to detect duplicate image files using
3
+ # the pHash library under the covers.
4
+ #
5
+ # The C API:
6
+ # int ph_dct_imagehash(const char *file, ulong64 &hash);
7
+ # int ph_hamming_distance(ulong64 hasha, ulong64 hashb);
8
+
9
+ module Phashion
10
+ VERSION = '1.0.2'
11
+
12
+ class Image
13
+ SETTINGS = {
14
+ :dupe_threshold => 15
15
+ }
16
+
17
+ attr_reader :filename
18
+ def initialize(filename)
19
+ @filename = filename
20
+ end
21
+
22
+ def duplicate?(other)
23
+ Phashion.hamming_distance(fingerprint, other.fingerprint) < SETTINGS[:dupe_threshold]
24
+ end
25
+
26
+ def fingerprint
27
+ @hash ||= Phashion.image_hash_for(@filename)
28
+ end
29
+ end
30
+
31
+ end
32
+
33
+ require 'phashion_ext'
Binary file
Binary file
Binary file
data/test/avatar.jpg ADDED
Binary file
data/test/helper.rb ADDED
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'phashion'
4
+
5
+ class Test::Unit::TestCase
6
+ end
@@ -0,0 +1,38 @@
1
+ require 'helper'
2
+
3
+ class TestPhashion < Test::Unit::TestCase
4
+
5
+ def test_duplicate_detection
6
+ files = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg)
7
+ images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
8
+ assert_duplicate images[0], images[1]
9
+ assert_duplicate images[1], images[2]
10
+ assert_duplicate images[0], images[2]
11
+ end
12
+
13
+ def test_duplicate_detection_2
14
+ files = %w(b32aade8c590e2d776c24f35868f0c7a588f51e1.jpeg df9cc82f5b32d7463f36620c61854fde9d939f7f.jpeg e7397898a7e395c2524978a5e64de0efabf08290.jpeg)
15
+ images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
16
+ assert_duplicate images[0], images[1]
17
+ assert_duplicate images[1], images[2]
18
+ assert_duplicate images[0], images[2]
19
+ end
20
+
21
+ def test_not_duplicate
22
+ files = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg avatar.jpg)
23
+ images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
24
+ assert_not_duplicate images[0], images[3]
25
+ assert_not_duplicate images[1], images[3]
26
+ assert_not_duplicate images[2], images[3]
27
+ end
28
+
29
+ private
30
+
31
+ def assert_duplicate(a, b)
32
+ assert a.duplicate?(b), "#{a.filename} not dupe of #{b.filename}"
33
+ end
34
+
35
+ def assert_not_duplicate(a, b)
36
+ assert !a.duplicate?(b), "#{a.filename} dupe of #{b.filename}"
37
+ end
38
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ign-phashion
3
+ version: !ruby/object:Gem::Version
4
+ hash: 17
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 3
10
+ version: 1.0.3
11
+ platform: ruby
12
+ authors:
13
+ - Mike Perham
14
+ - Bennett
15
+ - Thomas Nguyen
16
+ autorequire:
17
+ bindir: bin
18
+ cert_chain: []
19
+
20
+ date: 2010-12-14 00:00:00 -08:00
21
+ default_executable:
22
+ dependencies:
23
+ - !ruby/object:Gem::Dependency
24
+ name: rake-compiler
25
+ prerelease: false
26
+ requirement: &id001 !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ hash: 3
32
+ segments:
33
+ - 0
34
+ - 7
35
+ - 0
36
+ version: 0.7.0
37
+ type: :development
38
+ version_requirements: *id001
39
+ description: Simple wrapper around the pHash library
40
+ email: mperham@gmail.com
41
+ executables: []
42
+
43
+ extensions:
44
+ - ext/phashion_ext/extconf.rb
45
+ extra_rdoc_files:
46
+ - LICENSE
47
+ - README.md
48
+ files:
49
+ - .document
50
+ - .gitignore
51
+ - CHANGES.md
52
+ - LICENSE
53
+ - README.md
54
+ - Rakefile
55
+ - TODO.md
56
+ - ext/phashion_ext/extconf.rb
57
+ - ext/phashion_ext/pHash-0.9.0.tar.gz
58
+ - ext/phashion_ext/phashion_ext.c
59
+ - ign-phashion.gemspec
60
+ - lib/phashion.rb
61
+ - test/86x86-0a1e.jpeg
62
+ - test/86x86-83d6.jpeg
63
+ - test/86x86-a855.jpeg
64
+ - test/avatar.jpg
65
+ - test/b32aade8c590e2d776c24f35868f0c7a588f51e1.jpeg
66
+ - test/df9cc82f5b32d7463f36620c61854fde9d939f7f.jpeg
67
+ - test/e7397898a7e395c2524978a5e64de0efabf08290.jpeg
68
+ - test/helper.rb
69
+ - test/test_phashion.rb
70
+ has_rdoc: true
71
+ homepage: http://github.com/ign/phashion
72
+ licenses: []
73
+
74
+ post_install_message:
75
+ rdoc_options:
76
+ - --charset=UTF-8
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ requirements: []
98
+
99
+ rubyforge_project:
100
+ rubygems_version: 1.3.7
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: Simple wrapper around the pHash library
104
+ test_files:
105
+ - test/helper.rb
106
+ - test/test_phashion.rb