phashion 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES.md +16 -0
- data/README.md +1 -1
- data/Rakefile +8 -2
- data/TODO.md +6 -0
- data/ext/phashion_ext/extconf.rb +13 -0
- data/ext/phashion_ext/phashion_ext.c +35 -0
- data/lib/phashion.rb +8 -76
- metadata +13 -7
data/CHANGES.md
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
History
|
2
|
+
=========
|
3
|
+
|
4
|
+
1.0.1
|
5
|
+
-------
|
6
|
+
|
7
|
+
* Remove RubyInline, use standard Ruby extension infrastructure.
|
8
|
+
* Update duplicate threshold constant based on wider image testing.
|
9
|
+
* Make duplicate threshold variable so users can tune it based on their dataset.
|
10
|
+
* Add Phashion::Image#fingerprint method which exposes an Image's 64-bit hash.
|
11
|
+
|
12
|
+
|
13
|
+
1.0.0
|
14
|
+
-------
|
15
|
+
|
16
|
+
Initial release.
|
data/README.md
CHANGED
@@ -9,7 +9,7 @@ Installation
|
|
9
9
|
|
10
10
|
First you need to install pHash. pHash requires three libraries: CImg, ffmpeg and libjpeg. My system already came with libjpeg on it so I didn't have to do anything for it. YMMV.
|
11
11
|
|
12
|
-
Install CImg.h by downloading the latest version from cimg.sf.net and placing the CImg.h header file in /usr/local/include.
|
12
|
+
Install CImg.h by downloading the latest version from http://cimg.sf.net and placing the CImg.h header file in /usr/local/include.
|
13
13
|
|
14
14
|
If you are working with audio or video, you will need to install ffmpeg:
|
15
15
|
|
data/Rakefile
CHANGED
@@ -10,8 +10,8 @@ begin
|
|
10
10
|
gem.email = "mperham@gmail.com"
|
11
11
|
gem.homepage = "http://github.com/mperham/phashion"
|
12
12
|
gem.authors = ["Mike Perham"]
|
13
|
-
gem.
|
14
|
-
gem.version = '1.0.
|
13
|
+
gem.add_development_dependency 'rake-compiler', '>= 0.7.0'
|
14
|
+
gem.version = '1.0.1'
|
15
15
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
16
|
end
|
17
17
|
Jeweler::GemcutterTasks.new
|
@@ -50,3 +50,9 @@ Rake::RDocTask.new do |rdoc|
|
|
50
50
|
rdoc.rdoc_files.include('README*')
|
51
51
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
52
52
|
end
|
53
|
+
|
54
|
+
|
55
|
+
gem 'rake-compiler', '>= 0.7.0'
|
56
|
+
require "rake/extensiontask"
|
57
|
+
|
58
|
+
Rake::ExtensionTask.new("phashion_ext")
|
data/TODO.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
$CFLAGS << " -x c++ #{ENV["CFLAGS"]}"
|
4
|
+
$LIBS << " -lpHash #{ENV["LIBS"]}"
|
5
|
+
|
6
|
+
# TODO: need to figure this stuff out
|
7
|
+
# dir_config 'pHash'
|
8
|
+
# if !have_library('pHash', 'ph_dct_imagehash')
|
9
|
+
# puts "Unable to find pHash library, please use 'gem install phashion -- --with-pHash-dir=/phash/install/root'"
|
10
|
+
# exit 1
|
11
|
+
# end
|
12
|
+
|
13
|
+
create_makefile('phashion_ext')
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "pHash.h"
|
3
|
+
|
4
|
+
static VALUE image_hash_for(VALUE self, VALUE _filename) {
|
5
|
+
char * filename = StringValuePtr(_filename);
|
6
|
+
ulong64 hash;
|
7
|
+
if (-1 == ph_dct_imagehash(filename, hash)) {
|
8
|
+
rb_raise(rb_eRuntimeError, "Unknown pHash error");
|
9
|
+
}
|
10
|
+
return ULL2NUM(hash);
|
11
|
+
}
|
12
|
+
|
13
|
+
|
14
|
+
static VALUE hamming_distance(VALUE self, VALUE a, VALUE b) {
|
15
|
+
int result = 0;
|
16
|
+
result = ph_hamming_distance(NUM2ULL(a), NUM2ULL(b));
|
17
|
+
if (-1 == result) {
|
18
|
+
rb_raise(rb_eRuntimeError, "Unknown pHash error");
|
19
|
+
}
|
20
|
+
return INT2NUM(result);
|
21
|
+
}
|
22
|
+
|
23
|
+
#ifdef __cplusplus
|
24
|
+
extern "C" {
|
25
|
+
#endif
|
26
|
+
void Init_phashion_ext() {
|
27
|
+
VALUE c = rb_cObject;
|
28
|
+
c = rb_const_get(c, rb_intern("Phashion"));
|
29
|
+
|
30
|
+
rb_define_singleton_method(c, "hamming_distance", (VALUE(*)(ANYARGS))hamming_distance, 2);
|
31
|
+
rb_define_singleton_method(c, "image_hash_for", (VALUE(*)(ANYARGS))image_hash_for, 1);
|
32
|
+
}
|
33
|
+
#ifdef __cplusplus
|
34
|
+
}
|
35
|
+
#endif
|
data/lib/phashion.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'inline'
|
3
2
|
|
4
3
|
##
|
5
4
|
# Provides a clean and simple API to detect duplicate image files using
|
@@ -9,11 +8,13 @@ require 'inline'
|
|
9
8
|
# int ph_dct_imagehash(const char *file, ulong64 &hash);
|
10
9
|
# int ph_hamming_distance(ulong64 hasha, ulong64 hashb);
|
11
10
|
|
12
|
-
|
13
|
-
VERSION = '1.0.
|
11
|
+
module Phashion
|
12
|
+
VERSION = '1.0.1'
|
14
13
|
|
15
14
|
class Image
|
16
|
-
|
15
|
+
SETTINGS = {
|
16
|
+
:dupe_threshold => 15
|
17
|
+
}
|
17
18
|
|
18
19
|
attr_reader :filename
|
19
20
|
def initialize(filename)
|
@@ -21,83 +22,14 @@ class Phashion
|
|
21
22
|
end
|
22
23
|
|
23
24
|
def duplicate?(other)
|
24
|
-
Phashion.hamming_distance(
|
25
|
+
Phashion.hamming_distance(fingerprint, other.fingerprint) < SETTINGS[:dupe_threshold]
|
25
26
|
end
|
26
|
-
|
27
|
-
private
|
28
27
|
|
29
|
-
def
|
28
|
+
def fingerprint
|
30
29
|
@hash ||= Phashion.image_hash_for(@filename)
|
31
30
|
end
|
32
31
|
end
|
33
32
|
|
34
|
-
def self.image_hash_for(filename)
|
35
|
-
end
|
36
|
-
|
37
|
-
def self.hamming_distance(hashA, hashB)
|
38
|
-
end
|
39
|
-
|
40
|
-
inline do |builder|
|
41
|
-
if test ?d, "/opt/local" then
|
42
|
-
builder.add_compile_flags "-I/opt/local/include"
|
43
|
-
builder.add_link_flags "-L/opt/local/lib"
|
44
|
-
end
|
45
|
-
|
46
|
-
builder.add_compile_flags '-x c++', '-lstdc++'
|
47
|
-
builder.add_link_flags "-lpHash"
|
48
|
-
builder.include '"pHash.h"'
|
49
|
-
|
50
|
-
builder.c_singleton <<-"END"
|
51
|
-
VALUE image_hash_for(const char *filename) {
|
52
|
-
ulong64 hash;
|
53
|
-
if (-1 == ph_dct_imagehash(filename, hash)) {
|
54
|
-
rb_raise(rb_eRuntimeError, "Unknown pHash error");
|
55
|
-
}
|
56
|
-
return ULL2NUM(hash);
|
57
|
-
}
|
58
|
-
END
|
59
|
-
|
60
|
-
builder.c_singleton <<-"END"
|
61
|
-
VALUE hamming_distance(VALUE a, VALUE b) {
|
62
|
-
int result = 0;
|
63
|
-
result = ph_hamming_distance(NUM2ULL(a), NUM2ULL(b));
|
64
|
-
if (-1 == result) {
|
65
|
-
rb_raise(rb_eRuntimeError, "Unknown pHash error");
|
66
|
-
}
|
67
|
-
return INT2NUM(result);
|
68
|
-
}
|
69
|
-
END
|
70
|
-
|
71
|
-
end
|
72
33
|
end
|
73
34
|
|
74
|
-
|
75
|
-
|
76
|
-
def memory
|
77
|
-
`ps -o vsz,rss -p #{$$}`.strip
|
78
|
-
end
|
79
|
-
|
80
|
-
def assert_duplicate(a, b)
|
81
|
-
raise ArgumentError, "#{a.filename} not dupe of #{b.filename}" unless a.duplicate?(b)
|
82
|
-
end
|
83
|
-
|
84
|
-
def assert_not_duplicate(a, b)
|
85
|
-
raise ArgumentError, "#{a.filename} dupe of #{b.filename}" if a.duplicate?(b)
|
86
|
-
end
|
87
|
-
|
88
|
-
FILES = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg avatar.jpg)
|
89
|
-
|
90
|
-
images = FILES.map {|f| PHash::Image.new("#{File.dirname(__FILE__) + '/../test/'}#{f}")}
|
91
|
-
# GC.start
|
92
|
-
# puts memory
|
93
|
-
assert_duplicate images[0], images[1]
|
94
|
-
assert_duplicate images[1], images[2]
|
95
|
-
assert_duplicate images[0], images[2]
|
96
|
-
|
97
|
-
assert_not_duplicate images[0], images[3]
|
98
|
-
assert_not_duplicate images[1], images[3]
|
99
|
-
assert_not_duplicate images[2], images[3]
|
100
|
-
# GC.start
|
101
|
-
# puts memory
|
102
|
-
|
103
|
-
end
|
35
|
+
require 'phashion_ext'
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 1
|
9
|
+
version: 1.0.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Mike Perham
|
@@ -18,7 +18,7 @@ date: 2010-05-20 00:00:00 -05:00
|
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
|
-
name:
|
21
|
+
name: rake-compiler
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
@@ -26,24 +26,30 @@ dependencies:
|
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
segments:
|
28
28
|
- 0
|
29
|
-
|
30
|
-
|
29
|
+
- 7
|
30
|
+
- 0
|
31
|
+
version: 0.7.0
|
32
|
+
type: :development
|
31
33
|
version_requirements: *id001
|
32
34
|
description: Simple wrapper around the pHash library
|
33
35
|
email: mperham@gmail.com
|
34
36
|
executables: []
|
35
37
|
|
36
|
-
extensions:
|
37
|
-
|
38
|
+
extensions:
|
39
|
+
- ext/phashion_ext/extconf.rb
|
38
40
|
extra_rdoc_files:
|
39
41
|
- LICENSE
|
40
42
|
- README.md
|
41
43
|
files:
|
42
44
|
- .document
|
43
45
|
- .gitignore
|
46
|
+
- CHANGES.md
|
44
47
|
- LICENSE
|
45
48
|
- README.md
|
46
49
|
- Rakefile
|
50
|
+
- TODO.md
|
51
|
+
- ext/phashion_ext/extconf.rb
|
52
|
+
- ext/phashion_ext/phashion_ext.c
|
47
53
|
- lib/phashion.rb
|
48
54
|
- test/86x86-0a1e.jpeg
|
49
55
|
- test/86x86-83d6.jpeg
|