pHash 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. data/.gitignore +12 -0
  2. data/LICENSE.txt +20 -0
  3. data/README.markdown +53 -0
  4. data/audiophash.diff +17 -0
  5. data/lib/phash.rb +44 -0
  6. data/lib/phash/all.rb +3 -0
  7. data/lib/phash/audio.rb +116 -0
  8. data/lib/phash/image.rb +59 -0
  9. data/lib/phash/text.rb +100 -0
  10. data/lib/phash/video.rb +55 -0
  11. data/pHash.gemspec +20 -0
  12. data/spec/data/audiophash.cpp-0.9.3.txt +571 -0
  13. data/spec/data/audiophash.cpp-0.9.4.txt +572 -0
  14. data/spec/data/audiophash.h-0.9.3.txt +111 -0
  15. data/spec/data/audiophash.h-0.9.4.txt +108 -0
  16. data/spec/data/hal9000-m.mp3 +0 -0
  17. data/spec/data/hal9000-o.mp3 +0 -0
  18. data/spec/data/jug-0-10.jpg +0 -0
  19. data/spec/data/jug-0-120.png +0 -0
  20. data/spec/data/jug-0-50.jpg +0 -0
  21. data/spec/data/jug-0-70.jpg +0 -0
  22. data/spec/data/jug-1-10.jpg +0 -0
  23. data/spec/data/jug-1-120.png +0 -0
  24. data/spec/data/jug-1-50.jpg +0 -0
  25. data/spec/data/jug-1-70.jpg +0 -0
  26. data/spec/data/jug-120.mp4 +0 -0
  27. data/spec/data/jug-150.mp4 +0 -0
  28. data/spec/data/jug-180.mp4 +0 -0
  29. data/spec/data/jug-2-10.jpg +0 -0
  30. data/spec/data/jug-2-120.png +0 -0
  31. data/spec/data/jug-2-50.jpg +0 -0
  32. data/spec/data/jug-2-70.jpg +0 -0
  33. data/spec/data/mouse-0-10.jpg +0 -0
  34. data/spec/data/mouse-0-120.png +0 -0
  35. data/spec/data/mouse-0-50.jpg +0 -0
  36. data/spec/data/mouse-0-70.jpg +0 -0
  37. data/spec/data/mouse-1-10.jpg +0 -0
  38. data/spec/data/mouse-1-120.png +0 -0
  39. data/spec/data/mouse-1-50.jpg +0 -0
  40. data/spec/data/mouse-1-70.jpg +0 -0
  41. data/spec/data/mouse-120.mp4 +0 -0
  42. data/spec/data/mouse-150.mp4 +0 -0
  43. data/spec/data/mouse-180.mp4 +0 -0
  44. data/spec/data/mouse-2-10.jpg +0 -0
  45. data/spec/data/mouse-2-120.png +0 -0
  46. data/spec/data/mouse-2-50.jpg +0 -0
  47. data/spec/data/mouse-2-70.jpg +0 -0
  48. data/spec/data/scream-m.mp3 +0 -0
  49. data/spec/data/scream-o.mp3 +0 -0
  50. data/spec/data/vader-m.mp3 +0 -0
  51. data/spec/data/vader-o.mp3 +0 -0
  52. data/spec/phash_spec.rb +43 -0
  53. data/spec/spec_helper.rb +10 -0
  54. metadata +186 -0
@@ -0,0 +1,111 @@
1
+ /*
2
+
3
+ pHash, the open source perceptual hash library
4
+ Copyright (C) 2009 Aetilius, Inc.
5
+ All rights reserved.
6
+
7
+ This program is free software: you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation, either version 3 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+
20
+ Evan Klinger - eklinger@phash.org
21
+ D Grant Starkweather - dstarkweather@phash.org
22
+
23
+ */
24
+
25
+ #ifndef _AUDIO_PHASH_H
26
+ #define _AUDIO_PHASH_H
27
+
28
+ #include <limits.h>
29
+ #include <math.h>
30
+ #include <unistd.h>
31
+ #include <stdlib.h>
32
+ #include <algorithm>
33
+ #include "pHash.h"
34
+
35
+ extern "C" {
36
+ #include "./libavformat/avformat.h"
37
+ #include "./libavcodec/avcodec.h"
38
+ #include "./libswscale/swscale.h"
39
+ #include "ph_fft.h"
40
+ }
41
+
42
+ /* /brief count number of samples in file
43
+ *
44
+ * /param filename - path and file name of audio file
45
+ * /param sr - sample rate conversion
46
+ * /param channels - channels number conversion
47
+ * /return int count of number of sampels, negative for error
48
+ */
49
+ int ph_count_samples(const char *filename, int sr,int channels);
50
+
51
+
52
+
53
+
54
+ /* /brief read audio
55
+ *
56
+ * /param filename - path and name of audio file to read
57
+ * /param sr - sample rate conversion
58
+ * /param channels - nb channels to convert to (always 1) unused
59
+ * /param buf - preallocated buffer
60
+ * /param buflen - (in/out) param for buf length
61
+ * /param nbsecs - float value for duration (in secs) to read from file
62
+ * /return float* - float pointer to start of buffer - one channel of audio, NULL if error
63
+ */
64
+ float* ph_readaudio(const char *filename, int sr, int channels, float *sigbuf, int &buflen, const float nbsecs = 0);
65
+
66
+ /* /brief audio hash calculation
67
+ * purpose: hash calculation for each frame in the buffer.
68
+ * Each value is computed from successive overlapping frames of the input buffer.
69
+ * The value is based on the bark scale values of the frame fft spectrum. The value
70
+ * computed from temporal and spectral differences on the bark scale.
71
+ *
72
+ * /param buf - pointer to start of buffer
73
+ * /param N - length of buffer
74
+ * /param sr - sample rate on which to base the audiohash
75
+ * /param nb_frames - (out) number of frames in audio buf and length of audiohash buffer returned
76
+ * /return uint32 pointer to audio hash, NULL for error
77
+ */
78
+ uint32_t* ph_audiohash(float *buf, int nbbuf, const int sr, int &nbframes);
79
+
80
+ DP **ph_audio_hashes(char *files[], int count, int sr = 8000, int channels = 1, int threads = 0);
81
+
82
+ /* /brief bit count set bits in 32bit variable
83
+ * /param n
84
+ * /return int number of bits set to 1, negative if error
85
+ */
86
+ int ph_bitcount(uint32_t n);
87
+
88
+
89
+ /* /brief compare 2 hash blocks
90
+ * /param ptr_blockA - pointer to the first block
91
+ * /param ptr_blockB - pointer to the second block
92
+ * /param block_size - length of both blocks to compare
93
+ * /return double bit error rate (ber) from comparing two blocks, neg for error
94
+ */
95
+ double ph_compare_blocks(const uint32_t *ptr_blockA,const uint32_t *ptr_blockB, const int block_size);
96
+
97
+
98
+ /* /brief distance function between two hashes
99
+ *
100
+ * /param hash_a - first hash
101
+ * /param Na - length of first hash
102
+ * /param hash_b - second hash
103
+ * /param Nb - length of second hash
104
+ * /param threshold - threshold value to compare successive blocks, 0.25, 0.30, 0.35
105
+ * /param block_size - length of block_size, 256
106
+ * /param Nc - (out) length of confidence score vector
107
+ * /return double - ptr to confidence score vector
108
+ */
109
+ double* ph_audio_distance_ber(uint32_t *hash_a , const int Na, uint32_t *hash_b, const int Nb, const float threshold, const int block_size, int &Nc);
110
+
111
+ #endif
@@ -0,0 +1,108 @@
1
+ /*
2
+
3
+ pHash, the open source perceptual hash library
4
+ Copyright (C) 2009 Aetilius, Inc.
5
+ All rights reserved.
6
+
7
+ This program is free software: you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation, either version 3 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+
20
+ Evan Klinger - eklinger@phash.org
21
+ D Grant Starkweather - dstarkweather@phash.org
22
+
23
+ */
24
+
25
+ #ifndef _AUDIO_PHASH_H
26
+ #define _AUDIO_PHASH_H
27
+
28
+ #include <limits.h>
29
+ #include <math.h>
30
+ #include <unistd.h>
31
+ #include <stdlib.h>
32
+ #include <algorithm>
33
+ #include "pHash.h"
34
+
35
+ extern "C" {
36
+ #include "ph_fft.h"
37
+ }
38
+
39
+ /* /brief count number of samples in file
40
+ *
41
+ * /param filename - path and file name of audio file
42
+ * /param sr - sample rate conversion
43
+ * /param channels - channels number conversion
44
+ * /return int count of number of sampels, negative for error
45
+ */
46
+ int ph_count_samples(const char *filename, int sr,int channels);
47
+
48
+
49
+
50
+
51
+ /* /brief read audio
52
+ *
53
+ * /param filename - path and name of audio file to read
54
+ * /param sr - sample rate conversion
55
+ * /param channels - nb channels to convert to (always 1) unused
56
+ * /param buf - preallocated buffer
57
+ * /param buflen - (in/out) param for buf length
58
+ * /param nbsecs - float value for duration (in secs) to read from file
59
+ * /return float* - float pointer to start of buffer - one channel of audio, NULL if error
60
+ */
61
+ float* ph_readaudio(const char *filename, int sr, int channels, float *sigbuf, int &buflen, const float nbsecs = 0);
62
+
63
+ /* /brief audio hash calculation
64
+ * purpose: hash calculation for each frame in the buffer.
65
+ * Each value is computed from successive overlapping frames of the input buffer.
66
+ * The value is based on the bark scale values of the frame fft spectrum. The value
67
+ * computed from temporal and spectral differences on the bark scale.
68
+ *
69
+ * /param buf - pointer to start of buffer
70
+ * /param N - length of buffer
71
+ * /param sr - sample rate on which to base the audiohash
72
+ * /param nb_frames - (out) number of frames in audio buf and length of audiohash buffer returned
73
+ * /return uint32 pointer to audio hash, NULL for error
74
+ */
75
+ uint32_t* ph_audiohash(float *buf, int nbbuf, const int sr, int &nbframes);
76
+
77
+ DP **ph_audio_hashes(char *files[], int count, int sr = 8000, int channels = 1, int threads = 0);
78
+
79
+ /* /brief bit count set bits in 32bit variable
80
+ * /param n
81
+ * /return int number of bits set to 1, negative if error
82
+ */
83
+ int ph_bitcount(uint32_t n);
84
+
85
+
86
+ /* /brief compare 2 hash blocks
87
+ * /param ptr_blockA - pointer to the first block
88
+ * /param ptr_blockB - pointer to the second block
89
+ * /param block_size - length of both blocks to compare
90
+ * /return double bit error rate (ber) from comparing two blocks, neg for error
91
+ */
92
+ double ph_compare_blocks(const uint32_t *ptr_blockA,const uint32_t *ptr_blockB, const int block_size);
93
+
94
+
95
+ /* /brief distance function between two hashes
96
+ *
97
+ * /param hash_a - first hash
98
+ * /param Na - length of first hash
99
+ * /param hash_b - second hash
100
+ * /param Nb - length of second hash
101
+ * /param threshold - threshold value to compare successive blocks, 0.25, 0.30, 0.35
102
+ * /param block_size - length of block_size, 256
103
+ * /param Nc - (out) length of confidence score vector
104
+ * /return double - ptr to confidence score vector
105
+ */
106
+ double* ph_audio_distance_ber(uint32_t *hash_a , const int Na, uint32_t *hash_b, const int Nb, const float threshold, const int block_size, int &Nc);
107
+
108
+ #endif
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,43 @@
1
+ require File.dirname(__FILE__) + '/spec_helper.rb'
2
+
3
+ describe :Phash do
4
+ data_dir = FSPath(__FILE__).dirname / 'data'
5
+
6
+ shared_examples :similarity do
7
+ it "should return valid similarities" do
8
+ collection.combination(2) do |a, b|
9
+ if a.path.main_name == b.path.main_name
10
+ (a % b).should > 0.8
11
+ else
12
+ (a % b).should <= 0.5
13
+ end
14
+ end
15
+ end
16
+
17
+ it "should return same similarity if swapping instances" do
18
+ collection.combination(2) do |a, b|
19
+ (a % b).should == (b % a)
20
+ end
21
+ end
22
+ end
23
+
24
+ describe :Audio do
25
+ let(:collection){ Phash::Audio.for_paths(data_dir.glob('*.mp3')) }
26
+ include_examples :similarity
27
+ end
28
+
29
+ describe :Image do
30
+ let(:collection){ Phash::Image.for_paths(data_dir.glob('**/*.{jpg,png}')) }
31
+ include_examples :similarity
32
+ end
33
+
34
+ describe :Text do
35
+ let(:collection){ Phash::Text.for_paths(data_dir.glob('*.txt')) }
36
+ include_examples :similarity
37
+ end
38
+
39
+ describe :Video do
40
+ let(:collection){ Phash::Video.for_paths(data_dir.glob('*.mp4')) }
41
+ include_examples :similarity
42
+ end
43
+ end
@@ -0,0 +1,10 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
2
+ require 'rspec'
3
+ require 'fspath'
4
+ require 'phash'
5
+
6
+ class FSPath
7
+ def main_name
8
+ basename.to_s.split('-', 2).first
9
+ end
10
+ end
metadata ADDED
@@ -0,0 +1,186 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pHash
3
+ version: !ruby/object:Gem::Version
4
+ hash: 23
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 0
10
+ version: 1.0.0
11
+ platform: ruby
12
+ authors:
13
+ - Ivan Kuchin
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-12-25 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :development
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: fspath
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 3
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :development
47
+ version_requirements: *id002
48
+ description:
49
+ email:
50
+ executables: []
51
+
52
+ extensions: []
53
+
54
+ extra_rdoc_files: []
55
+
56
+ files:
57
+ - .gitignore
58
+ - LICENSE.txt
59
+ - README.markdown
60
+ - audiophash.diff
61
+ - lib/phash.rb
62
+ - lib/phash/all.rb
63
+ - lib/phash/audio.rb
64
+ - lib/phash/image.rb
65
+ - lib/phash/text.rb
66
+ - lib/phash/video.rb
67
+ - pHash.gemspec
68
+ - spec/data/audiophash.cpp-0.9.3.txt
69
+ - spec/data/audiophash.cpp-0.9.4.txt
70
+ - spec/data/audiophash.h-0.9.3.txt
71
+ - spec/data/audiophash.h-0.9.4.txt
72
+ - spec/data/hal9000-m.mp3
73
+ - spec/data/hal9000-o.mp3
74
+ - spec/data/jug-0-10.jpg
75
+ - spec/data/jug-0-120.png
76
+ - spec/data/jug-0-50.jpg
77
+ - spec/data/jug-0-70.jpg
78
+ - spec/data/jug-1-10.jpg
79
+ - spec/data/jug-1-120.png
80
+ - spec/data/jug-1-50.jpg
81
+ - spec/data/jug-1-70.jpg
82
+ - spec/data/jug-120.mp4
83
+ - spec/data/jug-150.mp4
84
+ - spec/data/jug-180.mp4
85
+ - spec/data/jug-2-10.jpg
86
+ - spec/data/jug-2-120.png
87
+ - spec/data/jug-2-50.jpg
88
+ - spec/data/jug-2-70.jpg
89
+ - spec/data/mouse-0-10.jpg
90
+ - spec/data/mouse-0-120.png
91
+ - spec/data/mouse-0-50.jpg
92
+ - spec/data/mouse-0-70.jpg
93
+ - spec/data/mouse-1-10.jpg
94
+ - spec/data/mouse-1-120.png
95
+ - spec/data/mouse-1-50.jpg
96
+ - spec/data/mouse-1-70.jpg
97
+ - spec/data/mouse-120.mp4
98
+ - spec/data/mouse-150.mp4
99
+ - spec/data/mouse-180.mp4
100
+ - spec/data/mouse-2-10.jpg
101
+ - spec/data/mouse-2-120.png
102
+ - spec/data/mouse-2-50.jpg
103
+ - spec/data/mouse-2-70.jpg
104
+ - spec/data/scream-m.mp3
105
+ - spec/data/scream-o.mp3
106
+ - spec/data/vader-m.mp3
107
+ - spec/data/vader-o.mp3
108
+ - spec/phash_spec.rb
109
+ - spec/spec_helper.rb
110
+ homepage: http://github.com/toy/pHash
111
+ licenses:
112
+ - MIT
113
+ post_install_message:
114
+ rdoc_options: []
115
+
116
+ require_paths:
117
+ - lib
118
+ required_ruby_version: !ruby/object:Gem::Requirement
119
+ none: false
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ hash: 3
124
+ segments:
125
+ - 0
126
+ version: "0"
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ hash: 3
133
+ segments:
134
+ - 0
135
+ version: "0"
136
+ requirements: []
137
+
138
+ rubyforge_project: pHash
139
+ rubygems_version: 1.8.12
140
+ signing_key:
141
+ specification_version: 3
142
+ summary: Use pHash with ruby
143
+ test_files:
144
+ - spec/data/audiophash.cpp-0.9.3.txt
145
+ - spec/data/audiophash.cpp-0.9.4.txt
146
+ - spec/data/audiophash.h-0.9.3.txt
147
+ - spec/data/audiophash.h-0.9.4.txt
148
+ - spec/data/hal9000-m.mp3
149
+ - spec/data/hal9000-o.mp3
150
+ - spec/data/jug-0-10.jpg
151
+ - spec/data/jug-0-120.png
152
+ - spec/data/jug-0-50.jpg
153
+ - spec/data/jug-0-70.jpg
154
+ - spec/data/jug-1-10.jpg
155
+ - spec/data/jug-1-120.png
156
+ - spec/data/jug-1-50.jpg
157
+ - spec/data/jug-1-70.jpg
158
+ - spec/data/jug-120.mp4
159
+ - spec/data/jug-150.mp4
160
+ - spec/data/jug-180.mp4
161
+ - spec/data/jug-2-10.jpg
162
+ - spec/data/jug-2-120.png
163
+ - spec/data/jug-2-50.jpg
164
+ - spec/data/jug-2-70.jpg
165
+ - spec/data/mouse-0-10.jpg
166
+ - spec/data/mouse-0-120.png
167
+ - spec/data/mouse-0-50.jpg
168
+ - spec/data/mouse-0-70.jpg
169
+ - spec/data/mouse-1-10.jpg
170
+ - spec/data/mouse-1-120.png
171
+ - spec/data/mouse-1-50.jpg
172
+ - spec/data/mouse-1-70.jpg
173
+ - spec/data/mouse-120.mp4
174
+ - spec/data/mouse-150.mp4
175
+ - spec/data/mouse-180.mp4
176
+ - spec/data/mouse-2-10.jpg
177
+ - spec/data/mouse-2-120.png
178
+ - spec/data/mouse-2-50.jpg
179
+ - spec/data/mouse-2-70.jpg
180
+ - spec/data/scream-m.mp3
181
+ - spec/data/scream-o.mp3
182
+ - spec/data/vader-m.mp3
183
+ - spec/data/vader-o.mp3
184
+ - spec/phash_spec.rb
185
+ - spec/spec_helper.rb
186
+ has_rdoc: