pHash 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +12 -0
- data/LICENSE.txt +20 -0
- data/README.markdown +53 -0
- data/audiophash.diff +17 -0
- data/lib/phash.rb +44 -0
- data/lib/phash/all.rb +3 -0
- data/lib/phash/audio.rb +116 -0
- data/lib/phash/image.rb +59 -0
- data/lib/phash/text.rb +100 -0
- data/lib/phash/video.rb +55 -0
- data/pHash.gemspec +20 -0
- data/spec/data/audiophash.cpp-0.9.3.txt +571 -0
- data/spec/data/audiophash.cpp-0.9.4.txt +572 -0
- data/spec/data/audiophash.h-0.9.3.txt +111 -0
- data/spec/data/audiophash.h-0.9.4.txt +108 -0
- data/spec/data/hal9000-m.mp3 +0 -0
- data/spec/data/hal9000-o.mp3 +0 -0
- data/spec/data/jug-0-10.jpg +0 -0
- data/spec/data/jug-0-120.png +0 -0
- data/spec/data/jug-0-50.jpg +0 -0
- data/spec/data/jug-0-70.jpg +0 -0
- data/spec/data/jug-1-10.jpg +0 -0
- data/spec/data/jug-1-120.png +0 -0
- data/spec/data/jug-1-50.jpg +0 -0
- data/spec/data/jug-1-70.jpg +0 -0
- data/spec/data/jug-120.mp4 +0 -0
- data/spec/data/jug-150.mp4 +0 -0
- data/spec/data/jug-180.mp4 +0 -0
- data/spec/data/jug-2-10.jpg +0 -0
- data/spec/data/jug-2-120.png +0 -0
- data/spec/data/jug-2-50.jpg +0 -0
- data/spec/data/jug-2-70.jpg +0 -0
- data/spec/data/mouse-0-10.jpg +0 -0
- data/spec/data/mouse-0-120.png +0 -0
- data/spec/data/mouse-0-50.jpg +0 -0
- data/spec/data/mouse-0-70.jpg +0 -0
- data/spec/data/mouse-1-10.jpg +0 -0
- data/spec/data/mouse-1-120.png +0 -0
- data/spec/data/mouse-1-50.jpg +0 -0
- data/spec/data/mouse-1-70.jpg +0 -0
- data/spec/data/mouse-120.mp4 +0 -0
- data/spec/data/mouse-150.mp4 +0 -0
- data/spec/data/mouse-180.mp4 +0 -0
- data/spec/data/mouse-2-10.jpg +0 -0
- data/spec/data/mouse-2-120.png +0 -0
- data/spec/data/mouse-2-50.jpg +0 -0
- data/spec/data/mouse-2-70.jpg +0 -0
- data/spec/data/scream-m.mp3 +0 -0
- data/spec/data/scream-o.mp3 +0 -0
- data/spec/data/vader-m.mp3 +0 -0
- data/spec/data/vader-o.mp3 +0 -0
- data/spec/phash_spec.rb +43 -0
- data/spec/spec_helper.rb +10 -0
- metadata +186 -0
data/pHash.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'pHash'
|
5
|
+
s.version = '1.0.0'
|
6
|
+
s.summary = %q{Use pHash with ruby}
|
7
|
+
s.homepage = "http://github.com/toy/#{s.name}"
|
8
|
+
s.authors = ['Ivan Kuchin']
|
9
|
+
s.license = 'MIT'
|
10
|
+
|
11
|
+
s.rubyforge_project = s.name
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
s.require_paths = %w[lib]
|
17
|
+
|
18
|
+
s.add_development_dependency 'rspec'
|
19
|
+
s.add_development_dependency 'fspath'
|
20
|
+
end
|
@@ -0,0 +1,571 @@
|
|
1
|
+
/*
|
2
|
+
|
3
|
+
pHash, the open source perceptual hash library
|
4
|
+
Copyright (C) 2009 Aetilius, Inc.
|
5
|
+
All rights reserved.
|
6
|
+
|
7
|
+
This program is free software: you can redistribute it and/or modify
|
8
|
+
it under the terms of the GNU General Public License as published by
|
9
|
+
the Free Software Foundation, either version 3 of the License, or
|
10
|
+
(at your option) any later version.
|
11
|
+
|
12
|
+
This program is distributed in the hope that it will be useful,
|
13
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
GNU General Public License for more details.
|
16
|
+
|
17
|
+
You should have received a copy of the GNU General Public License
|
18
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
|
20
|
+
Evan Klinger - eklinger@phash.org
|
21
|
+
David Starkweather - dstarkweather@phash.org
|
22
|
+
|
23
|
+
*/
|
24
|
+
|
25
|
+
#include "audiophash.h"
|
26
|
+
#include <sndfile.h>
|
27
|
+
#include <samplerate.h>
|
28
|
+
|
29
|
+
#ifdef HAVE_LIBMPG123
|
30
|
+
#include <mpg123.h>
|
31
|
+
#endif
|
32
|
+
|
33
|
+
int ph_count_samples(const char *filename, int sr,int channels){
|
34
|
+
|
35
|
+
SF_INFO sf_info;
|
36
|
+
sf_info.format=0;
|
37
|
+
SNDFILE *sndfile = sf_open(filename, SFM_READ, &sf_info);
|
38
|
+
if (sndfile == NULL){
|
39
|
+
return NULL;
|
40
|
+
}
|
41
|
+
int count = sf_info.frames;
|
42
|
+
sf_close(sndfile);
|
43
|
+
return count;
|
44
|
+
}
|
45
|
+
|
46
|
+
#ifdef HAVE_LIBMPG123
|
47
|
+
|
48
|
+
static
|
49
|
+
float* readaudio_mp3(const char *filename,long *sr, const float nbsecs, unsigned int *buflen){
|
50
|
+
mpg123_handle *m;
|
51
|
+
int ret;
|
52
|
+
|
53
|
+
if (mpg123_init() != MPG123_OK || ((m = mpg123_new(NULL,&ret)) == NULL)|| \
|
54
|
+
mpg123_open(m, filename) != MPG123_OK){
|
55
|
+
fprintf(stderr,"unable to init mpg\n");
|
56
|
+
return NULL;
|
57
|
+
}
|
58
|
+
|
59
|
+
/*turn off logging */
|
60
|
+
mpg123_param(m, MPG123_ADD_FLAGS, MPG123_QUIET, 0);
|
61
|
+
|
62
|
+
off_t totalsamples;
|
63
|
+
|
64
|
+
mpg123_scan(m);
|
65
|
+
totalsamples = mpg123_length(m);
|
66
|
+
|
67
|
+
int meta = mpg123_meta_check(m);
|
68
|
+
|
69
|
+
int channels, encoding;
|
70
|
+
|
71
|
+
if (mpg123_getformat(m, sr, &channels, &encoding) != MPG123_OK){
|
72
|
+
fprintf(stderr,"unable to get format\n");
|
73
|
+
return NULL;
|
74
|
+
}
|
75
|
+
|
76
|
+
mpg123_format_none(m);
|
77
|
+
mpg123_format(m, *sr, channels, encoding);
|
78
|
+
|
79
|
+
size_t decbuflen = mpg123_outblock(m);
|
80
|
+
unsigned char *decbuf = (unsigned char*)malloc(decbuflen);
|
81
|
+
if (decbuf == NULL){
|
82
|
+
printf("mem alloc error\n");
|
83
|
+
return NULL;
|
84
|
+
}
|
85
|
+
|
86
|
+
unsigned int nbsamples = (nbsecs <= 0) ? totalsamples : nbsecs*(*sr);
|
87
|
+
nbsamples = (nbsamples < totalsamples) ? nbsamples : totalsamples;
|
88
|
+
|
89
|
+
size_t i, j, index = 0, done;
|
90
|
+
|
91
|
+
|
92
|
+
float *buffer = (float*)malloc(nbsamples*sizeof(float));
|
93
|
+
*buflen = nbsamples;
|
94
|
+
|
95
|
+
do {
|
96
|
+
|
97
|
+
ret = mpg123_read(m, decbuf, decbuflen, &done);
|
98
|
+
switch (encoding) {
|
99
|
+
case MPG123_ENC_SIGNED_16 :
|
100
|
+
for (i = 0; i < done/sizeof(short); i+=channels){
|
101
|
+
buffer[index] = 0.0f;
|
102
|
+
for (j = 0; j < channels ; j++){
|
103
|
+
buffer[index] += (float)(((short*)decbuf)[i+j])/(float)SHRT_MAX;
|
104
|
+
}
|
105
|
+
buffer[index++] /= channels;
|
106
|
+
if (index >= nbsamples) break;
|
107
|
+
}
|
108
|
+
break;
|
109
|
+
case MPG123_ENC_SIGNED_8:
|
110
|
+
for (i = 0; i < done/sizeof(char); i+=channels){
|
111
|
+
buffer[index] = 0.0f;
|
112
|
+
for (j = 0; j < channels ; j++){
|
113
|
+
buffer[index] += (float)(((char*)decbuf)[i+j])/(float)SCHAR_MAX;
|
114
|
+
}
|
115
|
+
buffer[index++] /= channels;
|
116
|
+
if (index >= nbsamples) break;
|
117
|
+
}
|
118
|
+
break;
|
119
|
+
case MPG123_ENC_FLOAT_32:
|
120
|
+
for (i = 0; i < done/sizeof(float); i+=channels){
|
121
|
+
buffer[index] = 0.0f;
|
122
|
+
for (j = 0; j < channels; j++){
|
123
|
+
buffer[index] += ((float*)decbuf)[i+j];
|
124
|
+
}
|
125
|
+
buffer[index++] /= channels;
|
126
|
+
if (index >= nbsamples) break;
|
127
|
+
}
|
128
|
+
break;
|
129
|
+
default:
|
130
|
+
done = 0;
|
131
|
+
}
|
132
|
+
|
133
|
+
} while (ret == MPG123_OK && index < nbsamples);
|
134
|
+
|
135
|
+
free(decbuf);
|
136
|
+
mpg123_close(m);
|
137
|
+
mpg123_delete(m);
|
138
|
+
mpg123_exit();
|
139
|
+
|
140
|
+
return buffer;
|
141
|
+
}
|
142
|
+
|
143
|
+
#endif /*HAVE_LIBMPG123*/
|
144
|
+
|
145
|
+
static
|
146
|
+
float *readaudio_snd(const char *filename, long *sr, const float nbsecs, unsigned int *buflen){
|
147
|
+
|
148
|
+
SF_INFO sf_info;
|
149
|
+
sf_info.format=0;
|
150
|
+
SNDFILE *sndfile = sf_open(filename, SFM_READ, &sf_info);
|
151
|
+
if (sndfile == NULL){
|
152
|
+
return NULL;
|
153
|
+
}
|
154
|
+
|
155
|
+
/* normalize */
|
156
|
+
sf_command(sndfile, SFC_SET_NORM_FLOAT, NULL, SF_TRUE);
|
157
|
+
|
158
|
+
*sr = (long)sf_info.samplerate;
|
159
|
+
|
160
|
+
//allocate input buffer for signal
|
161
|
+
unsigned int src_frames = (nbsecs <= 0) ? sf_info.frames : (nbsecs*sf_info.samplerate);
|
162
|
+
src_frames = (sf_info.frames < src_frames) ? sf_info.frames : src_frames;
|
163
|
+
float *inbuf = (float*)malloc(src_frames*sf_info.channels*sizeof(float));
|
164
|
+
|
165
|
+
/*read frames */
|
166
|
+
sf_count_t cnt_frames = sf_readf_float(sndfile, inbuf, src_frames);
|
167
|
+
|
168
|
+
float *buf = (float*)malloc(cnt_frames*sizeof(float));
|
169
|
+
|
170
|
+
//average across all channels
|
171
|
+
int i,j,indx=0;
|
172
|
+
for (i=0;i<cnt_frames*sf_info.channels;i+=sf_info.channels){
|
173
|
+
buf[indx] = 0;
|
174
|
+
for (j=0;j<sf_info.channels;j++){
|
175
|
+
buf[indx] += inbuf[i+j];
|
176
|
+
}
|
177
|
+
buf[indx++] /= sf_info.channels;
|
178
|
+
}
|
179
|
+
free(inbuf);
|
180
|
+
|
181
|
+
*buflen = indx;
|
182
|
+
return buf;
|
183
|
+
}
|
184
|
+
|
185
|
+
float* ph_readaudio2(const char *filename, int sr, float *sigbuf, int &buflen, const float nbsecs){
|
186
|
+
|
187
|
+
long orig_sr;
|
188
|
+
float *inbuffer = NULL;
|
189
|
+
unsigned int inbufferlength;
|
190
|
+
buflen = 0;
|
191
|
+
|
192
|
+
const char *suffix = strrchr(filename, '.');
|
193
|
+
if (suffix == NULL) return NULL;
|
194
|
+
if (!strcasecmp(suffix+1, "mp3")) {
|
195
|
+
#ifdef HAVE_LIBMPG123
|
196
|
+
inbuffer = readaudio_mp3(filename, &orig_sr, nbsecs, &inbufferlength);
|
197
|
+
#endif /* HAVE_LIBMPG123 */
|
198
|
+
} else {
|
199
|
+
inbuffer = readaudio_snd(filename, &orig_sr, nbsecs, &inbufferlength);
|
200
|
+
}
|
201
|
+
|
202
|
+
if (inbuffer == NULL){
|
203
|
+
return NULL;
|
204
|
+
}
|
205
|
+
|
206
|
+
/* resample float array */
|
207
|
+
/* set desired sr ratio */
|
208
|
+
double sr_ratio = (double)(sr)/(double)orig_sr;
|
209
|
+
if (src_is_valid_ratio(sr_ratio) == 0){
|
210
|
+
free(inbuffer);
|
211
|
+
return NULL;
|
212
|
+
}
|
213
|
+
|
214
|
+
/* allocate output buffer for conversion */
|
215
|
+
unsigned int outbufferlength = sr_ratio*inbufferlength;
|
216
|
+
float *outbuffer = (float*)malloc(outbufferlength*sizeof(float));
|
217
|
+
if (!outbuffer){
|
218
|
+
free(inbuffer);
|
219
|
+
return NULL;
|
220
|
+
}
|
221
|
+
|
222
|
+
int error;
|
223
|
+
SRC_STATE *src_state = src_new(SRC_LINEAR, 1, &error);
|
224
|
+
if (!src_state){
|
225
|
+
free(inbuffer);
|
226
|
+
free(outbuffer);
|
227
|
+
return NULL;
|
228
|
+
}
|
229
|
+
|
230
|
+
SRC_DATA src_data;
|
231
|
+
src_data.data_in = inbuffer;
|
232
|
+
src_data.data_out = outbuffer;
|
233
|
+
src_data.input_frames = inbufferlength;
|
234
|
+
src_data.output_frames = outbufferlength;
|
235
|
+
src_data.end_of_input = SF_TRUE;
|
236
|
+
src_data.src_ratio = sr_ratio;
|
237
|
+
|
238
|
+
/* sample rate conversion */
|
239
|
+
if (error = src_process(src_state, &src_data)){
|
240
|
+
free(inbuffer);
|
241
|
+
free(outbuffer);
|
242
|
+
src_delete(src_state);
|
243
|
+
return NULL;
|
244
|
+
}
|
245
|
+
|
246
|
+
buflen = src_data.output_frames;
|
247
|
+
|
248
|
+
src_delete(src_state);
|
249
|
+
free(inbuffer);
|
250
|
+
|
251
|
+
return outbuffer;
|
252
|
+
}
|
253
|
+
|
254
|
+
|
255
|
+
float* ph_readaudio(const char *filename, int sr, int channels, float *sigbuf, int &buflen,\
|
256
|
+
const float nbsecs){
|
257
|
+
if(!filename || sr <= 0)
|
258
|
+
return NULL;
|
259
|
+
return ph_readaudio2(filename, sr, sigbuf, buflen, nbsecs);
|
260
|
+
}
|
261
|
+
|
262
|
+
uint32_t* ph_audiohash(float *buf, int N, int sr, int &nb_frames){
|
263
|
+
|
264
|
+
int frame_length = 4096;//2^12
|
265
|
+
int nfft = frame_length;
|
266
|
+
int nfft_half = 2048;
|
267
|
+
int start = 0;
|
268
|
+
int end = start + frame_length - 1;
|
269
|
+
int overlap = (int)(31*frame_length/32);
|
270
|
+
int advance = frame_length - overlap;
|
271
|
+
int index = 0;
|
272
|
+
nb_frames = (int)(floor(N/advance) - floor(frame_length/advance) + 1);
|
273
|
+
double window[frame_length];
|
274
|
+
for (int i = 0;i<frame_length;i++){
|
275
|
+
//hamming window
|
276
|
+
window[i] = 0.54 - 0.46*cos(2*M_PI*i/(frame_length-1));
|
277
|
+
}
|
278
|
+
|
279
|
+
double frame[frame_length];
|
280
|
+
//fftw_complex *pF;
|
281
|
+
//fftw_plan p;
|
282
|
+
//pF = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*nfft);
|
283
|
+
complex double *pF = (complex double*)malloc(sizeof(complex double)*nfft);
|
284
|
+
|
285
|
+
double magnF[nfft_half];
|
286
|
+
double maxF = 0.0;
|
287
|
+
double maxB = 0.0;
|
288
|
+
|
289
|
+
double minfreq = 300;
|
290
|
+
double maxfreq = 3000;
|
291
|
+
double minbark = 6*asinh(minfreq/600.0);
|
292
|
+
double maxbark = 6*asinh(maxfreq/600.0);
|
293
|
+
double nyqbark = maxbark - minbark;
|
294
|
+
int nfilts = 33;
|
295
|
+
double stepbarks = nyqbark/(nfilts - 1);
|
296
|
+
int nb_barks = (int)(floor(nfft_half/2 + 1));
|
297
|
+
double barkwidth = 1.06;
|
298
|
+
|
299
|
+
double freqs[nb_barks];
|
300
|
+
double binbarks[nb_barks];
|
301
|
+
double curr_bark[nfilts];
|
302
|
+
double prev_bark[nfilts];
|
303
|
+
for (int i=0;i< nfilts;i++){
|
304
|
+
prev_bark[i] = 0.0;
|
305
|
+
}
|
306
|
+
uint32_t *hash = (uint32_t*)malloc(nb_frames*sizeof(uint32_t));
|
307
|
+
double lof,hif;
|
308
|
+
|
309
|
+
for (int i=0; i < nb_barks;i++){
|
310
|
+
binbarks[i] = 6*asinh(i*sr/nfft_half/600.0);
|
311
|
+
freqs[i] = i*sr/nfft_half;
|
312
|
+
}
|
313
|
+
double **wts = new double*[nfilts];
|
314
|
+
for (int i=0;i<nfilts;i++){
|
315
|
+
wts[i] = new double[nfft_half];
|
316
|
+
}
|
317
|
+
for (int i=0;i<nfilts;i++){
|
318
|
+
for (int j=0;j<nfft_half;j++){
|
319
|
+
wts[i][j] = 0.0;
|
320
|
+
}
|
321
|
+
}
|
322
|
+
|
323
|
+
//calculate wts for each filter
|
324
|
+
for (int i=0;i<nfilts;i++){
|
325
|
+
double f_bark_mid = minbark + i*stepbarks;
|
326
|
+
for (int j=0;j<nb_barks;j++){
|
327
|
+
double barkdiff = binbarks[j] - f_bark_mid;
|
328
|
+
lof = -2.5*(barkdiff/barkwidth - 0.5);
|
329
|
+
hif = barkdiff/barkwidth + 0.5;
|
330
|
+
double m = std::min(lof,hif);
|
331
|
+
m = std::min(0.0,m);
|
332
|
+
m = pow(10,m);
|
333
|
+
wts[i][j] = m;
|
334
|
+
}
|
335
|
+
}
|
336
|
+
|
337
|
+
//p = fftw_plan_dft_r2c_1d(frame_length,frame,pF,FFTW_ESTIMATE);
|
338
|
+
|
339
|
+
while (end < N){
|
340
|
+
maxF = 0.0;
|
341
|
+
maxB = 0.0;
|
342
|
+
for (int i = 0;i<frame_length;i++){
|
343
|
+
frame[i] = window[i]*buf[start+i];
|
344
|
+
}
|
345
|
+
//fftw_execute(p);
|
346
|
+
if (fft(frame, frame_length, pF) < 0){
|
347
|
+
return NULL;
|
348
|
+
}
|
349
|
+
for (int i=0; i < nfft_half;i++){
|
350
|
+
//magnF[i] = sqrt(pF[i][0]*pF[i][0] + pF[i][1]*pF[i][1] );
|
351
|
+
magnF[i] = cabs(pF[i]);
|
352
|
+
if (magnF[i] > maxF){
|
353
|
+
maxF = magnF[i];
|
354
|
+
}
|
355
|
+
}
|
356
|
+
|
357
|
+
for (int i=0;i<nfilts;i++){
|
358
|
+
curr_bark[i] = 0;
|
359
|
+
for (int j=0;j < nfft_half;j++){
|
360
|
+
curr_bark[i] += wts[i][j]*magnF[j];
|
361
|
+
}
|
362
|
+
if (curr_bark[i] > maxB)
|
363
|
+
maxB = curr_bark[i];
|
364
|
+
}
|
365
|
+
|
366
|
+
uint32_t curr_hash = 0x00000000u;
|
367
|
+
for (int m=0;m<nfilts-1;m++){
|
368
|
+
double H = curr_bark[m] - curr_bark[m+1] - (prev_bark[m] - prev_bark[m+1]);
|
369
|
+
curr_hash = curr_hash << 1;
|
370
|
+
if (H > 0)
|
371
|
+
curr_hash |= 0x00000001;
|
372
|
+
}
|
373
|
+
|
374
|
+
|
375
|
+
hash[index] = curr_hash;
|
376
|
+
for (int i=0;i<nfilts;i++){
|
377
|
+
prev_bark[i] = curr_bark[i];
|
378
|
+
}
|
379
|
+
index += 1;
|
380
|
+
start += advance;
|
381
|
+
end += advance;
|
382
|
+
}
|
383
|
+
|
384
|
+
|
385
|
+
|
386
|
+
//fftw_destroy_plan(p);
|
387
|
+
//fftw_free(pF);
|
388
|
+
free(pF);
|
389
|
+
for (int i=0;i<nfilts;i++){
|
390
|
+
delete [] wts[i];
|
391
|
+
}
|
392
|
+
delete [] wts;
|
393
|
+
return hash;
|
394
|
+
}
|
395
|
+
|
396
|
+
|
397
|
+
int ph_bitcount(uint32_t n){
|
398
|
+
|
399
|
+
//parallel bit count
|
400
|
+
#define MASK_01010101 (((uint32_t)(-1))/3)
|
401
|
+
#define MASK_00110011 (((uint32_t)(-1))/5)
|
402
|
+
#define MASK_00001111 (((uint32_t)(-1))/17)
|
403
|
+
|
404
|
+
n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101) ;
|
405
|
+
n = (n & MASK_00110011) + ((n >> 2) & MASK_00110011) ;
|
406
|
+
n = (n & MASK_00001111) + ((n >> 4) & MASK_00001111) ;
|
407
|
+
return n % 255;
|
408
|
+
|
409
|
+
}
|
410
|
+
|
411
|
+
double ph_compare_blocks(const uint32_t *ptr_blockA,const uint32_t *ptr_blockB, const int block_size){
|
412
|
+
double result = 0;
|
413
|
+
for (int i=0;i<block_size;i++){
|
414
|
+
uint32_t xordhash = ptr_blockA[i]^ptr_blockB[i];
|
415
|
+
result += ph_bitcount(xordhash);
|
416
|
+
}
|
417
|
+
result = result/(32*block_size);
|
418
|
+
return result;
|
419
|
+
}
|
420
|
+
double* ph_audio_distance_ber(uint32_t *hash_a , const int Na, uint32_t *hash_b, const int Nb, const float threshold, const int block_size, int &Nc){
|
421
|
+
|
422
|
+
uint32_t *ptrA, *ptrB;
|
423
|
+
int N1, N2;
|
424
|
+
if (Na <= Nb){
|
425
|
+
ptrA = hash_a;
|
426
|
+
ptrB = hash_b;
|
427
|
+
Nc = Nb - Na + 1;
|
428
|
+
N1 = Na;
|
429
|
+
N2 = Nb;
|
430
|
+
} else {
|
431
|
+
ptrB = hash_a;
|
432
|
+
ptrA = hash_b;
|
433
|
+
Nc = Na - Nb + 1;
|
434
|
+
N1 = Nb;
|
435
|
+
N2 = Na;
|
436
|
+
}
|
437
|
+
|
438
|
+
double *pC = new double[Nc];
|
439
|
+
if (!pC)
|
440
|
+
return NULL;
|
441
|
+
int k,M,nb_above, nb_below, hash1_index,hash2_index;
|
442
|
+
double sum_above, sum_below,above_factor, below_factor;
|
443
|
+
|
444
|
+
uint32_t *pha,*phb;
|
445
|
+
double *dist = NULL;
|
446
|
+
|
447
|
+
for (int i=0; i < Nc;i++){
|
448
|
+
|
449
|
+
M = (int)floor(std::min(N1,N2-i)/block_size);
|
450
|
+
|
451
|
+
pha = ptrA;
|
452
|
+
phb = ptrB + i;
|
453
|
+
|
454
|
+
double *tmp_dist = (double*)realloc(dist, M*sizeof(double));
|
455
|
+
if (!tmp_dist){
|
456
|
+
return NULL;
|
457
|
+
}
|
458
|
+
dist = tmp_dist;
|
459
|
+
dist[0] = ph_compare_blocks(pha,phb,block_size);
|
460
|
+
|
461
|
+
k = 1;
|
462
|
+
|
463
|
+
pha += block_size;
|
464
|
+
phb += block_size;
|
465
|
+
|
466
|
+
hash1_index = block_size;
|
467
|
+
hash2_index = i + block_size;
|
468
|
+
|
469
|
+
while ((hash1_index < N1 - block_size) && (hash2_index < N2 - block_size)){
|
470
|
+
dist[k++] = ph_compare_blocks(pha,phb,block_size);
|
471
|
+
hash1_index += block_size;
|
472
|
+
hash2_index += block_size;
|
473
|
+
pha += block_size;
|
474
|
+
phb += block_size;
|
475
|
+
}
|
476
|
+
sum_above = 0;
|
477
|
+
sum_below = 0;
|
478
|
+
nb_above = 0;
|
479
|
+
nb_below = 0;
|
480
|
+
for (int n = 0; n < M; n++){
|
481
|
+
|
482
|
+
if (dist[n] <= threshold){
|
483
|
+
sum_below += 1-dist[n];
|
484
|
+
nb_below++;
|
485
|
+
} else {
|
486
|
+
sum_above += 1-dist[n];
|
487
|
+
nb_above++;
|
488
|
+
}
|
489
|
+
}
|
490
|
+
above_factor = sum_above/M;
|
491
|
+
below_factor = sum_below/M;
|
492
|
+
pC[i] = 0.5*(1 + below_factor - above_factor);
|
493
|
+
}
|
494
|
+
|
495
|
+
free(dist);
|
496
|
+
return pC;
|
497
|
+
}
|
498
|
+
#ifdef HAVE_PTHREAD
|
499
|
+
|
500
|
+
void *ph_audio_thread(void *p)
|
501
|
+
{
|
502
|
+
slice *s = (slice *)p;
|
503
|
+
for(int i = 0; i < s->n; ++i)
|
504
|
+
{
|
505
|
+
DP *dp = (DP *)s->hash_p[i];
|
506
|
+
int N, count;
|
507
|
+
pair<int,int> *p = (pair<int,int> *)s->hash_params;
|
508
|
+
float *buf = ph_readaudio(dp->id, p->first, p->second, NULL, N);
|
509
|
+
uint32_t *hash = ph_audiohash(buf, N, p->first, count);
|
510
|
+
free(buf);
|
511
|
+
buf = NULL;
|
512
|
+
dp->hash = hash;
|
513
|
+
dp->hash_length = count;
|
514
|
+
}
|
515
|
+
}
|
516
|
+
|
517
|
+
DP** ph_audio_hashes(char *files[], int count, int sr, int channels, int threads)
|
518
|
+
{
|
519
|
+
if(!files || count == 0)
|
520
|
+
return NULL;
|
521
|
+
|
522
|
+
int num_threads;
|
523
|
+
if(threads > count)
|
524
|
+
{
|
525
|
+
num_threads = count;
|
526
|
+
}
|
527
|
+
else if(threads > 0)
|
528
|
+
{
|
529
|
+
num_threads = threads;
|
530
|
+
}
|
531
|
+
else
|
532
|
+
{
|
533
|
+
num_threads = ph_num_threads();
|
534
|
+
}
|
535
|
+
|
536
|
+
DP **hashes = (DP**)malloc(count*sizeof(DP*));
|
537
|
+
|
538
|
+
for(int i = 0; i < count; ++i)
|
539
|
+
{
|
540
|
+
hashes[i] = (DP *)malloc(sizeof(DP));
|
541
|
+
hashes[i]->id = strdup(files[i]);
|
542
|
+
}
|
543
|
+
|
544
|
+
pthread_t thds[num_threads];
|
545
|
+
|
546
|
+
int rem = count % num_threads;
|
547
|
+
int start = 0;
|
548
|
+
int off = 0;
|
549
|
+
slice *s = new slice[num_threads];
|
550
|
+
for(int n = 0; n < num_threads; ++n)
|
551
|
+
{
|
552
|
+
off = (int)floor((count/(float)num_threads) + (rem>0?num_threads-(count % num_threads):0));
|
553
|
+
|
554
|
+
s[n].hash_p = &hashes[start];
|
555
|
+
s[n].n = off;
|
556
|
+
s[n].hash_params = new pair<int,int>(sr,channels);
|
557
|
+
start += off;
|
558
|
+
--rem;
|
559
|
+
pthread_create(&thds[n], NULL, ph_audio_thread, &s[n]);
|
560
|
+
}
|
561
|
+
for(int i = 0; i < num_threads; ++i)
|
562
|
+
{
|
563
|
+
pthread_join(thds[i], NULL);
|
564
|
+
delete (pair<int,int>*)s[i].hash_params;
|
565
|
+
}
|
566
|
+
delete[] s;
|
567
|
+
|
568
|
+
return hashes;
|
569
|
+
|
570
|
+
}
|
571
|
+
#endif
|