hmsearch-postgres 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/.travis.yml +0 -10
- data/README.md +4 -0
- data/Rakefile +50 -11
- data/ext/hmsearch/extconf.rb +2 -9
- data/ext/hmsearch/hmsearch.cc +104 -92
- data/hmsearch-postgres.gemspec +1 -1
- data/test/hmsearch_test.rb +20 -14
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ebd6e4d5fd861317d794ccfd7fc135ae815c157
|
4
|
+
data.tar.gz: 08f0655edbc17a90762046442c155b643e36ced6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 081e81c4357ff8ffc56ed28033c7f83319b1a981e7a6b754700b9c52ffc923e988dec484e832df7d0ea6a6429eb22b5968cd4a32e3ba75dfe5d8a39577ed656e
|
7
|
+
data.tar.gz: af5f422032d83d3e9437f5942aeb60f4e81355d0fc4698d22f2af5945f2204367af6566664e438623718551ed1cbc33ad5c409c4768af7804489989d2020df94
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,13 +1,3 @@
|
|
1
1
|
language: ruby
|
2
2
|
rvm:
|
3
3
|
- 2.1.5
|
4
|
-
before_install:
|
5
|
-
- mkdir -p $TRAVIS_BUILD_DIR/vendor
|
6
|
-
- curl -O http://pqxx.org/download/software/libpqxx/libpqxx-4.0.1.tar.gz
|
7
|
-
- tar -xvf libpqxx-4.0.1.tar.gz
|
8
|
-
- cd libpqxx-4.0.1
|
9
|
-
- ./configure --prefix=$TRAVIS_BUILD_DIR/vendor
|
10
|
-
- make
|
11
|
-
- make install
|
12
|
-
- cd ..
|
13
|
-
- bundle config --local build.hmsearch-postgres -- --with-pqxx-include=$TRAVIS_BUILD_DIR/vendor/include\ --with-pqxx-lib=$TRAVIS_BUILD_DIR/vendor/lib
|
data/README.md
CHANGED
data/Rakefile
CHANGED
@@ -1,26 +1,65 @@
|
|
1
1
|
require 'rake/clean'
|
2
|
+
require 'rbconfig'
|
2
3
|
|
3
|
-
|
4
|
+
$root_dir = File.expand_path('..', __FILE__)
|
5
|
+
$build_dir = File.join($root_dir, 'build')
|
6
|
+
$vendor_dir = File.join($build_dir, 'vendor')
|
7
|
+
$lib_dir = File.join($root_dir, 'lib/hmsearch')
|
8
|
+
$extconf = File.join($root_dir, 'ext/hmsearch/extconf.rb')
|
9
|
+
$libpqxx_url = "http://pqxx.org/download/software/libpqxx/libpqxx-4.0.1.tar.gz"
|
10
|
+
$libpqxx_tar = "#{$build_dir}/libpqxx-4.0.1.tar.gz"
|
11
|
+
$libpqxx_dir = "#{$build_dir}/libpqxx-4.0.1"
|
12
|
+
$libpqxx_header = "#{$vendor_dir}/include/pqxx/pqxx"
|
13
|
+
$libpqxx_include = "#{$vendor_dir}/include"
|
14
|
+
$libpqxx_lib = "#{$vendor_dir}/lib"
|
15
|
+
$dlext = RbConfig::CONFIG['DLEXT']
|
16
|
+
$build_ext_dylib = "#{$build_dir}/postgres_ext.#{$dlext}"
|
17
|
+
$lib_ext_dylib = "#{$lib_dir}/postgres_ext.#{$dlext}"
|
4
18
|
|
5
|
-
|
19
|
+
CLEAN.include($build_dir)
|
20
|
+
CLOBBER.include($lib_ext_dylib)
|
6
21
|
|
7
22
|
task default: :test
|
8
23
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
24
|
+
directory $build_dir
|
25
|
+
|
26
|
+
directory $vendor_dir
|
27
|
+
|
28
|
+
file $libpqxx_tar => $build_dir do
|
29
|
+
chdir $build_dir do
|
30
|
+
sh "curl -O #{$libpqxx_url}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
file $libpqxx_dir => $libpqxx_tar do
|
35
|
+
chdir $build_dir do
|
36
|
+
sh 'tar xf libpqxx-4.0.1.tar.gz'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
file $libpqxx_header => [$vendor_dir, $libpqxx_dir] do
|
41
|
+
chdir $libpqxx_dir do
|
42
|
+
sh "./configure --enable-shared --prefix=#{$vendor_dir}"
|
14
43
|
sh 'make'
|
44
|
+
sh 'make install'
|
15
45
|
end
|
46
|
+
end
|
16
47
|
|
17
|
-
|
18
|
-
|
19
|
-
|
48
|
+
file $build_ext_dylib => $libpqxx_header do
|
49
|
+
chdir $build_dir do
|
50
|
+
sh "ruby ../ext/hmsearch/extconf.rb -- --with-pqxx-include=#{$libpqxx_include}\ --with-pqxx-lib=#{$libpqxx_lib}"
|
51
|
+
sh 'make'
|
52
|
+
end
|
20
53
|
end
|
21
54
|
|
55
|
+
file $lib_ext_dylib => $build_ext_dylib do
|
56
|
+
cp $build_ext_dylib, $lib_ext_dylib
|
57
|
+
end
|
58
|
+
|
59
|
+
task build: $lib_ext_dylib
|
60
|
+
|
22
61
|
desc 'run tests'
|
23
|
-
task test:
|
62
|
+
task test: $lib_ext_dylib do
|
24
63
|
$:.unshift('lib')
|
25
64
|
FileList.new('./test/*_test.rb').each do |test|
|
26
65
|
load test
|
data/ext/hmsearch/extconf.rb
CHANGED
@@ -5,14 +5,7 @@ dir_config('pqxx')
|
|
5
5
|
|
6
6
|
have_library('pqxx')
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
#include <string>
|
11
|
-
int main() {
|
12
|
-
std::to_string(0);
|
13
|
-
return 0;
|
14
|
-
}
|
15
|
-
CPP
|
16
|
-
end
|
8
|
+
$CPPFLAGS += ' --std=c++0x'
|
9
|
+
$LDFLAGS += ' --std=c++0x'
|
17
10
|
|
18
11
|
create_makefile("hmsearch/postgres_ext")
|
data/ext/hmsearch/hmsearch.cc
CHANGED
@@ -38,6 +38,18 @@
|
|
38
38
|
|
39
39
|
#include "hmsearch.h"
|
40
40
|
|
41
|
+
#include <sstream>
|
42
|
+
|
43
|
+
namespace patch
|
44
|
+
{
|
45
|
+
template < typename T > std::string to_string( const T& n )
|
46
|
+
{
|
47
|
+
std::ostringstream stm ;
|
48
|
+
stm << n ;
|
49
|
+
return stm.str() ;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
41
53
|
/** The actual implementation of the HmSearch database.
|
42
54
|
*
|
43
55
|
* A difference between this implementation and the HmSearch algorithm
|
@@ -74,24 +86,24 @@ public:
|
|
74
86
|
{
|
75
87
|
_db = new pqxx::connection(connstr);
|
76
88
|
}
|
77
|
-
|
89
|
+
|
78
90
|
~HmSearchImpl() {
|
79
91
|
close();
|
80
92
|
}
|
81
|
-
|
93
|
+
|
82
94
|
bool insert(const hash_string& hash,
|
83
95
|
std::string* error_msg = NULL);
|
84
|
-
|
96
|
+
|
85
97
|
bool print_copystring(const hash_string& hash,
|
86
98
|
std::string* error_msg = NULL);
|
87
|
-
|
99
|
+
|
88
100
|
bool lookup(const hash_string& query,
|
89
101
|
LookupResultList& result,
|
90
102
|
int max_error = -1,
|
91
103
|
std::string* error_msg = NULL);
|
92
|
-
|
104
|
+
|
93
105
|
bool close(std::string* error_msg = NULL);
|
94
|
-
|
106
|
+
|
95
107
|
private:
|
96
108
|
struct Candidate {
|
97
109
|
Candidate() : matches(0), first_match(0), second_match(0) {}
|
@@ -99,18 +111,18 @@ private:
|
|
99
111
|
int first_match;
|
100
112
|
int second_match;
|
101
113
|
};
|
102
|
-
|
114
|
+
|
103
115
|
typedef std::map<hash_string, Candidate> CandidateMap;
|
104
|
-
|
116
|
+
|
105
117
|
void get_candidates(const hash_string& query, CandidateMap& candidates);
|
106
118
|
void add_hash_candidates(CandidateMap& candidates, int match,
|
107
119
|
const uint8_t* hashes, size_t length);
|
108
120
|
hash_string get_multiple_keys(uint8_t *key, int partition);
|
109
121
|
bool valid_candidate(const Candidate& candidate);
|
110
122
|
int hamming_distance(const hash_string& query, const hash_string& hash);
|
111
|
-
|
123
|
+
|
112
124
|
int get_partition_key(const hash_string& hash, int partition, uint8_t *key);
|
113
|
-
|
125
|
+
|
114
126
|
pqxx::connection *_db;
|
115
127
|
int _hash_bits;
|
116
128
|
int _max_error;
|
@@ -118,7 +130,7 @@ private:
|
|
118
130
|
int _partitions;
|
119
131
|
int _partition_bits;
|
120
132
|
int _partition_bytes;
|
121
|
-
|
133
|
+
|
122
134
|
static int one_bits[256];
|
123
135
|
};
|
124
136
|
|
@@ -133,35 +145,35 @@ bool HmSearch::init(const std::string& path,
|
|
133
145
|
error_msg = &dummy;
|
134
146
|
}
|
135
147
|
*error_msg = "";
|
136
|
-
|
148
|
+
|
137
149
|
if (hash_bits == 0 || (hash_bits & 7)) {
|
138
150
|
*error_msg = "invalid hash_bits value";
|
139
151
|
return false;
|
140
152
|
}
|
141
|
-
|
153
|
+
|
142
154
|
if (max_error == 0 || max_error >= hash_bits || max_error > 518) {
|
143
155
|
*error_msg = "invalid max_error value";
|
144
156
|
return false;
|
145
157
|
}
|
146
|
-
|
158
|
+
|
147
159
|
pqxx::connection db(path);
|
148
160
|
if (!db.is_open()) {
|
149
161
|
*error_msg = "Can't open database";
|
150
162
|
return false;
|
151
163
|
}
|
152
|
-
|
164
|
+
|
153
165
|
std::string sql;
|
154
166
|
sql = "INSERT INTO config VALUES ($1, $2)";
|
155
167
|
db.prepare("hash_max", sql);
|
156
|
-
|
168
|
+
|
157
169
|
pqxx::work W(db);
|
158
170
|
sql = "CREATE TABLE IF NOT EXISTS config ("\
|
159
171
|
" hash_bits int,"\
|
160
172
|
" max_error int); TRUNCATE config";
|
161
173
|
W.exec(sql);
|
162
|
-
|
174
|
+
|
163
175
|
W.prepared("hash_max")(hash_bits)(max_error).exec();
|
164
|
-
|
176
|
+
|
165
177
|
for (unsigned int i = 0; i < ((max_error + 3) / 2); i++) {
|
166
178
|
{
|
167
179
|
std::stringstream s;
|
@@ -170,24 +182,24 @@ bool HmSearch::init(const std::string& path,
|
|
170
182
|
<< " key bytea); TRUNCATE partition" << i;
|
171
183
|
W.exec(s.str());
|
172
184
|
}
|
173
|
-
|
185
|
+
|
174
186
|
{
|
175
187
|
std::stringstream s;
|
176
188
|
s << "DROP INDEX IF EXISTS ix_key_" << i;
|
177
189
|
W.exec(s.str());
|
178
190
|
}
|
179
|
-
|
191
|
+
|
180
192
|
{
|
181
193
|
std::stringstream s;
|
182
194
|
s << "CREATE INDEX ix_key_" << i << " ON partition" << i << "(key)";
|
183
195
|
W.exec(s.str());
|
184
196
|
}
|
185
197
|
}
|
186
|
-
|
198
|
+
|
187
199
|
W.commit();
|
188
|
-
|
200
|
+
|
189
201
|
db.disconnect();
|
190
|
-
|
202
|
+
|
191
203
|
return true;
|
192
204
|
}
|
193
205
|
|
@@ -196,37 +208,37 @@ HmSearch* HmSearch::open(const std::string& path,
|
|
196
208
|
std::string* error_msg)
|
197
209
|
{
|
198
210
|
std::string dummy;
|
199
|
-
|
211
|
+
|
200
212
|
if (!error_msg) {
|
201
213
|
error_msg = &dummy;
|
202
214
|
}
|
203
215
|
*error_msg = "";
|
204
|
-
|
216
|
+
|
205
217
|
try {
|
206
218
|
pqxx::connection db(path);
|
207
|
-
|
219
|
+
|
208
220
|
std::string sql;
|
209
|
-
|
221
|
+
|
210
222
|
sql = "SELECT max_error, hash_bits FROM config";
|
211
223
|
pqxx::nontransaction n(db);
|
212
224
|
pqxx::result res(n.exec(sql));
|
213
|
-
|
225
|
+
|
214
226
|
pqxx::result::const_iterator c = res.begin(); // We retrieve just one row
|
215
|
-
|
227
|
+
|
216
228
|
unsigned long hash_bits, max_error;
|
217
229
|
max_error = c[0].as<long>();
|
218
230
|
hash_bits = c[1].as<long>();
|
219
|
-
|
231
|
+
|
220
232
|
db.disconnect();
|
221
|
-
|
233
|
+
|
222
234
|
HmSearch* hm = new HmSearchImpl(path, (int)hash_bits, (int)max_error);
|
223
235
|
if (!hm) {
|
224
236
|
*error_msg = "out of memory";
|
225
237
|
return NULL;
|
226
238
|
}
|
227
|
-
|
239
|
+
|
228
240
|
return hm;
|
229
|
-
|
241
|
+
|
230
242
|
}
|
231
243
|
catch (const pqxx::broken_connection& e) {
|
232
244
|
*error_msg = e.what();
|
@@ -239,33 +251,33 @@ HmSearch::hash_string HmSearch::parse_hexhash(const std::string& hexhash)
|
|
239
251
|
{
|
240
252
|
int len = (int)hexhash.length() / 2;
|
241
253
|
uint8_t hash[len];
|
242
|
-
|
254
|
+
|
243
255
|
for (int i = 0; i < len; i++) {
|
244
256
|
char buf[3];
|
245
257
|
char* err;
|
246
|
-
|
258
|
+
|
247
259
|
buf[0] = hexhash[i * 2];
|
248
260
|
buf[1] = hexhash[i * 2 + 1];
|
249
261
|
buf[2] = 0;
|
250
|
-
|
262
|
+
|
251
263
|
hash[i] = strtoul(buf, &err, 16);
|
252
|
-
|
264
|
+
|
253
265
|
if (*err != '\0') {
|
254
266
|
return hash_string();
|
255
267
|
}
|
256
268
|
}
|
257
|
-
|
269
|
+
|
258
270
|
return hash_string(hash, len);
|
259
271
|
}
|
260
272
|
|
261
273
|
std::string HmSearch::format_hexhash(const HmSearch::hash_string& hash)
|
262
274
|
{
|
263
275
|
char hex[hash.length() * 2 + 1];
|
264
|
-
|
276
|
+
|
265
277
|
for (size_t i = 0; i < hash.length(); i++) {
|
266
278
|
sprintf(hex + 2 * i, "%02x", hash[i]);
|
267
279
|
}
|
268
|
-
|
280
|
+
|
269
281
|
return hex;
|
270
282
|
}
|
271
283
|
|
@@ -278,26 +290,26 @@ bool HmSearchImpl::print_copystring(const hash_string& hash,
|
|
278
290
|
error_msg = &dummy;
|
279
291
|
}
|
280
292
|
*error_msg = "";
|
281
|
-
|
293
|
+
|
282
294
|
if (hash.length() != (size_t) _hash_bytes) {
|
283
295
|
*error_msg = "incorrect hash length";
|
284
296
|
return false;
|
285
297
|
}
|
286
|
-
|
298
|
+
|
287
299
|
for (int i = 0; i < _partitions; i++) {
|
288
300
|
uint8_t key[_partition_bytes];
|
289
|
-
|
301
|
+
|
290
302
|
get_partition_key(hash, i, key);
|
291
|
-
|
303
|
+
|
292
304
|
std::cout << "\\\\x" << format_hexhash(hash)
|
293
305
|
<< " "
|
294
306
|
<< int(i)
|
295
307
|
<< " "
|
296
308
|
<< "\\\\x" << format_hexhash(hash_string(key, _partition_bytes))
|
297
309
|
<< std::endl;
|
298
|
-
|
310
|
+
|
299
311
|
}
|
300
|
-
|
312
|
+
|
301
313
|
return true;
|
302
314
|
}
|
303
315
|
|
@@ -309,37 +321,37 @@ bool HmSearchImpl::insert(const hash_string& hash,
|
|
309
321
|
error_msg = &dummy;
|
310
322
|
}
|
311
323
|
*error_msg = "";
|
312
|
-
|
324
|
+
|
313
325
|
if (hash.length() != (size_t) _hash_bytes) {
|
314
326
|
*error_msg = "incorrect hash length";
|
315
327
|
return false;
|
316
328
|
}
|
317
|
-
|
329
|
+
|
318
330
|
if (!_db->is_open()) {
|
319
331
|
*error_msg = "database is closed";
|
320
332
|
return false;
|
321
333
|
}
|
322
|
-
|
334
|
+
|
323
335
|
for (int i = 0; i < _partitions; i++) {
|
324
336
|
std::stringstream s;
|
325
337
|
s << "INSERT INTO partition" << i
|
326
338
|
<< " VALUES ($1, $2)";
|
327
|
-
_db->prepare("insert_"+
|
339
|
+
_db->prepare("insert_"+patch::to_string(static_cast<long long>(i)), s.str());
|
328
340
|
}
|
329
341
|
pqxx::work W(*_db);
|
330
342
|
for (int i = 0; i < _partitions; i++) {
|
331
343
|
uint8_t key[_partition_bytes];
|
332
|
-
|
344
|
+
|
333
345
|
get_partition_key(hash, i, key);
|
334
|
-
|
346
|
+
|
335
347
|
pqxx::binarystring key_blob(key, _partition_bytes);
|
336
348
|
pqxx::binarystring hash_blob(hash.data(), hash.length());
|
337
|
-
|
338
|
-
W.prepared("insert_"+
|
349
|
+
|
350
|
+
W.prepared("insert_"+patch::to_string(static_cast<long long>(i)))(hash_blob)(key_blob).exec();
|
339
351
|
}
|
340
|
-
|
352
|
+
|
341
353
|
W.commit();
|
342
|
-
|
354
|
+
|
343
355
|
return true;
|
344
356
|
}
|
345
357
|
|
@@ -354,25 +366,25 @@ bool HmSearchImpl::lookup(const hash_string& query,
|
|
354
366
|
error_msg = &dummy;
|
355
367
|
}
|
356
368
|
*error_msg = "";
|
357
|
-
|
369
|
+
|
358
370
|
if (query.length() != (size_t) _hash_bytes) {
|
359
371
|
*error_msg = "incorrect hash length";
|
360
372
|
return false;
|
361
373
|
}
|
362
|
-
|
374
|
+
|
363
375
|
if (!_db->is_open()) {
|
364
376
|
*error_msg = "database is closed";
|
365
377
|
return false;
|
366
378
|
}
|
367
|
-
|
379
|
+
|
368
380
|
try {
|
369
381
|
CandidateMap candidates;
|
370
382
|
get_candidates(query, candidates);
|
371
|
-
|
383
|
+
|
372
384
|
for (CandidateMap::const_iterator i = candidates.begin(); i != candidates.end(); ++i) {
|
373
385
|
if (valid_candidate(i->second)) {
|
374
386
|
int distance = hamming_distance(query, i->first);
|
375
|
-
|
387
|
+
|
376
388
|
if (distance <= _max_error
|
377
389
|
&& (reduced_error < 0 || distance <= reduced_error)) {
|
378
390
|
result.push_back(LookupResult(i->first, distance));
|
@@ -384,7 +396,7 @@ bool HmSearchImpl::lookup(const hash_string& query,
|
|
384
396
|
*error_msg = e.base().what();
|
385
397
|
return false;
|
386
398
|
}
|
387
|
-
|
399
|
+
|
388
400
|
return true;
|
389
401
|
}
|
390
402
|
|
@@ -396,14 +408,14 @@ bool HmSearchImpl::close(std::string* error_msg)
|
|
396
408
|
error_msg = &dummy;
|
397
409
|
}
|
398
410
|
*error_msg = "";
|
399
|
-
|
411
|
+
|
400
412
|
if (!_db->is_open()) {
|
401
413
|
// Already closed
|
402
414
|
return true;
|
403
415
|
}
|
404
|
-
|
416
|
+
|
405
417
|
_db->disconnect();
|
406
|
-
|
418
|
+
|
407
419
|
return true;
|
408
420
|
}
|
409
421
|
|
@@ -413,16 +425,16 @@ HmSearchImpl::hash_string HmSearchImpl::get_multiple_keys(
|
|
413
425
|
int partition)
|
414
426
|
{
|
415
427
|
hash_string hashes;
|
416
|
-
|
428
|
+
|
417
429
|
pqxx::nontransaction n(*_db);
|
418
430
|
pqxx::binarystring key_blob(key, _partition_bytes);
|
419
|
-
pqxx::result res = n.prepared("select_"+
|
420
|
-
|
431
|
+
pqxx::result res = n.prepared("select_"+patch::to_string(static_cast<long long>(partition)))(key_blob).exec();
|
432
|
+
|
421
433
|
for (pqxx::result::const_iterator c = res.begin(); c != res.end(); ++c) {
|
422
434
|
pqxx::binarystring hash_result(c[0]);
|
423
435
|
hashes.append(hash_string(hash_result.data(), hash_result.size()));
|
424
436
|
}
|
425
|
-
|
437
|
+
|
426
438
|
return hashes;
|
427
439
|
}
|
428
440
|
|
@@ -432,7 +444,7 @@ void HmSearchImpl::get_candidates(
|
|
432
444
|
{
|
433
445
|
uint8_t key[_partition_bytes];
|
434
446
|
memset(key, 0, _partition_bytes);
|
435
|
-
|
447
|
+
|
436
448
|
for (int i = 0; i < _partitions; i++) {
|
437
449
|
int psize = _hash_bits - i * _partition_bits;
|
438
450
|
if (psize > _partition_bits) {
|
@@ -441,7 +453,7 @@ void HmSearchImpl::get_candidates(
|
|
441
453
|
std::stringstream single;
|
442
454
|
single << "SELECT hash FROM partition" << i
|
443
455
|
<< " WHERE key=$1";
|
444
|
-
|
456
|
+
|
445
457
|
std::stringstream s;
|
446
458
|
s << "SELECT hash FROM partition"
|
447
459
|
<< i
|
@@ -456,28 +468,28 @@ void HmSearchImpl::get_candidates(
|
|
456
468
|
<< ".key = x.key";
|
457
469
|
std::string sql;
|
458
470
|
sql.append(s.str());
|
459
|
-
_db->prepare("select_multiple_"+
|
460
|
-
|
471
|
+
_db->prepare("select_multiple_"+patch::to_string(static_cast<long long>(i)), sql);
|
472
|
+
|
461
473
|
sql.clear();
|
462
474
|
sql.append(single.str());
|
463
|
-
_db->prepare("select_"+
|
475
|
+
_db->prepare("select_"+patch::to_string(static_cast<long long>(i)), sql);
|
464
476
|
}
|
465
477
|
for (int i = 0; i < _partitions; i++) {
|
466
478
|
hash_string hashes;
|
467
|
-
|
479
|
+
|
468
480
|
int bits = get_partition_key(query, i, key);
|
469
|
-
|
481
|
+
|
470
482
|
// Get exact matches
|
471
|
-
|
483
|
+
|
472
484
|
hashes = get_multiple_keys(key, i);
|
473
|
-
|
485
|
+
|
474
486
|
if (hashes.length() > 0) {
|
475
487
|
add_hash_candidates(candidates, 0, (const uint8_t*)hashes.data(), hashes.length());
|
476
488
|
}
|
477
|
-
|
489
|
+
|
478
490
|
// Get 1-variant matches
|
479
491
|
pqxx::nontransaction n(*_db);
|
480
|
-
pqxx::prepare::invocation prep = n.prepared("select_multiple_"+
|
492
|
+
pqxx::prepare::invocation prep = n.prepared("select_multiple_"+patch::to_string(static_cast<long long>(i)));
|
481
493
|
int pbyte = (i * _partition_bits) / 8;
|
482
494
|
int count = 0;
|
483
495
|
for (int pbit = i * _partition_bits; bits > 0; pbit++, bits--, count++) {
|
@@ -488,7 +500,7 @@ void HmSearchImpl::get_candidates(
|
|
488
500
|
key[pbit / 8 - pbyte] ^= flip;
|
489
501
|
}
|
490
502
|
pqxx::result res = prep.exec();
|
491
|
-
|
503
|
+
|
492
504
|
hashes.clear();
|
493
505
|
for (pqxx::result::const_iterator c = res.begin(); c != res.end(); ++c) {
|
494
506
|
pqxx::binarystring hash_result(c[0]);
|
@@ -506,7 +518,7 @@ void HmSearchImpl::add_hash_candidates(
|
|
506
518
|
for (size_t n = 0; n < length; n += _hash_bytes) {
|
507
519
|
hash_string hash = hash_string(hashes + n, _hash_bytes);
|
508
520
|
Candidate& cand = candidates[hash];
|
509
|
-
|
521
|
+
|
510
522
|
++cand.matches;
|
511
523
|
if (cand.matches == 1) {
|
512
524
|
cand.first_match = match;
|
@@ -536,7 +548,7 @@ bool HmSearchImpl::valid_candidate(
|
|
536
548
|
return false;
|
537
549
|
}
|
538
550
|
}
|
539
|
-
|
551
|
+
|
540
552
|
return true;
|
541
553
|
}
|
542
554
|
|
@@ -546,11 +558,11 @@ int HmSearchImpl::hamming_distance(
|
|
546
558
|
const HmSearchImpl::hash_string& hash)
|
547
559
|
{
|
548
560
|
int distance = 0;
|
549
|
-
|
561
|
+
|
550
562
|
for (size_t i = 0; i < query.length(); i++) {
|
551
563
|
distance += one_bits[query[i] ^ hash[i]];
|
552
564
|
}
|
553
|
-
|
565
|
+
|
554
566
|
return distance;
|
555
567
|
}
|
556
568
|
|
@@ -558,31 +570,31 @@ int HmSearchImpl::hamming_distance(
|
|
558
570
|
int HmSearchImpl::get_partition_key(const hash_string& hash, int partition, uint8_t *key)
|
559
571
|
{
|
560
572
|
int psize, hash_bit, bits_left;
|
561
|
-
|
573
|
+
|
562
574
|
psize = _hash_bits - partition * _partition_bits;
|
563
575
|
if (psize > _partition_bits) {
|
564
576
|
psize = _partition_bits;
|
565
577
|
}
|
566
|
-
|
578
|
+
|
567
579
|
// Copy bytes, masking out some bits at the start and end
|
568
580
|
bits_left = psize;
|
569
581
|
hash_bit = partition * _partition_bits;
|
570
|
-
|
582
|
+
|
571
583
|
for (int i = 0; i < _partition_bytes; i++) {
|
572
584
|
int byte = hash_bit / 8;
|
573
585
|
int bit = hash_bit % 8;
|
574
586
|
int bits = 8 - bit;
|
575
|
-
|
587
|
+
|
576
588
|
if (bits > bits_left) {
|
577
589
|
bits = bits_left;
|
578
590
|
}
|
579
|
-
|
591
|
+
|
580
592
|
bits_left -= bits;
|
581
593
|
hash_bit += bits;
|
582
|
-
|
594
|
+
|
583
595
|
key[i] = hash[byte] & (((1 << bits) - 1) << (8 - bit - bits));
|
584
596
|
}
|
585
|
-
|
597
|
+
|
586
598
|
return psize;
|
587
599
|
}
|
588
600
|
|
data/hmsearch-postgres.gemspec
CHANGED
data/test/hmsearch_test.rb
CHANGED
@@ -4,37 +4,43 @@ require 'pg'
|
|
4
4
|
|
5
5
|
class TestHmsearchPostgres < MiniTest::Unit::TestCase
|
6
6
|
def setup
|
7
|
-
conn = PG::Connection.open('host=localhost port=5432')
|
7
|
+
conn = PG::Connection.open('host=localhost port=5432 user=postgres')
|
8
8
|
conn.exec('drop database hmsearch_test') rescue nil
|
9
9
|
conn.exec('create database hmsearch_test')
|
10
10
|
end
|
11
11
|
|
12
12
|
def test_hmsearch
|
13
|
-
HmSearch::Postgres.initdb('host=localhost port=5432 dbname=hmsearch_test', 256, 10, 100000000)
|
13
|
+
HmSearch::Postgres.initdb('host=localhost port=5432 user=postgres dbname=hmsearch_test', 256, 10, 100000000)
|
14
14
|
|
15
15
|
conn = HmSearch::Postgres.open('host=localhost port=5432 dbname=hmsearch_test');
|
16
16
|
|
17
17
|
conn.insert('6e6fb315fa8c43fe9c2687d5be14575abb7252104236747d571b97e003563df0')
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
conn.insert('6e6fb315fa8c43fe9c2687d5be14575a4d7252104236747d571b97e003563df0')
|
19
|
+
|
20
|
+
expected = [{
|
21
|
+
hash: '6e6fb315fa8c43fe9c2687d5be14575a4d7252104236747d571b97e003563df0',
|
22
|
+
distance: 7
|
23
|
+
},{
|
24
|
+
hash: '6e6fb315fa8c43fe9c2687d5be14575abb7252104236747d571b97e003563df0',
|
25
|
+
distance: 1
|
26
|
+
}]
|
27
|
+
|
28
|
+
actual = conn.lookup('6e6fb315fa8c43fe9c2687d5be14575abb7252104236747d571b97e003563df2')
|
29
|
+
|
25
30
|
conn.close
|
26
31
|
|
27
|
-
|
32
|
+
conn = nil
|
33
|
+
|
34
|
+
|
35
|
+
assert_equal(expected, actual)
|
28
36
|
|
29
|
-
|
30
|
-
|
37
|
+
HmSearch::Postgres.open('host=localhost port=5432 dbname=hmsearch_test') do |conn|
|
31
38
|
expected = [{
|
32
39
|
hash: '6e6fb315fa8c43fe9c2687d5be14575abb7252104236747d571b97e003563df0',
|
33
40
|
distance: 1
|
34
41
|
}]
|
35
|
-
|
42
|
+
actual = conn.lookup('6e6fb315fa8c43fe9c2687d5be14575abb7252104236747d571b97e003563df2', 1)
|
36
43
|
assert_equal(expected, actual)
|
37
|
-
|
38
44
|
end
|
39
45
|
end
|
40
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hmsearch-postgres
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kris Selden
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-03-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|