hmsearch-postgres 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/hmsearch/hmsearch.cc +139 -138
- data/hmsearch-postgres.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2f4777db8ccdba08da153ddf1ce3bb44a5b74a7a
|
4
|
+
data.tar.gz: 15033367730bc27eb38137fe97b4e74d04512d5e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 963af2c221832611a18146dbbb83230d1e758b780743d027a2e23b7aa8e4f756d486e2e2f7c96ff60057ed7041e54a58e1b1dc5084a15ef0d6c7d655cfef22b9
|
7
|
+
data.tar.gz: f5a49475ba134cb7dcbc87adfd382bd5c74668b5639c1dca3afb78d6823054b231cb36508d82164a4262e42669ec4e20b60942526aa63edab016dccfc5b6e5fb
|
data/ext/hmsearch/hmsearch.cc
CHANGED
@@ -2,19 +2,19 @@
|
|
2
2
|
*
|
3
3
|
* http://commonsmachinery.se/
|
4
4
|
* Distributed under an MIT license
|
5
|
-
*
|
5
|
+
*
|
6
6
|
* Copyright (c) 2014 Commons Machinery
|
7
|
-
*
|
7
|
+
*
|
8
8
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
9
9
|
* of this software and associated documentation files (the "Software"), to deal
|
10
10
|
* in the Software without restriction, including without limitation the rights
|
11
11
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
12
12
|
* copies of the Software, and to permit persons to whom the Software is
|
13
13
|
* furnished to do so, subject to the following conditions:
|
14
|
-
*
|
14
|
+
*
|
15
15
|
* The above copyright notice and this permission notice shall be included in all
|
16
16
|
* copies or substantial portions of the Software.
|
17
|
-
*
|
17
|
+
*
|
18
18
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19
19
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20
20
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
@@ -65,33 +65,33 @@ class HmSearchImpl : public HmSearch
|
|
65
65
|
{
|
66
66
|
public:
|
67
67
|
HmSearchImpl(std::string connstr, int hash_bits, int max_error)
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
68
|
+
: _hash_bits(hash_bits)
|
69
|
+
, _max_error(max_error)
|
70
|
+
, _hash_bytes((hash_bits + 7) / 8)
|
71
|
+
, _partitions((max_error + 3) / 2)
|
72
|
+
, _partition_bits(ceil((double)hash_bits / _partitions))
|
73
|
+
, _partition_bytes((_partition_bits + 7) / 8 + 1)
|
74
74
|
{
|
75
|
-
|
75
|
+
_db = new pqxx::connection(connstr);
|
76
76
|
}
|
77
|
-
|
77
|
+
|
78
78
|
~HmSearchImpl() {
|
79
79
|
close();
|
80
80
|
}
|
81
81
|
|
82
82
|
bool insert(const hash_string& hash,
|
83
83
|
std::string* error_msg = NULL);
|
84
|
-
|
84
|
+
|
85
85
|
bool print_copystring(const hash_string& hash,
|
86
|
-
|
87
|
-
|
86
|
+
std::string* error_msg = NULL);
|
87
|
+
|
88
88
|
bool lookup(const hash_string& query,
|
89
89
|
LookupResultList& result,
|
90
90
|
int max_error = -1,
|
91
91
|
std::string* error_msg = NULL);
|
92
|
-
|
92
|
+
|
93
93
|
bool close(std::string* error_msg = NULL);
|
94
|
-
|
94
|
+
|
95
95
|
private:
|
96
96
|
struct Candidate {
|
97
97
|
Candidate() : matches(0), first_match(0), second_match(0) {}
|
@@ -99,7 +99,7 @@ private:
|
|
99
99
|
int first_match;
|
100
100
|
int second_match;
|
101
101
|
};
|
102
|
-
|
102
|
+
|
103
103
|
typedef std::map<hash_string, Candidate> CandidateMap;
|
104
104
|
|
105
105
|
void get_candidates(const hash_string& query, CandidateMap& candidates);
|
@@ -110,7 +110,7 @@ private:
|
|
110
110
|
int hamming_distance(const hash_string& query, const hash_string& hash);
|
111
111
|
|
112
112
|
int get_partition_key(const hash_string& hash, int partition, uint8_t *key);
|
113
|
-
|
113
|
+
|
114
114
|
pqxx::connection *_db;
|
115
115
|
int _hash_bits;
|
116
116
|
int _max_error;
|
@@ -118,7 +118,7 @@ private:
|
|
118
118
|
int _partitions;
|
119
119
|
int _partition_bits;
|
120
120
|
int _partition_bytes;
|
121
|
-
|
121
|
+
|
122
122
|
static int one_bits[256];
|
123
123
|
};
|
124
124
|
|
@@ -133,61 +133,61 @@ bool HmSearch::init(const std::string& path,
|
|
133
133
|
error_msg = &dummy;
|
134
134
|
}
|
135
135
|
*error_msg = "";
|
136
|
-
|
136
|
+
|
137
137
|
if (hash_bits == 0 || (hash_bits & 7)) {
|
138
138
|
*error_msg = "invalid hash_bits value";
|
139
139
|
return false;
|
140
140
|
}
|
141
|
-
|
141
|
+
|
142
142
|
if (max_error == 0 || max_error >= hash_bits || max_error > 518) {
|
143
143
|
*error_msg = "invalid max_error value";
|
144
144
|
return false;
|
145
145
|
}
|
146
|
-
|
146
|
+
|
147
147
|
pqxx::connection db(path);
|
148
148
|
if (!db.is_open()) {
|
149
|
-
|
150
|
-
|
149
|
+
*error_msg = "Can't open database";
|
150
|
+
return false;
|
151
151
|
}
|
152
|
-
|
152
|
+
|
153
153
|
std::string sql;
|
154
154
|
sql = "INSERT INTO config VALUES ($1, $2)";
|
155
155
|
db.prepare("hash_max", sql);
|
156
|
-
|
156
|
+
|
157
157
|
pqxx::work W(db);
|
158
158
|
sql = "CREATE TABLE IF NOT EXISTS config ("\
|
159
|
-
|
160
|
-
|
159
|
+
" hash_bits int,"\
|
160
|
+
" max_error int); TRUNCATE config";
|
161
161
|
W.exec(sql);
|
162
|
-
|
162
|
+
|
163
163
|
W.prepared("hash_max")(hash_bits)(max_error).exec();
|
164
|
-
|
164
|
+
|
165
165
|
for (unsigned int i = 0; i < ((max_error + 3) / 2); i++) {
|
166
166
|
{
|
167
167
|
std::stringstream s;
|
168
168
|
s << "CREATE TABLE IF NOT EXISTS partition" << i << " ("
|
169
|
-
|
170
|
-
|
169
|
+
<< " hash bytea,"
|
170
|
+
<< " key bytea); TRUNCATE partition" << i;
|
171
171
|
W.exec(s.str());
|
172
172
|
}
|
173
|
-
|
173
|
+
|
174
174
|
{
|
175
175
|
std::stringstream s;
|
176
176
|
s << "DROP INDEX IF EXISTS ix_key_" << i;
|
177
177
|
W.exec(s.str());
|
178
178
|
}
|
179
|
-
|
179
|
+
|
180
180
|
{
|
181
181
|
std::stringstream s;
|
182
182
|
s << "CREATE INDEX ix_key_" << i << " ON partition" << i << "(key)";
|
183
183
|
W.exec(s.str());
|
184
184
|
}
|
185
185
|
}
|
186
|
-
|
186
|
+
|
187
187
|
W.commit();
|
188
|
-
|
188
|
+
|
189
189
|
db.disconnect();
|
190
|
-
|
190
|
+
|
191
191
|
return true;
|
192
192
|
}
|
193
193
|
|
@@ -196,37 +196,37 @@ HmSearch* HmSearch::open(const std::string& path,
|
|
196
196
|
std::string* error_msg)
|
197
197
|
{
|
198
198
|
std::string dummy;
|
199
|
-
|
199
|
+
|
200
200
|
if (!error_msg) {
|
201
201
|
error_msg = &dummy;
|
202
202
|
}
|
203
203
|
*error_msg = "";
|
204
|
-
|
204
|
+
|
205
205
|
try {
|
206
206
|
pqxx::connection db(path);
|
207
|
-
|
207
|
+
|
208
208
|
std::string sql;
|
209
|
-
|
209
|
+
|
210
210
|
sql = "SELECT max_error, hash_bits FROM config";
|
211
211
|
pqxx::nontransaction n(db);
|
212
212
|
pqxx::result res(n.exec(sql));
|
213
|
-
|
213
|
+
|
214
214
|
pqxx::result::const_iterator c = res.begin(); // We retrieve just one row
|
215
|
-
|
215
|
+
|
216
216
|
unsigned long hash_bits, max_error;
|
217
217
|
max_error = c[0].as<long>();
|
218
218
|
hash_bits = c[1].as<long>();
|
219
|
-
|
219
|
+
|
220
220
|
db.disconnect();
|
221
|
-
|
222
|
-
HmSearch* hm = new HmSearchImpl(path, hash_bits, max_error);
|
221
|
+
|
222
|
+
HmSearch* hm = new HmSearchImpl(path, (int)hash_bits, (int)max_error);
|
223
223
|
if (!hm) {
|
224
224
|
*error_msg = "out of memory";
|
225
225
|
return NULL;
|
226
226
|
}
|
227
|
-
|
227
|
+
|
228
228
|
return hm;
|
229
|
-
|
229
|
+
|
230
230
|
}
|
231
231
|
catch (const pqxx::broken_connection& e) {
|
232
232
|
*error_msg = e.what();
|
@@ -237,7 +237,7 @@ HmSearch* HmSearch::open(const std::string& path,
|
|
237
237
|
|
238
238
|
HmSearch::hash_string HmSearch::parse_hexhash(const std::string& hexhash)
|
239
239
|
{
|
240
|
-
int len = hexhash.length() / 2;
|
240
|
+
int len = (int)hexhash.length() / 2;
|
241
241
|
uint8_t hash[len];
|
242
242
|
|
243
243
|
for (int i = 0; i < len; i++) {
|
@@ -247,57 +247,57 @@ HmSearch::hash_string HmSearch::parse_hexhash(const std::string& hexhash)
|
|
247
247
|
buf[0] = hexhash[i * 2];
|
248
248
|
buf[1] = hexhash[i * 2 + 1];
|
249
249
|
buf[2] = 0;
|
250
|
-
|
250
|
+
|
251
251
|
hash[i] = strtoul(buf, &err, 16);
|
252
252
|
|
253
253
|
if (*err != '\0') {
|
254
254
|
return hash_string();
|
255
255
|
}
|
256
256
|
}
|
257
|
-
|
257
|
+
|
258
258
|
return hash_string(hash, len);
|
259
259
|
}
|
260
260
|
|
261
261
|
std::string HmSearch::format_hexhash(const HmSearch::hash_string& hash)
|
262
262
|
{
|
263
263
|
char hex[hash.length() * 2 + 1];
|
264
|
-
|
264
|
+
|
265
265
|
for (size_t i = 0; i < hash.length(); i++) {
|
266
266
|
sprintf(hex + 2 * i, "%02x", hash[i]);
|
267
267
|
}
|
268
|
-
|
268
|
+
|
269
269
|
return hex;
|
270
270
|
}
|
271
271
|
|
272
272
|
|
273
273
|
bool HmSearchImpl::print_copystring(const hash_string& hash,
|
274
|
-
|
274
|
+
std::string* error_msg)
|
275
275
|
{
|
276
276
|
std::string dummy;
|
277
277
|
if (!error_msg) {
|
278
278
|
error_msg = &dummy;
|
279
279
|
}
|
280
280
|
*error_msg = "";
|
281
|
-
|
281
|
+
|
282
282
|
if (hash.length() != (size_t) _hash_bytes) {
|
283
283
|
*error_msg = "incorrect hash length";
|
284
284
|
return false;
|
285
285
|
}
|
286
|
-
|
286
|
+
|
287
287
|
for (int i = 0; i < _partitions; i++) {
|
288
288
|
uint8_t key[_partition_bytes];
|
289
|
-
|
289
|
+
|
290
290
|
get_partition_key(hash, i, key);
|
291
|
-
|
291
|
+
|
292
292
|
std::cout << "\\\\x" << format_hexhash(hash)
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
293
|
+
<< " "
|
294
|
+
<< int(i)
|
295
|
+
<< " "
|
296
|
+
<< "\\\\x" << format_hexhash(hash_string(key, _partition_bytes))
|
297
|
+
<< std::endl;
|
298
|
+
|
299
299
|
}
|
300
|
-
|
300
|
+
|
301
301
|
return true;
|
302
302
|
}
|
303
303
|
|
@@ -309,37 +309,37 @@ bool HmSearchImpl::insert(const hash_string& hash,
|
|
309
309
|
error_msg = &dummy;
|
310
310
|
}
|
311
311
|
*error_msg = "";
|
312
|
-
|
312
|
+
|
313
313
|
if (hash.length() != (size_t) _hash_bytes) {
|
314
314
|
*error_msg = "incorrect hash length";
|
315
315
|
return false;
|
316
316
|
}
|
317
|
-
|
317
|
+
|
318
318
|
if (!_db->is_open()) {
|
319
319
|
*error_msg = "database is closed";
|
320
320
|
return false;
|
321
321
|
}
|
322
|
-
|
322
|
+
|
323
323
|
for (int i = 0; i < _partitions; i++) {
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
324
|
+
std::stringstream s;
|
325
|
+
s << "INSERT INTO partition" << i
|
326
|
+
<< " VALUES ($1, $2)";
|
327
|
+
_db->prepare("insert_"+std::to_string(i), s.str());
|
328
328
|
}
|
329
329
|
pqxx::work W(*_db);
|
330
330
|
for (int i = 0; i < _partitions; i++) {
|
331
331
|
uint8_t key[_partition_bytes];
|
332
|
-
|
332
|
+
|
333
333
|
get_partition_key(hash, i, key);
|
334
|
-
|
334
|
+
|
335
335
|
pqxx::binarystring key_blob(key, _partition_bytes);
|
336
336
|
pqxx::binarystring hash_blob(hash.data(), hash.length());
|
337
|
-
|
338
|
-
W.prepared("insert_"+i)(hash_blob)(key_blob).exec();
|
337
|
+
|
338
|
+
W.prepared("insert_"+std::to_string(i))(hash_blob)(key_blob).exec();
|
339
339
|
}
|
340
|
-
|
340
|
+
|
341
341
|
W.commit();
|
342
|
-
|
342
|
+
|
343
343
|
return true;
|
344
344
|
}
|
345
345
|
|
@@ -354,25 +354,25 @@ bool HmSearchImpl::lookup(const hash_string& query,
|
|
354
354
|
error_msg = &dummy;
|
355
355
|
}
|
356
356
|
*error_msg = "";
|
357
|
-
|
357
|
+
|
358
358
|
if (query.length() != (size_t) _hash_bytes) {
|
359
359
|
*error_msg = "incorrect hash length";
|
360
360
|
return false;
|
361
361
|
}
|
362
|
-
|
362
|
+
|
363
363
|
if (!_db->is_open()) {
|
364
364
|
*error_msg = "database is closed";
|
365
365
|
return false;
|
366
366
|
}
|
367
|
-
|
367
|
+
|
368
368
|
try {
|
369
369
|
CandidateMap candidates;
|
370
370
|
get_candidates(query, candidates);
|
371
|
-
|
371
|
+
|
372
372
|
for (CandidateMap::const_iterator i = candidates.begin(); i != candidates.end(); ++i) {
|
373
373
|
if (valid_candidate(i->second)) {
|
374
374
|
int distance = hamming_distance(query, i->first);
|
375
|
-
|
375
|
+
|
376
376
|
if (distance <= _max_error
|
377
377
|
&& (reduced_error < 0 || distance <= reduced_error)) {
|
378
378
|
result.push_back(LookupResult(i->first, distance));
|
@@ -384,7 +384,7 @@ bool HmSearchImpl::lookup(const hash_string& query,
|
|
384
384
|
*error_msg = e.base().what();
|
385
385
|
return false;
|
386
386
|
}
|
387
|
-
|
387
|
+
|
388
388
|
return true;
|
389
389
|
}
|
390
390
|
|
@@ -396,42 +396,43 @@ bool HmSearchImpl::close(std::string* error_msg)
|
|
396
396
|
error_msg = &dummy;
|
397
397
|
}
|
398
398
|
*error_msg = "";
|
399
|
-
|
399
|
+
|
400
400
|
if (!_db->is_open()) {
|
401
401
|
// Already closed
|
402
402
|
return true;
|
403
403
|
}
|
404
|
-
|
404
|
+
|
405
405
|
_db->disconnect();
|
406
|
-
|
406
|
+
|
407
407
|
return true;
|
408
408
|
}
|
409
409
|
|
410
410
|
|
411
411
|
HmSearchImpl::hash_string HmSearchImpl::get_multiple_keys(
|
412
|
-
|
413
|
-
|
412
|
+
uint8_t *key,
|
413
|
+
int partition)
|
414
414
|
{
|
415
415
|
hash_string hashes;
|
416
|
-
|
416
|
+
|
417
417
|
pqxx::nontransaction n(*_db);
|
418
418
|
pqxx::binarystring key_blob(key, _partition_bytes);
|
419
|
-
pqxx::result res = n.prepared("select_"+partition)(key_blob).exec();
|
420
|
-
|
419
|
+
pqxx::result res = n.prepared("select_"+std::to_string(partition))(key_blob).exec();
|
420
|
+
|
421
421
|
for (pqxx::result::const_iterator c = res.begin(); c != res.end(); ++c) {
|
422
|
-
|
423
|
-
|
422
|
+
pqxx::binarystring hash_result(c[0]);
|
423
|
+
hashes.append(hash_string(hash_result.data(), hash_result.size()));
|
424
424
|
}
|
425
|
-
|
425
|
+
|
426
426
|
return hashes;
|
427
427
|
}
|
428
428
|
|
429
429
|
void HmSearchImpl::get_candidates(
|
430
|
-
|
431
|
-
|
430
|
+
const HmSearchImpl::hash_string& query,
|
431
|
+
HmSearchImpl::CandidateMap& candidates)
|
432
432
|
{
|
433
433
|
uint8_t key[_partition_bytes];
|
434
|
-
|
434
|
+
memset(key, 0, _partition_bytes);
|
435
|
+
|
435
436
|
for (int i = 0; i < _partitions; i++) {
|
436
437
|
int psize = _hash_bits - i * _partition_bits;
|
437
438
|
if (psize > _partition_bits) {
|
@@ -439,44 +440,44 @@ void HmSearchImpl::get_candidates(
|
|
439
440
|
}
|
440
441
|
std::stringstream single;
|
441
442
|
single << "SELECT hash FROM partition" << i
|
442
|
-
|
443
|
-
|
443
|
+
<< " WHERE key=$1";
|
444
|
+
|
444
445
|
std::stringstream s;
|
445
446
|
s << "SELECT hash FROM partition"
|
446
|
-
|
447
|
-
|
447
|
+
<< i
|
448
|
+
<< " INNER JOIN (SELECT $1::bytea AS key";
|
448
449
|
for (int j = 2; j <= psize; j++) {
|
449
|
-
|
450
|
-
|
451
|
-
|
450
|
+
s << " UNION ALL SELECT "
|
451
|
+
<< "$"
|
452
|
+
<< j;
|
452
453
|
}
|
453
454
|
s << ") AS x ON partition"
|
454
|
-
|
455
|
-
|
455
|
+
<< i
|
456
|
+
<< ".key = x.key";
|
456
457
|
std::string sql;
|
457
458
|
sql.append(s.str());
|
458
|
-
_db->prepare("select_multiple_"+i, sql);
|
459
|
-
|
459
|
+
_db->prepare("select_multiple_"+std::to_string(i), sql);
|
460
|
+
|
460
461
|
sql.clear();
|
461
462
|
sql.append(single.str());
|
462
|
-
_db->prepare("select_"+i, sql);
|
463
|
+
_db->prepare("select_"+std::to_string(i), sql);
|
463
464
|
}
|
464
465
|
for (int i = 0; i < _partitions; i++) {
|
465
466
|
hash_string hashes;
|
466
467
|
|
467
468
|
int bits = get_partition_key(query, i, key);
|
468
|
-
|
469
|
+
|
469
470
|
// Get exact matches
|
470
|
-
|
471
|
+
|
471
472
|
hashes = get_multiple_keys(key, i);
|
472
|
-
|
473
|
+
|
473
474
|
if (hashes.length() > 0) {
|
474
475
|
add_hash_candidates(candidates, 0, (const uint8_t*)hashes.data(), hashes.length());
|
475
476
|
}
|
476
|
-
|
477
|
+
|
477
478
|
// Get 1-variant matches
|
478
479
|
pqxx::nontransaction n(*_db);
|
479
|
-
pqxx::prepare::invocation prep = n.prepared("select_multiple_"+i);
|
480
|
+
pqxx::prepare::invocation prep = n.prepared("select_multiple_"+std::to_string(i));
|
480
481
|
int pbyte = (i * _partition_bits) / 8;
|
481
482
|
int count = 0;
|
482
483
|
for (int pbit = i * _partition_bits; bits > 0; pbit++, bits--, count++) {
|
@@ -487,7 +488,7 @@ void HmSearchImpl::get_candidates(
|
|
487
488
|
key[pbit / 8 - pbyte] ^= flip;
|
488
489
|
}
|
489
490
|
pqxx::result res = prep.exec();
|
490
|
-
|
491
|
+
|
491
492
|
hashes.clear();
|
492
493
|
for (pqxx::result::const_iterator c = res.begin(); c != res.end(); ++c) {
|
493
494
|
pqxx::binarystring hash_result(c[0]);
|
@@ -499,13 +500,13 @@ void HmSearchImpl::get_candidates(
|
|
499
500
|
|
500
501
|
|
501
502
|
void HmSearchImpl::add_hash_candidates(
|
502
|
-
|
503
|
-
|
503
|
+
HmSearchImpl::CandidateMap& candidates, int match,
|
504
|
+
const uint8_t* hashes, size_t length)
|
504
505
|
{
|
505
506
|
for (size_t n = 0; n < length; n += _hash_bytes) {
|
506
507
|
hash_string hash = hash_string(hashes + n, _hash_bytes);
|
507
508
|
Candidate& cand = candidates[hash];
|
508
|
-
|
509
|
+
|
509
510
|
++cand.matches;
|
510
511
|
if (cand.matches == 1) {
|
511
512
|
cand.first_match = match;
|
@@ -518,7 +519,7 @@ void HmSearchImpl::add_hash_candidates(
|
|
518
519
|
|
519
520
|
|
520
521
|
bool HmSearchImpl::valid_candidate(
|
521
|
-
|
522
|
+
const HmSearchImpl::Candidate& candidate)
|
522
523
|
{
|
523
524
|
if (_max_error & 1) {
|
524
525
|
// Odd k
|
@@ -535,21 +536,21 @@ bool HmSearchImpl::valid_candidate(
|
|
535
536
|
return false;
|
536
537
|
}
|
537
538
|
}
|
538
|
-
|
539
|
+
|
539
540
|
return true;
|
540
541
|
}
|
541
542
|
|
542
543
|
|
543
544
|
int HmSearchImpl::hamming_distance(
|
544
|
-
|
545
|
-
|
545
|
+
const HmSearchImpl::hash_string& query,
|
546
|
+
const HmSearchImpl::hash_string& hash)
|
546
547
|
{
|
547
548
|
int distance = 0;
|
548
|
-
|
549
|
+
|
549
550
|
for (size_t i = 0; i < query.length(); i++) {
|
550
551
|
distance += one_bits[query[i] ^ hash[i]];
|
551
552
|
}
|
552
|
-
|
553
|
+
|
553
554
|
return distance;
|
554
555
|
}
|
555
556
|
|
@@ -557,31 +558,31 @@ int HmSearchImpl::hamming_distance(
|
|
557
558
|
int HmSearchImpl::get_partition_key(const hash_string& hash, int partition, uint8_t *key)
|
558
559
|
{
|
559
560
|
int psize, hash_bit, bits_left;
|
560
|
-
|
561
|
+
|
561
562
|
psize = _hash_bits - partition * _partition_bits;
|
562
563
|
if (psize > _partition_bits) {
|
563
564
|
psize = _partition_bits;
|
564
565
|
}
|
565
|
-
|
566
|
+
|
566
567
|
// Copy bytes, masking out some bits at the start and end
|
567
568
|
bits_left = psize;
|
568
569
|
hash_bit = partition * _partition_bits;
|
569
|
-
|
570
|
+
|
570
571
|
for (int i = 0; i < _partition_bytes; i++) {
|
571
572
|
int byte = hash_bit / 8;
|
572
573
|
int bit = hash_bit % 8;
|
573
574
|
int bits = 8 - bit;
|
574
|
-
|
575
|
+
|
575
576
|
if (bits > bits_left) {
|
576
577
|
bits = bits_left;
|
577
578
|
}
|
578
|
-
|
579
|
+
|
579
580
|
bits_left -= bits;
|
580
581
|
hash_bit += bits;
|
581
|
-
|
582
|
+
|
582
583
|
key[i] = hash[byte] & (((1 << bits) - 1) << (8 - bit - bits));
|
583
584
|
}
|
584
|
-
|
585
|
+
|
585
586
|
return psize;
|
586
587
|
}
|
587
588
|
|
@@ -606,8 +607,8 @@ int HmSearchImpl::one_bits[256] = {
|
|
606
607
|
};
|
607
608
|
|
608
609
|
/*
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
*/
|
610
|
+
Local Variables:
|
611
|
+
c-file-style: "stroustrup"
|
612
|
+
indent-tabs-mode:nil
|
613
|
+
End:
|
614
|
+
*/
|
data/hmsearch-postgres.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hmsearch-postgres
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kris Selden
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|