hmsearch-postgres 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +3 -0
- data/Gemfile +3 -0
- data/LICENSE +21 -0
- data/README.md +0 -0
- data/Rakefile +27 -0
- data/ext/hmsearch/extconf.rb +6 -0
- data/ext/hmsearch/hmsearch.cc +613 -0
- data/ext/hmsearch/hmsearch.h +204 -0
- data/ext/hmsearch/postgres_ext.cc +127 -0
- data/hmsearch-postgres.gemspec +22 -0
- data/lib/hmsearch/postgres.rb +1 -0
- data/test/hmsearch_test.rb +40 -0
- metadata +85 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6819501caf23acccdaa5b8fd3f06087395c29322
|
4
|
+
data.tar.gz: 9d065fdd3196ba0404529f744d84a80f361c8124
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bd3b9373805b316ddf0269fafe616cb22a2b70b40c39ad60c8d42783f2a6a08818ecd50ebf69a3e5f143d820f84272eb8322edd5c9771049f0509d3a8adcb51c
|
7
|
+
data.tar.gz: f1864f94b7069b97b646c84d4b32b04029951648dd64ee32497a1554178861d9ab687d9cc3a39fb12ad07d921539ea00e12e4b733c20bb676ad72684b4f81e1b
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Mine
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
File without changes
|
data/Rakefile
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rake/clean'
|
2
|
+
|
3
|
+
CLEAN.include('lib/hmsearch/Makefile')
|
4
|
+
|
5
|
+
CLOBBER.include('lib/hmsearch/postgres_ext.*')
|
6
|
+
|
7
|
+
task default: :test
|
8
|
+
|
9
|
+
desc 'build native extension'
|
10
|
+
task :build do
|
11
|
+
mkdir_p 'tmp'
|
12
|
+
chdir 'tmp' do
|
13
|
+
sh 'ruby ../ext/hmsearch/extconf.rb'
|
14
|
+
sh 'make'
|
15
|
+
end
|
16
|
+
|
17
|
+
require 'rbconfig'
|
18
|
+
dlext = RbConfig::CONFIG['DLEXT']
|
19
|
+
cp "tmp/postgres_ext.#{dlext}", "lib/hmsearch/"
|
20
|
+
end
|
21
|
+
|
22
|
+
desc 'run tests'
|
23
|
+
task test: :build do
|
24
|
+
FileList.new('./test/*_test.rb').each do |test|
|
25
|
+
load test
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,613 @@
|
|
1
|
+
/* HmSearch hash lookup library
|
2
|
+
*
|
3
|
+
* http://commonsmachinery.se/
|
4
|
+
* Distributed under an MIT license
|
5
|
+
*
|
6
|
+
* Copyright (c) 2014 Commons Machinery
|
7
|
+
*
|
8
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
9
|
+
* of this software and associated documentation files (the "Software"), to deal
|
10
|
+
* in the Software without restriction, including without limitation the rights
|
11
|
+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
12
|
+
* copies of the Software, and to permit persons to whom the Software is
|
13
|
+
* furnished to do so, subject to the following conditions:
|
14
|
+
*
|
15
|
+
* The above copyright notice and this permission notice shall be included in all
|
16
|
+
* copies or substantial portions of the Software.
|
17
|
+
*
|
18
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23
|
+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
24
|
+
* SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#include <math.h>
|
28
|
+
#include <stdlib.h>
|
29
|
+
#include <stdio.h>
|
30
|
+
#include <string.h>
|
31
|
+
|
32
|
+
#include <memory>
|
33
|
+
#include <algorithm>
|
34
|
+
#include <map>
|
35
|
+
#include <iostream>
|
36
|
+
|
37
|
+
#include <pqxx/pqxx>
|
38
|
+
|
39
|
+
#include "hmsearch.h"
|
40
|
+
|
41
|
+
/** The actual implementation of the HmSearch database.
|
42
|
+
*
|
43
|
+
* A difference between this implementation and the HmSearch algorithm
|
44
|
+
* in the paper is that only exact-matches are stored in the database,
|
45
|
+
* not the 1-matches. The 1-var partitions are instead generated
|
46
|
+
* during lookup. This drastically reduces database size, which
|
47
|
+
* speeds up insertion and probably lookups too.
|
48
|
+
*
|
49
|
+
* The database contains some setting records controlling the
|
50
|
+
* operation:
|
51
|
+
*
|
52
|
+
* _hb: hash bits
|
53
|
+
* _me: max errors
|
54
|
+
* _se: global sequence when adding new keys
|
55
|
+
*
|
56
|
+
* These can't be changed once the database has been initialised.
|
57
|
+
*
|
58
|
+
* Each partition is stored as a key on the following format:
|
59
|
+
* Byte 0: 'P'
|
60
|
+
* Byte 1: Partition number (thus limiting to max error 518)
|
61
|
+
* Bytes 2-N: Partition bits.
|
62
|
+
* Bytes N-M: Sequence number (globally incremented with each addition)
|
63
|
+
*/
|
64
|
+
class HmSearchImpl : public HmSearch
|
65
|
+
{
|
66
|
+
public:
|
67
|
+
HmSearchImpl(std::string connstr, int hash_bits, int max_error)
|
68
|
+
: _hash_bits(hash_bits)
|
69
|
+
, _max_error(max_error)
|
70
|
+
, _hash_bytes((hash_bits + 7) / 8)
|
71
|
+
, _partitions((max_error + 3) / 2)
|
72
|
+
, _partition_bits(ceil((double)hash_bits / _partitions))
|
73
|
+
, _partition_bytes((_partition_bits + 7) / 8 + 1)
|
74
|
+
{
|
75
|
+
_db = new pqxx::connection(connstr);
|
76
|
+
}
|
77
|
+
|
78
|
+
~HmSearchImpl() {
|
79
|
+
close();
|
80
|
+
}
|
81
|
+
|
82
|
+
bool insert(const hash_string& hash,
|
83
|
+
std::string* error_msg = NULL);
|
84
|
+
|
85
|
+
bool print_copystring(const hash_string& hash,
|
86
|
+
std::string* error_msg = NULL);
|
87
|
+
|
88
|
+
bool lookup(const hash_string& query,
|
89
|
+
LookupResultList& result,
|
90
|
+
int max_error = -1,
|
91
|
+
std::string* error_msg = NULL);
|
92
|
+
|
93
|
+
bool close(std::string* error_msg = NULL);
|
94
|
+
|
95
|
+
private:
|
96
|
+
struct Candidate {
|
97
|
+
Candidate() : matches(0), first_match(0), second_match(0) {}
|
98
|
+
int matches;
|
99
|
+
int first_match;
|
100
|
+
int second_match;
|
101
|
+
};
|
102
|
+
|
103
|
+
typedef std::map<hash_string, Candidate> CandidateMap;
|
104
|
+
|
105
|
+
void get_candidates(const hash_string& query, CandidateMap& candidates);
|
106
|
+
void add_hash_candidates(CandidateMap& candidates, int match,
|
107
|
+
const uint8_t* hashes, size_t length);
|
108
|
+
hash_string get_multiple_keys(uint8_t *key, int partition);
|
109
|
+
bool valid_candidate(const Candidate& candidate);
|
110
|
+
int hamming_distance(const hash_string& query, const hash_string& hash);
|
111
|
+
|
112
|
+
int get_partition_key(const hash_string& hash, int partition, uint8_t *key);
|
113
|
+
|
114
|
+
pqxx::connection *_db;
|
115
|
+
int _hash_bits;
|
116
|
+
int _max_error;
|
117
|
+
int _hash_bytes;
|
118
|
+
int _partitions;
|
119
|
+
int _partition_bits;
|
120
|
+
int _partition_bytes;
|
121
|
+
|
122
|
+
static int one_bits[256];
|
123
|
+
};
|
124
|
+
|
125
|
+
|
126
|
+
bool HmSearch::init(const std::string& path,
|
127
|
+
unsigned hash_bits, unsigned max_error,
|
128
|
+
uint64_t num_hashes,
|
129
|
+
std::string* error_msg)
|
130
|
+
{
|
131
|
+
std::string dummy;
|
132
|
+
if (!error_msg) {
|
133
|
+
error_msg = &dummy;
|
134
|
+
}
|
135
|
+
*error_msg = "";
|
136
|
+
|
137
|
+
if (hash_bits == 0 || (hash_bits & 7)) {
|
138
|
+
*error_msg = "invalid hash_bits value";
|
139
|
+
return false;
|
140
|
+
}
|
141
|
+
|
142
|
+
if (max_error == 0 || max_error >= hash_bits || max_error > 518) {
|
143
|
+
*error_msg = "invalid max_error value";
|
144
|
+
return false;
|
145
|
+
}
|
146
|
+
|
147
|
+
pqxx::connection db(path);
|
148
|
+
if (!db.is_open()) {
|
149
|
+
*error_msg = "Can't open database";
|
150
|
+
return false;
|
151
|
+
}
|
152
|
+
|
153
|
+
std::string sql;
|
154
|
+
sql = "INSERT INTO config VALUES ($1, $2)";
|
155
|
+
db.prepare("hash_max", sql);
|
156
|
+
|
157
|
+
pqxx::work W(db);
|
158
|
+
sql = "CREATE TABLE IF NOT EXISTS config ("\
|
159
|
+
" hash_bits int,"\
|
160
|
+
" max_error int); TRUNCATE config";
|
161
|
+
W.exec(sql);
|
162
|
+
|
163
|
+
W.prepared("hash_max")(hash_bits)(max_error).exec();
|
164
|
+
|
165
|
+
for (unsigned int i = 0; i < ((max_error + 3) / 2); i++) {
|
166
|
+
{
|
167
|
+
std::stringstream s;
|
168
|
+
s << "CREATE TABLE IF NOT EXISTS partition" << i << " ("
|
169
|
+
<< " hash bytea,"
|
170
|
+
<< " key bytea); TRUNCATE partition" << i;
|
171
|
+
W.exec(s.str());
|
172
|
+
}
|
173
|
+
|
174
|
+
{
|
175
|
+
std::stringstream s;
|
176
|
+
s << "DROP INDEX IF EXISTS ix_key_" << i;
|
177
|
+
W.exec(s.str());
|
178
|
+
}
|
179
|
+
|
180
|
+
{
|
181
|
+
std::stringstream s;
|
182
|
+
s << "CREATE INDEX ix_key_" << i << " ON partition" << i << "(key)";
|
183
|
+
W.exec(s.str());
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
W.commit();
|
188
|
+
|
189
|
+
db.disconnect();
|
190
|
+
|
191
|
+
return true;
|
192
|
+
}
|
193
|
+
|
194
|
+
|
195
|
+
HmSearch* HmSearch::open(const std::string& path,
|
196
|
+
std::string* error_msg)
|
197
|
+
{
|
198
|
+
std::string dummy;
|
199
|
+
|
200
|
+
if (!error_msg) {
|
201
|
+
error_msg = &dummy;
|
202
|
+
}
|
203
|
+
*error_msg = "";
|
204
|
+
|
205
|
+
try {
|
206
|
+
pqxx::connection db(path);
|
207
|
+
|
208
|
+
std::string sql;
|
209
|
+
|
210
|
+
sql = "SELECT max_error, hash_bits FROM config";
|
211
|
+
pqxx::nontransaction n(db);
|
212
|
+
pqxx::result res(n.exec(sql));
|
213
|
+
|
214
|
+
pqxx::result::const_iterator c = res.begin(); // We retrieve just one row
|
215
|
+
|
216
|
+
unsigned long hash_bits, max_error;
|
217
|
+
max_error = c[0].as<long>();
|
218
|
+
hash_bits = c[1].as<long>();
|
219
|
+
|
220
|
+
db.disconnect();
|
221
|
+
|
222
|
+
HmSearch* hm = new HmSearchImpl(path, hash_bits, max_error);
|
223
|
+
if (!hm) {
|
224
|
+
*error_msg = "out of memory";
|
225
|
+
return NULL;
|
226
|
+
}
|
227
|
+
|
228
|
+
return hm;
|
229
|
+
|
230
|
+
}
|
231
|
+
catch (const pqxx::broken_connection& e) {
|
232
|
+
*error_msg = e.what();
|
233
|
+
return NULL;
|
234
|
+
}
|
235
|
+
}
|
236
|
+
|
237
|
+
|
238
|
+
HmSearch::hash_string HmSearch::parse_hexhash(const std::string& hexhash)
|
239
|
+
{
|
240
|
+
int len = hexhash.length() / 2;
|
241
|
+
uint8_t hash[len];
|
242
|
+
|
243
|
+
for (int i = 0; i < len; i++) {
|
244
|
+
char buf[3];
|
245
|
+
char* err;
|
246
|
+
|
247
|
+
buf[0] = hexhash[i * 2];
|
248
|
+
buf[1] = hexhash[i * 2 + 1];
|
249
|
+
buf[2] = 0;
|
250
|
+
|
251
|
+
hash[i] = strtoul(buf, &err, 16);
|
252
|
+
|
253
|
+
if (*err != '\0') {
|
254
|
+
return hash_string();
|
255
|
+
}
|
256
|
+
}
|
257
|
+
|
258
|
+
return hash_string(hash, len);
|
259
|
+
}
|
260
|
+
|
261
|
+
std::string HmSearch::format_hexhash(const HmSearch::hash_string& hash)
|
262
|
+
{
|
263
|
+
char hex[hash.length() * 2 + 1];
|
264
|
+
|
265
|
+
for (size_t i = 0; i < hash.length(); i++) {
|
266
|
+
sprintf(hex + 2 * i, "%02x", hash[i]);
|
267
|
+
}
|
268
|
+
|
269
|
+
return hex;
|
270
|
+
}
|
271
|
+
|
272
|
+
|
273
|
+
bool HmSearchImpl::print_copystring(const hash_string& hash,
|
274
|
+
std::string* error_msg)
|
275
|
+
{
|
276
|
+
std::string dummy;
|
277
|
+
if (!error_msg) {
|
278
|
+
error_msg = &dummy;
|
279
|
+
}
|
280
|
+
*error_msg = "";
|
281
|
+
|
282
|
+
if (hash.length() != (size_t) _hash_bytes) {
|
283
|
+
*error_msg = "incorrect hash length";
|
284
|
+
return false;
|
285
|
+
}
|
286
|
+
|
287
|
+
for (int i = 0; i < _partitions; i++) {
|
288
|
+
uint8_t key[_partition_bytes];
|
289
|
+
|
290
|
+
get_partition_key(hash, i, key);
|
291
|
+
|
292
|
+
std::cout << "\\\\x" << format_hexhash(hash)
|
293
|
+
<< " "
|
294
|
+
<< int(i)
|
295
|
+
<< " "
|
296
|
+
<< "\\\\x" << format_hexhash(hash_string(key, _partition_bytes))
|
297
|
+
<< std::endl;
|
298
|
+
|
299
|
+
}
|
300
|
+
|
301
|
+
return true;
|
302
|
+
}
|
303
|
+
|
304
|
+
bool HmSearchImpl::insert(const hash_string& hash,
|
305
|
+
std::string* error_msg)
|
306
|
+
{
|
307
|
+
std::string dummy;
|
308
|
+
if (!error_msg) {
|
309
|
+
error_msg = &dummy;
|
310
|
+
}
|
311
|
+
*error_msg = "";
|
312
|
+
|
313
|
+
if (hash.length() != (size_t) _hash_bytes) {
|
314
|
+
*error_msg = "incorrect hash length";
|
315
|
+
return false;
|
316
|
+
}
|
317
|
+
|
318
|
+
if (!_db->is_open()) {
|
319
|
+
*error_msg = "database is closed";
|
320
|
+
return false;
|
321
|
+
}
|
322
|
+
|
323
|
+
for (int i = 0; i < _partitions; i++) {
|
324
|
+
std::stringstream s;
|
325
|
+
s << "INSERT INTO partition" << i
|
326
|
+
<< " VALUES ($1, $2)";
|
327
|
+
_db->prepare("insert_"+i, s.str());
|
328
|
+
}
|
329
|
+
pqxx::work W(*_db);
|
330
|
+
for (int i = 0; i < _partitions; i++) {
|
331
|
+
uint8_t key[_partition_bytes];
|
332
|
+
|
333
|
+
get_partition_key(hash, i, key);
|
334
|
+
|
335
|
+
pqxx::binarystring key_blob(key, _partition_bytes);
|
336
|
+
pqxx::binarystring hash_blob(hash.data(), hash.length());
|
337
|
+
|
338
|
+
W.prepared("insert_"+i)(hash_blob)(key_blob).exec();
|
339
|
+
}
|
340
|
+
|
341
|
+
W.commit();
|
342
|
+
|
343
|
+
return true;
|
344
|
+
}
|
345
|
+
|
346
|
+
|
347
|
+
bool HmSearchImpl::lookup(const hash_string& query,
|
348
|
+
LookupResultList& result,
|
349
|
+
int reduced_error,
|
350
|
+
std::string* error_msg)
|
351
|
+
{
|
352
|
+
std::string dummy;
|
353
|
+
if (!error_msg) {
|
354
|
+
error_msg = &dummy;
|
355
|
+
}
|
356
|
+
*error_msg = "";
|
357
|
+
|
358
|
+
if (query.length() != (size_t) _hash_bytes) {
|
359
|
+
*error_msg = "incorrect hash length";
|
360
|
+
return false;
|
361
|
+
}
|
362
|
+
|
363
|
+
if (!_db->is_open()) {
|
364
|
+
*error_msg = "database is closed";
|
365
|
+
return false;
|
366
|
+
}
|
367
|
+
|
368
|
+
try {
|
369
|
+
CandidateMap candidates;
|
370
|
+
get_candidates(query, candidates);
|
371
|
+
|
372
|
+
for (CandidateMap::const_iterator i = candidates.begin(); i != candidates.end(); ++i) {
|
373
|
+
if (valid_candidate(i->second)) {
|
374
|
+
int distance = hamming_distance(query, i->first);
|
375
|
+
|
376
|
+
if (distance <= _max_error
|
377
|
+
&& (reduced_error < 0 || distance <= reduced_error)) {
|
378
|
+
result.push_back(LookupResult(i->first, distance));
|
379
|
+
}
|
380
|
+
}
|
381
|
+
}
|
382
|
+
}
|
383
|
+
catch (const pqxx::pqxx_exception &e) {
|
384
|
+
*error_msg = e.base().what();
|
385
|
+
return false;
|
386
|
+
}
|
387
|
+
|
388
|
+
return true;
|
389
|
+
}
|
390
|
+
|
391
|
+
|
392
|
+
bool HmSearchImpl::close(std::string* error_msg)
|
393
|
+
{
|
394
|
+
std::string dummy;
|
395
|
+
if (!error_msg) {
|
396
|
+
error_msg = &dummy;
|
397
|
+
}
|
398
|
+
*error_msg = "";
|
399
|
+
|
400
|
+
if (!_db->is_open()) {
|
401
|
+
// Already closed
|
402
|
+
return true;
|
403
|
+
}
|
404
|
+
|
405
|
+
_db->disconnect();
|
406
|
+
|
407
|
+
return true;
|
408
|
+
}
|
409
|
+
|
410
|
+
|
411
|
+
HmSearchImpl::hash_string HmSearchImpl::get_multiple_keys(
|
412
|
+
uint8_t *key,
|
413
|
+
int partition)
|
414
|
+
{
|
415
|
+
hash_string hashes;
|
416
|
+
|
417
|
+
pqxx::nontransaction n(*_db);
|
418
|
+
pqxx::binarystring key_blob(key, _partition_bytes);
|
419
|
+
pqxx::result res = n.prepared("select_"+partition)(key_blob).exec();
|
420
|
+
|
421
|
+
for (pqxx::result::const_iterator c = res.begin(); c != res.end(); ++c) {
|
422
|
+
pqxx::binarystring hash_result(c[0]);
|
423
|
+
hashes.append(hash_string(hash_result.data(), hash_result.size()));
|
424
|
+
}
|
425
|
+
|
426
|
+
return hashes;
|
427
|
+
}
|
428
|
+
|
429
|
+
void HmSearchImpl::get_candidates(
|
430
|
+
const HmSearchImpl::hash_string& query,
|
431
|
+
HmSearchImpl::CandidateMap& candidates)
|
432
|
+
{
|
433
|
+
uint8_t key[_partition_bytes];
|
434
|
+
|
435
|
+
for (int i = 0; i < _partitions; i++) {
|
436
|
+
int psize = _hash_bits - i * _partition_bits;
|
437
|
+
if (psize > _partition_bits) {
|
438
|
+
psize = _partition_bits;
|
439
|
+
}
|
440
|
+
std::stringstream single;
|
441
|
+
single << "SELECT hash FROM partition" << i
|
442
|
+
<< " WHERE key=$1";
|
443
|
+
|
444
|
+
std::stringstream s;
|
445
|
+
s << "SELECT hash FROM partition"
|
446
|
+
<< i
|
447
|
+
<< " INNER JOIN (SELECT $1::bytea AS key";
|
448
|
+
for (int j = 2; j <= psize; j++) {
|
449
|
+
s << " UNION ALL SELECT "
|
450
|
+
<< "$"
|
451
|
+
<< j;
|
452
|
+
}
|
453
|
+
s << ") AS x ON partition"
|
454
|
+
<< i
|
455
|
+
<< ".key = x.key";
|
456
|
+
std::string sql;
|
457
|
+
sql.append(s.str());
|
458
|
+
_db->prepare("select_multiple_"+i, sql);
|
459
|
+
|
460
|
+
sql.clear();
|
461
|
+
sql.append(single.str());
|
462
|
+
_db->prepare("select_"+i, sql);
|
463
|
+
}
|
464
|
+
for (int i = 0; i < _partitions; i++) {
|
465
|
+
hash_string hashes;
|
466
|
+
|
467
|
+
int bits = get_partition_key(query, i, key);
|
468
|
+
|
469
|
+
// Get exact matches
|
470
|
+
|
471
|
+
hashes = get_multiple_keys(key, i);
|
472
|
+
|
473
|
+
if (hashes.length() > 0) {
|
474
|
+
add_hash_candidates(candidates, 0, (const uint8_t*)hashes.data(), hashes.length());
|
475
|
+
}
|
476
|
+
|
477
|
+
// Get 1-variant matches
|
478
|
+
pqxx::nontransaction n(*_db);
|
479
|
+
pqxx::prepare::invocation prep = n.prepared("select_multiple_"+i);
|
480
|
+
int pbyte = (i * _partition_bits) / 8;
|
481
|
+
int count = 0;
|
482
|
+
for (int pbit = i * _partition_bits; bits > 0; pbit++, bits--, count++) {
|
483
|
+
uint8_t flip = 1 << (7 - (pbit % 8));
|
484
|
+
key[pbit / 8 - pbyte] ^= flip;
|
485
|
+
pqxx::binarystring key_blob(key, _partition_bytes);
|
486
|
+
prep(key_blob);
|
487
|
+
key[pbit / 8 - pbyte] ^= flip;
|
488
|
+
}
|
489
|
+
pqxx::result res = prep.exec();
|
490
|
+
|
491
|
+
hashes.clear();
|
492
|
+
for (pqxx::result::const_iterator c = res.begin(); c != res.end(); ++c) {
|
493
|
+
pqxx::binarystring hash_result(c[0]);
|
494
|
+
hashes.append(hash_string(hash_result.data(), hash_result.size()));
|
495
|
+
}
|
496
|
+
add_hash_candidates(candidates, 1, (const uint8_t*)hashes.data(), hashes.length());
|
497
|
+
}
|
498
|
+
}
|
499
|
+
|
500
|
+
|
501
|
+
void HmSearchImpl::add_hash_candidates(
|
502
|
+
HmSearchImpl::CandidateMap& candidates, int match,
|
503
|
+
const uint8_t* hashes, size_t length)
|
504
|
+
{
|
505
|
+
for (size_t n = 0; n < length; n += _hash_bytes) {
|
506
|
+
hash_string hash = hash_string(hashes + n, _hash_bytes);
|
507
|
+
Candidate& cand = candidates[hash];
|
508
|
+
|
509
|
+
++cand.matches;
|
510
|
+
if (cand.matches == 1) {
|
511
|
+
cand.first_match = match;
|
512
|
+
}
|
513
|
+
else if (cand.matches == 2) {
|
514
|
+
cand.second_match = match;
|
515
|
+
}
|
516
|
+
}
|
517
|
+
}
|
518
|
+
|
519
|
+
|
520
|
+
bool HmSearchImpl::valid_candidate(
|
521
|
+
const HmSearchImpl::Candidate& candidate)
|
522
|
+
{
|
523
|
+
if (_max_error & 1) {
|
524
|
+
// Odd k
|
525
|
+
if (candidate.matches < 3) {
|
526
|
+
if (candidate.matches == 1 || (candidate.first_match && candidate.second_match)) {
|
527
|
+
return false;
|
528
|
+
}
|
529
|
+
}
|
530
|
+
}
|
531
|
+
else {
|
532
|
+
// Even k
|
533
|
+
if (candidate.matches < 2) {
|
534
|
+
if (candidate.first_match)
|
535
|
+
return false;
|
536
|
+
}
|
537
|
+
}
|
538
|
+
|
539
|
+
return true;
|
540
|
+
}
|
541
|
+
|
542
|
+
|
543
|
+
int HmSearchImpl::hamming_distance(
|
544
|
+
const HmSearchImpl::hash_string& query,
|
545
|
+
const HmSearchImpl::hash_string& hash)
|
546
|
+
{
|
547
|
+
int distance = 0;
|
548
|
+
|
549
|
+
for (size_t i = 0; i < query.length(); i++) {
|
550
|
+
distance += one_bits[query[i] ^ hash[i]];
|
551
|
+
}
|
552
|
+
|
553
|
+
return distance;
|
554
|
+
}
|
555
|
+
|
556
|
+
|
557
|
+
int HmSearchImpl::get_partition_key(const hash_string& hash, int partition, uint8_t *key)
|
558
|
+
{
|
559
|
+
int psize, hash_bit, bits_left;
|
560
|
+
|
561
|
+
psize = _hash_bits - partition * _partition_bits;
|
562
|
+
if (psize > _partition_bits) {
|
563
|
+
psize = _partition_bits;
|
564
|
+
}
|
565
|
+
|
566
|
+
// Copy bytes, masking out some bits at the start and end
|
567
|
+
bits_left = psize;
|
568
|
+
hash_bit = partition * _partition_bits;
|
569
|
+
|
570
|
+
for (int i = 0; i < _partition_bytes; i++) {
|
571
|
+
int byte = hash_bit / 8;
|
572
|
+
int bit = hash_bit % 8;
|
573
|
+
int bits = 8 - bit;
|
574
|
+
|
575
|
+
if (bits > bits_left) {
|
576
|
+
bits = bits_left;
|
577
|
+
}
|
578
|
+
|
579
|
+
bits_left -= bits;
|
580
|
+
hash_bit += bits;
|
581
|
+
|
582
|
+
key[i] = hash[byte] & (((1 << bits) - 1) << (8 - bit - bits));
|
583
|
+
}
|
584
|
+
|
585
|
+
return psize;
|
586
|
+
}
|
587
|
+
|
588
|
+
|
589
|
+
int HmSearchImpl::one_bits[256] = {
|
590
|
+
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
591
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
592
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
593
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
594
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
595
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
596
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
597
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
598
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
599
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
600
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
601
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
602
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
603
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
604
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
605
|
+
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
606
|
+
};
|
607
|
+
|
608
|
+
/*
|
609
|
+
Local Variables:
|
610
|
+
c-file-style: "stroustrup"
|
611
|
+
indent-tabs-mode:nil
|
612
|
+
End:
|
613
|
+
*/
|
@@ -0,0 +1,204 @@
|
|
1
|
+
/* HmSearch hash lookup library
|
2
|
+
*
|
3
|
+
* http://commonsmachinery.se/
|
4
|
+
* Distributed under an MIT license
|
5
|
+
*
|
6
|
+
* Copyright (c) 2014 Commons Machinery
|
7
|
+
*
|
8
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
9
|
+
* of this software and associated documentation files (the "Software"), to deal
|
10
|
+
* in the Software without restriction, including without limitation the rights
|
11
|
+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
12
|
+
* copies of the Software, and to permit persons to whom the Software is
|
13
|
+
* furnished to do so, subject to the following conditions:
|
14
|
+
*
|
15
|
+
* The above copyright notice and this permission notice shall be included in all
|
16
|
+
* copies or substantial portions of the Software.
|
17
|
+
*
|
18
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23
|
+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
24
|
+
* SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#ifndef __HMSEARCH_H_INCLUDED__
|
28
|
+
#define __HMSEARCH_H_INCLUDED__
|
29
|
+
|
30
|
+
#include <string>
|
31
|
+
#include <list>
|
32
|
+
#include <stdint.h>
|
33
|
+
|
34
|
+
/** Interface to a HmSearch database.
|
35
|
+
*
|
36
|
+
* It cannot be instantiated directly, instead open() must be used to
|
37
|
+
* get a pointer to a database object.
|
38
|
+
*
|
39
|
+
* The database is closed when the object is deleted. Until it is closed,
|
40
|
+
* any added hashes may not yet have been written to disk.
|
41
|
+
*
|
42
|
+
* Multiple threads can call HmSearch::insert() and HmSearch::lookup()
|
43
|
+
* on the same database object without locks.
|
44
|
+
*
|
45
|
+
* HmSearch::close() is not thread-safe, so the caller must ensure
|
46
|
+
* that no inserts or lookups are in progress.
|
47
|
+
*
|
48
|
+
* A database file can only be opened by a single process. This is a
|
49
|
+
* limitation in the underlying LevelDB library.
|
50
|
+
*/
|
51
|
+
class HmSearch
|
52
|
+
{
|
53
|
+
public:
|
54
|
+
/** A string for passing around raw (i.e. not hexadecimal) hashes.
|
55
|
+
*/
|
56
|
+
typedef std::basic_string<uint8_t> hash_string;
|
57
|
+
|
58
|
+
/** A record holding a hash found by lookup() and its hamming distance
|
59
|
+
* from the query hash.
|
60
|
+
*/
|
61
|
+
struct LookupResult {
|
62
|
+
LookupResult(const hash_string& h, int d) : hash(h), distance(d) {}
|
63
|
+
hash_string hash;
|
64
|
+
int distance;
|
65
|
+
};
|
66
|
+
|
67
|
+
typedef std::list<LookupResult> LookupResultList;
|
68
|
+
|
69
|
+
/** Initialise a new hash database file.
|
70
|
+
*
|
71
|
+
* The database file should not exist, or if it does it must not
|
72
|
+
* contain any records.
|
73
|
+
*
|
74
|
+
* Parameters:
|
75
|
+
*
|
76
|
+
* - path: file path, typically ending in ".ldb"
|
77
|
+
*
|
78
|
+
* - hash_bits: number of bits in the hash (must be a multiple of 8)
|
79
|
+
*
|
80
|
+
* - max_error: maximum hamming distance, must be less than hash_bits
|
81
|
+
*
|
82
|
+
* - num_hashes: target number of hashes, used for tuning the database
|
83
|
+
*
|
84
|
+
* - error_msg: if provided, will be set to an string describing any
|
85
|
+
* error, or to an empty string if no error occurred.
|
86
|
+
*
|
87
|
+
* Returns true if the database could be initialised, false on errors.
|
88
|
+
*/
|
89
|
+
static bool init(const std::string& path,
|
90
|
+
unsigned hash_bits, unsigned max_error,
|
91
|
+
uint64_t num_hashes,
|
92
|
+
std::string* error_msg = NULL);
|
93
|
+
|
94
|
+
/** Open a database file.
|
95
|
+
*
|
96
|
+
* The returned object must be deleted when not used any longer to
|
97
|
+
* ensure that the database is synced and closed.
|
98
|
+
*
|
99
|
+
* Parameters:
|
100
|
+
*
|
101
|
+
* - path: file path, typically ending in ".ldb"
|
102
|
+
*
|
103
|
+
* - mode: database open mode
|
104
|
+
*
|
105
|
+
* - error_msg: if provided, will be set to an string describing any
|
106
|
+
* error, or to an empty string if no error occurred.
|
107
|
+
*
|
108
|
+
* Returns the new object on success, or NULL on error.
|
109
|
+
*/
|
110
|
+
static HmSearch* open(const std::string& path,
|
111
|
+
std::string* error_msg = NULL);
|
112
|
+
|
113
|
+
|
114
|
+
/** Parse a hash in hexadecimal format, returning
|
115
|
+
* a string of raw bytes.
|
116
|
+
*/
|
117
|
+
static hash_string parse_hexhash(const std::string& hexhash);
|
118
|
+
|
119
|
+
/** Format a hash of raw bytes into a hexadecimal string.
|
120
|
+
*/
|
121
|
+
static std::string format_hexhash(const hash_string& hash);
|
122
|
+
|
123
|
+
|
124
|
+
/** Prints a string suitable for PostgreSQL COPY onto stdout.
|
125
|
+
*
|
126
|
+
* No check is made if the hash already exists in the database,
|
127
|
+
* so this may result in duplicate records.
|
128
|
+
*
|
129
|
+
* Parameters:
|
130
|
+
* - hash: The hash to print, as raw bytes
|
131
|
+
* - error_msg: if provided, will be set to an string describing any
|
132
|
+
* error, or to an empty string if no error occurred.
|
133
|
+
*
|
134
|
+
* Returns true if the command succeded, false on any error.
|
135
|
+
*/
|
136
|
+
virtual bool print_copystring(const hash_string& hash,
|
137
|
+
std::string* error_msg = NULL) = 0;
|
138
|
+
|
139
|
+
/** Insert a hash into the database.
|
140
|
+
*
|
141
|
+
* No check is made if the hash already exists in the database,
|
142
|
+
* so this may result in duplicate records.
|
143
|
+
*
|
144
|
+
* Parameters:
|
145
|
+
* - hash: The hash to insert, as raw bytes
|
146
|
+
* - error_msg: if provided, will be set to an string describing any
|
147
|
+
* error, or to an empty string if no error occurred.
|
148
|
+
*
|
149
|
+
* Returns true if the insert succeded, false on any error.
|
150
|
+
*/
|
151
|
+
virtual bool insert(const hash_string& hash,
|
152
|
+
std::string* error_msg = NULL) = 0;
|
153
|
+
|
154
|
+
/** Lookup a hash in the database, returning a list of matches.
|
155
|
+
*
|
156
|
+
* Parameters:
|
157
|
+
*
|
158
|
+
* - query: query hash string
|
159
|
+
*
|
160
|
+
* - result: matches are added to this list (which is not emptied)
|
161
|
+
*
|
162
|
+
* - max_error: if >= 0, reduce the maximum accepted error
|
163
|
+
* from the database default
|
164
|
+
*
|
165
|
+
* - error_msg: if provided, will be set to an string describing any
|
166
|
+
* error, or to an empty string if no error occurred.
|
167
|
+
*
|
168
|
+
* Returns true if the lookup could be performed (even if no
|
169
|
+
* hashes were found), false if an error occurred.
|
170
|
+
*/
|
171
|
+
virtual bool lookup(const hash_string& query,
|
172
|
+
LookupResultList& result,
|
173
|
+
int max_error = -1,
|
174
|
+
std::string* error_msg = NULL) = 0;
|
175
|
+
|
176
|
+
/** Explicitly sync and close the database file.
|
177
|
+
*
|
178
|
+
* Parameter:
|
179
|
+
* - error_msg: if provided, will be set to an string describing any
|
180
|
+
* error, or to an empty string if no error occurred.
|
181
|
+
*
|
182
|
+
* Returns true if all went well, false on errors.
|
183
|
+
*/
|
184
|
+
virtual bool close(std::string* error_msg = NULL) = 0;
|
185
|
+
|
186
|
+
/** Delete the database object, syncing and closing the database
|
187
|
+
* file if not already done.
|
188
|
+
*/
|
189
|
+
virtual ~HmSearch() {}
|
190
|
+
|
191
|
+
protected:
|
192
|
+
HmSearch() {}
|
193
|
+
};
|
194
|
+
|
195
|
+
|
196
|
+
/*
|
197
|
+
Local Variables:
|
198
|
+
c-file-style: "stroustrup"
|
199
|
+
indent-tabs-mode:nil
|
200
|
+
End:
|
201
|
+
*/
|
202
|
+
|
203
|
+
#endif // __HMSEARCH_H_INCLUDED__
|
204
|
+
|
@@ -0,0 +1,127 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "hmsearch.h"
|
3
|
+
|
4
|
+
extern "C" void Init_postgres_ext();
|
5
|
+
|
6
|
+
static VALUE eHmSearchError;
|
7
|
+
static ID id_hash;
|
8
|
+
static ID id_distance;
|
9
|
+
|
10
|
+
static HmSearch *HmSearch_ptr(VALUE obj);
|
11
|
+
static void HmSearch_free(void *p);
|
12
|
+
static VALUE HmSearch_initdb(VALUE klass, VALUE _path, VALUE _hash_bits, VALUE _max_error, VALUE _num_hashes);
|
13
|
+
static VALUE HmSearch_open(VALUE klass, VALUE _path);
|
14
|
+
static VALUE HmSearch_close(VALUE self);
|
15
|
+
static VALUE HmSearch_insert(VALUE self, VALUE _hash);
|
16
|
+
static VALUE HmSearch_lookup(int argc, VALUE *argv, VALUE self);
|
17
|
+
|
18
|
+
static HmSearch *HmSearch_ptr(VALUE obj)
|
19
|
+
{
|
20
|
+
HmSearch *p;
|
21
|
+
Data_Get_Struct(obj, HmSearch, p);
|
22
|
+
return p;
|
23
|
+
}
|
24
|
+
|
25
|
+
static void HmSearch_free(void *p)
|
26
|
+
{
|
27
|
+
static_cast<HmSearch*>(p)->~HmSearch();
|
28
|
+
}
|
29
|
+
|
30
|
+
static VALUE HmSearch_initdb(VALUE klass, VALUE _path, VALUE _hash_bits, VALUE _max_error, VALUE _num_hashes)
|
31
|
+
{
|
32
|
+
const char *path = StringValueCStr(_path);
|
33
|
+
unsigned hash_bits = FIX2UINT(_hash_bits);
|
34
|
+
unsigned max_error = FIX2UINT(_max_error);
|
35
|
+
uint64_t num_hashes = NUM2ULONG(_num_hashes);
|
36
|
+
|
37
|
+
std::string error_msg;
|
38
|
+
if (!HmSearch::init(path, hash_bits, max_error, num_hashes, &error_msg)) {
|
39
|
+
rb_raise(eHmSearchError, "%s", error_msg.c_str());
|
40
|
+
}
|
41
|
+
|
42
|
+
return Qnil;
|
43
|
+
}
|
44
|
+
|
45
|
+
static VALUE HmSearch_open(VALUE klass, VALUE _path)
|
46
|
+
{
|
47
|
+
const char *path = StringValueCStr(_path);
|
48
|
+
|
49
|
+
std::string error_msg;
|
50
|
+
HmSearch* p = HmSearch::open(path, &error_msg);
|
51
|
+
if (!p) {
|
52
|
+
rb_raise(eHmSearchError, "%s", error_msg.c_str());
|
53
|
+
}
|
54
|
+
|
55
|
+
VALUE obj = Data_Wrap_Struct(klass, NULL, HmSearch_free, p);
|
56
|
+
|
57
|
+
if (rb_block_given_p()) {
|
58
|
+
return rb_ensure(RUBY_METHOD_FUNC(rb_yield), obj, RUBY_METHOD_FUNC(HmSearch_close), obj);
|
59
|
+
}
|
60
|
+
|
61
|
+
return obj;
|
62
|
+
}
|
63
|
+
|
64
|
+
static VALUE HmSearch_close(VALUE self)
|
65
|
+
{
|
66
|
+
HmSearch_ptr(self)->close();
|
67
|
+
return Qnil;
|
68
|
+
}
|
69
|
+
|
70
|
+
static VALUE HmSearch_insert(VALUE self, VALUE _hash)
|
71
|
+
{
|
72
|
+
HmSearch_ptr(self)->insert(HmSearch::parse_hexhash(StringValueCStr(_hash)));
|
73
|
+
return Qnil;
|
74
|
+
}
|
75
|
+
|
76
|
+
static VALUE HmSearch_lookup(int argc, VALUE *argv, VALUE self)
|
77
|
+
{
|
78
|
+
VALUE _hash, _reduced_error;
|
79
|
+
rb_scan_args(argc, argv, "11", &_hash, &_reduced_error);
|
80
|
+
|
81
|
+
const char *hash = StringValueCStr(_hash);
|
82
|
+
|
83
|
+
int reduced_error = NIL_P(_reduced_error) ? -1 : FIX2INT(_reduced_error);
|
84
|
+
|
85
|
+
HmSearch::LookupResultList matches;
|
86
|
+
std::string error_msg;
|
87
|
+
if (!HmSearch_ptr(self)->lookup(HmSearch::parse_hexhash(hash), matches, reduced_error, &error_msg)) {
|
88
|
+
rb_raise(eHmSearchError, "%s", error_msg.c_str());
|
89
|
+
}
|
90
|
+
|
91
|
+
VALUE results = rb_ary_new2(matches.size());
|
92
|
+
|
93
|
+
for (HmSearch::LookupResultList::const_iterator it=matches.begin(); it != matches.end(); ++it) {
|
94
|
+
VALUE result = rb_hash_new();
|
95
|
+
|
96
|
+
rb_hash_aset(result, ID2SYM(id_hash), rb_str_new2(HmSearch::format_hexhash(it->hash).c_str()));
|
97
|
+
rb_hash_aset(result, ID2SYM(id_distance), INT2NUM(it->distance));
|
98
|
+
|
99
|
+
rb_ary_push(results, result);
|
100
|
+
}
|
101
|
+
|
102
|
+
return results;
|
103
|
+
}
|
104
|
+
|
105
|
+
void Init_postgres_ext() {
|
106
|
+
VALUE mHmSearch = rb_define_module("HmSearch");
|
107
|
+
|
108
|
+
eHmSearchError = rb_define_class_under(mHmSearch, "HmSearchError", rb_eStandardError);
|
109
|
+
|
110
|
+
VALUE cPostgres = rb_define_class_under(mHmSearch, "Postgres", rb_cObject);
|
111
|
+
|
112
|
+
VALUE cPostgresSingleton = rb_singleton_class(cPostgres);
|
113
|
+
|
114
|
+
id_hash = rb_intern("hash");
|
115
|
+
id_distance = rb_intern("distance");
|
116
|
+
|
117
|
+
rb_define_method(cPostgresSingleton, "initdb", RUBY_METHOD_FUNC(HmSearch_initdb), 4);
|
118
|
+
|
119
|
+
rb_define_method(cPostgresSingleton, "open", RUBY_METHOD_FUNC(HmSearch_open), 1);
|
120
|
+
// we have to allocate via open
|
121
|
+
rb_undef_method(cPostgresSingleton, "new");
|
122
|
+
rb_undef_method(cPostgresSingleton, "allocate");
|
123
|
+
|
124
|
+
rb_define_method(cPostgres, "close", RUBY_METHOD_FUNC(HmSearch_close), 0);
|
125
|
+
rb_define_method(cPostgres, "insert", RUBY_METHOD_FUNC(HmSearch_insert), 1);
|
126
|
+
rb_define_method(cPostgres, "lookup", RUBY_METHOD_FUNC(HmSearch_lookup), -1);
|
127
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'hmsearch-postgres'
|
3
|
+
s.version = '0.1.0'
|
4
|
+
|
5
|
+
s.summary = 'hmsearch postgres client'
|
6
|
+
s.description = <<-EOF
|
7
|
+
A ruby wrapper for the hmsearch postgres client.
|
8
|
+
EOF
|
9
|
+
|
10
|
+
s.files = `git ls-files`.split("\n")
|
11
|
+
s.require_path = 'lib'
|
12
|
+
|
13
|
+
s.add_development_dependency 'rake'
|
14
|
+
s.add_development_dependency 'pg'
|
15
|
+
|
16
|
+
s.extensions = 'ext/hmsearch/extconf.rb'
|
17
|
+
|
18
|
+
s.authors = ['Kris Selden']
|
19
|
+
s.email = 'kris.selden@gmail.com'
|
20
|
+
s.homepage = 'https://github.com/denisnazarov/hmsearch-postgres-ruby'
|
21
|
+
s.license = 'MIT'
|
22
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'hmsearch/postgres_ext'
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'hmsearch/postgres'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'pg'
|
4
|
+
|
5
|
+
class TestHmsearchPostgres < MiniTest::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
conn = PG::Connection.open('host=localhost port=5432')
|
8
|
+
conn.exec('drop database hmsearch_test') rescue nil
|
9
|
+
conn.exec('create database hmsearch_test')
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_hmsearch
|
13
|
+
HmSearch::Postgres.initdb('host=localhost port=5432 dbname=hmsearch_test', 256, 10, 100000000)
|
14
|
+
|
15
|
+
conn = HmSearch::Postgres.open('host=localhost port=5432 dbname=hmsearch_test');
|
16
|
+
|
17
|
+
conn.insert('6e6fb315fa8c43fe9c2687d5be14575abb7252104236747d571b97e003563df0')
|
18
|
+
|
19
|
+
# we can't do more than one op per open because of a bug in underlying library
|
20
|
+
# warning: adding 'int' to a string does not append to the string [-Wstring-plus-int]
|
21
|
+
# pqxx::prepare::invocation prep = n.prepared("select_multiple_"+i);
|
22
|
+
#
|
23
|
+
# instead of appending to the string this just moves the char pointer
|
24
|
+
# happens to work out ok one run through but not subsequent
|
25
|
+
conn.close
|
26
|
+
|
27
|
+
HmSearch::Postgres.open('host=localhost port=5432 dbname=hmsearch_test') do |conn|
|
28
|
+
|
29
|
+
actual = conn.lookup('6e6fb315fa8c43fe9c2687d5be14575abb7252104236747d571b97e003563df2', -1)
|
30
|
+
|
31
|
+
expected = [{
|
32
|
+
hash: '6e6fb315fa8c43fe9c2687d5be14575abb7252104236747d571b97e003563df0',
|
33
|
+
distance: 1
|
34
|
+
}]
|
35
|
+
|
36
|
+
assert_equal(expected, actual)
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
metadata
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hmsearch-postgres
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kris Selden
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-12-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pg
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: |2
|
42
|
+
A ruby wrapper for the hmsearch postgres client.
|
43
|
+
email: kris.selden@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions:
|
46
|
+
- ext/hmsearch/extconf.rb
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- ".gitignore"
|
50
|
+
- Gemfile
|
51
|
+
- LICENSE
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- ext/hmsearch/extconf.rb
|
55
|
+
- ext/hmsearch/hmsearch.cc
|
56
|
+
- ext/hmsearch/hmsearch.h
|
57
|
+
- ext/hmsearch/postgres_ext.cc
|
58
|
+
- hmsearch-postgres.gemspec
|
59
|
+
- lib/hmsearch/postgres.rb
|
60
|
+
- test/hmsearch_test.rb
|
61
|
+
homepage: https://github.com/denisnazarov/hmsearch-postgres-ruby
|
62
|
+
licenses:
|
63
|
+
- MIT
|
64
|
+
metadata: {}
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options: []
|
67
|
+
require_paths:
|
68
|
+
- lib
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
requirements: []
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 2.2.2
|
82
|
+
signing_key:
|
83
|
+
specification_version: 4
|
84
|
+
summary: hmsearch postgres client
|
85
|
+
test_files: []
|