triez 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/changes +7 -0
- data/ext/hat-trie/hat-trie.c +57 -1
- data/ext/hat-trie/hat-trie.h +14 -3
- data/ext/triez.cc +26 -0
- data/lib/triez.rb +6 -1
- data/readme.md +12 -0
- data/test/triez_test.rb +28 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88f05351da6b711cefe44c0e5060f5c04380e5b6
|
4
|
+
data.tar.gz: 41ff89af13fd30d8def221fa396d1097816f39c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b8047e6c017a559b9916dfcdf235f39b560e486d190a14963a32c2cf565725f128e26c2b3f07d83273fe79ab566349220cc048a1eec347f35aa6308832887124
|
7
|
+
data.tar.gz: 8f64787cd1db06201d18e5ddc235d960a5ab6d08a63c212e13619094c47bc688feb59ababc950ec4e70bc8382ff89a255154a03913eb16ac784690aeb2d6f787
|
data/changes
CHANGED
data/ext/hat-trie/hat-trie.c
CHANGED
@@ -408,6 +408,60 @@ value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len)
|
|
408
408
|
}
|
409
409
|
|
410
410
|
|
411
|
+
void hattrie_walk (hattrie_t* T, const char* key, size_t len, void* user_data, hattrie_walk_cb cb) {
|
412
|
+
unsigned char* k = (unsigned char*)key;
|
413
|
+
node_ptr node = T->root;
|
414
|
+
size_t i, j;
|
415
|
+
ahtable_iter_t* it;
|
416
|
+
|
417
|
+
/* go down until a bucket is reached */
|
418
|
+
for (i = 0; i < len; i++, k++) {
|
419
|
+
if (!(*node.flag & NODE_TYPE_TRIE))
|
420
|
+
break;
|
421
|
+
node = node.t->xs[*k];
|
422
|
+
if (*node.flag & NODE_HAS_VAL) {
|
423
|
+
if (hattrie_walk_stop == cb(key, i, &node.t->val, user_data))
|
424
|
+
return;
|
425
|
+
}
|
426
|
+
}
|
427
|
+
if (i == len)
|
428
|
+
return;
|
429
|
+
|
430
|
+
assert(i);
|
431
|
+
if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
|
432
|
+
i--;
|
433
|
+
k--;
|
434
|
+
} else {
|
435
|
+
assert(*node.flag & NODE_TYPE_PURE_BUCKET);
|
436
|
+
}
|
437
|
+
|
438
|
+
/* dict order ensured short => long */
|
439
|
+
it = ahtable_iter_begin(node.b, true);
|
440
|
+
for(; !ahtable_iter_finished(it); ahtable_iter_next(it)) {
|
441
|
+
size_t stored_len;
|
442
|
+
unsigned char* stored_key = (unsigned char*)ahtable_iter_key(it, &stored_len);
|
443
|
+
int matched = 1;
|
444
|
+
if (stored_len + i > len) {
|
445
|
+
continue;
|
446
|
+
}
|
447
|
+
for (j = 0; j < stored_len; j++) {
|
448
|
+
if (stored_key[j] != k[j]) {
|
449
|
+
matched = 0;
|
450
|
+
break;
|
451
|
+
}
|
452
|
+
}
|
453
|
+
if (matched) {
|
454
|
+
value_t* val = ahtable_iter_val(it);
|
455
|
+
if (hattrie_walk_stop == cb(key, i + stored_len, val, user_data)) {
|
456
|
+
ahtable_iter_free(it);
|
457
|
+
return;
|
458
|
+
}
|
459
|
+
}
|
460
|
+
}
|
461
|
+
ahtable_iter_free(it);
|
462
|
+
}
|
463
|
+
|
464
|
+
|
411
465
|
int hattrie_del(hattrie_t* T, const char* key, size_t len)
|
412
466
|
{
|
413
467
|
node_ptr parent = T->root;
|
@@ -545,7 +599,9 @@ static void hattrie_iter_nextnode(hattrie_iter_t* i)
|
|
545
599
|
}
|
546
600
|
|
547
601
|
|
548
|
-
|
602
|
+
/** next non-nil-key node
|
603
|
+
* TODO pick a better name
|
604
|
+
*/
|
549
605
|
static void hattrie_iter_step(hattrie_iter_t* i)
|
550
606
|
{
|
551
607
|
while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
|
data/ext/hat-trie/hat-trie.h
CHANGED
@@ -46,11 +46,22 @@ size_t hattrie_size (hattrie_t*);
|
|
46
46
|
*/
|
47
47
|
value_t* hattrie_get (hattrie_t*, const char* key, size_t len);
|
48
48
|
|
49
|
-
|
50
49
|
/** Find a given key in the table, returning a NULL pointer if it does not
|
51
50
|
* exist. */
|
52
51
|
value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len);
|
53
52
|
|
53
|
+
/** hattrie_walk callback signature */
|
54
|
+
typedef int (*hattrie_walk_cb)(const char* key, size_t len, value_t* val, void* user_data);
|
55
|
+
|
56
|
+
/** hattrie_walk callback return values, controls whether should stop the walk or not */
|
57
|
+
#define hattrie_walk_stop 0
|
58
|
+
#define hattrie_walk_continue 1
|
59
|
+
|
60
|
+
/** Find stored keys which are prefices of key, and invoke callback for every found key and val.
|
61
|
+
* The invocation order is: short key to long key.
|
62
|
+
*/
|
63
|
+
void hattrie_walk (hattrie_t*, const char* key, size_t len, void* user_data, hattrie_walk_cb);
|
64
|
+
|
54
65
|
/** Delete a given key from trie. Returns 0 if successful or -1 if not found.
|
55
66
|
*/
|
56
67
|
int hattrie_del(hattrie_t* T, const char* key, size_t len);
|
@@ -64,6 +75,8 @@ void hattrie_iter_free (hattrie_iter_t*);
|
|
64
75
|
const char* hattrie_iter_key (hattrie_iter_t*, size_t* len);
|
65
76
|
value_t* hattrie_iter_val (hattrie_iter_t*);
|
66
77
|
|
78
|
+
/** Note the hattrie_iter_key() for prefixed search gets the suffix instead of the whole key
|
79
|
+
*/
|
67
80
|
hattrie_iter_t* hattrie_iter_with_prefix(const hattrie_t*, bool sorted, const char* prefix, size_t prefix_len);
|
68
81
|
|
69
82
|
#ifdef __cplusplus
|
@@ -71,5 +84,3 @@ hattrie_iter_t* hattrie_iter_with_prefix(const hattrie_t*, bool sorted, const ch
|
|
71
84
|
#endif
|
72
85
|
|
73
86
|
#endif
|
74
|
-
|
75
|
-
|
data/ext/triez.cc
CHANGED
@@ -62,6 +62,7 @@ static void hat_mark(void* p_ht) {
|
|
62
62
|
if (!IMMEDIATE_P(*v)) {
|
63
63
|
rb_gc_mark(*v);
|
64
64
|
}
|
65
|
+
hattrie_iter_next(it);
|
65
66
|
}
|
66
67
|
hattrie_iter_free(it);
|
67
68
|
}
|
@@ -247,6 +248,30 @@ static VALUE hat_search(VALUE self, VALUE key, VALUE vlimit, VALUE vsort, VALUE
|
|
247
248
|
return self;
|
248
249
|
}
|
249
250
|
|
251
|
+
typedef struct {
|
252
|
+
bool obj_value;
|
253
|
+
VALUE arr;
|
254
|
+
} HatWalkData;
|
255
|
+
|
256
|
+
static int hat_walk_cb(const char* key, size_t len, value_t* v, void* data_p) {
|
257
|
+
HatWalkData* data = (HatWalkData*)data_p;
|
258
|
+
volatile VALUE r = rb_ary_new();
|
259
|
+
rb_ary_push(r, rb_str_new(key, len));
|
260
|
+
rb_ary_push(r, data->obj_value ? (*v) : LL2NUM(*v));
|
261
|
+
rb_ary_push(data->arr, r);
|
262
|
+
return hattrie_walk_continue;
|
263
|
+
}
|
264
|
+
|
265
|
+
static VALUE hat_walk(VALUE self, VALUE key) {
|
266
|
+
PRE_HAT;
|
267
|
+
size_t len = (size_t)RSTRING_LEN(key);
|
268
|
+
volatile HatWalkData data = {ht->obj_value, rb_ary_new()};
|
269
|
+
|
270
|
+
// to prevent leak by break/next, we have to collect the array first
|
271
|
+
hattrie_walk(p, RSTRING_PTR(key), len, (void*)&data, hat_walk_cb);
|
272
|
+
return data.arr;
|
273
|
+
}
|
274
|
+
|
250
275
|
#define DEF(k,n,f,c) rb_define_method(k,n,RUBY_METHOD_FUNC(f),c)
|
251
276
|
|
252
277
|
extern "C"
|
@@ -266,4 +291,5 @@ void Init_triez() {
|
|
266
291
|
DEF(hat_class, "has_key?", hat_check, 1);
|
267
292
|
DEF(hat_class, "delete", hat_del, 1);
|
268
293
|
DEF(hat_class, "_internal_search", hat_search, 4);
|
294
|
+
DEF(hat_class, "_internal_walk", hat_walk, 1);
|
269
295
|
}
|
data/lib/triez.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require_relative "../ext/triez"
|
2
2
|
|
3
3
|
class Triez
|
4
|
-
VERSION = '1.0.
|
4
|
+
VERSION = '1.0.2'
|
5
5
|
|
6
6
|
private :_internal_set_type
|
7
7
|
private :_internal_search
|
8
|
+
private :_internal_walk
|
8
9
|
|
9
10
|
def initialize opts={}
|
10
11
|
opts = opts.dup
|
@@ -36,6 +37,10 @@ class Triez
|
|
36
37
|
_internal_search '', nil, true, p
|
37
38
|
end
|
38
39
|
|
40
|
+
def walk s, &p
|
41
|
+
_internal_walk(s).each &p
|
42
|
+
end
|
43
|
+
|
39
44
|
def search_with_prefix prefix, opts={}, &p
|
40
45
|
opts = opts.dup
|
41
46
|
|
data/readme.md
CHANGED
@@ -74,6 +74,11 @@ t.search_with_prefix('prefix')
|
|
74
74
|
t.each do |key, value|
|
75
75
|
...
|
76
76
|
end
|
77
|
+
|
78
|
+
# iterate stored keys which are prefices of a given string, from shallow to deep
|
79
|
+
t.walk string do |k, v|
|
80
|
+
...
|
81
|
+
end
|
77
82
|
```
|
78
83
|
|
79
84
|
\* Note: By default, *triez* store signed integers within 64bits, you can use them as weights, counts or database IDs. In case you need to store arbitrary object in a node, use `value_type: :object`:
|
@@ -206,6 +211,13 @@ rake glob_src
|
|
206
211
|
rake
|
207
212
|
```
|
208
213
|
|
214
|
+
To update vendor lib and re-compile:
|
215
|
+
|
216
|
+
``` bash
|
217
|
+
rake glob_src
|
218
|
+
rake
|
219
|
+
```
|
220
|
+
|
209
221
|
## Note
|
210
222
|
|
211
223
|
Although HAT trie uses MurMurHash3 instead of SipHash in Ruby, It is still safe under hashDoS because bucket size is limited.
|
data/test/triez_test.rb
CHANGED
@@ -157,6 +157,34 @@ class TriezTest < Test::Unit::TestCase
|
|
157
157
|
assert_equal %w[a b c ab bc abc].sort, keys.sort
|
158
158
|
end
|
159
159
|
|
160
|
+
def test_walk
|
161
|
+
urls = %w[
|
162
|
+
/users/
|
163
|
+
/users/12/edit
|
164
|
+
/posts
|
165
|
+
]
|
166
|
+
t = Triez.new value_type: :object
|
167
|
+
urls.each_with_index do |url, i|
|
168
|
+
t[url] = i.to_s
|
169
|
+
end
|
170
|
+
|
171
|
+
assert_equal [%w'/users/ 0'], t.walk('/users/12/delete').to_a
|
172
|
+
assert_equal [%w'/users/ 0', %w'/users/12/edit 1'], t.walk('/users/12/edit').to_a
|
173
|
+
assert_equal [%w'/users/ 0', %w'/users/12/edit 1'], t.walk('/users/12/edit/3').to_a
|
174
|
+
|
175
|
+
assert_raise TypeError do
|
176
|
+
t.walk :'/post' do
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
t.walk '' do |k, v|
|
181
|
+
assert_equal [nil, nil], [k, v]
|
182
|
+
end
|
183
|
+
|
184
|
+
# try to trigger rb_gc_mark(), it can stuck if hattrie_iter_next() not called properly
|
185
|
+
100000.times{ 'a' + 'b' }
|
186
|
+
end
|
187
|
+
|
160
188
|
def test_solve_longest_common_substring
|
161
189
|
sentences = %w[
|
162
190
|
万塘路一锅鸡
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: triez
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zete Lui
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-05-
|
11
|
+
date: 2013-05-31 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: fast, efficient, unicode aware HAT trie with prefix / suffix support.
|
14
14
|
email:
|