triez 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/changes +7 -0
- data/ext/hat-trie/hat-trie.c +57 -1
- data/ext/hat-trie/hat-trie.h +14 -3
- data/ext/triez.cc +26 -0
- data/lib/triez.rb +6 -1
- data/readme.md +12 -0
- data/test/triez_test.rb +28 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88f05351da6b711cefe44c0e5060f5c04380e5b6
|
4
|
+
data.tar.gz: 41ff89af13fd30d8def221fa396d1097816f39c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b8047e6c017a559b9916dfcdf235f39b560e486d190a14963a32c2cf565725f128e26c2b3f07d83273fe79ab566349220cc048a1eec347f35aa6308832887124
|
7
|
+
data.tar.gz: 8f64787cd1db06201d18e5ddc235d960a5ab6d08a63c212e13619094c47bc688feb59ababc950ec4e70bc8382ff89a255154a03913eb16ac784690aeb2d6f787
|
data/changes
CHANGED
data/ext/hat-trie/hat-trie.c
CHANGED
@@ -408,6 +408,60 @@ value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len)
|
|
408
408
|
}
|
409
409
|
|
410
410
|
|
411
|
+
void hattrie_walk (hattrie_t* T, const char* key, size_t len, void* user_data, hattrie_walk_cb cb) {
|
412
|
+
unsigned char* k = (unsigned char*)key;
|
413
|
+
node_ptr node = T->root;
|
414
|
+
size_t i, j;
|
415
|
+
ahtable_iter_t* it;
|
416
|
+
|
417
|
+
/* go down until a bucket is reached */
|
418
|
+
for (i = 0; i < len; i++, k++) {
|
419
|
+
if (!(*node.flag & NODE_TYPE_TRIE))
|
420
|
+
break;
|
421
|
+
node = node.t->xs[*k];
|
422
|
+
if (*node.flag & NODE_HAS_VAL) {
|
423
|
+
if (hattrie_walk_stop == cb(key, i, &node.t->val, user_data))
|
424
|
+
return;
|
425
|
+
}
|
426
|
+
}
|
427
|
+
if (i == len)
|
428
|
+
return;
|
429
|
+
|
430
|
+
assert(i);
|
431
|
+
if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
|
432
|
+
i--;
|
433
|
+
k--;
|
434
|
+
} else {
|
435
|
+
assert(*node.flag & NODE_TYPE_PURE_BUCKET);
|
436
|
+
}
|
437
|
+
|
438
|
+
/* dict order ensured short => long */
|
439
|
+
it = ahtable_iter_begin(node.b, true);
|
440
|
+
for(; !ahtable_iter_finished(it); ahtable_iter_next(it)) {
|
441
|
+
size_t stored_len;
|
442
|
+
unsigned char* stored_key = (unsigned char*)ahtable_iter_key(it, &stored_len);
|
443
|
+
int matched = 1;
|
444
|
+
if (stored_len + i > len) {
|
445
|
+
continue;
|
446
|
+
}
|
447
|
+
for (j = 0; j < stored_len; j++) {
|
448
|
+
if (stored_key[j] != k[j]) {
|
449
|
+
matched = 0;
|
450
|
+
break;
|
451
|
+
}
|
452
|
+
}
|
453
|
+
if (matched) {
|
454
|
+
value_t* val = ahtable_iter_val(it);
|
455
|
+
if (hattrie_walk_stop == cb(key, i + stored_len, val, user_data)) {
|
456
|
+
ahtable_iter_free(it);
|
457
|
+
return;
|
458
|
+
}
|
459
|
+
}
|
460
|
+
}
|
461
|
+
ahtable_iter_free(it);
|
462
|
+
}
|
463
|
+
|
464
|
+
|
411
465
|
int hattrie_del(hattrie_t* T, const char* key, size_t len)
|
412
466
|
{
|
413
467
|
node_ptr parent = T->root;
|
@@ -545,7 +599,9 @@ static void hattrie_iter_nextnode(hattrie_iter_t* i)
|
|
545
599
|
}
|
546
600
|
|
547
601
|
|
548
|
-
|
602
|
+
/** next non-nil-key node
|
603
|
+
* TODO pick a better name
|
604
|
+
*/
|
549
605
|
static void hattrie_iter_step(hattrie_iter_t* i)
|
550
606
|
{
|
551
607
|
while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
|
data/ext/hat-trie/hat-trie.h
CHANGED
@@ -46,11 +46,22 @@ size_t hattrie_size (hattrie_t*);
|
|
46
46
|
*/
|
47
47
|
value_t* hattrie_get (hattrie_t*, const char* key, size_t len);
|
48
48
|
|
49
|
-
|
50
49
|
/** Find a given key in the table, returning a NULL pointer if it does not
|
51
50
|
* exist. */
|
52
51
|
value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len);
|
53
52
|
|
53
|
+
/** hattrie_walk callback signature */
|
54
|
+
typedef int (*hattrie_walk_cb)(const char* key, size_t len, value_t* val, void* user_data);
|
55
|
+
|
56
|
+
/** hattrie_walk callback return values, controls whether should stop the walk or not */
|
57
|
+
#define hattrie_walk_stop 0
|
58
|
+
#define hattrie_walk_continue 1
|
59
|
+
|
60
|
+
/** Find stored keys which are prefices of key, and invoke callback for every found key and val.
|
61
|
+
* The invocation order is: short key to long key.
|
62
|
+
*/
|
63
|
+
void hattrie_walk (hattrie_t*, const char* key, size_t len, void* user_data, hattrie_walk_cb);
|
64
|
+
|
54
65
|
/** Delete a given key from trie. Returns 0 if successful or -1 if not found.
|
55
66
|
*/
|
56
67
|
int hattrie_del(hattrie_t* T, const char* key, size_t len);
|
@@ -64,6 +75,8 @@ void hattrie_iter_free (hattrie_iter_t*);
|
|
64
75
|
const char* hattrie_iter_key (hattrie_iter_t*, size_t* len);
|
65
76
|
value_t* hattrie_iter_val (hattrie_iter_t*);
|
66
77
|
|
78
|
+
/** Note the hattrie_iter_key() for prefixed search gets the suffix instead of the whole key
|
79
|
+
*/
|
67
80
|
hattrie_iter_t* hattrie_iter_with_prefix(const hattrie_t*, bool sorted, const char* prefix, size_t prefix_len);
|
68
81
|
|
69
82
|
#ifdef __cplusplus
|
@@ -71,5 +84,3 @@ hattrie_iter_t* hattrie_iter_with_prefix(const hattrie_t*, bool sorted, const ch
|
|
71
84
|
#endif
|
72
85
|
|
73
86
|
#endif
|
74
|
-
|
75
|
-
|
data/ext/triez.cc
CHANGED
@@ -62,6 +62,7 @@ static void hat_mark(void* p_ht) {
|
|
62
62
|
if (!IMMEDIATE_P(*v)) {
|
63
63
|
rb_gc_mark(*v);
|
64
64
|
}
|
65
|
+
hattrie_iter_next(it);
|
65
66
|
}
|
66
67
|
hattrie_iter_free(it);
|
67
68
|
}
|
@@ -247,6 +248,30 @@ static VALUE hat_search(VALUE self, VALUE key, VALUE vlimit, VALUE vsort, VALUE
|
|
247
248
|
return self;
|
248
249
|
}
|
249
250
|
|
251
|
+
typedef struct {
|
252
|
+
bool obj_value;
|
253
|
+
VALUE arr;
|
254
|
+
} HatWalkData;
|
255
|
+
|
256
|
+
static int hat_walk_cb(const char* key, size_t len, value_t* v, void* data_p) {
|
257
|
+
HatWalkData* data = (HatWalkData*)data_p;
|
258
|
+
volatile VALUE r = rb_ary_new();
|
259
|
+
rb_ary_push(r, rb_str_new(key, len));
|
260
|
+
rb_ary_push(r, data->obj_value ? (*v) : LL2NUM(*v));
|
261
|
+
rb_ary_push(data->arr, r);
|
262
|
+
return hattrie_walk_continue;
|
263
|
+
}
|
264
|
+
|
265
|
+
static VALUE hat_walk(VALUE self, VALUE key) {
|
266
|
+
PRE_HAT;
|
267
|
+
size_t len = (size_t)RSTRING_LEN(key);
|
268
|
+
volatile HatWalkData data = {ht->obj_value, rb_ary_new()};
|
269
|
+
|
270
|
+
// to prevent leak by break/next, we have to collect the array first
|
271
|
+
hattrie_walk(p, RSTRING_PTR(key), len, (void*)&data, hat_walk_cb);
|
272
|
+
return data.arr;
|
273
|
+
}
|
274
|
+
|
250
275
|
#define DEF(k,n,f,c) rb_define_method(k,n,RUBY_METHOD_FUNC(f),c)
|
251
276
|
|
252
277
|
extern "C"
|
@@ -266,4 +291,5 @@ void Init_triez() {
|
|
266
291
|
DEF(hat_class, "has_key?", hat_check, 1);
|
267
292
|
DEF(hat_class, "delete", hat_del, 1);
|
268
293
|
DEF(hat_class, "_internal_search", hat_search, 4);
|
294
|
+
DEF(hat_class, "_internal_walk", hat_walk, 1);
|
269
295
|
}
|
data/lib/triez.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require_relative "../ext/triez"
|
2
2
|
|
3
3
|
class Triez
|
4
|
-
VERSION = '1.0.
|
4
|
+
VERSION = '1.0.2'
|
5
5
|
|
6
6
|
private :_internal_set_type
|
7
7
|
private :_internal_search
|
8
|
+
private :_internal_walk
|
8
9
|
|
9
10
|
def initialize opts={}
|
10
11
|
opts = opts.dup
|
@@ -36,6 +37,10 @@ class Triez
|
|
36
37
|
_internal_search '', nil, true, p
|
37
38
|
end
|
38
39
|
|
40
|
+
def walk s, &p
|
41
|
+
_internal_walk(s).each &p
|
42
|
+
end
|
43
|
+
|
39
44
|
def search_with_prefix prefix, opts={}, &p
|
40
45
|
opts = opts.dup
|
41
46
|
|
data/readme.md
CHANGED
@@ -74,6 +74,11 @@ t.search_with_prefix('prefix')
|
|
74
74
|
t.each do |key, value|
|
75
75
|
...
|
76
76
|
end
|
77
|
+
|
78
|
+
# iterate stored keys which are prefices of a given string, from shallow to deep
|
79
|
+
t.walk string do |k, v|
|
80
|
+
...
|
81
|
+
end
|
77
82
|
```
|
78
83
|
|
79
84
|
\* Note: By default, *triez* store signed integers within 64bits, you can use them as weights, counts or database IDs. In case you need to store arbitrary object in a node, use `value_type: :object`:
|
@@ -206,6 +211,13 @@ rake glob_src
|
|
206
211
|
rake
|
207
212
|
```
|
208
213
|
|
214
|
+
To update vendor lib and re-compile:
|
215
|
+
|
216
|
+
``` bash
|
217
|
+
rake glob_src
|
218
|
+
rake
|
219
|
+
```
|
220
|
+
|
209
221
|
## Note
|
210
222
|
|
211
223
|
Although HAT trie uses MurMurHash3 instead of SipHash in Ruby, It is still safe under hashDoS because bucket size is limited.
|
data/test/triez_test.rb
CHANGED
@@ -157,6 +157,34 @@ class TriezTest < Test::Unit::TestCase
|
|
157
157
|
assert_equal %w[a b c ab bc abc].sort, keys.sort
|
158
158
|
end
|
159
159
|
|
160
|
+
def test_walk
|
161
|
+
urls = %w[
|
162
|
+
/users/
|
163
|
+
/users/12/edit
|
164
|
+
/posts
|
165
|
+
]
|
166
|
+
t = Triez.new value_type: :object
|
167
|
+
urls.each_with_index do |url, i|
|
168
|
+
t[url] = i.to_s
|
169
|
+
end
|
170
|
+
|
171
|
+
assert_equal [%w'/users/ 0'], t.walk('/users/12/delete').to_a
|
172
|
+
assert_equal [%w'/users/ 0', %w'/users/12/edit 1'], t.walk('/users/12/edit').to_a
|
173
|
+
assert_equal [%w'/users/ 0', %w'/users/12/edit 1'], t.walk('/users/12/edit/3').to_a
|
174
|
+
|
175
|
+
assert_raise TypeError do
|
176
|
+
t.walk :'/post' do
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
t.walk '' do |k, v|
|
181
|
+
assert_equal [nil, nil], [k, v]
|
182
|
+
end
|
183
|
+
|
184
|
+
# try to trigger rb_gc_mark(), it can stuck if hattrie_iter_next() not called properly
|
185
|
+
100000.times{ 'a' + 'b' }
|
186
|
+
end
|
187
|
+
|
160
188
|
def test_solve_longest_common_substring
|
161
189
|
sentences = %w[
|
162
190
|
万塘路一锅鸡
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: triez
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zete Lui
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-05-
|
11
|
+
date: 2013-05-31 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: fast, efficient, unicode aware HAT trie with prefix / suffix support.
|
14
14
|
email:
|