triez 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8624d508bf82330cf354730f21341f21c7e0989b
4
- data.tar.gz: cc9e500bf179457d77361acef1c399da936b0375
3
+ metadata.gz: 88f05351da6b711cefe44c0e5060f5c04380e5b6
4
+ data.tar.gz: 41ff89af13fd30d8def221fa396d1097816f39c1
5
5
  SHA512:
6
- metadata.gz: c73e070211b80ceb0c50fb0af6c09bc726b9ffe1a396d4faf104a74bad75d4a2484606444636c9d5c9390a0d1b28f6eca4c656aef422411e2c8c83984ff4894c
7
- data.tar.gz: bdf8cc170ce95ba7370b2d163730ce42b78a2a84de823cfdcfe220f07898aa8a08385819acd01e217ce829fc44ca662845776f4de33297c69ef7a5daf4cf7b4a
6
+ metadata.gz: b8047e6c017a559b9916dfcdf235f39b560e486d190a14963a32c2cf565725f128e26c2b3f07d83273fe79ab566349220cc048a1eec347f35aa6308832887124
7
+ data.tar.gz: 8f64787cd1db06201d18e5ddc235d960a5ab6d08a63c212e13619094c47bc688feb59ababc950ec4e70bc8382ff89a255154a03913eb16ac784690aeb2d6f787
data/changes CHANGED
@@ -1,3 +1,10 @@
1
+ 1.0.2
2
+
3
+ 2013-06-01
4
+ add #walk
5
+ 2013-05-31
6
+ fix rb_gc_mark() stuck for value_type: :object
7
+
1
8
  1.0.1
2
9
 
3
10
  2013-05-30
@@ -408,6 +408,60 @@ value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len)
408
408
  }
409
409
 
410
410
 
411
+ void hattrie_walk (hattrie_t* T, const char* key, size_t len, void* user_data, hattrie_walk_cb cb) {
412
+ unsigned char* k = (unsigned char*)key;
413
+ node_ptr node = T->root;
414
+ size_t i, j;
415
+ ahtable_iter_t* it;
416
+
417
+ /* go down until a bucket is reached */
418
+ for (i = 0; i < len; i++, k++) {
419
+ if (!(*node.flag & NODE_TYPE_TRIE))
420
+ break;
421
+ node = node.t->xs[*k];
422
+ if (*node.flag & NODE_HAS_VAL) {
423
+ if (hattrie_walk_stop == cb(key, i, &node.t->val, user_data))
424
+ return;
425
+ }
426
+ }
427
+ if (i == len)
428
+ return;
429
+
430
+ assert(i);
431
+ if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
432
+ i--;
433
+ k--;
434
+ } else {
435
+ assert(*node.flag & NODE_TYPE_PURE_BUCKET);
436
+ }
437
+
438
+ /* dict order ensured short => long */
439
+ it = ahtable_iter_begin(node.b, true);
440
+ for(; !ahtable_iter_finished(it); ahtable_iter_next(it)) {
441
+ size_t stored_len;
442
+ unsigned char* stored_key = (unsigned char*)ahtable_iter_key(it, &stored_len);
443
+ int matched = 1;
444
+ if (stored_len + i > len) {
445
+ continue;
446
+ }
447
+ for (j = 0; j < stored_len; j++) {
448
+ if (stored_key[j] != k[j]) {
449
+ matched = 0;
450
+ break;
451
+ }
452
+ }
453
+ if (matched) {
454
+ value_t* val = ahtable_iter_val(it);
455
+ if (hattrie_walk_stop == cb(key, i + stored_len, val, user_data)) {
456
+ ahtable_iter_free(it);
457
+ return;
458
+ }
459
+ }
460
+ }
461
+ ahtable_iter_free(it);
462
+ }
463
+
464
+
411
465
  int hattrie_del(hattrie_t* T, const char* key, size_t len)
412
466
  {
413
467
  node_ptr parent = T->root;
@@ -545,7 +599,9 @@ static void hattrie_iter_nextnode(hattrie_iter_t* i)
545
599
  }
546
600
 
547
601
 
548
- // TODO pick a better name
602
+ /** next non-nil-key node
603
+ * TODO pick a better name
604
+ */
549
605
  static void hattrie_iter_step(hattrie_iter_t* i)
550
606
  {
551
607
  while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
@@ -46,11 +46,22 @@ size_t hattrie_size (hattrie_t*);
46
46
  */
47
47
  value_t* hattrie_get (hattrie_t*, const char* key, size_t len);
48
48
 
49
-
50
49
  /** Find a given key in the table, returning a NULL pointer if it does not
51
50
  * exist. */
52
51
  value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len);
53
52
 
53
+ /** hattrie_walk callback signature */
54
+ typedef int (*hattrie_walk_cb)(const char* key, size_t len, value_t* val, void* user_data);
55
+
56
+ /** hattrie_walk callback return values, controls whether should stop the walk or not */
57
+ #define hattrie_walk_stop 0
58
+ #define hattrie_walk_continue 1
59
+
60
+ /** Find stored keys which are prefices of key, and invoke callback for every found key and val.
61
+ * The invocation order is: short key to long key.
62
+ */
63
+ void hattrie_walk (hattrie_t*, const char* key, size_t len, void* user_data, hattrie_walk_cb);
64
+
54
65
  /** Delete a given key from trie. Returns 0 if successful or -1 if not found.
55
66
  */
56
67
  int hattrie_del(hattrie_t* T, const char* key, size_t len);
@@ -64,6 +75,8 @@ void hattrie_iter_free (hattrie_iter_t*);
64
75
  const char* hattrie_iter_key (hattrie_iter_t*, size_t* len);
65
76
  value_t* hattrie_iter_val (hattrie_iter_t*);
66
77
 
78
+ /** Note the hattrie_iter_key() for prefixed search gets the suffix instead of the whole key
79
+ */
67
80
  hattrie_iter_t* hattrie_iter_with_prefix(const hattrie_t*, bool sorted, const char* prefix, size_t prefix_len);
68
81
 
69
82
  #ifdef __cplusplus
@@ -71,5 +84,3 @@ hattrie_iter_t* hattrie_iter_with_prefix(const hattrie_t*, bool sorted, const ch
71
84
  #endif
72
85
 
73
86
  #endif
74
-
75
-
@@ -62,6 +62,7 @@ static void hat_mark(void* p_ht) {
62
62
  if (!IMMEDIATE_P(*v)) {
63
63
  rb_gc_mark(*v);
64
64
  }
65
+ hattrie_iter_next(it);
65
66
  }
66
67
  hattrie_iter_free(it);
67
68
  }
@@ -247,6 +248,30 @@ static VALUE hat_search(VALUE self, VALUE key, VALUE vlimit, VALUE vsort, VALUE
247
248
  return self;
248
249
  }
249
250
 
251
+ typedef struct {
252
+ bool obj_value;
253
+ VALUE arr;
254
+ } HatWalkData;
255
+
256
+ static int hat_walk_cb(const char* key, size_t len, value_t* v, void* data_p) {
257
+ HatWalkData* data = (HatWalkData*)data_p;
258
+ volatile VALUE r = rb_ary_new();
259
+ rb_ary_push(r, rb_str_new(key, len));
260
+ rb_ary_push(r, data->obj_value ? (*v) : LL2NUM(*v));
261
+ rb_ary_push(data->arr, r);
262
+ return hattrie_walk_continue;
263
+ }
264
+
265
+ static VALUE hat_walk(VALUE self, VALUE key) {
266
+ PRE_HAT;
267
+ size_t len = (size_t)RSTRING_LEN(key);
268
+ volatile HatWalkData data = {ht->obj_value, rb_ary_new()};
269
+
270
+ // to prevent leak by break/next, we have to collect the array first
271
+ hattrie_walk(p, RSTRING_PTR(key), len, (void*)&data, hat_walk_cb);
272
+ return data.arr;
273
+ }
274
+
250
275
  #define DEF(k,n,f,c) rb_define_method(k,n,RUBY_METHOD_FUNC(f),c)
251
276
 
252
277
  extern "C"
@@ -266,4 +291,5 @@ void Init_triez() {
266
291
  DEF(hat_class, "has_key?", hat_check, 1);
267
292
  DEF(hat_class, "delete", hat_del, 1);
268
293
  DEF(hat_class, "_internal_search", hat_search, 4);
294
+ DEF(hat_class, "_internal_walk", hat_walk, 1);
269
295
  }
@@ -1,10 +1,11 @@
1
1
  require_relative "../ext/triez"
2
2
 
3
3
  class Triez
4
- VERSION = '1.0.1'
4
+ VERSION = '1.0.2'
5
5
 
6
6
  private :_internal_set_type
7
7
  private :_internal_search
8
+ private :_internal_walk
8
9
 
9
10
  def initialize opts={}
10
11
  opts = opts.dup
@@ -36,6 +37,10 @@ class Triez
36
37
  _internal_search '', nil, true, p
37
38
  end
38
39
 
40
+ def walk s, &p
41
+ _internal_walk(s).each &p
42
+ end
43
+
39
44
  def search_with_prefix prefix, opts={}, &p
40
45
  opts = opts.dup
41
46
 
data/readme.md CHANGED
@@ -74,6 +74,11 @@ t.search_with_prefix('prefix')
74
74
  t.each do |key, value|
75
75
  ...
76
76
  end
77
+
78
+ # iterate stored keys which are prefices of a given string, from shallow to deep
79
+ t.walk string do |k, v|
80
+ ...
81
+ end
77
82
  ```
78
83
 
79
84
  \* Note: By default, *triez* store signed integers within 64bits, you can use them as weights, counts or database IDs. In case you need to store arbitrary object in a node, use `value_type: :object`:
@@ -206,6 +211,13 @@ rake glob_src
206
211
  rake
207
212
  ```
208
213
 
214
+ To update vendor lib and re-compile:
215
+
216
+ ``` bash
217
+ rake glob_src
218
+ rake
219
+ ```
220
+
209
221
  ## Note
210
222
 
211
223
  Although HAT trie uses MurMurHash3 instead of SipHash in Ruby, It is still safe under hashDoS because bucket size is limited.
@@ -157,6 +157,34 @@ class TriezTest < Test::Unit::TestCase
157
157
  assert_equal %w[a b c ab bc abc].sort, keys.sort
158
158
  end
159
159
 
160
+ def test_walk
161
+ urls = %w[
162
+ /users/
163
+ /users/12/edit
164
+ /posts
165
+ ]
166
+ t = Triez.new value_type: :object
167
+ urls.each_with_index do |url, i|
168
+ t[url] = i.to_s
169
+ end
170
+
171
+ assert_equal [%w'/users/ 0'], t.walk('/users/12/delete').to_a
172
+ assert_equal [%w'/users/ 0', %w'/users/12/edit 1'], t.walk('/users/12/edit').to_a
173
+ assert_equal [%w'/users/ 0', %w'/users/12/edit 1'], t.walk('/users/12/edit/3').to_a
174
+
175
+ assert_raise TypeError do
176
+ t.walk :'/post' do
177
+ end
178
+ end
179
+
180
+ t.walk '' do |k, v|
181
+ assert_equal [nil, nil], [k, v]
182
+ end
183
+
184
+ # try to trigger rb_gc_mark(), it can stuck if hattrie_iter_next() not called properly
185
+ 100000.times{ 'a' + 'b' }
186
+ end
187
+
160
188
  def test_solve_longest_common_substring
161
189
  sentences = %w[
162
190
  万塘路一锅鸡
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: triez
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-29 00:00:00.000000000 Z
11
+ date: 2013-05-31 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: fast, efficient, unicode aware HAT trie with prefix / suffix support.
14
14
  email: