triez 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8624d508bf82330cf354730f21341f21c7e0989b
4
- data.tar.gz: cc9e500bf179457d77361acef1c399da936b0375
3
+ metadata.gz: 88f05351da6b711cefe44c0e5060f5c04380e5b6
4
+ data.tar.gz: 41ff89af13fd30d8def221fa396d1097816f39c1
5
5
  SHA512:
6
- metadata.gz: c73e070211b80ceb0c50fb0af6c09bc726b9ffe1a396d4faf104a74bad75d4a2484606444636c9d5c9390a0d1b28f6eca4c656aef422411e2c8c83984ff4894c
7
- data.tar.gz: bdf8cc170ce95ba7370b2d163730ce42b78a2a84de823cfdcfe220f07898aa8a08385819acd01e217ce829fc44ca662845776f4de33297c69ef7a5daf4cf7b4a
6
+ metadata.gz: b8047e6c017a559b9916dfcdf235f39b560e486d190a14963a32c2cf565725f128e26c2b3f07d83273fe79ab566349220cc048a1eec347f35aa6308832887124
7
+ data.tar.gz: 8f64787cd1db06201d18e5ddc235d960a5ab6d08a63c212e13619094c47bc688feb59ababc950ec4e70bc8382ff89a255154a03913eb16ac784690aeb2d6f787
data/changes CHANGED
@@ -1,3 +1,10 @@
1
+ 1.0.2
2
+
3
+ 2013-06-01
4
+ add #walk
5
+ 2013-05-31
6
+ fix rb_gc_mark() stuck for value_type: :object
7
+
1
8
  1.0.1
2
9
 
3
10
  2013-05-30
@@ -408,6 +408,60 @@ value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len)
408
408
  }
409
409
 
410
410
 
411
+ void hattrie_walk (hattrie_t* T, const char* key, size_t len, void* user_data, hattrie_walk_cb cb) {
412
+ unsigned char* k = (unsigned char*)key;
413
+ node_ptr node = T->root;
414
+ size_t i, j;
415
+ ahtable_iter_t* it;
416
+
417
+ /* go down until a bucket is reached */
418
+ for (i = 0; i < len; i++, k++) {
419
+ if (!(*node.flag & NODE_TYPE_TRIE))
420
+ break;
421
+ node = node.t->xs[*k];
422
+ if (*node.flag & NODE_HAS_VAL) {
423
+ if (hattrie_walk_stop == cb(key, i, &node.t->val, user_data))
424
+ return;
425
+ }
426
+ }
427
+ if (i == len)
428
+ return;
429
+
430
+ assert(i);
431
+ if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
432
+ i--;
433
+ k--;
434
+ } else {
435
+ assert(*node.flag & NODE_TYPE_PURE_BUCKET);
436
+ }
437
+
438
+ /* dict order ensured short => long */
439
+ it = ahtable_iter_begin(node.b, true);
440
+ for(; !ahtable_iter_finished(it); ahtable_iter_next(it)) {
441
+ size_t stored_len;
442
+ unsigned char* stored_key = (unsigned char*)ahtable_iter_key(it, &stored_len);
443
+ int matched = 1;
444
+ if (stored_len + i > len) {
445
+ continue;
446
+ }
447
+ for (j = 0; j < stored_len; j++) {
448
+ if (stored_key[j] != k[j]) {
449
+ matched = 0;
450
+ break;
451
+ }
452
+ }
453
+ if (matched) {
454
+ value_t* val = ahtable_iter_val(it);
455
+ if (hattrie_walk_stop == cb(key, i + stored_len, val, user_data)) {
456
+ ahtable_iter_free(it);
457
+ return;
458
+ }
459
+ }
460
+ }
461
+ ahtable_iter_free(it);
462
+ }
463
+
464
+
411
465
  int hattrie_del(hattrie_t* T, const char* key, size_t len)
412
466
  {
413
467
  node_ptr parent = T->root;
@@ -545,7 +599,9 @@ static void hattrie_iter_nextnode(hattrie_iter_t* i)
545
599
  }
546
600
 
547
601
 
548
- // TODO pick a better name
602
+ /** next non-nil-key node
603
+ * TODO pick a better name
604
+ */
549
605
  static void hattrie_iter_step(hattrie_iter_t* i)
550
606
  {
551
607
  while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
@@ -46,11 +46,22 @@ size_t hattrie_size (hattrie_t*);
46
46
  */
47
47
  value_t* hattrie_get (hattrie_t*, const char* key, size_t len);
48
48
 
49
-
50
49
  /** Find a given key in the table, returning a NULL pointer if it does not
51
50
  * exist. */
52
51
  value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len);
53
52
 
53
+ /** hattrie_walk callback signature */
54
+ typedef int (*hattrie_walk_cb)(const char* key, size_t len, value_t* val, void* user_data);
55
+
56
+ /** hattrie_walk callback return values, controls whether should stop the walk or not */
57
+ #define hattrie_walk_stop 0
58
+ #define hattrie_walk_continue 1
59
+
60
+ /** Find stored keys which are prefices of key, and invoke callback for every found key and val.
61
+ * The invocation order is: short key to long key.
62
+ */
63
+ void hattrie_walk (hattrie_t*, const char* key, size_t len, void* user_data, hattrie_walk_cb);
64
+
54
65
  /** Delete a given key from trie. Returns 0 if successful or -1 if not found.
55
66
  */
56
67
  int hattrie_del(hattrie_t* T, const char* key, size_t len);
@@ -64,6 +75,8 @@ void hattrie_iter_free (hattrie_iter_t*);
64
75
  const char* hattrie_iter_key (hattrie_iter_t*, size_t* len);
65
76
  value_t* hattrie_iter_val (hattrie_iter_t*);
66
77
 
78
+ /** Note the hattrie_iter_key() for prefixed search gets the suffix instead of the whole key
79
+ */
67
80
  hattrie_iter_t* hattrie_iter_with_prefix(const hattrie_t*, bool sorted, const char* prefix, size_t prefix_len);
68
81
 
69
82
  #ifdef __cplusplus
@@ -71,5 +84,3 @@ hattrie_iter_t* hattrie_iter_with_prefix(const hattrie_t*, bool sorted, const ch
71
84
  #endif
72
85
 
73
86
  #endif
74
-
75
-
@@ -62,6 +62,7 @@ static void hat_mark(void* p_ht) {
62
62
  if (!IMMEDIATE_P(*v)) {
63
63
  rb_gc_mark(*v);
64
64
  }
65
+ hattrie_iter_next(it);
65
66
  }
66
67
  hattrie_iter_free(it);
67
68
  }
@@ -247,6 +248,30 @@ static VALUE hat_search(VALUE self, VALUE key, VALUE vlimit, VALUE vsort, VALUE
247
248
  return self;
248
249
  }
249
250
 
251
+ typedef struct {
252
+ bool obj_value;
253
+ VALUE arr;
254
+ } HatWalkData;
255
+
256
+ static int hat_walk_cb(const char* key, size_t len, value_t* v, void* data_p) {
257
+ HatWalkData* data = (HatWalkData*)data_p;
258
+ volatile VALUE r = rb_ary_new();
259
+ rb_ary_push(r, rb_str_new(key, len));
260
+ rb_ary_push(r, data->obj_value ? (*v) : LL2NUM(*v));
261
+ rb_ary_push(data->arr, r);
262
+ return hattrie_walk_continue;
263
+ }
264
+
265
+ static VALUE hat_walk(VALUE self, VALUE key) {
266
+ PRE_HAT;
267
+ size_t len = (size_t)RSTRING_LEN(key);
268
+ volatile HatWalkData data = {ht->obj_value, rb_ary_new()};
269
+
270
+ // to prevent leak by break/next, we have to collect the array first
271
+ hattrie_walk(p, RSTRING_PTR(key), len, (void*)&data, hat_walk_cb);
272
+ return data.arr;
273
+ }
274
+
250
275
  #define DEF(k,n,f,c) rb_define_method(k,n,RUBY_METHOD_FUNC(f),c)
251
276
 
252
277
  extern "C"
@@ -266,4 +291,5 @@ void Init_triez() {
266
291
  DEF(hat_class, "has_key?", hat_check, 1);
267
292
  DEF(hat_class, "delete", hat_del, 1);
268
293
  DEF(hat_class, "_internal_search", hat_search, 4);
294
+ DEF(hat_class, "_internal_walk", hat_walk, 1);
269
295
  }
@@ -1,10 +1,11 @@
1
1
  require_relative "../ext/triez"
2
2
 
3
3
  class Triez
4
- VERSION = '1.0.1'
4
+ VERSION = '1.0.2'
5
5
 
6
6
  private :_internal_set_type
7
7
  private :_internal_search
8
+ private :_internal_walk
8
9
 
9
10
  def initialize opts={}
10
11
  opts = opts.dup
@@ -36,6 +37,10 @@ class Triez
36
37
  _internal_search '', nil, true, p
37
38
  end
38
39
 
40
+ def walk s, &p
41
+ _internal_walk(s).each &p
42
+ end
43
+
39
44
  def search_with_prefix prefix, opts={}, &p
40
45
  opts = opts.dup
41
46
 
data/readme.md CHANGED
@@ -74,6 +74,11 @@ t.search_with_prefix('prefix')
74
74
  t.each do |key, value|
75
75
  ...
76
76
  end
77
+
78
+ # iterate stored keys which are prefices of a given string, from shallow to deep
79
+ t.walk string do |k, v|
80
+ ...
81
+ end
77
82
  ```
78
83
 
79
84
  \* Note: By default, *triez* store signed integers within 64bits, you can use them as weights, counts or database IDs. In case you need to store arbitrary object in a node, use `value_type: :object`:
@@ -206,6 +211,13 @@ rake glob_src
206
211
  rake
207
212
  ```
208
213
 
214
+ To update vendor lib and re-compile:
215
+
216
+ ``` bash
217
+ rake glob_src
218
+ rake
219
+ ```
220
+
209
221
  ## Note
210
222
 
211
223
  Although HAT trie uses MurMurHash3 instead of SipHash in Ruby, It is still safe under hashDoS because bucket size is limited.
@@ -157,6 +157,34 @@ class TriezTest < Test::Unit::TestCase
157
157
  assert_equal %w[a b c ab bc abc].sort, keys.sort
158
158
  end
159
159
 
160
+ def test_walk
161
+ urls = %w[
162
+ /users/
163
+ /users/12/edit
164
+ /posts
165
+ ]
166
+ t = Triez.new value_type: :object
167
+ urls.each_with_index do |url, i|
168
+ t[url] = i.to_s
169
+ end
170
+
171
+ assert_equal [%w'/users/ 0'], t.walk('/users/12/delete').to_a
172
+ assert_equal [%w'/users/ 0', %w'/users/12/edit 1'], t.walk('/users/12/edit').to_a
173
+ assert_equal [%w'/users/ 0', %w'/users/12/edit 1'], t.walk('/users/12/edit/3').to_a
174
+
175
+ assert_raise TypeError do
176
+ t.walk :'/post' do
177
+ end
178
+ end
179
+
180
+ t.walk '' do |k, v|
181
+ assert_equal [nil, nil], [k, v]
182
+ end
183
+
184
+ # try to trigger rb_gc_mark(), it can stuck if hattrie_iter_next() not called properly
185
+ 100000.times{ 'a' + 'b' }
186
+ end
187
+
160
188
  def test_solve_longest_common_substring
161
189
  sentences = %w[
162
190
  万塘路一锅鸡
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: triez
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-29 00:00:00.000000000 Z
11
+ date: 2013-05-31 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: fast, efficient, unicode aware HAT trie with prefix / suffix support.
14
14
  email: