triez 1.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/changes +4 -0
- data/ext/extconf.rb +1 -0
- data/ext/hat-trie/ahtable.c +1 -1
- data/ext/triez.cc +5 -0
- data/lib/triez.rb +1 -1
- data/readme.md +15 -11
- metadata +8 -9
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8624d508bf82330cf354730f21341f21c7e0989b
|
4
|
+
data.tar.gz: cc9e500bf179457d77361acef1c399da936b0375
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c73e070211b80ceb0c50fb0af6c09bc726b9ffe1a396d4faf104a74bad75d4a2484606444636c9d5c9390a0d1b28f6eca4c656aef422411e2c8c83984ff4894c
|
7
|
+
data.tar.gz: bdf8cc170ce95ba7370b2d163730ce42b78a2a84de823cfdcfe220f07898aa8a08385819acd01e217ce829fc44ca662845776f4de33297c69ef7a5daf4cf7b4a
|
data/changes
ADDED
data/ext/extconf.rb
CHANGED
data/ext/hat-trie/ahtable.c
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
|
15
15
|
|
16
16
|
const double ahtable_max_load_factor = 100000.0; /* arbitrary large number => don't resize */
|
17
|
-
const
|
17
|
+
const size_t ahtable_initial_size = 4096;
|
18
18
|
static const uint16_t LONG_KEYLEN_MASK = 0x7fff;
|
19
19
|
|
20
20
|
static size_t keylen(slot_t s) {
|
data/ext/triez.cc
CHANGED
data/lib/triez.rb
CHANGED
data/readme.md
CHANGED
@@ -1,14 +1,18 @@
|
|
1
|
-
##
|
1
|
+
## Triez
|
2
|
+
|
3
|
+
[![Build Status](https://travis-ci.org/luikore/triez.png)](https://travis-ci.org/luikore/triez)
|
4
|
+
[![Code Climate](https://codeclimate.com/github/luikore/triez.png)](https://codeclimate.com/github/luikore/triez)
|
5
|
+
[![Gem Version](https://badge.fury.io/rb/triez.png)](http://badge.fury.io/rb/triez)
|
2
6
|
|
3
7
|
Pragmatic [tries](http://en.wikipedia.org/wiki/Trie) for Ruby, spelled in lolcat.
|
4
8
|
|
5
9
|
It is fast, memory efficient, unicode aware, prefix searchable, and enchanced with prefix/suffix/substring keys.
|
6
10
|
|
7
|
-
The backend of *triez* is a cache oblivious data structure: the [HAT trie](https://github.com/dcjones/hat-trie) (In fact
|
11
|
+
The backend of *triez* is a cache oblivious data structure: the [HAT trie](https://github.com/dcjones/hat-trie) (In fact it is a [modified version](https://github.com/luikore/hat-trie) for improved functionality). HAT trie is generally faster and more memory efficient than [double array](http://linux.thai.net/~thep/datrie/datrie.html) or [burst trie](http://ww2.cs.mu.oz.au/~jz/fulltext/acmtois02.pdf).
|
8
12
|
|
9
13
|
## Requirement
|
10
14
|
|
11
|
-
-
|
15
|
+
- CRuby 1.9 / 2.0
|
12
16
|
- `g++` or `clang`
|
13
17
|
|
14
18
|
## Install
|
@@ -72,10 +76,10 @@ t.each do |key, value|
|
|
72
76
|
end
|
73
77
|
```
|
74
78
|
|
75
|
-
\* Note: By default, *triez* store signed integers within 64bits, you can use them as weights, counts or database IDs. In case you need to store arbitrary object in a node, use `
|
79
|
+
\* Note: By default, *triez* store signed integers within 64bits, you can use them as weights, counts or database IDs. In case you need to store arbitrary object in a node, use `value_type: :object`:
|
76
80
|
|
77
81
|
``` ruby
|
78
|
-
t = Triez.new
|
82
|
+
t = Triez.new value_type: :object
|
79
83
|
t['Tom'] = {name: 'Tom', sex: 'Female'}
|
80
84
|
t['Tree'] = [:leaf, :trunk, :root]
|
81
85
|
```
|
@@ -91,8 +95,8 @@ t = Triez.new
|
|
91
95
|
words.each do |word|
|
92
96
|
t[word] = 1
|
93
97
|
end
|
94
|
-
t.search_with_prefix 're' do |
|
95
|
-
puts "candidate: #{
|
98
|
+
t.search_with_prefix 're' do |suffix|
|
99
|
+
puts "candidate: re#{suffix}"
|
96
100
|
end
|
97
101
|
```
|
98
102
|
|
@@ -116,9 +120,9 @@ sequences = {
|
|
116
120
|
}
|
117
121
|
t = Triez.new
|
118
122
|
|
119
|
-
# build suffix
|
123
|
+
# build suffix tree
|
120
124
|
sequences.each do |seq, id|
|
121
|
-
t.change_all
|
125
|
+
t.change_all(:suffix, seq){id}
|
122
126
|
end
|
123
127
|
|
124
128
|
t.search_with_prefix 'CGGT' do |_, id|
|
@@ -126,7 +130,7 @@ t.search_with_prefix 'CGGT' do |_, id|
|
|
126
130
|
end
|
127
131
|
```
|
128
132
|
|
129
|
-
The searching time is linear to the length of the substring.
|
133
|
+
The searching time is linear to the length of the substring. You may also be interested in the example of a simple [full text search server](https://github.com/luikore/triez/tree/master/examples/full-text-search-server) with *triez*.
|
130
134
|
|
131
135
|
---
|
132
136
|
|
@@ -185,7 +189,7 @@ fast_trie/double array* | 155.6 M | 130.7 s | 0.4359 s
|
|
185
189
|
triez/HAT trie | 121.7 M | 3.872 s | 0.3472 s
|
186
190
|
```
|
187
191
|
|
188
|
-
Note: `
|
192
|
+
Note: `fast_trie/double array` -> https://github.com/tyler/trie
|
189
193
|
|
190
194
|
## Caveats
|
191
195
|
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: triez
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Zete Lui
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-05-29 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description: fast, efficient, unicode aware HAT trie with prefix / suffix support.
|
15
14
|
email:
|
@@ -19,6 +18,7 @@ extensions:
|
|
19
18
|
extra_rdoc_files: []
|
20
19
|
files:
|
21
20
|
- copying
|
21
|
+
- changes
|
22
22
|
- readme.md
|
23
23
|
- lib/triez.rb
|
24
24
|
- test/triez_test.rb
|
@@ -37,27 +37,26 @@ files:
|
|
37
37
|
- ext/hat-trie/pstdint.h
|
38
38
|
homepage: https://github.com/luikore/triez
|
39
39
|
licenses: []
|
40
|
+
metadata: {}
|
40
41
|
post_install_message:
|
41
42
|
rdoc_options: []
|
42
43
|
require_paths:
|
43
44
|
- lib
|
44
45
|
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
-
none: false
|
46
46
|
requirements:
|
47
|
-
- -
|
47
|
+
- - '>='
|
48
48
|
- !ruby/object:Gem::Version
|
49
49
|
version: 1.9.2
|
50
50
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
-
none: false
|
52
51
|
requirements:
|
53
|
-
- -
|
52
|
+
- - '>='
|
54
53
|
- !ruby/object:Gem::Version
|
55
54
|
version: '0'
|
56
55
|
requirements: []
|
57
56
|
rubyforge_project:
|
58
|
-
rubygems_version:
|
57
|
+
rubygems_version: 2.0.3
|
59
58
|
signing_key:
|
60
|
-
specification_version:
|
59
|
+
specification_version: 4
|
61
60
|
summary: fast, efficient, unicode aware HAT trie with prefix / suffix support
|
62
61
|
test_files: []
|
63
62
|
has_rdoc: false
|