triez 1.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/changes +4 -0
- data/ext/extconf.rb +1 -0
- data/ext/hat-trie/ahtable.c +1 -1
- data/ext/triez.cc +5 -0
- data/lib/triez.rb +1 -1
- data/readme.md +15 -11
- metadata +8 -9
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8624d508bf82330cf354730f21341f21c7e0989b
|
4
|
+
data.tar.gz: cc9e500bf179457d77361acef1c399da936b0375
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c73e070211b80ceb0c50fb0af6c09bc726b9ffe1a396d4faf104a74bad75d4a2484606444636c9d5c9390a0d1b28f6eca4c656aef422411e2c8c83984ff4894c
|
7
|
+
data.tar.gz: bdf8cc170ce95ba7370b2d163730ce42b78a2a84de823cfdcfe220f07898aa8a08385819acd01e217ce829fc44ca662845776f4de33297c69ef7a5daf4cf7b4a
|
data/changes
ADDED
data/ext/extconf.rb
CHANGED
data/ext/hat-trie/ahtable.c
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
|
15
15
|
|
16
16
|
const double ahtable_max_load_factor = 100000.0; /* arbitrary large number => don't resize */
|
17
|
-
const
|
17
|
+
const size_t ahtable_initial_size = 4096;
|
18
18
|
static const uint16_t LONG_KEYLEN_MASK = 0x7fff;
|
19
19
|
|
20
20
|
static size_t keylen(slot_t s) {
|
data/ext/triez.cc
CHANGED
data/lib/triez.rb
CHANGED
data/readme.md
CHANGED
@@ -1,14 +1,18 @@
|
|
1
|
-
##
|
1
|
+
## Triez
|
2
|
+
|
3
|
+
[](https://travis-ci.org/luikore/triez)
|
4
|
+
[](https://codeclimate.com/github/luikore/triez)
|
5
|
+
[](http://badge.fury.io/rb/triez)
|
2
6
|
|
3
7
|
Pragmatic [tries](http://en.wikipedia.org/wiki/Trie) for Ruby, spelled in lolcat.
|
4
8
|
|
5
9
|
It is fast, memory efficient, unicode aware, prefix searchable, and enchanced with prefix/suffix/substring keys.
|
6
10
|
|
7
|
-
The backend of *triez* is a cache oblivious data structure: the [HAT trie](https://github.com/dcjones/hat-trie) (In fact
|
11
|
+
The backend of *triez* is a cache oblivious data structure: the [HAT trie](https://github.com/dcjones/hat-trie) (In fact it is a [modified version](https://github.com/luikore/hat-trie) for improved functionality). HAT trie is generally faster and more memory efficient than [double array](http://linux.thai.net/~thep/datrie/datrie.html) or [burst trie](http://ww2.cs.mu.oz.au/~jz/fulltext/acmtois02.pdf).
|
8
12
|
|
9
13
|
## Requirement
|
10
14
|
|
11
|
-
-
|
15
|
+
- CRuby 1.9 / 2.0
|
12
16
|
- `g++` or `clang`
|
13
17
|
|
14
18
|
## Install
|
@@ -72,10 +76,10 @@ t.each do |key, value|
|
|
72
76
|
end
|
73
77
|
```
|
74
78
|
|
75
|
-
\* Note: By default, *triez* store signed integers within 64bits, you can use them as weights, counts or database IDs. In case you need to store arbitrary object in a node, use `
|
79
|
+
\* Note: By default, *triez* store signed integers within 64bits, you can use them as weights, counts or database IDs. In case you need to store arbitrary object in a node, use `value_type: :object`:
|
76
80
|
|
77
81
|
``` ruby
|
78
|
-
t = Triez.new
|
82
|
+
t = Triez.new value_type: :object
|
79
83
|
t['Tom'] = {name: 'Tom', sex: 'Female'}
|
80
84
|
t['Tree'] = [:leaf, :trunk, :root]
|
81
85
|
```
|
@@ -91,8 +95,8 @@ t = Triez.new
|
|
91
95
|
words.each do |word|
|
92
96
|
t[word] = 1
|
93
97
|
end
|
94
|
-
t.search_with_prefix 're' do |
|
95
|
-
puts "candidate: #{
|
98
|
+
t.search_with_prefix 're' do |suffix|
|
99
|
+
puts "candidate: re#{suffix}"
|
96
100
|
end
|
97
101
|
```
|
98
102
|
|
@@ -116,9 +120,9 @@ sequences = {
|
|
116
120
|
}
|
117
121
|
t = Triez.new
|
118
122
|
|
119
|
-
# build suffix
|
123
|
+
# build suffix tree
|
120
124
|
sequences.each do |seq, id|
|
121
|
-
t.change_all
|
125
|
+
t.change_all(:suffix, seq){id}
|
122
126
|
end
|
123
127
|
|
124
128
|
t.search_with_prefix 'CGGT' do |_, id|
|
@@ -126,7 +130,7 @@ t.search_with_prefix 'CGGT' do |_, id|
|
|
126
130
|
end
|
127
131
|
```
|
128
132
|
|
129
|
-
The searching time is linear to the length of the substring.
|
133
|
+
The searching time is linear to the length of the substring. You may also be interested in the example of a simple [full text search server](https://github.com/luikore/triez/tree/master/examples/full-text-search-server) with *triez*.
|
130
134
|
|
131
135
|
---
|
132
136
|
|
@@ -185,7 +189,7 @@ fast_trie/double array* | 155.6 M | 130.7 s | 0.4359 s
|
|
185
189
|
triez/HAT trie | 121.7 M | 3.872 s | 0.3472 s
|
186
190
|
```
|
187
191
|
|
188
|
-
Note: `
|
192
|
+
Note: `fast_trie/double array` -> https://github.com/tyler/trie
|
189
193
|
|
190
194
|
## Caveats
|
191
195
|
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: triez
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Zete Lui
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-05-29 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description: fast, efficient, unicode aware HAT trie with prefix / suffix support.
|
15
14
|
email:
|
@@ -19,6 +18,7 @@ extensions:
|
|
19
18
|
extra_rdoc_files: []
|
20
19
|
files:
|
21
20
|
- copying
|
21
|
+
- changes
|
22
22
|
- readme.md
|
23
23
|
- lib/triez.rb
|
24
24
|
- test/triez_test.rb
|
@@ -37,27 +37,26 @@ files:
|
|
37
37
|
- ext/hat-trie/pstdint.h
|
38
38
|
homepage: https://github.com/luikore/triez
|
39
39
|
licenses: []
|
40
|
+
metadata: {}
|
40
41
|
post_install_message:
|
41
42
|
rdoc_options: []
|
42
43
|
require_paths:
|
43
44
|
- lib
|
44
45
|
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
-
none: false
|
46
46
|
requirements:
|
47
|
-
- -
|
47
|
+
- - '>='
|
48
48
|
- !ruby/object:Gem::Version
|
49
49
|
version: 1.9.2
|
50
50
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
-
none: false
|
52
51
|
requirements:
|
53
|
-
- -
|
52
|
+
- - '>='
|
54
53
|
- !ruby/object:Gem::Version
|
55
54
|
version: '0'
|
56
55
|
requirements: []
|
57
56
|
rubyforge_project:
|
58
|
-
rubygems_version:
|
57
|
+
rubygems_version: 2.0.3
|
59
58
|
signing_key:
|
60
|
-
specification_version:
|
59
|
+
specification_version: 4
|
61
60
|
summary: fast, efficient, unicode aware HAT trie with prefix / suffix support
|
62
61
|
test_files: []
|
63
62
|
has_rdoc: false
|