trie_matcher 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/trie_matcher.rb +59 -43
- data/lib/trie_matcher/version.rb +1 -1
- data/spec/ips_benchmark.rb +32 -0
- data/spec/profiling_trie_matcher.rb +66 -0
- data/spec/trie_matcher_spec.rb +13 -13
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c5273b23bb88d114d2f8f1aae29bd364900718b
|
4
|
+
data.tar.gz: ef5db432e345ee9f8be827ab36447ac15c1059b8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e12f179872cc28b5190ff69352991a25522d151e5b9604292756395e0017243c5faecaeacf039aa97c673fdb2c48aef6ddb04ce4b17f7ac6292f0ae645330eac
|
7
|
+
data.tar.gz: c23a87b8d7637a8cf9b4fc93f2af2b43fb35bcd21f40904bd91791b4b8b52263212cefe9715ac955b7ee592ed1dd6bf7cf12511288bdab84d97f181316784fd2
|
data/lib/trie_matcher.rb
CHANGED
@@ -3,11 +3,11 @@ require File.expand_path("trie_matcher/version", __dir__)
|
|
3
3
|
# Trie implementation that acts as a weak mapping
|
4
4
|
#
|
5
5
|
# Values can be stored for a given prefix, and are returned for the longest prefix.
|
6
|
-
# Lookup searches
|
6
|
+
# Lookup searches based on a fixed prefix size. This can cause extra memory use and performance degredation on saturated tries with many lexemes.
|
7
7
|
class TrieMatcher
|
8
8
|
# Build an empty trie
|
9
9
|
def initialize
|
10
|
-
@root = { nodes: {}, value: nil,
|
10
|
+
@root = { nodes: {}, value: nil, key_length: nil }
|
11
11
|
end
|
12
12
|
|
13
13
|
# Store a prefix in the trie, and associate a value with it
|
@@ -35,7 +35,7 @@ class TrieMatcher
|
|
35
35
|
current = @root
|
36
36
|
current_prefix = prefix
|
37
37
|
|
38
|
-
while current
|
38
|
+
while !current.nil? && current_prefix != ""
|
39
39
|
previous = current
|
40
40
|
current, current_prefix = next_node(current, current_prefix)
|
41
41
|
end
|
@@ -95,58 +95,74 @@ class TrieMatcher
|
|
95
95
|
end
|
96
96
|
|
97
97
|
private
|
98
|
-
|
98
|
+
def insert_node(root, key)
|
99
|
+
new_node = {
|
100
|
+
nodes: {},
|
101
|
+
value: nil,
|
102
|
+
key_length: nil,
|
103
|
+
}
|
104
|
+
root[:nodes][key] = new_node
|
105
|
+
return new_node
|
106
|
+
end
|
107
|
+
|
108
|
+
# get the node for insertion, splitting intermediary nodes as necessary
|
99
109
|
def find_canididate_insertion_node(current, key)
|
100
|
-
|
101
|
-
|
110
|
+
if current[:key_length].nil?
|
111
|
+
new_node = insert_node(current, key)
|
112
|
+
current[:key_length] = key.length
|
113
|
+
return new_node, ""
|
114
|
+
end
|
115
|
+
|
116
|
+
# check if we have an existing shared prefix already
|
117
|
+
current_key = key[0...current[:key_length]]
|
118
|
+
|
119
|
+
# look for an existing key path
|
120
|
+
if current[:nodes].has_key?(current_key)
|
121
|
+
return current[:nodes][current_key], key[current_key.length..-1]
|
122
|
+
end
|
123
|
+
|
124
|
+
# search for a shared prefix, and split all the nodes if necessary
|
125
|
+
current[:nodes].keys.each do |prefix|
|
102
126
|
common_prefix = shared_prefix(key, prefix)
|
103
127
|
next unless common_prefix
|
104
128
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
new_suffix = prefix[common_prefix.length..-1]
|
110
|
-
new_node = {
|
111
|
-
nodes: {
|
112
|
-
new_suffix => old
|
113
|
-
},
|
114
|
-
value: nil,
|
115
|
-
longest_node_length: new_suffix.length,
|
116
|
-
longest_node: new_suffix,
|
117
|
-
}
|
118
|
-
current[:nodes][common_prefix] = new_node
|
119
|
-
if current[:longest_node] == prefix
|
120
|
-
longest_prefix = current[:nodes].keys.max_by(&:length)
|
121
|
-
current[:longest_node_length] = longest_prefix.length
|
122
|
-
current[:longest_node] = longest_prefix
|
123
|
-
end
|
124
|
-
return new_node, key[common_prefix.length..-1]
|
125
|
-
end
|
129
|
+
new_key_length = common_prefix.length
|
130
|
+
|
131
|
+
split_nodes(current, new_key_length)
|
132
|
+
return current[:nodes][common_prefix], key[new_key_length..-1]
|
126
133
|
end
|
127
134
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
135
|
+
# potentially split all other keys
|
136
|
+
if current_key.length < current[:key_length]
|
137
|
+
split_nodes(current, current_key.length)
|
138
|
+
end
|
139
|
+
|
140
|
+
new_node = insert_node(current, current_key)
|
141
|
+
return new_node, key[current_key.length..-1]
|
142
|
+
end
|
143
|
+
|
144
|
+
# split all the branches in the given root to the given length
|
145
|
+
def split_nodes(root, new_length)
|
146
|
+
old_nodes = root[:nodes]
|
147
|
+
split_length = root[:key_length] - new_length
|
148
|
+
root[:key_length] = new_length
|
149
|
+
root[:nodes] = {}
|
150
|
+
old_nodes.each do |key, old|
|
151
|
+
new_node = insert_node(root, key[0...new_length])
|
152
|
+
new_node[:nodes][key[new_length..-1]] = old
|
153
|
+
new_node[:key_length] = split_length
|
137
154
|
end
|
138
|
-
current[:nodes][key] = new_node
|
139
|
-
return new_node, ""
|
140
155
|
end
|
141
156
|
|
142
157
|
# find the next node from the current one based on the given key
|
143
158
|
def next_node(current, key)
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
159
|
+
return nil, nil unless current[:key_length]
|
160
|
+
next_key = key[0...current[:key_length]]
|
161
|
+
if current[:nodes].has_key?(next_key)
|
162
|
+
return current[:nodes][next_key], key[next_key.length..-1]
|
163
|
+
else
|
164
|
+
return nil, nil
|
148
165
|
end
|
149
|
-
return nil, nil
|
150
166
|
end
|
151
167
|
|
152
168
|
# finds a shared prefix between the two strings, or nil if there isn't any
|
data/lib/trie_matcher/version.rb
CHANGED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'benchmark/ips'
|
2
|
+
require File.expand_path('../lib/trie_matcher', __dir__)
|
3
|
+
|
4
|
+
def run_user_agent_sim
|
5
|
+
trie = TrieMatcher.new
|
6
|
+
uas = <<-UAS.gsub(/^ +/, "").lines
|
7
|
+
Tiny Tiny RSS/1.2
|
8
|
+
Windows-RSS-Platform/1.0 Mozilla compatible
|
9
|
+
RSSOwl/2.1
|
10
|
+
Bloglovin/1.0 (http://www.bloglovin.com;
|
11
|
+
NewsBlur Feed Fetcher - (
|
12
|
+
g2reader-bot/1.0 (+http://www.g2reader.com;
|
13
|
+
Mozilla/5.0 Vienna/3.1.1
|
14
|
+
Mozilla/5.0 (compatible; theoldreader.com;
|
15
|
+
Feedbin -
|
16
|
+
Feed Wrangler/1.0 (
|
17
|
+
Mozilla 5.0 (compatible; BazQux/2.4 +http://bazqux.com/fetcher;
|
18
|
+
livedoor FeedFetcher/0.01 (http://reader.livedoor.com/;
|
19
|
+
HanRSS/1.1 (http://www.hanrss.com;
|
20
|
+
UAS
|
21
|
+
uas.each_with_index do |line, i|
|
22
|
+
trie[line] = i
|
23
|
+
end
|
24
|
+
Benchmark.ips do |x|
|
25
|
+
x.report { uas.sample }
|
26
|
+
x.report { trie[uas.sample] }
|
27
|
+
|
28
|
+
x.compare!
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
run_user_agent_sim
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'ruby-prof'
|
2
|
+
require 'stringio'
|
3
|
+
require File.expand_path('../lib/trie_matcher', __dir__)
|
4
|
+
|
5
|
+
# RubyProf.measure_mode = RubyProf::WALL_TIME
|
6
|
+
# RubyProf.measure_mode = RubyProf::PROCESS_TIME
|
7
|
+
# RubyProf.measure_mode = RubyProf::CPU_TIME
|
8
|
+
# RubyProf.measure_mode = RubyProf::ALLOCATIONS
|
9
|
+
RubyProf.measure_mode = RubyProf::MEMORY
|
10
|
+
# RubyProf.measure_mode = RubyProf::GC_TIME
|
11
|
+
# RubyProf.measure_mode = RubyProf::GC_RUNS
|
12
|
+
|
13
|
+
def profile
|
14
|
+
RubyProf.start
|
15
|
+
yield
|
16
|
+
return RubyProf.stop
|
17
|
+
end
|
18
|
+
|
19
|
+
# we investigate two use cases: user agent matching, and t9 prediction.
|
20
|
+
# TODO: add third use case - route matching
|
21
|
+
|
22
|
+
def run_user_agent_sim
|
23
|
+
trie = TrieMatcher.new
|
24
|
+
uas = <<-UAS.gsub(/^ +/, "").lines
|
25
|
+
Tiny Tiny RSS/1.2
|
26
|
+
Windows-RSS-Platform/1.0 Mozilla compatible
|
27
|
+
RSSOwl/2.1
|
28
|
+
Bloglovin/1.0 (http://www.bloglovin.com;
|
29
|
+
NewsBlur Feed Fetcher - (
|
30
|
+
g2reader-bot/1.0 (+http://www.g2reader.com;
|
31
|
+
Mozilla/5.0 Vienna/3.1.1
|
32
|
+
Mozilla/5.0 (compatible; theoldreader.com;
|
33
|
+
Feedbin -
|
34
|
+
Feed Wrangler/1.0 (
|
35
|
+
Mozilla 5.0 (compatible; BazQux/2.4 +http://bazqux.com/fetcher;
|
36
|
+
livedoor FeedFetcher/0.01 (http://reader.livedoor.com/;
|
37
|
+
HanRSS/1.1 (http://www.hanrss.com;
|
38
|
+
UAS
|
39
|
+
uas.each_with_index do |line, i|
|
40
|
+
trie[line] = i
|
41
|
+
end
|
42
|
+
simulation = (uas.size * 10000).times.map do
|
43
|
+
uas.sample
|
44
|
+
end
|
45
|
+
profile do
|
46
|
+
simulation.each do |test_case|
|
47
|
+
trie[test_case]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def report_profile(profile, outfile = "profile.html")
|
53
|
+
# print the call graph
|
54
|
+
output = StringIO.new
|
55
|
+
|
56
|
+
printer = RubyProf::CallStackPrinter.new(profile)
|
57
|
+
printer.print(output, min_percent: 2)
|
58
|
+
|
59
|
+
File.write(outfile, output.string)
|
60
|
+
|
61
|
+
# Print the raw report
|
62
|
+
printer = RubyProf::FlatPrinter.new(profile)
|
63
|
+
printer.print(STDOUT)
|
64
|
+
end
|
65
|
+
|
66
|
+
report_profile(run_user_agent_sim)
|
data/spec/trie_matcher_spec.rb
CHANGED
@@ -6,7 +6,7 @@ describe TrieMatcher do
|
|
6
6
|
end
|
7
7
|
|
8
8
|
it 'has a version number' do
|
9
|
-
expect(TrieMatcher::VERSION).not_to
|
9
|
+
expect(TrieMatcher::VERSION).not_to eq nil
|
10
10
|
end
|
11
11
|
|
12
12
|
describe "Hash access" do
|
@@ -17,55 +17,55 @@ describe TrieMatcher do
|
|
17
17
|
it 'should retrieve stored values' do
|
18
18
|
value = "bar"
|
19
19
|
@t["foo"] = value
|
20
|
-
expect(@t["foo"]).to
|
20
|
+
expect(@t["foo"]).to eq value
|
21
21
|
end
|
22
22
|
|
23
23
|
it 'should return the stored value' do
|
24
24
|
value = "bar"
|
25
|
-
expect(@t["foo"] = value).to
|
25
|
+
expect(@t["foo"] = value).to eq value
|
26
26
|
end
|
27
27
|
|
28
28
|
it 'should store values with shared prefixes' do
|
29
29
|
@t["cat"] = 1
|
30
30
|
@t["car"] = 2
|
31
|
-
expect(@t["cat"]).to
|
32
|
-
expect(@t["car"]).to
|
31
|
+
expect(@t["cat"]).to eq 1
|
32
|
+
expect(@t["car"]).to eq 2
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'should store keys that are a prefix of other keys' do
|
36
36
|
@t["catch"] = 1
|
37
37
|
@t["cat"] = 2
|
38
|
-
expect(@t["catch"]).to
|
39
|
-
expect(@t["cat"]).to
|
38
|
+
expect(@t["catch"]).to eq 1
|
39
|
+
expect(@t["cat"]).to eq 2
|
40
40
|
end
|
41
41
|
|
42
42
|
it 'should store keys that have a prefix of another key' do
|
43
43
|
@t["cat"] = 1
|
44
44
|
@t["catch"] = 2
|
45
|
-
expect(@t["cat"]).to
|
46
|
-
expect(@t["catch"]).to
|
45
|
+
expect(@t["cat"]).to eq 1
|
46
|
+
expect(@t["catch"]).to eq 2
|
47
47
|
end
|
48
48
|
|
49
49
|
it 'should do prefix searching' do
|
50
50
|
@t["cat"] = 1
|
51
|
-
expect(@t["cats"]).to
|
51
|
+
expect(@t["cats"]).to eq 1
|
52
52
|
end
|
53
53
|
|
54
54
|
it 'should do partial prefix matching' do
|
55
55
|
@t["cat"] = 1
|
56
56
|
@t["cats in the cradle"] = 2
|
57
|
-
expect(@t["cats"]).to
|
57
|
+
expect(@t["cats"]).to eq 1
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'should return the more specific prefix value' do
|
61
61
|
@t["cat"] = 1
|
62
62
|
@t["catch"] = 2
|
63
|
-
expect(@t["catcher"]).to
|
63
|
+
expect(@t["catcher"]).to eq 2
|
64
64
|
end
|
65
65
|
|
66
66
|
it 'should not give a longer prefix value' do
|
67
67
|
@t["catch"] = 2
|
68
|
-
expect(@t["cat"]).to
|
68
|
+
expect(@t["cat"]).to eq nil
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: trie_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Steven Karas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-08-
|
11
|
+
date: 2015-08-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -69,6 +69,8 @@ files:
|
|
69
69
|
- lib/trie_matcher.rb
|
70
70
|
- lib/trie_matcher/pattern_matcher.rb
|
71
71
|
- lib/trie_matcher/version.rb
|
72
|
+
- spec/ips_benchmark.rb
|
73
|
+
- spec/profiling_trie_matcher.rb
|
72
74
|
- spec/spec_helper.rb
|
73
75
|
- spec/trie_matcher_spec.rb
|
74
76
|
- trie_matcher.gemspec
|
@@ -97,5 +99,7 @@ signing_key:
|
|
97
99
|
specification_version: 4
|
98
100
|
summary: Fast prefix matching
|
99
101
|
test_files:
|
102
|
+
- spec/ips_benchmark.rb
|
103
|
+
- spec/profiling_trie_matcher.rb
|
100
104
|
- spec/spec_helper.rb
|
101
105
|
- spec/trie_matcher_spec.rb
|