trie_matcher 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: be6709d49263fbdc6baf9dcf04766f2e44710216
4
- data.tar.gz: 65705c4113f69078469d79a97967cb25ec513329
3
+ metadata.gz: 0c5273b23bb88d114d2f8f1aae29bd364900718b
4
+ data.tar.gz: ef5db432e345ee9f8be827ab36447ac15c1059b8
5
5
  SHA512:
6
- metadata.gz: 61a82d7c000c5c076f240c8ea4221554006de143917a9e1e5210ab820c628ff640d88d3f70ce3a3df35497c03afd25915357fe3304707e67d145aad1abbfd1a5
7
- data.tar.gz: f26bbae5d67f77d84665253312b9f1c342bc4523f8d1bbf4cd30c81f94c1e952504f3ec41faee6a67c5a1a98c039bbf6183c68ff70409234e09db24aa2f54985
6
+ metadata.gz: e12f179872cc28b5190ff69352991a25522d151e5b9604292756395e0017243c5faecaeacf039aa97c673fdb2c48aef6ddb04ce4b17f7ac6292f0ae645330eac
7
+ data.tar.gz: c23a87b8d7637a8cf9b4fc93f2af2b43fb35bcd21f40904bd91791b4b8b52263212cefe9715ac955b7ee592ed1dd6bf7cf12511288bdab84d97f181316784fd2
data/lib/trie_matcher.rb CHANGED
@@ -3,11 +3,11 @@ require File.expand_path("trie_matcher/version", __dir__)
3
3
  # Trie implementation that acts as a weak mapping
4
4
  #
5
5
  # Values can be stored for a given prefix, and are returned for the longest prefix.
6
- # Lookup searches for longer prefixes optimistically, so saturated tries with many lexemes in them will be less efficient
6
+ # Lookup searches based on a fixed prefix size. This can cause extra memory use and performance degredation on saturated tries with many lexemes.
7
7
  class TrieMatcher
8
8
  # Build an empty trie
9
9
  def initialize
10
- @root = { nodes: {}, value: nil, longest_node_length: 0, longest_node: nil }
10
+ @root = { nodes: {}, value: nil, key_length: nil }
11
11
  end
12
12
 
13
13
  # Store a prefix in the trie, and associate a value with it
@@ -35,7 +35,7 @@ class TrieMatcher
35
35
  current = @root
36
36
  current_prefix = prefix
37
37
 
38
- while current != nil && current_prefix != ""
38
+ while !current.nil? && current_prefix != ""
39
39
  previous = current
40
40
  current, current_prefix = next_node(current, current_prefix)
41
41
  end
@@ -95,58 +95,74 @@ class TrieMatcher
95
95
  end
96
96
 
97
97
  private
98
- # get the node for insertion, splitting shared prefixes into subnodes if necessary
98
+ def insert_node(root, key)
99
+ new_node = {
100
+ nodes: {},
101
+ value: nil,
102
+ key_length: nil,
103
+ }
104
+ root[:nodes][key] = new_node
105
+ return new_node
106
+ end
107
+
108
+ # get the node for insertion, splitting intermediary nodes as necessary
99
109
  def find_canididate_insertion_node(current, key)
100
- # look for a common prefix
101
- current[:nodes].keys.find do |prefix|
110
+ if current[:key_length].nil?
111
+ new_node = insert_node(current, key)
112
+ current[:key_length] = key.length
113
+ return new_node, ""
114
+ end
115
+
116
+ # check if we have an existing shared prefix already
117
+ current_key = key[0...current[:key_length]]
118
+
119
+ # look for an existing key path
120
+ if current[:nodes].has_key?(current_key)
121
+ return current[:nodes][current_key], key[current_key.length..-1]
122
+ end
123
+
124
+ # search for a shared prefix, and split all the nodes if necessary
125
+ current[:nodes].keys.each do |prefix|
102
126
  common_prefix = shared_prefix(key, prefix)
103
127
  next unless common_prefix
104
128
 
105
- if common_prefix == prefix
106
- return current[:nodes][prefix], key[common_prefix.length..-1]
107
- else
108
- old = current[:nodes].delete(prefix)
109
- new_suffix = prefix[common_prefix.length..-1]
110
- new_node = {
111
- nodes: {
112
- new_suffix => old
113
- },
114
- value: nil,
115
- longest_node_length: new_suffix.length,
116
- longest_node: new_suffix,
117
- }
118
- current[:nodes][common_prefix] = new_node
119
- if current[:longest_node] == prefix
120
- longest_prefix = current[:nodes].keys.max_by(&:length)
121
- current[:longest_node_length] = longest_prefix.length
122
- current[:longest_node] = longest_prefix
123
- end
124
- return new_node, key[common_prefix.length..-1]
125
- end
129
+ new_key_length = common_prefix.length
130
+
131
+ split_nodes(current, new_key_length)
132
+ return current[:nodes][common_prefix], key[new_key_length..-1]
126
133
  end
127
134
 
128
- new_node = {
129
- nodes: {},
130
- value: nil,
131
- longest_node: nil,
132
- longest_node_length: 0,
133
- }
134
- if key.length > current[:longest_node_length]
135
- current[:longest_node_length] = key.length
136
- current[:longest_node] = key
135
+ # potentially split all other keys
136
+ if current_key.length < current[:key_length]
137
+ split_nodes(current, current_key.length)
138
+ end
139
+
140
+ new_node = insert_node(current, current_key)
141
+ return new_node, key[current_key.length..-1]
142
+ end
143
+
144
+ # split all the branches in the given root to the given length
145
+ def split_nodes(root, new_length)
146
+ old_nodes = root[:nodes]
147
+ split_length = root[:key_length] - new_length
148
+ root[:key_length] = new_length
149
+ root[:nodes] = {}
150
+ old_nodes.each do |key, old|
151
+ new_node = insert_node(root, key[0...new_length])
152
+ new_node[:nodes][key[new_length..-1]] = old
153
+ new_node[:key_length] = split_length
137
154
  end
138
- current[:nodes][key] = new_node
139
- return new_node, ""
140
155
  end
141
156
 
142
157
  # find the next node from the current one based on the given key
143
158
  def next_node(current, key)
144
- ([key.length, current[:longest_node_length]].max).times do |l|
145
- if current[:nodes].has_key?(key[0..-l-1])
146
- return current[:nodes][key[0..-l-1]], key[-l,l]
147
- end
159
+ return nil, nil unless current[:key_length]
160
+ next_key = key[0...current[:key_length]]
161
+ if current[:nodes].has_key?(next_key)
162
+ return current[:nodes][next_key], key[next_key.length..-1]
163
+ else
164
+ return nil, nil
148
165
  end
149
- return nil, nil
150
166
  end
151
167
 
152
168
  # finds a shared prefix between the two strings, or nil if there isn't any
@@ -1,3 +1,3 @@
1
1
  class TrieMatcher
2
- VERSION = "1.3.0"
2
+ VERSION = "1.3.1"
3
3
  end
@@ -0,0 +1,32 @@
1
+ require 'benchmark/ips'
2
+ require File.expand_path('../lib/trie_matcher', __dir__)
3
+
4
+ def run_user_agent_sim
5
+ trie = TrieMatcher.new
6
+ uas = <<-UAS.gsub(/^ +/, "").lines
7
+ Tiny Tiny RSS/1.2
8
+ Windows-RSS-Platform/1.0 Mozilla compatible
9
+ RSSOwl/2.1
10
+ Bloglovin/1.0 (http://www.bloglovin.com;
11
+ NewsBlur Feed Fetcher - (
12
+ g2reader-bot/1.0 (+http://www.g2reader.com;
13
+ Mozilla/5.0 Vienna/3.1.1
14
+ Mozilla/5.0 (compatible; theoldreader.com;
15
+ Feedbin -
16
+ Feed Wrangler/1.0 (
17
+ Mozilla 5.0 (compatible; BazQux/2.4 +http://bazqux.com/fetcher;
18
+ livedoor FeedFetcher/0.01 (http://reader.livedoor.com/;
19
+ HanRSS/1.1 (http://www.hanrss.com;
20
+ UAS
21
+ uas.each_with_index do |line, i|
22
+ trie[line] = i
23
+ end
24
+ Benchmark.ips do |x|
25
+ x.report { uas.sample }
26
+ x.report { trie[uas.sample] }
27
+
28
+ x.compare!
29
+ end
30
+ end
31
+
32
+ run_user_agent_sim
@@ -0,0 +1,66 @@
1
+ require 'ruby-prof'
2
+ require 'stringio'
3
+ require File.expand_path('../lib/trie_matcher', __dir__)
4
+
5
+ # RubyProf.measure_mode = RubyProf::WALL_TIME
6
+ # RubyProf.measure_mode = RubyProf::PROCESS_TIME
7
+ # RubyProf.measure_mode = RubyProf::CPU_TIME
8
+ # RubyProf.measure_mode = RubyProf::ALLOCATIONS
9
+ RubyProf.measure_mode = RubyProf::MEMORY
10
+ # RubyProf.measure_mode = RubyProf::GC_TIME
11
+ # RubyProf.measure_mode = RubyProf::GC_RUNS
12
+
13
+ def profile
14
+ RubyProf.start
15
+ yield
16
+ return RubyProf.stop
17
+ end
18
+
19
+ # we investigate two use cases: user agent matching, and t9 prediction.
20
+ # TODO: add third use case - route matching
21
+
22
+ def run_user_agent_sim
23
+ trie = TrieMatcher.new
24
+ uas = <<-UAS.gsub(/^ +/, "").lines
25
+ Tiny Tiny RSS/1.2
26
+ Windows-RSS-Platform/1.0 Mozilla compatible
27
+ RSSOwl/2.1
28
+ Bloglovin/1.0 (http://www.bloglovin.com;
29
+ NewsBlur Feed Fetcher - (
30
+ g2reader-bot/1.0 (+http://www.g2reader.com;
31
+ Mozilla/5.0 Vienna/3.1.1
32
+ Mozilla/5.0 (compatible; theoldreader.com;
33
+ Feedbin -
34
+ Feed Wrangler/1.0 (
35
+ Mozilla 5.0 (compatible; BazQux/2.4 +http://bazqux.com/fetcher;
36
+ livedoor FeedFetcher/0.01 (http://reader.livedoor.com/;
37
+ HanRSS/1.1 (http://www.hanrss.com;
38
+ UAS
39
+ uas.each_with_index do |line, i|
40
+ trie[line] = i
41
+ end
42
+ simulation = (uas.size * 10000).times.map do
43
+ uas.sample
44
+ end
45
+ profile do
46
+ simulation.each do |test_case|
47
+ trie[test_case]
48
+ end
49
+ end
50
+ end
51
+
52
+ def report_profile(profile, outfile = "profile.html")
53
+ # print the call graph
54
+ output = StringIO.new
55
+
56
+ printer = RubyProf::CallStackPrinter.new(profile)
57
+ printer.print(output, min_percent: 2)
58
+
59
+ File.write(outfile, output.string)
60
+
61
+ # Print the raw report
62
+ printer = RubyProf::FlatPrinter.new(profile)
63
+ printer.print(STDOUT)
64
+ end
65
+
66
+ report_profile(run_user_agent_sim)
@@ -6,7 +6,7 @@ describe TrieMatcher do
6
6
  end
7
7
 
8
8
  it 'has a version number' do
9
- expect(TrieMatcher::VERSION).not_to be nil
9
+ expect(TrieMatcher::VERSION).not_to eq nil
10
10
  end
11
11
 
12
12
  describe "Hash access" do
@@ -17,55 +17,55 @@ describe TrieMatcher do
17
17
  it 'should retrieve stored values' do
18
18
  value = "bar"
19
19
  @t["foo"] = value
20
- expect(@t["foo"]).to be value
20
+ expect(@t["foo"]).to eq value
21
21
  end
22
22
 
23
23
  it 'should return the stored value' do
24
24
  value = "bar"
25
- expect(@t["foo"] = value).to be value
25
+ expect(@t["foo"] = value).to eq value
26
26
  end
27
27
 
28
28
  it 'should store values with shared prefixes' do
29
29
  @t["cat"] = 1
30
30
  @t["car"] = 2
31
- expect(@t["cat"]).to be 1
32
- expect(@t["car"]).to be 2
31
+ expect(@t["cat"]).to eq 1
32
+ expect(@t["car"]).to eq 2
33
33
  end
34
34
 
35
35
  it 'should store keys that are a prefix of other keys' do
36
36
  @t["catch"] = 1
37
37
  @t["cat"] = 2
38
- expect(@t["catch"]).to be 1
39
- expect(@t["cat"]).to be 2
38
+ expect(@t["catch"]).to eq 1
39
+ expect(@t["cat"]).to eq 2
40
40
  end
41
41
 
42
42
  it 'should store keys that have a prefix of another key' do
43
43
  @t["cat"] = 1
44
44
  @t["catch"] = 2
45
- expect(@t["cat"]).to be 1
46
- expect(@t["catch"]).to be 2
45
+ expect(@t["cat"]).to eq 1
46
+ expect(@t["catch"]).to eq 2
47
47
  end
48
48
 
49
49
  it 'should do prefix searching' do
50
50
  @t["cat"] = 1
51
- expect(@t["cats"]).to be 1
51
+ expect(@t["cats"]).to eq 1
52
52
  end
53
53
 
54
54
  it 'should do partial prefix matching' do
55
55
  @t["cat"] = 1
56
56
  @t["cats in the cradle"] = 2
57
- expect(@t["cats"]).to be 1
57
+ expect(@t["cats"]).to eq 1
58
58
  end
59
59
 
60
60
  it 'should return the more specific prefix value' do
61
61
  @t["cat"] = 1
62
62
  @t["catch"] = 2
63
- expect(@t["catcher"]).to be 2
63
+ expect(@t["catcher"]).to eq 2
64
64
  end
65
65
 
66
66
  it 'should not give a longer prefix value' do
67
67
  @t["catch"] = 2
68
- expect(@t["cat"]).to be nil
68
+ expect(@t["cat"]).to eq nil
69
69
  end
70
70
  end
71
71
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: trie_matcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steven Karas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-16 00:00:00.000000000 Z
11
+ date: 2015-08-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -69,6 +69,8 @@ files:
69
69
  - lib/trie_matcher.rb
70
70
  - lib/trie_matcher/pattern_matcher.rb
71
71
  - lib/trie_matcher/version.rb
72
+ - spec/ips_benchmark.rb
73
+ - spec/profiling_trie_matcher.rb
72
74
  - spec/spec_helper.rb
73
75
  - spec/trie_matcher_spec.rb
74
76
  - trie_matcher.gemspec
@@ -97,5 +99,7 @@ signing_key:
97
99
  specification_version: 4
98
100
  summary: Fast prefix matching
99
101
  test_files:
102
+ - spec/ips_benchmark.rb
103
+ - spec/profiling_trie_matcher.rb
100
104
  - spec/spec_helper.rb
101
105
  - spec/trie_matcher_spec.rb