stringtree 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,28 @@
1
+ module StringTree
2
+ # Represents an individual tokenized item in a dataset, i.e. A match, its offset and its terminator node
3
+ class Item
4
+ # The offset of the Item in the dataset, in chars
5
+ attr_accessor :offset
6
+ # The match itself
7
+ attr_accessor :match
8
+ # The terminating node of the match.
9
+ attr_accessor :node
10
+
11
+ # Create a new Item with the specifid offset,match and optionally, node.
12
+ def initialize(offset, match, node = nil)
13
+ @offset = offset
14
+ @match = match
15
+ @node = node
16
+ end
17
+
18
+ # Returns true if this is a match.
19
+ def match?
20
+ @match
21
+ end
22
+
23
+ # Returns the value of the match if not nil
24
+ def value
25
+ @node.value unless @node.nil?
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,216 @@
1
+ module StringTree
2
+ # Node represents a node in a StringTree::Tree.
3
+ #
4
+ # This is essentially a binary tree node with additional up and down pointers.
5
+ class Node
6
+ # The char Character value of this node
7
+ attr_accessor :char
8
+ # The next left node to this node, or nil
9
+ attr_accessor :left
10
+ # The next right node to this node, or nil
11
+ attr_accessor :right
12
+ # The child (down) node of this node, or nil
13
+ attr_accessor :down
14
+ # The parent (up) node to this node, or nil
15
+ attr_accessor :up
16
+ # The value of this node, or nil
17
+ attr_accessor :value
18
+
19
+ # Create a new Node with the given char and optionally, parent node and value
20
+ def initialize(char, parent = nil, value = nil)
21
+ @char = char
22
+ @up = parent
23
+ @value = value
24
+ end
25
+
26
+ # Add another node horizontally
27
+ # (within the left-right binary tree of this Node)
28
+ def add_horizontal(node)
29
+ if(node.char > @char)
30
+ if (@right === nil)
31
+ @right = node
32
+ else
33
+ @right.add_horizontal(node)
34
+ end
35
+ else
36
+ if (@left === nil)
37
+ @left = node
38
+ else
39
+ @left.add_horizontal(node)
40
+ end
41
+ end
42
+ end
43
+
44
+ # Add and return a new, or return the existing, node with the given char horizontally
45
+ # (within the left-right binary tree of this Node)
46
+ def add_horizontal_char(char)
47
+ node = find_horizontal(char);
48
+ if node != nil
49
+ return node
50
+ end
51
+ node = Node.new(char, up)
52
+ add_horizontal(node)
53
+ return node
54
+ end
55
+
56
+ # Find and return the node corresponding to the given char horizontally, or nil if not found
57
+ # (within the left-right binary tree of this Node)
58
+ def find_horizontal(char)
59
+ return self if @char == char
60
+ if(char > @char)
61
+ if (@right === nil)
62
+ return nil
63
+ else
64
+ return @right.find_horizontal(char)
65
+ end
66
+ else
67
+ if (@left === nil)
68
+ return nil
69
+ else
70
+ return @left.find_horizontal(char)
71
+ end
72
+ end
73
+ end
74
+
75
+ # Add the given String str and value vertically to this node, by adding or finding each character
76
+ # horizontally, then stepping down and repeating with the next character and so on, writing the
77
+ # value to the last node.
78
+ def add_vertical(str, value)
79
+ node = nil
80
+ str.each_char { |c|
81
+ if (node == nil)
82
+ node = self.add_horizontal_char(c)
83
+ elsif (node.down != nil)
84
+ node = node.down.add_horizontal_char(c)
85
+ else
86
+ node.down = Node.new(c, node)
87
+ node = node.down
88
+ end
89
+ }
90
+ node.value = value
91
+ end
92
+
93
+ # Find the given String str vertically by finding each character horizontally, then stepping down
94
+ # and repeating with the next character and so on. Return the last node if found, or nil if any
95
+ # horizontal search fails.
96
+ # Optionally, set the offset into the string and its length
97
+ def find_vertical(str, offset = 0, length = str.length)
98
+ node = nil
99
+ i = offset
100
+ while (i<offset+length)
101
+ c = str[i]
102
+ if (node == nil)
103
+ node = self.find_horizontal(c)
104
+ elsif (node.down != nil)
105
+ node = node.down.find_horizontal(c)
106
+ else
107
+ return nil
108
+ end
109
+
110
+ return nil if (node == nil)
111
+ i += 1
112
+ end
113
+ node
114
+ end
115
+
116
+ # Find the next match (terminating node with value non-nil) in the String data
117
+ # Optionally, set the offset into the data and its length
118
+ def find_forward(data, offset = 0, length = data.length)
119
+ node = nil
120
+ lastvaluenode = nil
121
+ i = offset
122
+ while (i<offset+length)
123
+ c = data[i]
124
+ if (node == nil)
125
+ node = self.find_horizontal(c)
126
+ elsif (node.down != nil)
127
+ node = node.down.find_horizontal(c)
128
+ else
129
+ return lastvaluenode
130
+ end
131
+ return lastvaluenode if (node == nil)
132
+ lastvaluenode = node if (node.value != nil)
133
+ i += 1
134
+ end
135
+ lastvaluenode
136
+ end
137
+
138
+ # Count the number of nodes in the given direction (:up,:down,:left,:right) until
139
+ # the edge of the tree.
140
+ def count(direction)
141
+ i = 0
142
+ node = self
143
+ while (node != nil)
144
+ node = node.send(direction)
145
+ i += 1
146
+ end
147
+ i
148
+ end
149
+
150
+ # Recursively balance this node in its own tree, and every node in all four directions,
151
+ # and return the new root node.
152
+ def balance
153
+ node = self
154
+ i = (node.count(:right) - node.count(:left))/2
155
+ while (i!=0)
156
+ if (i>0)
157
+ mvnode = node.right
158
+ node.right = nil
159
+ mvnode.add_horizontal node
160
+ i -= 1
161
+ else
162
+ mvnode = node.left
163
+ node.left = nil
164
+ mvnode.add_horizontal node
165
+ i += 1
166
+ end
167
+ node = mvnode
168
+ end
169
+ if (node.left != nil)
170
+ node.left = node.left.balance
171
+ end
172
+ if (node.right != nil)
173
+ node.right = node.right.balance
174
+ end
175
+ if (node.down != nil)
176
+ node.down = node.down.balance
177
+ end
178
+ node
179
+ end
180
+
181
+ # Walk the tree from this node, yielding all strings and values where the
182
+ # value is not nil. Optionally, use the given prefix String str.
183
+ def walk(str="", &block)
184
+ @down.walk(str+char, &block) if @down != nil
185
+ @left.walk(str, &block) if @left != nil
186
+ yield str+@char, @value if @value != nil
187
+ @right.walk(str, &block) if @right != nil
188
+ end
189
+
190
+ # Return the complete string from the tree root up to and including this node
191
+ # i.e. The total string key for this node from root.
192
+ def to_s
193
+ st = @char
194
+ node = self
195
+ while node != nil
196
+ node = node.up
197
+ break if node==nil
198
+ st = node.char+st
199
+ end
200
+ st
201
+ end
202
+
203
+ # Return the length of the total string key for this node from root.
204
+ def length
205
+ count(:up)
206
+ end
207
+
208
+ # Return an Array of Strings of all possible partial string from this node.
209
+ # Optionally, use the given prefix String str.
210
+ def all_partials(str = "")
211
+ list = []
212
+ @down.walk(str) { |str| list << str } unless @down.nil?
213
+ list
214
+ end
215
+ end
216
+ end
@@ -0,0 +1,98 @@
1
+ module StringTree
2
+ # Tree represents a complete StringTree, and has functionality resembling a Hash.
3
+ class Tree
4
+ # The root StringTree::Node, or nil if empty
5
+ attr_accessor :root
6
+
7
+ # Create a new empty Tree
8
+ def initialize
9
+ @root = nil
10
+ end
11
+
12
+ # Add a key and value to this Tree
13
+ def add(key,value)
14
+ @root = Node.new(key[0], nil) if (@root == nil)
15
+ @root.add_vertical(key,value)
16
+ end
17
+
18
+ # Find a specified key in the Tree, and return the value, or nil if not found.
19
+ def find(key)
20
+ return nil if @root == nil
21
+ node = @root.find_vertical(key)
22
+ (node == nil ? nil : node.value)
23
+ end
24
+
25
+ # Return true if the given key exists
26
+ def has_key?(key)
27
+ return false if @root == nil
28
+ node = @root.find_vertical(key)
29
+ return false if node.nil? or node.value.nil?
30
+ true
31
+ end
32
+
33
+ # Return an Array of Strings representing all partial matches forward of key in the Tree.
34
+ # Please note the key itself is not included, even if it exists as a value.
35
+ #
36
+ # E.g.: A tree containing 'ant','antler','deer','anthropic','beer'
37
+ # tree.partials('ant') would return ['antler','anthropic']
38
+ def partials(key)
39
+ return nil if @root == nil
40
+ node = @root.find_vertical(key)
41
+ return nil if node == nil
42
+ node.all_partials(key)
43
+ end
44
+
45
+ # Rebalance the tree for faster access.
46
+ def optimize!
47
+ return nil if @root == nil
48
+ @root = @root.balance
49
+ end
50
+
51
+ # Tokenize the string Data by finding all instances of any key in the Tree.
52
+ # yields each instance as a StringTree::Item.
53
+ def match_all(data, &block)
54
+ return nil if @root == nil
55
+ i=0
56
+ while (i<data.length)
57
+ node = @root.find_forward(data, i, data.length-i)
58
+ if (node!=nil && node.value!=nil)
59
+ yield Item.new(i, true, node)
60
+ i += node.length
61
+ else
62
+ i += 1
63
+ end
64
+ end
65
+ end
66
+
67
+ # Alias for find
68
+ def [](key)
69
+ find(key)
70
+ end
71
+
72
+ # Alias for add
73
+ def []=(key,value)
74
+ add(key,value)
75
+ end
76
+
77
+ # Return a Hash of terminating nodes to Integer counts for a given String data,
78
+ # i.e. Find the count of instances of each String in the tree in the given data.
79
+ def match_count(data, list = {})
80
+ return nil if @root == nil
81
+ i=0
82
+ while (i<data.length)
83
+ node = @root.find_forward(data, i, data.length-i)
84
+ if (node!=nil && node.value!=nil)
85
+ if (!list.has_key?(node))
86
+ list[node] = 1
87
+ else
88
+ list[node] += 1
89
+ end
90
+ i += node.length
91
+ else
92
+ i += 1
93
+ end
94
+ end
95
+ list
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,5 @@
1
+ # The StringTree module contains all the classes and constants for the `stringtree` gem.
2
+ module StringTree
3
+ # The current version of the `stringtree` gem
4
+ VERSION = '0.1.0'
5
+ end
data/lib/stringtree.rb ADDED
@@ -0,0 +1,6 @@
1
+ files = [
2
+ 'version',
3
+ 'node',
4
+ 'item',
5
+ 'tree',
6
+ ].each { |file| require "#{File.dirname(__FILE__)}/stringtree/#{file}" }
data/spec/item_spec.rb ADDED
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+
3
+ describe StringTree::Item do
4
+ describe "#initalize" do
5
+ it "should initialize correctly" do
6
+ @nodea = StringTree::Item.new(1,2,3)
7
+ expect(@nodea.offset).to eq(1)
8
+ expect(@nodea.match).to eq(2)
9
+ expect(@nodea.node).to eq(3)
10
+ end
11
+ end
12
+
13
+ describe "#match?" do
14
+ it "should return @match" do
15
+ @nodea = StringTree::Item.new(1,2,3)
16
+ expect(@nodea.match?).to eq(2)
17
+ end
18
+ end
19
+
20
+ describe "#value" do
21
+ it "should return @node.value if not nil" do
22
+ x = OpenStruct.new(:value => "foo")
23
+ @nodea = StringTree::Item.new(1,2,x)
24
+ expect(@nodea.value).to eq("foo")
25
+ end
26
+
27
+ it "should return nil if value is nil" do
28
+ @nodea = StringTree::Item.new(1,2,nil)
29
+ expect(@nodea.value).to be_nil()
30
+ end
31
+ end
32
+ end
data/spec/node_spec.rb ADDED
@@ -0,0 +1,187 @@
1
+ require 'spec_helper'
2
+
3
+ describe StringTree::Node do
4
+ describe "when instantiated" do
5
+ it "has correct char" do
6
+ @nodea = StringTree::Node.new 'a'
7
+ expect(@nodea.char).to eq(@nodea.char)
8
+ end
9
+ end
10
+
11
+ describe "add tests" do
12
+ before do
13
+ @nodeb = StringTree::Node.new 'b'
14
+ @nodec = @nodeb.add_horizontal_char('c')
15
+ @nodea = @nodeb.add_horizontal_char('a')
16
+ end
17
+ it "adds a on correct side" do
18
+ expect(@nodeb.left).to eq(@nodeb.left)
19
+ end
20
+ it "adds c on correct side" do
21
+ expect(@nodeb.right).to eq(@nodeb.right)
22
+ end
23
+ it "does not add a again" do
24
+ expect(@nodeb.add_horizontal_char('a')).to eq(@nodeb.add_horizontal_char('a'))
25
+ end
26
+ it "count left correct" do
27
+ expect(@nodeb.count(:left)).to eq(@nodeb.count(:left))
28
+ end
29
+ it "count right correct" do
30
+ expect(@nodeb.count(:right)).to eq(@nodeb.count(:right))
31
+ end
32
+ end
33
+
34
+ describe "find tests" do
35
+ before do
36
+ @nodeb = StringTree::Node.new 'b'
37
+ @nodec = @nodeb.add_horizontal_char('c')
38
+ @nodea = @nodeb.add_horizontal_char('a')
39
+ end
40
+ it "finds a" do
41
+ @nodeb.find_horizontal('a') == @nodea
42
+ end
43
+ it "finds c" do
44
+ @nodeb.find_horizontal('c') == @nodec
45
+ end
46
+ end
47
+
48
+ describe "vertical tests" do
49
+ before do
50
+ @node = StringTree::Node.new 'a'
51
+ @str = "testing"
52
+ @node.add_vertical(@str, 'one')
53
+ end
54
+ it "string exists in stringtree" do
55
+ @str.each_char { |c|
56
+ @node = @node.find_horizontal(c);
57
+ expect(@node).not_to be(nil)
58
+ expect(@node.char).to eq(@node.char)
59
+ @node = @node.down
60
+ }
61
+ end
62
+ it "find_vertical returns correct value" do
63
+ expect(@node.find_vertical(@str).value).to eq(@node.find_vertical(@str).value)
64
+ end
65
+ it "find_vertical does not find superstring" do
66
+ expect(@node.find_vertical("testing1")).to eq(@node.find_vertical("testing1"))
67
+ expect(@node.find_vertical("testin").value).to eq(@node.find_vertical("testin").value)
68
+ end
69
+ end
70
+
71
+ describe "optimize tests" do
72
+ before do
73
+ @node = StringTree::Node.new 'a'
74
+ @val = {
75
+ "one"=>1, "two"=>2, "three"=>3, "four"=>4, "five"=>5, "six"=>6, "seven"=>7, "eight"=>8, "nine"=>9, "ten"=>10,
76
+ "eleven"=>11, "twelve"=>12, "thirteen"=>13, "fourteen"=>14, "fifteen"=>15, "sixteen"=>16, "seventeen"=>17, "eighteen"=>18, "nineteen"=>19, "twenty"=>20
77
+ }
78
+ @val.each { |c,d| @node.add_vertical(c,d) }
79
+ @node = @node.balance
80
+ end
81
+ it "string exists in stringtree" do
82
+ 'one'.each_char { |c|
83
+ @node = @node.find_horizontal(c)
84
+ expect(@node).not_to be(nil)
85
+ expect(@node.char).to eq(@node.char)
86
+ @node = @node.down
87
+ }
88
+ end
89
+ it "find_vertical returns correct value" do
90
+ expect(@node.find_vertical("one").value).to eq(@node.find_vertical("one").value)
91
+ end
92
+ it "find_vertical does not find superstring" do
93
+ expect(@node.find_vertical("testing1")).to eq(@node.find_vertical("testing1"))
94
+ expect(@node.find_vertical("testin")).to eq(@node.find_vertical("testin"))
95
+ end
96
+ end
97
+
98
+ describe '#walk' do
99
+ it 'should walk a tree correctly' do
100
+ inst = StringTree::Node.new 'b'
101
+ inst.left = StringTree::Node.new 'a'
102
+ inst.right = StringTree::Node.new 'c'
103
+ inst.down = StringTree::Node.new '2'
104
+ inst.down.left = StringTree::Node.new '1'
105
+ inst.down.right = StringTree::Node.new '3'
106
+
107
+ list = []
108
+ inst.walk do |k,v|
109
+ puts k,v
110
+ list << { k => v }
111
+ end
112
+ expect(list).to eq []
113
+ end
114
+
115
+ end
116
+
117
+ describe '#all_partials' do
118
+ it 'should call @down.walk if it exists' do
119
+
120
+ inst = StringTree::Node.new 'a'
121
+ inst.down = StringTree::Node.new 'b'
122
+
123
+ expect(inst.down).to receive(:walk).with('one')
124
+ inst.all_partials('one')
125
+ end
126
+
127
+ it 'should not call @down.walk if it doesnt exist' do
128
+ # Spec will fail with exception if faulty.
129
+ inst = StringTree::Node.new 'a'
130
+ inst.down = nil
131
+ inst.all_partials('one')
132
+ end
133
+ end
134
+
135
+ describe '#find_forward' do
136
+ it 'should return the correct node' do
137
+
138
+ inst = StringTree::Node.new 'b'
139
+ inst.left = StringTree::Node.new 'a'
140
+ inst.right = StringTree::Node.new 'c'
141
+ inst.down = StringTree::Node.new '2', inst, 'one'
142
+ inst.down.left = StringTree::Node.new '1', inst
143
+ inst.down.right = StringTree::Node.new '3', inst
144
+
145
+ expect(inst.find_forward("b2",0,2)).to be(inst.down)
146
+ expect(inst.find_forward("asoicbasicn",0,2)).to be_nil
147
+ end
148
+ end
149
+
150
+ describe '#to_s' do
151
+ it 'should return the correct string' do
152
+ inst = StringTree::Node.new 'b'
153
+ inst.left = StringTree::Node.new 'a'
154
+ inst.right = StringTree::Node.new 'c'
155
+ inst.down = StringTree::Node.new '2', inst, 'one'
156
+ inst.down.left = StringTree::Node.new '1', inst
157
+ inst.down.right = StringTree::Node.new '3', inst
158
+
159
+ expect(inst.down.left.to_s).to eq("b1")
160
+ expect(inst.down.right.to_s).to eq("b3")
161
+ expect(inst.down.to_s).to eq("b2")
162
+ expect(inst.right.to_s).to eq("c")
163
+ expect(inst.left.to_s).to eq("a")
164
+ expect(inst.to_s).to eq("b")
165
+ end
166
+ end
167
+
168
+ describe '#length' do
169
+ it 'should return the correct count' do
170
+ inst = StringTree::Node.new 'b'
171
+ inst.left = StringTree::Node.new 'a'
172
+ inst.right = StringTree::Node.new 'c'
173
+ inst.down = StringTree::Node.new '2', inst, 'one'
174
+ inst.down.left = StringTree::Node.new '1', inst
175
+ inst.down.right = StringTree::Node.new '3', inst
176
+ inst.down.left.down = StringTree::Node.new 'z', inst.down.left
177
+ inst.down.left.down.left = StringTree::Node.new 'v', inst.down.left
178
+
179
+ expect(inst.down.left.down.left.length).to eq(3)
180
+ expect(inst.down.left.down.length).to eq(3)
181
+ expect(inst.down.left.length).to eq(2)
182
+ expect(inst.down.length).to eq(2)
183
+ expect(inst.length).to eq(1)
184
+ end
185
+ end
186
+
187
+ end
@@ -0,0 +1,19 @@
1
+ # Supress Warnings
2
+ warn_level = $VERBOSE
3
+ $VERBOSE = nil
4
+
5
+ if ENV.has_key?('SIMPLECOV')
6
+ require 'simplecov'
7
+ require 'simplecov-rcov'
8
+
9
+ SimpleCov.formatter = SimpleCov::Formatter::RcovFormatter
10
+ SimpleCov.start do
11
+ add_filter '/spec/'
12
+ end
13
+ else
14
+ require 'coveralls'
15
+ Coveralls.wear!
16
+ end
17
+
18
+ require 'stringtree'
19
+ require 'ostruct'