stringtree 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ module StringTree
2
+ # Represents an individual tokenized item in a dataset, i.e. A match, its offset and its terminator node
3
+ class Item
4
+ # The offset of the Item in the dataset, in chars
5
+ attr_accessor :offset
6
+ # The match itself
7
+ attr_accessor :match
8
+ # The terminating node of the match.
9
+ attr_accessor :node
10
+
11
+ # Create a new Item with the specifid offset,match and optionally, node.
12
+ def initialize(offset, match, node = nil)
13
+ @offset = offset
14
+ @match = match
15
+ @node = node
16
+ end
17
+
18
+ # Returns true if this is a match.
19
+ def match?
20
+ @match
21
+ end
22
+
23
+ # Returns the value of the match if not nil
24
+ def value
25
+ @node.value unless @node.nil?
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,216 @@
1
+ module StringTree
2
+ # Node represents a node in a StringTree::Tree.
3
+ #
4
+ # This is essentially a binary tree node with additional up and down pointers.
5
+ class Node
6
+ # The char Character value of this node
7
+ attr_accessor :char
8
+ # The next left node to this node, or nil
9
+ attr_accessor :left
10
+ # The next right node to this node, or nil
11
+ attr_accessor :right
12
+ # The child (down) node of this node, or nil
13
+ attr_accessor :down
14
+ # The parent (up) node to this node, or nil
15
+ attr_accessor :up
16
+ # The value of this node, or nil
17
+ attr_accessor :value
18
+
19
+ # Create a new Node with the given char and optionally, parent node and value
20
+ def initialize(char, parent = nil, value = nil)
21
+ @char = char
22
+ @up = parent
23
+ @value = value
24
+ end
25
+
26
+ # Add another node horizontally
27
+ # (within the left-right binary tree of this Node)
28
+ def add_horizontal(node)
29
+ if(node.char > @char)
30
+ if (@right === nil)
31
+ @right = node
32
+ else
33
+ @right.add_horizontal(node)
34
+ end
35
+ else
36
+ if (@left === nil)
37
+ @left = node
38
+ else
39
+ @left.add_horizontal(node)
40
+ end
41
+ end
42
+ end
43
+
44
+ # Add and return a new, or return the existing, node with the given char horizontally
45
+ # (within the left-right binary tree of this Node)
46
+ def add_horizontal_char(char)
47
+ node = find_horizontal(char);
48
+ if node != nil
49
+ return node
50
+ end
51
+ node = Node.new(char, up)
52
+ add_horizontal(node)
53
+ return node
54
+ end
55
+
56
+ # Find and return the node corresponding to the given char horizontally, or nil if not found
57
+ # (within the left-right binary tree of this Node)
58
+ def find_horizontal(char)
59
+ return self if @char == char
60
+ if(char > @char)
61
+ if (@right === nil)
62
+ return nil
63
+ else
64
+ return @right.find_horizontal(char)
65
+ end
66
+ else
67
+ if (@left === nil)
68
+ return nil
69
+ else
70
+ return @left.find_horizontal(char)
71
+ end
72
+ end
73
+ end
74
+
75
+ # Add the given String str and value vertically to this node, by adding or finding each character
76
+ # horizontally, then stepping down and repeating with the next character and so on, writing the
77
+ # value to the last node.
78
+ def add_vertical(str, value)
79
+ node = nil
80
+ str.each_char { |c|
81
+ if (node == nil)
82
+ node = self.add_horizontal_char(c)
83
+ elsif (node.down != nil)
84
+ node = node.down.add_horizontal_char(c)
85
+ else
86
+ node.down = Node.new(c, node)
87
+ node = node.down
88
+ end
89
+ }
90
+ node.value = value
91
+ end
92
+
93
+ # Find the given String str vertically by finding each character horizontally, then stepping down
94
+ # and repeating with the next character and so on. Return the last node if found, or nil if any
95
+ # horizontal search fails.
96
+ # Optionally, set the offset into the string and its length
97
+ def find_vertical(str, offset = 0, length = str.length)
98
+ node = nil
99
+ i = offset
100
+ while (i<offset+length)
101
+ c = str[i]
102
+ if (node == nil)
103
+ node = self.find_horizontal(c)
104
+ elsif (node.down != nil)
105
+ node = node.down.find_horizontal(c)
106
+ else
107
+ return nil
108
+ end
109
+
110
+ return nil if (node == nil)
111
+ i += 1
112
+ end
113
+ node
114
+ end
115
+
116
+ # Find the next match (terminating node with value non-nil) in the String data
117
+ # Optionally, set the offset into the data and its length
118
+ def find_forward(data, offset = 0, length = data.length)
119
+ node = nil
120
+ lastvaluenode = nil
121
+ i = offset
122
+ while (i<offset+length)
123
+ c = data[i]
124
+ if (node == nil)
125
+ node = self.find_horizontal(c)
126
+ elsif (node.down != nil)
127
+ node = node.down.find_horizontal(c)
128
+ else
129
+ return lastvaluenode
130
+ end
131
+ return lastvaluenode if (node == nil)
132
+ lastvaluenode = node if (node.value != nil)
133
+ i += 1
134
+ end
135
+ lastvaluenode
136
+ end
137
+
138
+ # Count the number of nodes in the given direction (:up,:down,:left,:right) until
139
+ # the edge of the tree.
140
+ def count(direction)
141
+ i = 0
142
+ node = self
143
+ while (node != nil)
144
+ node = node.send(direction)
145
+ i += 1
146
+ end
147
+ i
148
+ end
149
+
150
+ # Recursively balance this node in its own tree, and every node in all four directions,
151
+ # and return the new root node.
152
+ def balance
153
+ node = self
154
+ i = (node.count(:right) - node.count(:left))/2
155
+ while (i!=0)
156
+ if (i>0)
157
+ mvnode = node.right
158
+ node.right = nil
159
+ mvnode.add_horizontal node
160
+ i -= 1
161
+ else
162
+ mvnode = node.left
163
+ node.left = nil
164
+ mvnode.add_horizontal node
165
+ i += 1
166
+ end
167
+ node = mvnode
168
+ end
169
+ if (node.left != nil)
170
+ node.left = node.left.balance
171
+ end
172
+ if (node.right != nil)
173
+ node.right = node.right.balance
174
+ end
175
+ if (node.down != nil)
176
+ node.down = node.down.balance
177
+ end
178
+ node
179
+ end
180
+
181
+ # Walk the tree from this node, yielding all strings and values where the
182
+ # value is not nil. Optionally, use the given prefix String str.
183
+ def walk(str="", &block)
184
+ @down.walk(str+char, &block) if @down != nil
185
+ @left.walk(str, &block) if @left != nil
186
+ yield str+@char, @value if @value != nil
187
+ @right.walk(str, &block) if @right != nil
188
+ end
189
+
190
+ # Return the complete string from the tree root up to and including this node
191
+ # i.e. The total string key for this node from root.
192
+ def to_s
193
+ st = @char
194
+ node = self
195
+ while node != nil
196
+ node = node.up
197
+ break if node==nil
198
+ st = node.char+st
199
+ end
200
+ st
201
+ end
202
+
203
+ # Return the length of the total string key for this node from root.
204
+ def length
205
+ count(:up)
206
+ end
207
+
208
+ # Return an Array of Strings of all possible partial string from this node.
209
+ # Optionally, use the given prefix String str.
210
+ def all_partials(str = "")
211
+ list = []
212
+ @down.walk(str) { |str| list << str } unless @down.nil?
213
+ list
214
+ end
215
+ end
216
+ end
@@ -0,0 +1,98 @@
1
+ module StringTree
2
+ # Tree represents a complete StringTree, and has functionality resembling a Hash.
3
+ class Tree
4
+ # The root StringTree::Node, or nil if empty
5
+ attr_accessor :root
6
+
7
+ # Create a new empty Tree
8
+ def initialize
9
+ @root = nil
10
+ end
11
+
12
+ # Add a key and value to this Tree
13
+ def add(key,value)
14
+ @root = Node.new(key[0], nil) if (@root == nil)
15
+ @root.add_vertical(key,value)
16
+ end
17
+
18
+ # Find a specified key in the Tree, and return the value, or nil if not found.
19
+ def find(key)
20
+ return nil if @root == nil
21
+ node = @root.find_vertical(key)
22
+ (node == nil ? nil : node.value)
23
+ end
24
+
25
+ # Return true if the given key exists
26
+ def has_key?(key)
27
+ return false if @root == nil
28
+ node = @root.find_vertical(key)
29
+ return false if node.nil? or node.value.nil?
30
+ true
31
+ end
32
+
33
+ # Return an Array of Strings representing all partial matches forward of key in the Tree.
34
+ # Please note the key itself is not included, even if it exists as a value.
35
+ #
36
+ # E.g.: A tree containing 'ant','antler','deer','anthropic','beer'
37
+ # tree.partials('ant') would return ['antler','anthropic']
38
+ def partials(key)
39
+ return nil if @root == nil
40
+ node = @root.find_vertical(key)
41
+ return nil if node == nil
42
+ node.all_partials(key)
43
+ end
44
+
45
+ # Rebalance the tree for faster access.
46
+ def optimize!
47
+ return nil if @root == nil
48
+ @root = @root.balance
49
+ end
50
+
51
+ # Tokenize the string Data by finding all instances of any key in the Tree.
52
+ # yields each instance as a StringTree::Item.
53
+ def match_all(data, &block)
54
+ return nil if @root == nil
55
+ i=0
56
+ while (i<data.length)
57
+ node = @root.find_forward(data, i, data.length-i)
58
+ if (node!=nil && node.value!=nil)
59
+ yield Item.new(i, true, node)
60
+ i += node.length
61
+ else
62
+ i += 1
63
+ end
64
+ end
65
+ end
66
+
67
+ # Alias for find
68
+ def [](key)
69
+ find(key)
70
+ end
71
+
72
+ # Alias for add
73
+ def []=(key,value)
74
+ add(key,value)
75
+ end
76
+
77
+ # Return a Hash of terminating nodes to Integer counts for a given String data,
78
+ # i.e. Find the count of instances of each String in the tree in the given data.
79
+ def match_count(data, list = {})
80
+ return nil if @root == nil
81
+ i=0
82
+ while (i<data.length)
83
+ node = @root.find_forward(data, i, data.length-i)
84
+ if (node!=nil && node.value!=nil)
85
+ if (!list.has_key?(node))
86
+ list[node] = 1
87
+ else
88
+ list[node] += 1
89
+ end
90
+ i += node.length
91
+ else
92
+ i += 1
93
+ end
94
+ end
95
+ list
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,5 @@
1
+ # The StringTree module contains all the classes and constants for the `stringtree` gem.
2
+ module StringTree
3
+ # The current version of the `stringtree` gem
4
+ VERSION = '0.1.0'
5
+ end
data/lib/stringtree.rb ADDED
@@ -0,0 +1,6 @@
1
+ files = [
2
+ 'version',
3
+ 'node',
4
+ 'item',
5
+ 'tree',
6
+ ].each { |file| require "#{File.dirname(__FILE__)}/stringtree/#{file}" }
data/spec/item_spec.rb ADDED
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+
3
+ describe StringTree::Item do
4
+ describe "#initalize" do
5
+ it "should initialize correctly" do
6
+ @nodea = StringTree::Item.new(1,2,3)
7
+ expect(@nodea.offset).to eq(1)
8
+ expect(@nodea.match).to eq(2)
9
+ expect(@nodea.node).to eq(3)
10
+ end
11
+ end
12
+
13
+ describe "#match?" do
14
+ it "should return @match" do
15
+ @nodea = StringTree::Item.new(1,2,3)
16
+ expect(@nodea.match?).to eq(2)
17
+ end
18
+ end
19
+
20
+ describe "#value" do
21
+ it "should return @node.value if not nil" do
22
+ x = OpenStruct.new(:value => "foo")
23
+ @nodea = StringTree::Item.new(1,2,x)
24
+ expect(@nodea.value).to eq("foo")
25
+ end
26
+
27
+ it "should return nil if value is nil" do
28
+ @nodea = StringTree::Item.new(1,2,nil)
29
+ expect(@nodea.value).to be_nil()
30
+ end
31
+ end
32
+ end
data/spec/node_spec.rb ADDED
@@ -0,0 +1,187 @@
1
+ require 'spec_helper'
2
+
3
+ describe StringTree::Node do
4
+ describe "when instantiated" do
5
+ it "has correct char" do
6
+ @nodea = StringTree::Node.new 'a'
7
+ expect(@nodea.char).to eq(@nodea.char)
8
+ end
9
+ end
10
+
11
+ describe "add tests" do
12
+ before do
13
+ @nodeb = StringTree::Node.new 'b'
14
+ @nodec = @nodeb.add_horizontal_char('c')
15
+ @nodea = @nodeb.add_horizontal_char('a')
16
+ end
17
+ it "adds a on correct side" do
18
+ expect(@nodeb.left).to eq(@nodeb.left)
19
+ end
20
+ it "adds c on correct side" do
21
+ expect(@nodeb.right).to eq(@nodeb.right)
22
+ end
23
+ it "does not add a again" do
24
+ expect(@nodeb.add_horizontal_char('a')).to eq(@nodeb.add_horizontal_char('a'))
25
+ end
26
+ it "count left correct" do
27
+ expect(@nodeb.count(:left)).to eq(@nodeb.count(:left))
28
+ end
29
+ it "count right correct" do
30
+ expect(@nodeb.count(:right)).to eq(@nodeb.count(:right))
31
+ end
32
+ end
33
+
34
+ describe "find tests" do
35
+ before do
36
+ @nodeb = StringTree::Node.new 'b'
37
+ @nodec = @nodeb.add_horizontal_char('c')
38
+ @nodea = @nodeb.add_horizontal_char('a')
39
+ end
40
+ it "finds a" do
41
+ @nodeb.find_horizontal('a') == @nodea
42
+ end
43
+ it "finds c" do
44
+ @nodeb.find_horizontal('c') == @nodec
45
+ end
46
+ end
47
+
48
+ describe "vertical tests" do
49
+ before do
50
+ @node = StringTree::Node.new 'a'
51
+ @str = "testing"
52
+ @node.add_vertical(@str, 'one')
53
+ end
54
+ it "string exists in stringtree" do
55
+ @str.each_char { |c|
56
+ @node = @node.find_horizontal(c);
57
+ expect(@node).not_to be(nil)
58
+ expect(@node.char).to eq(@node.char)
59
+ @node = @node.down
60
+ }
61
+ end
62
+ it "find_vertical returns correct value" do
63
+ expect(@node.find_vertical(@str).value).to eq(@node.find_vertical(@str).value)
64
+ end
65
+ it "find_vertical does not find superstring" do
66
+ expect(@node.find_vertical("testing1")).to eq(@node.find_vertical("testing1"))
67
+ expect(@node.find_vertical("testin").value).to eq(@node.find_vertical("testin").value)
68
+ end
69
+ end
70
+
71
+ describe "optimize tests" do
72
+ before do
73
+ @node = StringTree::Node.new 'a'
74
+ @val = {
75
+ "one"=>1, "two"=>2, "three"=>3, "four"=>4, "five"=>5, "six"=>6, "seven"=>7, "eight"=>8, "nine"=>9, "ten"=>10,
76
+ "eleven"=>11, "twelve"=>12, "thirteen"=>13, "fourteen"=>14, "fifteen"=>15, "sixteen"=>16, "seventeen"=>17, "eighteen"=>18, "nineteen"=>19, "twenty"=>20
77
+ }
78
+ @val.each { |c,d| @node.add_vertical(c,d) }
79
+ @node = @node.balance
80
+ end
81
+ it "string exists in stringtree" do
82
+ 'one'.each_char { |c|
83
+ @node = @node.find_horizontal(c)
84
+ expect(@node).not_to be(nil)
85
+ expect(@node.char).to eq(@node.char)
86
+ @node = @node.down
87
+ }
88
+ end
89
+ it "find_vertical returns correct value" do
90
+ expect(@node.find_vertical("one").value).to eq(@node.find_vertical("one").value)
91
+ end
92
+ it "find_vertical does not find superstring" do
93
+ expect(@node.find_vertical("testing1")).to eq(@node.find_vertical("testing1"))
94
+ expect(@node.find_vertical("testin")).to eq(@node.find_vertical("testin"))
95
+ end
96
+ end
97
+
98
+ describe '#walk' do
99
+ it 'should walk a tree correctly' do
100
+ inst = StringTree::Node.new 'b'
101
+ inst.left = StringTree::Node.new 'a'
102
+ inst.right = StringTree::Node.new 'c'
103
+ inst.down = StringTree::Node.new '2'
104
+ inst.down.left = StringTree::Node.new '1'
105
+ inst.down.right = StringTree::Node.new '3'
106
+
107
+ list = []
108
+ inst.walk do |k,v|
109
+ puts k,v
110
+ list << { k => v }
111
+ end
112
+ expect(list).to eq []
113
+ end
114
+
115
+ end
116
+
117
+ describe '#all_partials' do
118
+ it 'should call @down.walk if it exists' do
119
+
120
+ inst = StringTree::Node.new 'a'
121
+ inst.down = StringTree::Node.new 'b'
122
+
123
+ expect(inst.down).to receive(:walk).with('one')
124
+ inst.all_partials('one')
125
+ end
126
+
127
+ it 'should not call @down.walk if it doesnt exist' do
128
+ # Spec will fail with exception if faulty.
129
+ inst = StringTree::Node.new 'a'
130
+ inst.down = nil
131
+ inst.all_partials('one')
132
+ end
133
+ end
134
+
135
+ describe '#find_forward' do
136
+ it 'should return the correct node' do
137
+
138
+ inst = StringTree::Node.new 'b'
139
+ inst.left = StringTree::Node.new 'a'
140
+ inst.right = StringTree::Node.new 'c'
141
+ inst.down = StringTree::Node.new '2', inst, 'one'
142
+ inst.down.left = StringTree::Node.new '1', inst
143
+ inst.down.right = StringTree::Node.new '3', inst
144
+
145
+ expect(inst.find_forward("b2",0,2)).to be(inst.down)
146
+ expect(inst.find_forward("asoicbasicn",0,2)).to be_nil
147
+ end
148
+ end
149
+
150
+ describe '#to_s' do
151
+ it 'should return the correct string' do
152
+ inst = StringTree::Node.new 'b'
153
+ inst.left = StringTree::Node.new 'a'
154
+ inst.right = StringTree::Node.new 'c'
155
+ inst.down = StringTree::Node.new '2', inst, 'one'
156
+ inst.down.left = StringTree::Node.new '1', inst
157
+ inst.down.right = StringTree::Node.new '3', inst
158
+
159
+ expect(inst.down.left.to_s).to eq("b1")
160
+ expect(inst.down.right.to_s).to eq("b3")
161
+ expect(inst.down.to_s).to eq("b2")
162
+ expect(inst.right.to_s).to eq("c")
163
+ expect(inst.left.to_s).to eq("a")
164
+ expect(inst.to_s).to eq("b")
165
+ end
166
+ end
167
+
168
+ describe '#length' do
169
+ it 'should return the correct count' do
170
+ inst = StringTree::Node.new 'b'
171
+ inst.left = StringTree::Node.new 'a'
172
+ inst.right = StringTree::Node.new 'c'
173
+ inst.down = StringTree::Node.new '2', inst, 'one'
174
+ inst.down.left = StringTree::Node.new '1', inst
175
+ inst.down.right = StringTree::Node.new '3', inst
176
+ inst.down.left.down = StringTree::Node.new 'z', inst.down.left
177
+ inst.down.left.down.left = StringTree::Node.new 'v', inst.down.left
178
+
179
+ expect(inst.down.left.down.left.length).to eq(3)
180
+ expect(inst.down.left.down.length).to eq(3)
181
+ expect(inst.down.left.length).to eq(2)
182
+ expect(inst.down.length).to eq(2)
183
+ expect(inst.length).to eq(1)
184
+ end
185
+ end
186
+
187
+ end
@@ -0,0 +1,19 @@
1
+ # Supress Warnings
2
+ warn_level = $VERBOSE
3
+ $VERBOSE = nil
4
+
5
+ if ENV.has_key?('SIMPLECOV')
6
+ require 'simplecov'
7
+ require 'simplecov-rcov'
8
+
9
+ SimpleCov.formatter = SimpleCov::Formatter::RcovFormatter
10
+ SimpleCov.start do
11
+ add_filter '/spec/'
12
+ end
13
+ else
14
+ require 'coveralls'
15
+ Coveralls.wear!
16
+ end
17
+
18
+ require 'stringtree'
19
+ require 'ostruct'