suffix_tree 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/lib/data/base_data_source.rb +44 -0
  3. data/lib/data/data_source_factory.rb +16 -0
  4. data/lib/data/file_data_source.rb +29 -0
  5. data/lib/data/line_state_machine.rb +86 -0
  6. data/lib/data/string_data_source.rb +31 -0
  7. data/lib/data/word_data_source.rb +229 -0
  8. data/lib/location.rb +165 -0
  9. data/lib/node.rb +63 -0
  10. data/lib/node_factory.rb +169 -0
  11. data/lib/persist/suffix_tree_db.rb +148 -0
  12. data/lib/search/searcher.rb +68 -0
  13. data/lib/suffix_linker.rb +16 -0
  14. data/lib/suffix_tree.rb +122 -0
  15. data/lib/visitor/base_visitor.rb +17 -0
  16. data/lib/visitor/bfs.rb +22 -0
  17. data/lib/visitor/data_source_visitor.rb +15 -0
  18. data/lib/visitor/dfs.rb +34 -0
  19. data/lib/visitor/k_common_visitor.rb +71 -0
  20. data/lib/visitor/leaf_count_visitor.rb +15 -0
  21. data/lib/visitor/node_count_visitor.rb +16 -0
  22. data/lib/visitor/numbering_visitor.rb +230 -0
  23. data/lib/visitor/suffix_offset_visitor.rb +23 -0
  24. data/lib/visitor/tree_print_visitor.rb +44 -0
  25. data/lib/visitor/value_depth_visitor.rb +34 -0
  26. data/spec/constant_lca_spec.rb +27 -0
  27. data/spec/data_source_spec.rb +51 -0
  28. data/spec/fixtures/arizona.txt +1 -0
  29. data/spec/fixtures/chapter1.txt +371 -0
  30. data/spec/fixtures/chapter1.txt.summary +3 -0
  31. data/spec/fixtures/chapter1.txt.values +0 -0
  32. data/spec/fixtures/chapter1.txt.words +1329 -0
  33. data/spec/fixtures/mississippi.txt +1 -0
  34. data/spec/fixtures/singlePara.txt +41 -0
  35. data/spec/fixtures/smallFile.txt +3 -0
  36. data/spec/fixtures/smallFile.txt.summary +2 -0
  37. data/spec/fixtures/smallFile.txt.values +0 -0
  38. data/spec/fixtures/smallFile.txt.words +14 -0
  39. data/spec/fixtures/testbook.txt +5414 -0
  40. data/spec/location_spec.rb +149 -0
  41. data/spec/node_factory_spec.rb +199 -0
  42. data/spec/search_spec.rb +182 -0
  43. data/spec/suffix_tree_spec.rb +270 -0
  44. data/spec/util_spec.rb +47 -0
  45. data/spec/visitor_spec.rb +310 -0
  46. metadata +87 -0
@@ -0,0 +1,149 @@
1
+ require 'rspec'
2
+ require_relative '../lib/location'
3
+ require_relative '../lib/node'
4
+ require_relative '../lib/node_factory'
5
+ require_relative '../lib/data/string_data_source'
6
+
7
+ describe 'Location class' do
8
+
9
+ let(:dataSource) { StringDataSource.new("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz")}
10
+ let(:nodeFactory) { NodeFactory.new dataSource }
11
+ let(:root) { nodeFactory.newRoot }
12
+ let(:level1) { nodeFactory.addLeaf(root, 'a', 0) } # level1 is entire string
13
+ let(:level2) { nodeFactory.splitEdgeAt(level1, 26) } # root -> level2(0, 25) -> level1(26, 51)
14
+ let(:level3) { nodeFactory.splitEdgeAt(level1, 29) } # root -> level2(0, 25) -> level3(26,28) -> level1 (29,51)
15
+ let(:linkNode) { nodeFactory.addLeaf(root, 'b', 2) }
16
+
17
+ def forceLazyLoad
18
+ location = Location.new(root)
19
+ location = Location.new(level1)
20
+ location = Location.new(level2)
21
+ location = Location.new(level3)
22
+ end
23
+
24
+ describe "#new" do
25
+ it "starts at a node" do
26
+ location = Location.new(root)
27
+ expect(location.node).to eq root
28
+ expect(location.onNode).to eq true
29
+ expect(location.incomingEdgeOffset).to eq Node::UNSPECIFIED_OFFSET
30
+ end
31
+ end
32
+
33
+ describe "#jumpToNode" do
34
+ it "sets location to node starting at another" do
35
+ location = Location.new(level1)
36
+ location.jumpToNode(level2)
37
+ expect(location.node).to eq level2
38
+ expect(location.onNode).to eq true
39
+ expect(location.incomingEdgeOffset).to eq Node::UNSPECIFIED_OFFSET
40
+ end
41
+ end
42
+
43
+ describe "#traverseUp" do
44
+ # root -> level2(0, 25) -> level3(26,28) -> level1 (29,51)
45
+ it "goes to parent node starting from node" do
46
+ # this is weird, but if I don't force the lazy loading test failes
47
+ forceLazyLoad
48
+ location = Location.new(level3)
49
+ startOffset, endOffset = location.traverseUp
50
+ expect(location.node).to eq level2
51
+ expect(location.onNode).to eq true
52
+ expect(location.incomingEdgeOffset).to eq Node::UNSPECIFIED_OFFSET
53
+ expect(startOffset).to eq 26
54
+ expect(endOffset).to eq 28
55
+ end
56
+
57
+ # root -> level2(0, 25) -> level3(26,28) -> level1 (29,51)
58
+ it "goes to parent node starting from leaf" do
59
+ forceLazyLoad
60
+ leafNode = nodeFactory.addLeaf(level3, 'c', 3)
61
+ location = Location.new(leafNode, false, 6)
62
+ startOffset, endOffset = location.traverseUp
63
+ expect(location.node).to eq level3
64
+ expect(location.onNode).to eq true
65
+ expect(location.incomingEdgeOffset).to eq Node::UNSPECIFIED_OFFSET
66
+ expect(startOffset).to eq 3
67
+ expect(endOffset).to eq 5
68
+ end
69
+
70
+ # root -> level2(0, 25) -> level3(26,28) -> level1 (29,51)
71
+ it "goes to parent node starting at mid-edge from internal node" do
72
+ forceLazyLoad
73
+ leafNode = nodeFactory.addLeaf(level3, 'x', 38)
74
+ location = Location.new(leafNode, false, 48)
75
+ startOffset, endOffset = location.traverseUp
76
+ expect(location.node).to eq level3
77
+ expect(location.onNode).to eq true
78
+ expect(location.incomingEdgeOffset).to eq Node::UNSPECIFIED_OFFSET
79
+ expect(startOffset).to eq 38
80
+ expect(endOffset).to eq 47
81
+ end
82
+ end
83
+
84
+ describe "#traverseSuffixLink" do
85
+ it "follows suffix link" do
86
+ forceLazyLoad
87
+ level3.suffixLink = linkNode
88
+ location = Location.new(level3)
89
+ location.traverseSuffixLink
90
+ expect(location.node).to eq linkNode
91
+ expect(location.onNode).to eq true
92
+ expect(location.incomingEdgeOffset).to eq Node::UNSPECIFIED_OFFSET
93
+ end
94
+ end
95
+
96
+ # root -> level2(0, 25) -> level3(26,28) -> level1 (29,51)
97
+ # a, z a,c d,z
98
+ describe "#traverseDownChildValue" do
99
+ it "ends on child node if child edge has one value" do
100
+ forceLazyLoad
101
+ testNode = nodeFactory.addLeaf(level2, 'c', 2)
102
+ location = Location.new(level2)
103
+ location.traverseDownChildValue('c')
104
+ expect(location.node).to eq testNode
105
+ expect(location.onNode).to eq false
106
+ expect(location.incomingEdgeOffset).to eq 3
107
+ end
108
+
109
+ it "ends on second character of child edge when that edge has more than one value" do
110
+ forceLazyLoad
111
+ leafNode = nodeFactory.addLeaf(level2, 'c', 2)
112
+ internalNode = nodeFactory.splitEdgeAt(leafNode, 7)
113
+ location = Location.new(internalNode)
114
+ location.traverseDownChildValue('h')
115
+ expect(location.node).to eq leafNode
116
+ expect(location.onNode).to eq false
117
+ expect(location.incomingEdgeOffset).to eq 8
118
+ end
119
+ end
120
+
121
+ # root -> level2(0, 25) -> level3(26,28) -> level1 (29,51)
122
+ # a, z "a" a,c "d" d,z
123
+ describe "#traverseSkipDownCount" do
124
+ it "checks single character to get to next node" do
125
+ forceLazyLoad
126
+ leaf = nodeFactory.addLeaf(level2, 'c', 2)
127
+ # root -> level2(0, 25) -> level3(26,28) -> level1 (29,51)
128
+ # a, z "a" a,c "d" d,z
129
+ # "c" leaf(2,51)
130
+ location = Location.new(level2)
131
+ location.traverseSkipCountDown(dataSource, 2, 10)
132
+ expect(location.node).to eq leaf
133
+ expect(location.onNode).to eq false
134
+ expect(location.incomingEdgeOffset).to eq 11
135
+ end
136
+
137
+ # 1:root -> 3:level2(0, 25) -> 4:level3(26,28) -> 2:level1 (29,-1)
138
+ # a, z "a" a,c "d" d,z
139
+ it "traverses multiple nodes down" do
140
+ forceLazyLoad
141
+ location = Location.new(root)
142
+ location.traverseSkipCountDown(dataSource, 0, 33)
143
+ expect(location.node).to eq level1
144
+ expect(location.incomingEdgeOffset).to eq 34
145
+ expect(location.onNode).to eq false
146
+ end
147
+ end
148
+
149
+ end
@@ -0,0 +1,199 @@
1
+ require 'rspec'
2
+ require_relative '../lib/node'
3
+ require_relative '../lib/node_factory'
4
+ require_relative '../lib/data/string_data_source'
5
+
6
+ describe "NodeFactory class" do
7
+
8
+ let(:dataSource) { StringDataSource.new "mississippi" }
9
+ let(:alphaDataSource) { StringDataSource.new "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" }
10
+ let(:alphaNodeFactory) { NodeFactory.new alphaDataSource }
11
+
12
+ describe '#initialize' do
13
+ it "accepts a dataSource" do
14
+ nodeFactory = NodeFactory.new dataSource
15
+ expect(nodeFactory.dataSource).to eq dataSource
16
+ end
17
+ end
18
+
19
+ describe '#configure' do
20
+ it "keeps a hash of configuration options for how nodes get built" do
21
+ nodeFactory = NodeFactory.new dataSource
22
+ hash = {
23
+ :generalized_suffix_tree => true,
24
+ :track_value_depth => true,
25
+ # :save_previous_value => true, for this test we verify default "false" remains
26
+ }
27
+ nodeFactory.setConfiguration(hash)
28
+ configuration = nodeFactory.configuration
29
+ expect(configuration[:generalized_suffix_tree]).to eq hash[:generalized_suffix_tree]
30
+ expect(configuration[:track_character_depth]).to eq hash[:track_character_depth]
31
+ expect(configuration[:previousValue]).to eq false
32
+ end
33
+
34
+ it "by default does not save previous value" do
35
+ nodeFactory = NodeFactory.new dataSource
36
+ root = nodeFactory.newRoot
37
+ leaf = nodeFactory.addLeaf(root, 'i', 1)
38
+ internal = nodeFactory.splitEdgeAt(leaf, 4)
39
+ leaf2 = nodeFactory.addLeaf(internal, 'x', 7)
40
+ expect(defined? leaf.previousValue).to eq nil
41
+ expect(defined? internal.previousValue).to eq nil
42
+ expect(defined? leaf2.previousValue).to eq nil
43
+ end
44
+
45
+ it "configuration turns on previous value saving" do
46
+ nodeFactory = NodeFactory.new dataSource
47
+ hash = {
48
+ :previousValue => true
49
+ }
50
+ nodeFactory.setConfiguration(hash)
51
+ root = nodeFactory.newRoot
52
+ leaf = nodeFactory.addLeaf(root, 'm', 0)
53
+ expect(leaf.previousValue).to eq nil
54
+
55
+ leaf = nodeFactory.addLeaf(root, 'i', 1)
56
+ expect(leaf.previousValue).to eq 'm'
57
+
58
+ leaf2 = nodeFactory.addLeaf(root, 's', 2)
59
+ expect(leaf2.previousValue).to eq 'i'
60
+
61
+ internal = nodeFactory.splitEdgeAt(leaf, 4)
62
+ expect(internal.previousValue).to eq nil
63
+
64
+ leaf3 = nodeFactory.addLeaf(internal, 'x', 7)
65
+ expect(leaf3.previousValue).to eq 's'
66
+ leaf3 = nodeFactory.addLeaf(internal, 'x', 7)
67
+ expect(leaf3.previousValue).to eq 's'
68
+ end
69
+
70
+ it "tracks value depth of nodes" do
71
+ nodeFactory = NodeFactory.new dataSource
72
+ hash = {
73
+ :valueDepth => true
74
+ }
75
+ nodeFactory.setConfiguration(hash)
76
+ root = nodeFactory.newRoot
77
+ leaf = nodeFactory.addLeaf(root, 'i', 1)
78
+ internal = nodeFactory.splitEdgeAt(leaf, 4)
79
+ expect(internal.valueDepth).to eq 3
80
+ end
81
+
82
+ it "tracks value depth of nodes" do
83
+ nodeFactory = NodeFactory.new dataSource
84
+ nodeFactory.setConfiguration({
85
+ :valueDepth => true
86
+ })
87
+ root = nodeFactory.newRoot
88
+ expect(root.valueDepth).to eq 0
89
+ leaf = nodeFactory.addLeaf(root, 'i', 1)
90
+ internal = nodeFactory.splitEdgeAt(leaf, 8)
91
+ internal2 = nodeFactory.splitEdgeAt(internal, 4)
92
+ expect(internal.valueDepth).to eq 7
93
+ expect(internal2.valueDepth).to eq 3
94
+ internal3 = nodeFactory.splitEdgeAt(internal, 6)
95
+ expect(internal3.valueDepth).to eq 5
96
+ end
97
+ end
98
+
99
+ describe "#newRoot" do
100
+ it "creates a new root node with node_id=1" do
101
+ nodeFactory = NodeFactory.new dataSource
102
+ root = nodeFactory.newRoot
103
+ expect(root.nodeId).to eq 1
104
+ expect(root.children.length).to eq 0
105
+ expect(nodeFactory.nextNodeId).to eq 2
106
+ end
107
+
108
+ it "resets each time newRoot is called" do
109
+ nodeFactory = NodeFactory.new dataSource
110
+ root1 = nodeFactory.newRoot
111
+ root2 = nodeFactory.newRoot
112
+ expect(root1.nodeId).to eq 1
113
+ expect(root2.nodeId).to eq 1
114
+ end
115
+ end
116
+
117
+ describe "#addLeaf" do
118
+ it "adds a leaf node" do
119
+ nodeFactory = NodeFactory.new dataSource
120
+ root = nodeFactory.newRoot
121
+ child = nodeFactory.addLeaf(root, 'a', 3)
122
+ aChild = root.children['a']
123
+ expect(aChild).to eq child
124
+ expect(root.children['a'].parent).to eq root
125
+ end
126
+ end
127
+
128
+ describe "#splitEdgeAtOffset" do
129
+ it "splits a long edge" do
130
+
131
+ root = alphaNodeFactory.newRoot
132
+ level1 = alphaNodeFactory.addLeaf(root, 'a', 0)
133
+ expect(level1.parent).to eq root
134
+ expect(level1.incomingEdgeStartOffset).to eq 0
135
+ expect(level1.incomingEdgeEndOffset).to eq Node::CURRENT_ENDING_OFFSET
136
+ level2 = alphaNodeFactory.splitEdgeAt(level1, 26)
137
+ expect(level2.parent).to eq root
138
+ expect(level1.parent).to eq level2
139
+ expect(level2.incomingEdgeStartOffset).to eq 0
140
+ expect(level2.incomingEdgeEndOffset).to eq 25
141
+ expect(level1.incomingEdgeStartOffset).to eq 26
142
+ expect(level1.incomingEdgeEndOffset).to eq Node::CURRENT_ENDING_OFFSET
143
+ level3 = alphaNodeFactory.splitEdgeAt(level1, 29)
144
+ expect(level3.parent).to eq level2
145
+ expect(level2.parent).to eq root
146
+ expect(level3.incomingEdgeStartOffset).to eq 26
147
+ expect(level3.incomingEdgeEndOffset).to eq 28
148
+ expect(level1.incomingEdgeStartOffset).to eq 29
149
+ expect(level1.incomingEdgeEndOffset).to eq Node::CURRENT_ENDING_OFFSET
150
+ end
151
+
152
+ it "splits edge and returns that node" do
153
+ nodeFactory = NodeFactory.new dataSource
154
+ root = nodeFactory.newRoot
155
+ child = nodeFactory.addLeaf(root, 'm', 0)
156
+ middleNode = nodeFactory.splitEdgeAt(child, 3)
157
+ expect(middleNode.parent).to eq (root)
158
+ expect(middleNode.incomingEdgeStartOffset).to eq 0
159
+ expect(middleNode.incomingEdgeEndOffset).to eq 2
160
+ expect(middleNode.isInternal).to eq true
161
+ expect(child.parent).to eq (middleNode)
162
+ expect(child.incomingEdgeStartOffset).to eq 3
163
+ expect(child.incomingEdgeEndOffset).to eq Node::CURRENT_ENDING_OFFSET
164
+ expect(child.isLeaf).to eq true
165
+ end
166
+
167
+ it "splits nodes correctly" do
168
+ root = nodeFactory2.newRoot
169
+ level1 = nodeFactory2.addLeaf(root, 'a', 0)
170
+ expect(root.children['a']).to eq level1
171
+ level2 = nodeFactory2.splitEdgeAt(level1, 26)
172
+ expect(root.children['a']).to eq level2
173
+ expect(level2.parent).to eq root
174
+ expect(level2.incomingEdgeStartOffset).to eq 0
175
+ expect(level2.incomingEdgeEndOffset).to eq 25
176
+ expect(level1.parent).to eq level2
177
+ expect(level1.incomingEdgeStartOffset).to eq 26
178
+ expect(level1.incomingEdgeEndOffset).to eq Node::CURRENT_ENDING_OFFSET
179
+ end
180
+
181
+ it "handles multiple splits" do
182
+ root2 = nodeFactory2.newRoot
183
+ rLevel1 = nodeFactory2.addLeaf(root2, 'a', 0)
184
+ expect(rLevel1.incomingEdgeStartOffset).to eq 0
185
+ expect(rLevel1.incomingEdgeEndOffset).to eq Node::CURRENT_ENDING_OFFSET
186
+ rLevel2 = nodeFactory2.splitEdgeAt(rLevel1, 26)
187
+ expect(rLevel2.children.length).to eq 1
188
+ expect(rLevel2.isInternal).to eq true
189
+ expect(rLevel2.incomingEdgeStartOffset).to eq 0
190
+ expect(rLevel2.incomingEdgeEndOffset).to eq 25
191
+ expect(rLevel1.isLeaf).to eq true
192
+ expect(rLevel1.incomingEdgeStartOffset).to eq 26
193
+ expect(rLevel1.incomingEdgeEndOffset).to eq Node::CURRENT_ENDING_OFFSET
194
+ end
195
+
196
+ let(:dataSource2) { StringDataSource.new("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz")}
197
+ let(:nodeFactory2) { NodeFactory.new dataSource2 }
198
+ end
199
+ end
@@ -0,0 +1,182 @@
1
+ require 'rspec'
2
+ require_relative '../lib/location'
3
+ require_relative '../lib/node'
4
+ require_relative '../lib/node_factory'
5
+ require_relative '../lib/data/string_data_source'
6
+ require_relative '../lib/data/file_data_source'
7
+ require_relative '../lib/visitor/bfs'
8
+ require_relative '../lib/visitor/leaf_count_visitor'
9
+ require_relative '../lib/visitor/value_depth_visitor'
10
+ require_relative '../lib/visitor/dfs'
11
+ require_relative '../lib/visitor/node_count_visitor'
12
+ require_relative '../lib/search/searcher'
13
+ require_relative '../lib/suffix_tree'
14
+
15
+ describe "Search class" do
16
+
17
+ let (:dataSource) { StringDataSource.new("mississippi") }
18
+ let (:nodeFactory) { NodeFactory.new dataSource }
19
+ let (:rootNodeId) { nodeFactory.nextNodeId }
20
+ let (:fileDataSource) { FileDataSource.new(File.join('spec', 'fixtures', "mississippi.txt")) }
21
+ let (:fileNodeFactory) { NodeFactory.new fileDataSource }
22
+
23
+ describe '#find' do
24
+
25
+ it 'finds all substrings' do
26
+ hash = {
27
+ :valueDepth => true
28
+ }
29
+ st = SuffixTree.new(nil, hash)
30
+ st.addDataSource(dataSource)
31
+
32
+ searcher = Searcher.new(dataSource, st.root)
33
+ expect(searcher.findString("m")).to eq ([0])
34
+ expect(searcher.findString("i")).to eq ([1,4,7])
35
+ expect(searcher.findString("s")).to eq ([2,3,5,6])
36
+ expect(searcher.findString("p")).to eq ([8,9])
37
+ expect(searcher.findString("x")).to eq ([])
38
+ expect(searcher.findString("mi")).to eq ([0])
39
+ expect(searcher.findString("is")).to eq ([1,4])
40
+ expect(searcher.findString("ss")).to eq ([2,5])
41
+ expect(searcher.findString("si")).to eq ([3,6])
42
+ expect(searcher.findString("ip")).to eq ([7])
43
+ expect(searcher.findString("pp")).to eq ([8])
44
+ expect(searcher.findString("pi")).to eq ([9])
45
+ expect(searcher.findString("mis")).to eq ([0])
46
+ expect(searcher.findString("iss")).to eq ([1,4])
47
+ expect(searcher.findString("ssi")).to eq ([2,5])
48
+ expect(searcher.findString("ssx")).to eq ([])
49
+ expect(searcher.findString("sis")).to eq ([3])
50
+ expect(searcher.findString("sip")).to eq ([6])
51
+ expect(searcher.findString("ipp")).to eq ([7])
52
+ expect(searcher.findString("ppi")).to eq ([8])
53
+ expect(searcher.findString("miss")).to eq ([0])
54
+ expect(searcher.findString("issi")).to eq ([1,4])
55
+ expect(searcher.findString("ssis")).to eq ([2])
56
+ expect(searcher.findString("siss")).to eq ([3])
57
+ expect(searcher.findString("ssip")).to eq ([5])
58
+ expect(searcher.findString("sipp")).to eq ([6])
59
+ expect(searcher.findString("ippi")).to eq ([7])
60
+ expect(searcher.findString("missi")).to eq ([0])
61
+ expect(searcher.findString("issis")).to eq ([1])
62
+ expect(searcher.findString("ssiss")).to eq ([2])
63
+ expect(searcher.findString("sissi")).to eq ([3])
64
+ expect(searcher.findString("issip")).to eq ([4])
65
+ expect(searcher.findString("ssipp")).to eq ([5])
66
+ expect(searcher.findString("sippi")).to eq ([6])
67
+ expect(searcher.findString("missis")).to eq ([0])
68
+ expect(searcher.findString("ississ")).to eq ([1])
69
+ expect(searcher.findString("ssissi")).to eq ([2])
70
+ expect(searcher.findString("sissip")).to eq ([3])
71
+ expect(searcher.findString("issipp")).to eq ([4])
72
+ expect(searcher.findString("ssippi")).to eq ([5])
73
+ expect(searcher.findString("mississ")).to eq ([0])
74
+ expect(searcher.findString("ississi")).to eq ([1])
75
+ expect(searcher.findString("ssissip")).to eq ([2])
76
+ expect(searcher.findString("sissipp")).to eq ([3])
77
+ expect(searcher.findString("issippi")).to eq ([4])
78
+ expect(searcher.findString("mississi")).to eq ([0])
79
+ expect(searcher.findString("ississip")).to eq ([1])
80
+ expect(searcher.findString("ssissipp")).to eq ([2])
81
+ expect(searcher.findString("sissippi")).to eq ([3])
82
+ expect(searcher.findString("mississip")).to eq ([0])
83
+ expect(searcher.findString("ississipp")).to eq ([1])
84
+ expect(searcher.findString("ssissippi")).to eq ([2])
85
+ expect(searcher.findString("ssissippix")).to eq ([])
86
+ expect(searcher.findString("mississipp")).to eq ([0])
87
+ expect(searcher.findString("ississippi")).to eq ([1])
88
+ expect(searcher.findString("mississippi")).to eq ([0])
89
+ end
90
+
91
+ it 'finds all substrings' do
92
+ st = SuffixTree.new(nil, { :valueDepth => true })
93
+ st.addDataSource(fileDataSource)
94
+
95
+ st.addValue('m',11)
96
+ st.addValue('i',12)
97
+ st.addValue('$',13)
98
+ searcher = Searcher.new(dataSource, st.root)
99
+ expect(searcher.findString("m")).to eq ([0,11]) # 2 m's now
100
+ expect(searcher.findString("i")).to eq ([1,4,7,10,12]) # final "i" of mississippi now explicit, and there's another as well
101
+ expect(searcher.findString("s")).to eq ([2,3,5,6])
102
+ expect(searcher.findString("p")).to eq ([8,9])
103
+ expect(searcher.findString("x")).to eq ([])
104
+ expect(searcher.findString("mi")).to eq ([0,11]) # "mi" is now at end as well
105
+ expect(searcher.findString("is")).to eq ([1,4])
106
+ expect(searcher.findString("ss")).to eq ([2,5])
107
+ expect(searcher.findString("si")).to eq ([3,6])
108
+ expect(searcher.findString("ip")).to eq ([7])
109
+ expect(searcher.findString("pp")).to eq ([8])
110
+ expect(searcher.findString("pi")).to eq ([9])
111
+ expect(searcher.findString("mis")).to eq ([0])
112
+ expect(searcher.findString("iss")).to eq ([1,4])
113
+ expect(searcher.findString("ssi")).to eq ([2,5])
114
+ expect(searcher.findString("ssx")).to eq ([])
115
+ expect(searcher.findString("sis")).to eq ([3])
116
+ expect(searcher.findString("sip")).to eq ([6])
117
+ expect(searcher.findString("ipp")).to eq ([7])
118
+ expect(searcher.findString("ppi")).to eq ([8])
119
+ expect(searcher.findString("miss")).to eq ([0])
120
+ expect(searcher.findString("issi")).to eq ([1,4])
121
+ expect(searcher.findString("ssis")).to eq ([2])
122
+ expect(searcher.findString("siss")).to eq ([3])
123
+ expect(searcher.findString("ssip")).to eq ([5])
124
+ expect(searcher.findString("sipp")).to eq ([6])
125
+ expect(searcher.findString("ippi")).to eq ([7])
126
+ expect(searcher.findString("missi")).to eq ([0])
127
+ expect(searcher.findString("issis")).to eq ([1])
128
+ expect(searcher.findString("ssiss")).to eq ([2])
129
+ expect(searcher.findString("sissi")).to eq ([3])
130
+ expect(searcher.findString("issip")).to eq ([4])
131
+ expect(searcher.findString("ssipp")).to eq ([5])
132
+ expect(searcher.findString("sippi")).to eq ([6])
133
+ expect(searcher.findString("missis")).to eq ([0])
134
+ expect(searcher.findString("ississ")).to eq ([1])
135
+ expect(searcher.findString("ssissi")).to eq ([2])
136
+ expect(searcher.findString("sissip")).to eq ([3])
137
+ expect(searcher.findString("issipp")).to eq ([4])
138
+ expect(searcher.findString("ssippi")).to eq ([5])
139
+ expect(searcher.findString("mississ")).to eq ([0])
140
+ expect(searcher.findString("ississi")).to eq ([1])
141
+ expect(searcher.findString("ssissip")).to eq ([2])
142
+ expect(searcher.findString("sissipp")).to eq ([3])
143
+ expect(searcher.findString("issippi")).to eq ([4])
144
+ expect(searcher.findString("mississi")).to eq ([0])
145
+ expect(searcher.findString("ississip")).to eq ([1])
146
+ expect(searcher.findString("ssissipp")).to eq ([2])
147
+ expect(searcher.findString("sissippi")).to eq ([3])
148
+ expect(searcher.findString("mississip")).to eq ([0])
149
+ expect(searcher.findString("ississipp")).to eq ([1])
150
+ expect(searcher.findString("ssissippi")).to eq ([2])
151
+ expect(searcher.findString("ssissippix")).to eq ([])
152
+ expect(searcher.findString("mississipp")).to eq ([0])
153
+ expect(searcher.findString("ississippi")).to eq ([1])
154
+ expect(searcher.findString("mississippi")).to eq ([0])
155
+ end
156
+ end
157
+
158
+ describe "#matchDataSource" do
159
+ it "returns root location if nothing matches" do
160
+ st = SuffixTree.new(nil, { :valueDepth => true })
161
+ st.addDataSource(dataSource)
162
+ searcher = Searcher.new(dataSource, st.root)
163
+ xDataSource = StringDataSource.new("xxx")
164
+ location = searcher.matchDataSource(xDataSource)
165
+ expect(location.onNode).to eq true
166
+ expect(location.node).to eq st.root
167
+ expect(location.depth).to eq 0
168
+ end
169
+
170
+ it "finds location that we can use to get suffix offset" do
171
+ st = SuffixTree.new('$', { :valueDepth => true })
172
+ st.addDataSource(dataSource)
173
+ searcher = Searcher.new(dataSource, st.root)
174
+ location = searcher.matchDataSource(StringDataSource.new "i")
175
+ result = []
176
+ location.node.each_suffix do |suffixOffset|
177
+ result << suffixOffset
178
+ end
179
+ expect(result).to eq [ 10, 7, 4, 1 ]
180
+ end
181
+ end
182
+ end