suffix_tree 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/lib/data/base_data_source.rb +44 -0
  3. data/lib/data/data_source_factory.rb +16 -0
  4. data/lib/data/file_data_source.rb +29 -0
  5. data/lib/data/line_state_machine.rb +86 -0
  6. data/lib/data/string_data_source.rb +31 -0
  7. data/lib/data/word_data_source.rb +229 -0
  8. data/lib/location.rb +165 -0
  9. data/lib/node.rb +63 -0
  10. data/lib/node_factory.rb +169 -0
  11. data/lib/persist/suffix_tree_db.rb +148 -0
  12. data/lib/search/searcher.rb +68 -0
  13. data/lib/suffix_linker.rb +16 -0
  14. data/lib/suffix_tree.rb +122 -0
  15. data/lib/visitor/base_visitor.rb +17 -0
  16. data/lib/visitor/bfs.rb +22 -0
  17. data/lib/visitor/data_source_visitor.rb +15 -0
  18. data/lib/visitor/dfs.rb +34 -0
  19. data/lib/visitor/k_common_visitor.rb +71 -0
  20. data/lib/visitor/leaf_count_visitor.rb +15 -0
  21. data/lib/visitor/node_count_visitor.rb +16 -0
  22. data/lib/visitor/numbering_visitor.rb +230 -0
  23. data/lib/visitor/suffix_offset_visitor.rb +23 -0
  24. data/lib/visitor/tree_print_visitor.rb +44 -0
  25. data/lib/visitor/value_depth_visitor.rb +34 -0
  26. data/spec/constant_lca_spec.rb +27 -0
  27. data/spec/data_source_spec.rb +51 -0
  28. data/spec/fixtures/arizona.txt +1 -0
  29. data/spec/fixtures/chapter1.txt +371 -0
  30. data/spec/fixtures/chapter1.txt.summary +3 -0
  31. data/spec/fixtures/chapter1.txt.values +0 -0
  32. data/spec/fixtures/chapter1.txt.words +1329 -0
  33. data/spec/fixtures/mississippi.txt +1 -0
  34. data/spec/fixtures/singlePara.txt +41 -0
  35. data/spec/fixtures/smallFile.txt +3 -0
  36. data/spec/fixtures/smallFile.txt.summary +2 -0
  37. data/spec/fixtures/smallFile.txt.values +0 -0
  38. data/spec/fixtures/smallFile.txt.words +14 -0
  39. data/spec/fixtures/testbook.txt +5414 -0
  40. data/spec/location_spec.rb +149 -0
  41. data/spec/node_factory_spec.rb +199 -0
  42. data/spec/search_spec.rb +182 -0
  43. data/spec/suffix_tree_spec.rb +270 -0
  44. data/spec/util_spec.rb +47 -0
  45. data/spec/visitor_spec.rb +310 -0
  46. metadata +87 -0
@@ -0,0 +1,169 @@
1
+ require_relative 'node'
2
+
3
+ class NodeFactory
4
+ attr_reader :nextNodeId, :root
5
+ attr_reader :dataSource
6
+ attr_reader :configuration
7
+ attr_reader :db
8
+
9
+ def initialize(dataSource, persister=nil)
10
+ @dataSource = dataSource
11
+ @suffixOffset = 0
12
+ @configuration = {
13
+ :leafCount => false,
14
+ :valueDepth => false,
15
+ :previousValue => false,
16
+ :dataSourceBit => false
17
+ }
18
+ @db = persister
19
+ self.reset
20
+ end
21
+
22
+ def reset
23
+ @nextNodeId = 1
24
+ end
25
+
26
+ def nextDataSourceBit
27
+ @dataSourceBit = (@dataSourceBit << 1) if ((@configuration[:dataSourceBit]) && (@dataSource != nil))
28
+ end
29
+
30
+ def extendDataSource(dataSource, startOffset)
31
+ self.nextDataSourceBit
32
+ if (@dataSource == nil) then
33
+ @dataSource = dataSource
34
+ else
35
+ @dataSource.extendWith(dataSource, startOffset)
36
+ end
37
+ end
38
+
39
+ def nextDataSourceSetSize(modForSwitch)
40
+ @nextDataSourceSwitch = modForSwitch
41
+ end
42
+
43
+ def setConfiguration configurationHash
44
+ configurationHash.each do |key, value|
45
+ @configuration[key] = value
46
+ end
47
+ self
48
+ end
49
+
50
+ def newRoot
51
+ self.reset
52
+ result = newNode
53
+ result.children = {}
54
+ @root = result
55
+ @configuration.each do |key, value|
56
+ if (value) then
57
+ @root.createAccessor(key.to_s)
58
+ end
59
+ end
60
+
61
+ # configuration controlled accessors
62
+ @root.valueDepth = 0 if @configuration[:valueDepth]
63
+ @root.leafCount = 0 if @configuration[:leafCount]
64
+ @dataSourceBit = 1 if @configuration[:dataSourceBit]
65
+ @root.dataSourceBit = @dataSourceBit if @configuration[:dataSourceBit]
66
+
67
+ persist(result)
68
+ end
69
+
70
+
71
+ #
72
+ # The algorithm adds leaf nodes in order
73
+ #
74
+ def addLeaf(node, value, offset)
75
+ result = newChild(node, value, @suffixOffset, offset, Node::CURRENT_ENDING_OFFSET)
76
+
77
+ # optional configuration based properties
78
+ result.leafCount = 1 if (@configuration[:leafCount])
79
+ result.previousValue = (@dataSource.valueAt(@suffixOffset - 1)) if ((@suffixOffset > 0) && @configuration[:previousValue])
80
+ result.dataSourceBit = @dataSourceBit if @configuration[:dataSourceBit]
81
+ @suffixOffset += 1
82
+ if ((@nextDataSourceSwitch != nil) && ((@suffixOffset % @nextDataSourceSwitch) == 0)) then
83
+ self.nextDataSourceBit
84
+ end
85
+
86
+ persist(result)
87
+ end
88
+
89
+ def splitEdgeAt(node, incomingEdgeOffset)
90
+ result = newChild(node.parent, @dataSource.valueAt(node.incomingEdgeStartOffset), node.suffixOffset, node.incomingEdgeStartOffset, incomingEdgeOffset - 1)
91
+ node.incomingEdgeStartOffset = incomingEdgeOffset
92
+ addChild(result, @dataSource.valueAt(incomingEdgeOffset), node)
93
+
94
+ # optional configuration based properties
95
+ result.valueDepth = (result.parent.valueDepth + result.incomingEdgeLength) if @configuration[:valueDepth]
96
+ result.dataSourceBit = (node.dataSourceBit | @dataSourceBit) if @configuration[:dataSourceBit]
97
+
98
+ persist(node)
99
+ persist(result)
100
+ end
101
+
102
+ #
103
+ # return a sequence of all values on the path to this node
104
+ #
105
+ def valuePath(node, delimiter=' ')
106
+ result = []
107
+ while (node.parent != nil) do
108
+ reverseAddValues(result, node.incomingEdgeStartOffset, node.incomingEdgeEndOffset)
109
+ node = node.parent
110
+ end
111
+ result.reverse!
112
+ return result.join(delimiter)
113
+ end
114
+
115
+ #
116
+ # internal private methods
117
+ #
118
+ private
119
+
120
+ # return edge value sequence in reverse (used when getting path to root from a node)
121
+ def reverseAddValues(result, startOffset, endOffset)
122
+ if (endOffset == Node::CURRENT_ENDING_OFFSET) then
123
+ result << @dataSource.valueAt(startOffset)
124
+ else
125
+ scanner = endOffset
126
+ while (scanner >= startOffset) do
127
+ result << @dataSource.valueAt(scanner)
128
+ scanner -= 1
129
+ end
130
+ end
131
+ end
132
+
133
+ def newChild(node, key, suffixOffset, incomingEdgeStartOffset, incomingEdgeEndOffset)
134
+ child = newNode
135
+ child.suffixOffset = suffixOffset
136
+ child.incomingEdgeStartOffset = incomingEdgeStartOffset
137
+ child.incomingEdgeEndOffset = incomingEdgeEndOffset
138
+ addChild(node, key, child)
139
+ child.valueDepth = 0 if @configuration[:valueDepth]
140
+ return child
141
+ end
142
+
143
+ def newNode
144
+ result = Node.new(@nextNodeId)
145
+
146
+ # newRoot defines leafCount accessor, so that case is handled in newRoot after the node is created
147
+ result.leafCount = 0 if (@configuration[:leafCount] && (@nextNodeId > 1))
148
+ result.dataSourceBit = @dataSourceBit if (@configuration[:dataSourceBit] && (@nextNodeId > 1))
149
+ @nextNodeId += 1
150
+ return result
151
+ end
152
+
153
+ def addChild(parentNode, value, childNode)
154
+ if (parentNode.children == nil) then
155
+ parentNode.children = {}
156
+ end
157
+ parentNode.children[value] = childNode
158
+ childNode.parent = parentNode
159
+ persist(parentNode)
160
+ persist(childNode)
161
+ end
162
+
163
+ def persist(node)
164
+ if (@db != nil) then
165
+ @db.persist(node)
166
+ end
167
+ node
168
+ end
169
+ end
@@ -0,0 +1,148 @@
1
+ require_relative '../node'
2
+
3
+ class SuffixTreeDB
4
+ def initialize(textFile)
5
+ @textFile = File.open(textFile, "w")
6
+ @dataValues = []
7
+ @dataValueIdx = 0
8
+ end
9
+
10
+ def val(node)
11
+ if (node == nil) then
12
+ return 0
13
+ else
14
+ return node.nodeId
15
+ end
16
+ end
17
+
18
+ def persist(node)
19
+ @textFile.print "#{node.nodeId} #{val(node.parent)} #{node.incomingEdgeStartOffset} #{node.incomingEdgeEndOffset} #{node.suffixOffset} #{val(node.suffixLink)}"
20
+ if (node.children != nil) then
21
+ node.children.values.each do |childNode|
22
+ @textFile.print " #{childNode.nodeId}"
23
+ end
24
+ end
25
+ @textFile.print " 0\n"
26
+ end
27
+
28
+ def readInt()
29
+ if (@dataValueIdx >= @dataValues.length) then
30
+ if (@textFile.eof?) then
31
+ return 0
32
+ end
33
+ line = @textFile.readline()
34
+ if (line == nil) then
35
+ return 0
36
+ else
37
+ line.chomp!
38
+ @dataValueIdx = 0
39
+ @dataValues = line.split
40
+ end
41
+ end
42
+
43
+ result = @dataValues[@dataValueIdx].to_i
44
+ @dataValueIdx += 1
45
+ return result
46
+ end
47
+ end
48
+
49
+ class SuffixTreeBuilder
50
+ attr_reader :suffixCount
51
+
52
+ def initialize(stdb, dataSource)
53
+ @suffxTreeDB = stdb
54
+ @dataSource = dataSource
55
+ @root = nil
56
+ @unresolvedParents = {}
57
+ @unresolvedSuffixLinks = {}
58
+ @unresolvedChildren = {}
59
+ @allNodes = {}
60
+ end
61
+
62
+ def buildNode
63
+ nodeId = @suffxTreeDB.readInt()
64
+ if (nodeId > 0) then
65
+ node = resolveNodeId(nodeId)
66
+ resolve(nodeId, node)
67
+ @allNodes[nodeId] = node
68
+ @root = node if (@root == nil)
69
+ resolveParent(node, @suffxTreeDB.readInt())
70
+ node.incomingEdgeStartOffset = @suffxTreeDB.readInt()
71
+ node.incomingEdgeEndOffset = @suffxTreeDB.readInt()
72
+ @suffixCount = node.suffixOffset = @suffxTreeDB.readInt()
73
+ resolveSuffixLink(node, @suffxTreeDB.readInt())
74
+ childNodeId = @suffxTreeDB.readInt()
75
+ while (childNodeId != 0) do
76
+ resolveChild(node, childNodeId)
77
+ childNodeId = @suffxTreeDB.readInt()
78
+ end
79
+ return node
80
+ end
81
+ return false
82
+ end
83
+
84
+ private
85
+
86
+ def resolveParent(node, nodeId)
87
+ if (@allNodes.has_key?(nodeId)) then
88
+ node.parent = @allNodes[nodeId]
89
+ else
90
+ resolveEntry(node, nodeId, @unresolvedParents)
91
+ end
92
+ end
93
+
94
+ def resolveSuffixLink(node, nodeId)
95
+ if (@allNodes.has_key?(nodeId)) then
96
+ node.suffixLink = @allNodes[nodeId]
97
+ else
98
+ resolveEntry(node, nodeId, @unresolvedSuffixLinks)
99
+ end
100
+ end
101
+
102
+ def resolveChild(node, nodeId)
103
+ if (@allNodes.has_key?(nodeId)) then
104
+ childNode = @allNodes[nodeId]
105
+ if (node.children == nil) then
106
+ node.children = {}
107
+ end
108
+ node.children[@dataSource.valueAt(childNode.incomingEdgeStartOffset)] = childNode
109
+ else
110
+ resolveEntry(node, nodeId, @unresolvedChildren)
111
+ end
112
+ end
113
+
114
+ def resolveEntry(node, nodeId, theList)
115
+ if (nodeId > 0) then
116
+ theList[nodeId] = node
117
+ end
118
+ end
119
+
120
+ def resolveNodeId(nodeId)
121
+ if @allNodes.has_key?(nodeId) then
122
+ @allNodes[nodeId]
123
+ else
124
+ Node.new(nodeId)
125
+ end
126
+ end
127
+
128
+ def resolve(nodeId, node)
129
+ if (@unresolvedParents.has_key?(nodeId)) then
130
+ print "Unresolved parent value #{nodeId}\n"
131
+ @unresolvedParents[nodeId].parent = node
132
+ @unresolvedParents.delete(nodeId)
133
+ end
134
+ if (@unresolvedChildren.has_key?(nodeId) && (node.incomingEdgeStartOffset >= 0)) then
135
+ unfinishedNode = @unresolvedChildren[nodeId]
136
+ if (unfinishedNode.children == nil) then
137
+ unfinishedNode.children = {}
138
+ end
139
+ unfinishedNode.children[@dataSource.valueAt(node.incomingEdgeStartOffset)] = node
140
+ @unresolvedChildren.delete(nodeId)
141
+ end
142
+ if (@unresolvedSuffixLinks.has_key?(nodeId)) then
143
+ unfinishedNode = @unresolvedSuffixLinks[nodeId]
144
+ unfinishedNode.suffixLink = node
145
+ @unresolvedSuffixLinks.delete(nodeId)
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,68 @@
1
+ require_relative '../visitor/bfs'
2
+ require_relative '../visitor/suffix_offset_visitor'
3
+ require_relative '../data/string_data_source'
4
+
5
+ #
6
+ # Searcher finds matches in a tree
7
+ #
8
+ # It needs the tree root, and the data source used to make the tree
9
+ # This assumes the tree was made with a single data source.
10
+ #
11
+ # "find" really should be finding matches from a different data source (not a string)
12
+ #
13
+ class Searcher
14
+ def initialize(treeDataSource, treeRoot)
15
+ @dataSource = treeDataSource
16
+ @root = treeRoot
17
+ end
18
+
19
+ #
20
+ # match dataSource values, return location in the suffix tree where the match stopped
21
+ #
22
+ def matchDataSource(dataSource)
23
+ location = Location.new(@root)
24
+ location.matchDataSource(@dataSource, dataSource)
25
+ location
26
+ end
27
+
28
+ def findNode(dataSource)
29
+ location = Location.new(@root)
30
+ if (location.matchDataSource(@dataSource, dataSource).depth == dataSource.numberValues) then
31
+ return location.node
32
+ else
33
+ return nil
34
+ end
35
+ end
36
+
37
+ #
38
+ # returns the list of suffix offset values where the searchString has been found
39
+ #
40
+ def findString(searchString)
41
+ node = self.findNode(StringDataSource.new(searchString))
42
+ return self.findResults(node)
43
+ end
44
+
45
+
46
+ def findWord(searchString)
47
+ node = self.findNode(SingleWordDataSource.new(searchString))
48
+ return self.findResults(node)
49
+ end
50
+
51
+ def findResults(node)
52
+ if (node != nil) then
53
+ soCollector = SuffixOffsetVisitor.new
54
+ so = BFS.new(soCollector)
55
+ so.traverse(node)
56
+ return soCollector.result.sort
57
+ else
58
+ return []
59
+ end
60
+ end
61
+
62
+ # match a string starting at a specific location,
63
+ # returning the character depth of the resulting match
64
+ def findAtLocation(location, s)
65
+ location.matchString(@dataSource, s)
66
+ return location.depth
67
+ end
68
+ end
@@ -0,0 +1,16 @@
1
+ class SuffixLinker
2
+
3
+ def update(location)
4
+ if ((@nodeNeedingSuffixLink != nil) && (location.node != @nodeNeedingSuffixLink) && location.onNode) then
5
+ @nodeNeedingSuffixLink.suffixLink = location.node
6
+ @nodeNeedingSuffixLink = nil
7
+ end
8
+ end
9
+
10
+ def nodeNeedingSuffixLink(node)
11
+ if (@nodeNeedingSuffixLink != nil) then
12
+ @nodeNeedingSuffixLink.suffixLink = node
13
+ end
14
+ @nodeNeedingSuffixLink = node
15
+ end
16
+ end
@@ -0,0 +1,122 @@
1
+ require_relative 'location'
2
+ require_relative 'node_factory'
3
+ require_relative 'suffix_linker'
4
+
5
+ #
6
+ # Builds a suffix tree from one or more DataSource instances
7
+ #
8
+ class SuffixTree
9
+ NO_SUFFIX_OFFSET = -1
10
+
11
+ # first data source we use
12
+ attr_reader :rootDataSource
13
+
14
+ # when there are a sequence of data sources, treat them as one long one, this is where next source starts
15
+ attr_reader :startOffset
16
+
17
+ # where we are in the implicit tree building process
18
+ attr_reader :location
19
+
20
+ attr_reader :nodeFactory
21
+
22
+ # the root of the tree, and the terminal value (for making implicit trees explicit)
23
+ attr_reader :root, :terminalValue
24
+
25
+ # keep track of which nodes need suffix links
26
+ attr_reader :suffixLinker
27
+
28
+ def initialize(terminalValue = nil, configuration = nil, persister = nil)
29
+ @nextNodeId = 0
30
+ @nodeFactory = NodeFactory.new(nil, persister)
31
+ @nodeFactory.setConfiguration(configuration) if (configuration != nil)
32
+ @root = @nodeFactory.newRoot()
33
+ @rootDataSource = nil
34
+ @location = Location.new(@root)
35
+ @startOffset = 0
36
+ @suffixOffset = 0
37
+ @suffixLinker = SuffixLinker.new
38
+ @terminalValue = terminalValue
39
+ end
40
+
41
+ #
42
+ # Set the data source, but do not add any values from the data source
43
+ #
44
+ def setDataSource(dataSource)
45
+ if (@rootDataSource == nil) then
46
+ @rootDataSource = dataSource
47
+ end
48
+ @nodeFactory.extendDataSource(dataSource, @startOffset)
49
+ end
50
+
51
+ #
52
+ # Add all values in a given dataSource
53
+ #
54
+ def addDataSource(dataSource)
55
+ @suffixOffset = 0
56
+ self.setDataSource(dataSource)
57
+ dataSource.each_with_index(@startOffset) do |value, offset|
58
+ self.addValue(value, offset)
59
+ end
60
+ if (@terminalValue != nil) then
61
+ @lastOffsetAdded += 1
62
+ self.addValue(@terminalValue, @lastOffsetAdded)
63
+ end
64
+ @startOffset = @lastOffsetAdded + 1
65
+ end
66
+
67
+ #
68
+ # Adding one value at a time, rootDataSource must be set for this to work
69
+ #
70
+ def addValue(value, offset)
71
+ while (extend(value, offset)) do
72
+ @suffixLinker.update(@location)
73
+ end
74
+ @lastOffsetAdded = offset
75
+ end
76
+
77
+ #
78
+ # Finish building the tree by adding a value that is not part of the data source
79
+ #
80
+ def finish()
81
+ if (@rootDataSource.has_terminator?) then
82
+ self.addValue(@rootDataSource.terminator, @startOffset)
83
+ end
84
+ end
85
+
86
+ #
87
+ # Extend a single suffix at the current location, returns true if there is another
88
+ # suffix to extend.
89
+ #
90
+ # Handles these cases:
91
+ #
92
+ # On a node:
93
+ # if there is a child starting with the extension value, traverse down that one value, return FALSE
94
+ # if no child has the extension value, add a leaf,
95
+ # if we are at root, return FALSE,
96
+ # otherwise traverse to the next suffix and return TRUE
97
+ #
98
+ # On an edge:
99
+ # if next character has the value, traverse past it, return FALSE
100
+ # if next character is not the value, split edge at that location, locate at the new node, and return TRUE
101
+ #
102
+ def extend(value,offset)
103
+ if (@location.onNode)
104
+ if (@location.node.children.has_key?(value)) then
105
+ @location.traverseDownChildValue(value)
106
+ return false # rule 3
107
+ else
108
+ @nodeFactory.addLeaf(@location.node, value, offset)
109
+ return @location.traverseToNextSuffix(@rootDataSource) # rule 1, traverse returns false when at root
110
+ end
111
+ elsif (@rootDataSource.valueAt(@location.incomingEdgeOffset) == value) then
112
+ @location.traverseDownEdgeValue()
113
+ return false # found value on edge, rule 3
114
+ else
115
+ newNode = @nodeFactory.splitEdgeAt(@location.node, @location.incomingEdgeOffset)
116
+ @suffixLinker.nodeNeedingSuffixLink(newNode)
117
+ @location.jumpToNode(newNode)
118
+ return true # rule 2
119
+ end
120
+ end
121
+
122
+ end
@@ -0,0 +1,17 @@
1
+ class BaseVisitor
2
+ attr_accessor :preCounter, :postCounter
3
+
4
+ def initialize
5
+ @preCounter = 0
6
+ @postCounter = 0
7
+ end
8
+
9
+ def preVisit(node)
10
+ @preCounter += 1
11
+ return true
12
+ end
13
+
14
+ def postVisit(node)
15
+ @postCounter += 1
16
+ end
17
+ end
@@ -0,0 +1,22 @@
1
+ class BFS
2
+ def initialize(visitor)
3
+ @visitor = visitor
4
+ @q = Array.new
5
+ end
6
+
7
+ def traverse(node)
8
+ @q.unshift(node)
9
+
10
+ while (@q.size > 0) do
11
+ node = @q.pop
12
+ if (@visitor.preVisit(node)) then
13
+ if (node.children != nil) then
14
+ node.children.values.each do |child|
15
+ @q.unshift(child)
16
+ end
17
+ end
18
+ end
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,15 @@
1
+ require_relative 'base_visitor'
2
+
3
+ class DataSourceVisitor < BaseVisitor
4
+ def initialize
5
+ super
6
+ end
7
+
8
+ def postVisit(node)
9
+ if (node.children != nil) then
10
+ node.children.values.each do |child|
11
+ node.dataSourceBit |= child.dataSourceBit
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,34 @@
1
+ class DFS
2
+ def initialize(visitor)
3
+ @visitor = visitor
4
+ end
5
+
6
+ def traverseChildren(children)
7
+ if (children != nil)
8
+ children.each do |key,value|
9
+ self.traverse(value)
10
+ end
11
+ end
12
+ end
13
+
14
+ def traverse(node)
15
+ if (@visitor.preVisit(node)) then
16
+ self.traverseChildren(node.children)
17
+ @visitor.postVisit(node)
18
+ end
19
+ end
20
+ end
21
+
22
+ class OrderedDFS < DFS
23
+ def initialize(visitor)
24
+ super(visitor)
25
+ end
26
+
27
+ def traverseChildren(children)
28
+ if (children != nil)
29
+ children.keys.sort.each do |key|
30
+ self.traverse(children[key])
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,71 @@
1
+ class ValueRange
2
+
3
+ attr_accessor :startOffset, :endOffset
4
+
5
+ def initialize(startOffset, endOffset)
6
+ @startOffset = startOffset
7
+ @endOffset = endOffset
8
+ end
9
+
10
+ def length
11
+ return @endOffset - @startOffset + 1
12
+ end
13
+
14
+ end
15
+
16
+ class KCommonVisitor < BaseVisitor
17
+
18
+ def initialize(dataSource)
19
+ @dataSource = dataSource
20
+
21
+ #
22
+ # key = common to at least this many (2, 3, ...)
23
+ # value = [ startOffset, endOffset ] of value sequence
24
+ #
25
+ @commonTo = {}
26
+
27
+ #
28
+ # set up initial values
29
+ #
30
+ (0..64).each do |value|
31
+ @commonTo[value] = ValueRange.new(0,-1)
32
+ end
33
+ super()
34
+ end
35
+
36
+ def postVisit(node)
37
+ nCommon = self.countCommon(node.dataSourceBit)
38
+ currentCommonLength = @commonTo[nCommon].endOffset - @commonTo[nCommon].startOffset + 1
39
+ if (node.valueDepth > currentCommonLength) then
40
+ @commonTo[nCommon].startOffset = node.incomingEdgeEndOffset - node.valueDepth + 1
41
+ @commonTo[nCommon].endOffset = node.incomingEdgeEndOffset
42
+ if (nCommon > 2) then
43
+ longestLength = node.valueDepth
44
+ (1..(nCommon-1)).each do |offset|
45
+ testLength = @commonTo[offset].endOffset - @commonTo[offset].startOffset + 1
46
+ if (testLength < longestLength) then
47
+ @commonTo[offset].startOffset = @commonTo[nCommon].startOffset
48
+ @commonTo[offset].endOffset = @commonTo[nCommon].endOffset
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+
55
+ def longestStringCommonTo(numberInCommon)
56
+ return @commonTo[numberInCommon].length, @dataSource.valueSequence(@commonTo[numberInCommon].startOffset, @commonTo[numberInCommon].endOffset)
57
+ end
58
+
59
+ def countCommon(bits)
60
+ result = 0
61
+ scanner = 1
62
+ bits = bits.to_i
63
+ (1..32).each do
64
+ if ((scanner & bits) != 0) then
65
+ result += 1
66
+ end
67
+ scanner = scanner << 1
68
+ end
69
+ result
70
+ end
71
+ end
@@ -0,0 +1,15 @@
1
+ require_relative 'base_visitor'
2
+
3
+ class LeafCountVisitor < BaseVisitor
4
+ def initialize
5
+ super
6
+ end
7
+
8
+ def postVisit(node)
9
+ if (node.children != nil) then
10
+ node.children.values.each do |child|
11
+ node.leafCount += child.leafCount
12
+ end
13
+ end
14
+ end
15
+ end