suffix_tree 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/persist/suffix_tree_db.rb +1 -1
- data/lib/suffix_tree.rb +7 -5
- data/lib/{location.rb → support/location.rb} +0 -0
- data/lib/{node.rb → support/node.rb} +0 -0
- data/lib/{node_factory.rb → support/node_factory.rb} +0 -0
- data/lib/{suffix_linker.rb → support/suffix_linker.rb} +0 -0
- data/lib/visitor/numbering_visitor.rb +1 -1
- data/lib/visitor/value_depth_visitor.rb +1 -1
- metadata +6 -7
- data/lib/data/line_state_machine.rb +0 -86
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 487602d8f23057546423dd8d71b42863369eb153
|
4
|
+
data.tar.gz: cf4c5658cf7b78dc4ea3254d20006bfebab36e14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe25f9462e7b6c311744647c9ae387f56103b94096071d76d100cd0cfc5af4197d19220a161a6162f2bee1676ebd5d1a528be9f55543afb204239cc1c0d9370d
|
7
|
+
data.tar.gz: bb058a657e287d9298f09762737194f7dd10a29a850afd32c0e7fc6f2b457dcf217f81c1a8c4c9bc700044b0e415a553b235263b146b6f0e02eabf02e46ad3d6
|
data/lib/suffix_tree.rb
CHANGED
@@ -1,7 +1,3 @@
|
|
1
|
-
require_relative 'location'
|
2
|
-
require_relative 'node_factory'
|
3
|
-
require_relative 'suffix_linker'
|
4
|
-
|
5
1
|
#
|
6
2
|
# Builds a suffix tree from one or more DataSource instances
|
7
3
|
#
|
@@ -119,4 +115,10 @@ class SuffixTree
|
|
119
115
|
end
|
120
116
|
end
|
121
117
|
|
122
|
-
end
|
118
|
+
end
|
119
|
+
|
120
|
+
Gem.find_files("visitor/*.rb").each { |path| require path }
|
121
|
+
Gem.find_files("support/*.rb").each { |path| require path }
|
122
|
+
Gem.find_files("search/*.rb").each { |path| require path }
|
123
|
+
Gem.find_files("persist/*.rb").each { |path| require path }
|
124
|
+
Gem.find_files("data/*.rb").each { |path| require path }
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: suffix_tree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Johannes Johannsen
|
@@ -19,16 +19,15 @@ files:
|
|
19
19
|
- lib/data/base_data_source.rb
|
20
20
|
- lib/data/data_source_factory.rb
|
21
21
|
- lib/data/file_data_source.rb
|
22
|
-
- lib/data/line_state_machine.rb
|
23
22
|
- lib/data/string_data_source.rb
|
24
23
|
- lib/data/word_data_source.rb
|
25
|
-
- lib/location.rb
|
26
|
-
- lib/node.rb
|
27
|
-
- lib/node_factory.rb
|
28
24
|
- lib/persist/suffix_tree_db.rb
|
29
25
|
- lib/search/searcher.rb
|
30
|
-
- lib/suffix_linker.rb
|
31
26
|
- lib/suffix_tree.rb
|
27
|
+
- lib/support/location.rb
|
28
|
+
- lib/support/node.rb
|
29
|
+
- lib/support/node_factory.rb
|
30
|
+
- lib/support/suffix_linker.rb
|
32
31
|
- lib/visitor/base_visitor.rb
|
33
32
|
- lib/visitor/bfs.rb
|
34
33
|
- lib/visitor/data_source_visitor.rb
|
@@ -80,7 +79,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
80
79
|
version: '0'
|
81
80
|
requirements: []
|
82
81
|
rubyforge_project:
|
83
|
-
rubygems_version: 2.4.
|
82
|
+
rubygems_version: 2.4.6
|
84
83
|
signing_key:
|
85
84
|
specification_version: 4
|
86
85
|
summary: Ukkonen suffix tree
|
@@ -1,86 +0,0 @@
|
|
1
|
-
require 'state_machine'
|
2
|
-
|
3
|
-
#
|
4
|
-
# First pass at state machine for converting sequence of formatted lines into a different
|
5
|
-
# set of word values, in this case "<N>, blank, |, blank, <footer title>" get converted
|
6
|
-
# into [ "END_OF_PAGE", "<page number>", "<title as a single word>"]
|
7
|
-
#
|
8
|
-
class LineStateMachine
|
9
|
-
attr_accessor :bucket, :pages
|
10
|
-
|
11
|
-
def initialize
|
12
|
-
@bucket = "Page 0"
|
13
|
-
@pages = {}
|
14
|
-
@dataQueue = []
|
15
|
-
super
|
16
|
-
end
|
17
|
-
|
18
|
-
def resetState(data)
|
19
|
-
self.reset
|
20
|
-
result = []
|
21
|
-
result << @dataQueue
|
22
|
-
result << data
|
23
|
-
@dataQueue = []
|
24
|
-
return result.flatten
|
25
|
-
end
|
26
|
-
|
27
|
-
def process(line, wordIndex)
|
28
|
-
data = line.split
|
29
|
-
|
30
|
-
# we are looking for a blank, a pipe, or a page number
|
31
|
-
if (data.length == 0) then
|
32
|
-
if (self.foundBlank) then
|
33
|
-
return []
|
34
|
-
end
|
35
|
-
end
|
36
|
-
if (data.length == 1) then
|
37
|
-
if (data[0] == "|") then
|
38
|
-
if (self.foundPipe) then
|
39
|
-
return []
|
40
|
-
end
|
41
|
-
end
|
42
|
-
ival = data[0].to_i
|
43
|
-
if (ival > 0) then
|
44
|
-
if (self.foundN) then
|
45
|
-
@potentialPageNumber = ival
|
46
|
-
@dataQueue << data # in case this really isn't it
|
47
|
-
return []
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
# if we are looking for the title, the entire line is the title
|
53
|
-
if (data.length > 0) then
|
54
|
-
if (self.foundTitle) then
|
55
|
-
@dataQueue = []
|
56
|
-
@bucket = "Page #{@potentialPageNumber}"
|
57
|
-
@pages[@bucket] = wordIndex
|
58
|
-
return []
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
resetState(data)
|
63
|
-
end
|
64
|
-
|
65
|
-
state_machine :state, :initial => :lookingForN do
|
66
|
-
event :foundN do
|
67
|
-
transition :lookingForN => :lookingForFirstBlank
|
68
|
-
end
|
69
|
-
|
70
|
-
event :foundBlank do
|
71
|
-
transition :lookingForFirstBlank => :lookingForPipe, :lookingForSecondBlank => :lookingForTitle
|
72
|
-
end
|
73
|
-
|
74
|
-
event :foundPipe do
|
75
|
-
transition :lookingForPipe => :lookingForSecondBlank
|
76
|
-
end
|
77
|
-
|
78
|
-
event :foundTitle do
|
79
|
-
transition :lookingForTitle => :lookingForN
|
80
|
-
end
|
81
|
-
|
82
|
-
event :reset do
|
83
|
-
transition all => :lookingForN
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|