dawg 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/dawg.rb +161 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 71bacf98485657d1f57fbf1c71ee76b613a2ea69
|
4
|
+
data.tar.gz: 7c10810752805ab89deac3e6edd6f0e259a2c17d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9540d8dff79f43f854271140b210f8f472c0dd4472808c9cd8e93fdb60ad40d4b8c64975d68f0e751cc456cbd00c21664a6477c20e30fcd8129a9d173803a1ad
|
7
|
+
data.tar.gz: 317ca1ae25bfbf511d3bb0395cf19f0505d94a0b2427b7bc7c973a15817182bdc734b80d309c8a86b8fdd47cceec25b214fb44a09817521ac8f01cd8bde0cde6
|
data/lib/dawg.rb
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
class DawgNode
|
2
|
+
@@next_id = 0
|
3
|
+
attr_accessor :edges,:final,:id
|
4
|
+
def initialize
|
5
|
+
@id = @@next_id
|
6
|
+
@@next_id += 1
|
7
|
+
@final = false
|
8
|
+
@edges = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
arr = []
|
13
|
+
if @final
|
14
|
+
arr<<"1"
|
15
|
+
else
|
16
|
+
arr<<"0"
|
17
|
+
end
|
18
|
+
|
19
|
+
for (label, node) in @edges
|
20
|
+
arr << label
|
21
|
+
arr << node.id.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
arr.join("_")
|
25
|
+
end
|
26
|
+
|
27
|
+
def hash
|
28
|
+
to_s.hash
|
29
|
+
end
|
30
|
+
|
31
|
+
def eql?(other)
|
32
|
+
to_s == other.to_s
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class Dawg
|
37
|
+
def initialize
|
38
|
+
@previousWord = ""
|
39
|
+
@root = DawgNode.new
|
40
|
+
|
41
|
+
# Here is a list of nodes that have not been checked for duplication.
|
42
|
+
@uncheckedNodes = []
|
43
|
+
|
44
|
+
# Here is a list of unique nodes that have been checked for
|
45
|
+
# duplication.
|
46
|
+
@minimizedNodes = {}
|
47
|
+
end
|
48
|
+
|
49
|
+
def save(filename)
|
50
|
+
data = Marshal.dump(self)
|
51
|
+
File.open(filename, 'w') { |file| file.write(data) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.load(filename)
|
55
|
+
dawg = Marshal.load( File.open(filename).read )
|
56
|
+
end
|
57
|
+
|
58
|
+
def insert( word )
|
59
|
+
if word < @previousWord
|
60
|
+
raise "Error: Words must be inserted in alphabetical order."
|
61
|
+
end
|
62
|
+
|
63
|
+
# find common prefix between word and previous word
|
64
|
+
commonPrefix = 0
|
65
|
+
for i in 0..[word.length-1, @previousWord.length-1].min
|
66
|
+
break if word[i] != @previousWord[i]
|
67
|
+
commonPrefix += 1
|
68
|
+
end
|
69
|
+
|
70
|
+
# Check the uncheckedNodes for redundant nodes, proceeding from last
|
71
|
+
# one down to the common prefix size. Then truncate the list at that
|
72
|
+
# point.
|
73
|
+
_minimize( commonPrefix )
|
74
|
+
|
75
|
+
# add the suffix, starting from the correct node mid-way through the
|
76
|
+
# graph
|
77
|
+
if @uncheckedNodes.length == 0
|
78
|
+
node = @root
|
79
|
+
else
|
80
|
+
node = @uncheckedNodes[-1][2]
|
81
|
+
end
|
82
|
+
|
83
|
+
for letter in word.split("")[commonPrefix..-1]
|
84
|
+
nextNode = DawgNode.new
|
85
|
+
node.edges[letter] = nextNode
|
86
|
+
@uncheckedNodes<< [node, letter, nextNode]
|
87
|
+
node = nextNode
|
88
|
+
end
|
89
|
+
|
90
|
+
node.final = true
|
91
|
+
@previousWord = word
|
92
|
+
end
|
93
|
+
def finish
|
94
|
+
# minimize all uncheckedNodes
|
95
|
+
_minimize( 0 )
|
96
|
+
end
|
97
|
+
|
98
|
+
def _minimize(downTo)
|
99
|
+
# proceed from the leaf up to a certain point
|
100
|
+
for i in (@uncheckedNodes.length - 1).downto(downTo)
|
101
|
+
parent, letter, child = @uncheckedNodes[i]
|
102
|
+
if @minimizedNodes.has_key? child
|
103
|
+
# replace the child with the previously encountered one
|
104
|
+
parent.edges[letter] = @minimizedNodes[child]
|
105
|
+
else
|
106
|
+
# add the state to the minimized nodes.
|
107
|
+
@minimizedNodes[child] = child
|
108
|
+
end
|
109
|
+
@uncheckedNodes.pop
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def lookup(word)
|
114
|
+
node = @root
|
115
|
+
for letter in word.split("")
|
116
|
+
return false if !node.edges.has_key? letter
|
117
|
+
node = node.edges[letter]
|
118
|
+
end
|
119
|
+
node.final
|
120
|
+
end
|
121
|
+
|
122
|
+
def find_similar(word)
|
123
|
+
node = @root
|
124
|
+
for letter in word.split("")
|
125
|
+
return [] if !node.edges.has_key? letter
|
126
|
+
node = node.edges[letter]
|
127
|
+
end
|
128
|
+
results = get_recuirsively_all(node)
|
129
|
+
|
130
|
+
return [word].product(results).map(&:join)
|
131
|
+
end
|
132
|
+
|
133
|
+
def get_recuirsively_all(node)
|
134
|
+
suffixes = []
|
135
|
+
|
136
|
+
node.edges.each do |key,value|
|
137
|
+
results = get_recuirsively_all(value)
|
138
|
+
|
139
|
+
# result.flatten! if result.length==2
|
140
|
+
results.each do |result|
|
141
|
+
suffixes << [[key] + [result]].flatten.join
|
142
|
+
end
|
143
|
+
|
144
|
+
suffixes << key if results.empty?
|
145
|
+
|
146
|
+
|
147
|
+
end
|
148
|
+
return suffixes
|
149
|
+
end
|
150
|
+
def nodeCount
|
151
|
+
@minimizedNodes.length
|
152
|
+
end
|
153
|
+
|
154
|
+
def edgeCount
|
155
|
+
count = 0
|
156
|
+
for key,node in @minimizedNodes
|
157
|
+
count += node.edges.length
|
158
|
+
end
|
159
|
+
count
|
160
|
+
end
|
161
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dawg
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Maksatbek Manurov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-05-29 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Basic deterministic acyclic finite state automaton in ruby
|
14
|
+
email:
|
15
|
+
- maksat.mansurov@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/dawg.rb
|
21
|
+
homepage: https://github.com/baltavay/dawg
|
22
|
+
licenses:
|
23
|
+
- MIT
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 2.2.2
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: Deterministic acyclic finite state automaton
|
45
|
+
test_files: []
|