dawg 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/dawg.rb +161 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 71bacf98485657d1f57fbf1c71ee76b613a2ea69
|
4
|
+
data.tar.gz: 7c10810752805ab89deac3e6edd6f0e259a2c17d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9540d8dff79f43f854271140b210f8f472c0dd4472808c9cd8e93fdb60ad40d4b8c64975d68f0e751cc456cbd00c21664a6477c20e30fcd8129a9d173803a1ad
|
7
|
+
data.tar.gz: 317ca1ae25bfbf511d3bb0395cf19f0505d94a0b2427b7bc7c973a15817182bdc734b80d309c8a86b8fdd47cceec25b214fb44a09817521ac8f01cd8bde0cde6
|
data/lib/dawg.rb
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
class DawgNode
|
2
|
+
@@next_id = 0
|
3
|
+
attr_accessor :edges,:final,:id
|
4
|
+
def initialize
|
5
|
+
@id = @@next_id
|
6
|
+
@@next_id += 1
|
7
|
+
@final = false
|
8
|
+
@edges = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
arr = []
|
13
|
+
if @final
|
14
|
+
arr<<"1"
|
15
|
+
else
|
16
|
+
arr<<"0"
|
17
|
+
end
|
18
|
+
|
19
|
+
for (label, node) in @edges
|
20
|
+
arr << label
|
21
|
+
arr << node.id.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
arr.join("_")
|
25
|
+
end
|
26
|
+
|
27
|
+
def hash
|
28
|
+
to_s.hash
|
29
|
+
end
|
30
|
+
|
31
|
+
def eql?(other)
|
32
|
+
to_s == other.to_s
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class Dawg
|
37
|
+
def initialize
|
38
|
+
@previousWord = ""
|
39
|
+
@root = DawgNode.new
|
40
|
+
|
41
|
+
# Here is a list of nodes that have not been checked for duplication.
|
42
|
+
@uncheckedNodes = []
|
43
|
+
|
44
|
+
# Here is a list of unique nodes that have been checked for
|
45
|
+
# duplication.
|
46
|
+
@minimizedNodes = {}
|
47
|
+
end
|
48
|
+
|
49
|
+
def save(filename)
|
50
|
+
data = Marshal.dump(self)
|
51
|
+
File.open(filename, 'w') { |file| file.write(data) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.load(filename)
|
55
|
+
dawg = Marshal.load( File.open(filename).read )
|
56
|
+
end
|
57
|
+
|
58
|
+
def insert( word )
|
59
|
+
if word < @previousWord
|
60
|
+
raise "Error: Words must be inserted in alphabetical order."
|
61
|
+
end
|
62
|
+
|
63
|
+
# find common prefix between word and previous word
|
64
|
+
commonPrefix = 0
|
65
|
+
for i in 0..[word.length-1, @previousWord.length-1].min
|
66
|
+
break if word[i] != @previousWord[i]
|
67
|
+
commonPrefix += 1
|
68
|
+
end
|
69
|
+
|
70
|
+
# Check the uncheckedNodes for redundant nodes, proceeding from last
|
71
|
+
# one down to the common prefix size. Then truncate the list at that
|
72
|
+
# point.
|
73
|
+
_minimize( commonPrefix )
|
74
|
+
|
75
|
+
# add the suffix, starting from the correct node mid-way through the
|
76
|
+
# graph
|
77
|
+
if @uncheckedNodes.length == 0
|
78
|
+
node = @root
|
79
|
+
else
|
80
|
+
node = @uncheckedNodes[-1][2]
|
81
|
+
end
|
82
|
+
|
83
|
+
for letter in word.split("")[commonPrefix..-1]
|
84
|
+
nextNode = DawgNode.new
|
85
|
+
node.edges[letter] = nextNode
|
86
|
+
@uncheckedNodes<< [node, letter, nextNode]
|
87
|
+
node = nextNode
|
88
|
+
end
|
89
|
+
|
90
|
+
node.final = true
|
91
|
+
@previousWord = word
|
92
|
+
end
|
93
|
+
def finish
|
94
|
+
# minimize all uncheckedNodes
|
95
|
+
_minimize( 0 )
|
96
|
+
end
|
97
|
+
|
98
|
+
def _minimize(downTo)
|
99
|
+
# proceed from the leaf up to a certain point
|
100
|
+
for i in (@uncheckedNodes.length - 1).downto(downTo)
|
101
|
+
parent, letter, child = @uncheckedNodes[i]
|
102
|
+
if @minimizedNodes.has_key? child
|
103
|
+
# replace the child with the previously encountered one
|
104
|
+
parent.edges[letter] = @minimizedNodes[child]
|
105
|
+
else
|
106
|
+
# add the state to the minimized nodes.
|
107
|
+
@minimizedNodes[child] = child
|
108
|
+
end
|
109
|
+
@uncheckedNodes.pop
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def lookup(word)
|
114
|
+
node = @root
|
115
|
+
for letter in word.split("")
|
116
|
+
return false if !node.edges.has_key? letter
|
117
|
+
node = node.edges[letter]
|
118
|
+
end
|
119
|
+
node.final
|
120
|
+
end
|
121
|
+
|
122
|
+
def find_similar(word)
|
123
|
+
node = @root
|
124
|
+
for letter in word.split("")
|
125
|
+
return [] if !node.edges.has_key? letter
|
126
|
+
node = node.edges[letter]
|
127
|
+
end
|
128
|
+
results = get_recuirsively_all(node)
|
129
|
+
|
130
|
+
return [word].product(results).map(&:join)
|
131
|
+
end
|
132
|
+
|
133
|
+
def get_recuirsively_all(node)
|
134
|
+
suffixes = []
|
135
|
+
|
136
|
+
node.edges.each do |key,value|
|
137
|
+
results = get_recuirsively_all(value)
|
138
|
+
|
139
|
+
# result.flatten! if result.length==2
|
140
|
+
results.each do |result|
|
141
|
+
suffixes << [[key] + [result]].flatten.join
|
142
|
+
end
|
143
|
+
|
144
|
+
suffixes << key if results.empty?
|
145
|
+
|
146
|
+
|
147
|
+
end
|
148
|
+
return suffixes
|
149
|
+
end
|
150
|
+
def nodeCount
|
151
|
+
@minimizedNodes.length
|
152
|
+
end
|
153
|
+
|
154
|
+
def edgeCount
|
155
|
+
count = 0
|
156
|
+
for key,node in @minimizedNodes
|
157
|
+
count += node.edges.length
|
158
|
+
end
|
159
|
+
count
|
160
|
+
end
|
161
|
+
end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dawg
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Maksatbek Manurov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-05-29 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Basic deterministic acyclic finite state automaton in ruby
|
14
|
+
email:
|
15
|
+
- maksat.mansurov@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/dawg.rb
|
21
|
+
homepage: https://github.com/baltavay/dawg
|
22
|
+
licenses:
|
23
|
+
- MIT
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 2.2.2
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: Deterministic acyclic finite state automaton
|
45
|
+
test_files: []
|