dawg 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/dawg.rb +161 -0
  3. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 71bacf98485657d1f57fbf1c71ee76b613a2ea69
4
+ data.tar.gz: 7c10810752805ab89deac3e6edd6f0e259a2c17d
5
+ SHA512:
6
+ metadata.gz: 9540d8dff79f43f854271140b210f8f472c0dd4472808c9cd8e93fdb60ad40d4b8c64975d68f0e751cc456cbd00c21664a6477c20e30fcd8129a9d173803a1ad
7
+ data.tar.gz: 317ca1ae25bfbf511d3bb0395cf19f0505d94a0b2427b7bc7c973a15817182bdc734b80d309c8a86b8fdd47cceec25b214fb44a09817521ac8f01cd8bde0cde6
data/lib/dawg.rb ADDED
@@ -0,0 +1,161 @@
1
+ class DawgNode
2
+ @@next_id = 0
3
+ attr_accessor :edges,:final,:id
4
+ def initialize
5
+ @id = @@next_id
6
+ @@next_id += 1
7
+ @final = false
8
+ @edges = {}
9
+ end
10
+
11
+ def to_s
12
+ arr = []
13
+ if @final
14
+ arr<<"1"
15
+ else
16
+ arr<<"0"
17
+ end
18
+
19
+ for (label, node) in @edges
20
+ arr << label
21
+ arr << node.id.to_s
22
+ end
23
+
24
+ arr.join("_")
25
+ end
26
+
27
+ def hash
28
+ to_s.hash
29
+ end
30
+
31
+ def eql?(other)
32
+ to_s == other.to_s
33
+ end
34
+ end
35
+
36
+ class Dawg
37
+ def initialize
38
+ @previousWord = ""
39
+ @root = DawgNode.new
40
+
41
+ # Here is a list of nodes that have not been checked for duplication.
42
+ @uncheckedNodes = []
43
+
44
+ # Here is a list of unique nodes that have been checked for
45
+ # duplication.
46
+ @minimizedNodes = {}
47
+ end
48
+
49
+ def save(filename)
50
+ data = Marshal.dump(self)
51
+ File.open(filename, 'w') { |file| file.write(data) }
52
+ end
53
+
54
+ def self.load(filename)
55
+ dawg = Marshal.load( File.open(filename).read )
56
+ end
57
+
58
+ def insert( word )
59
+ if word < @previousWord
60
+ raise "Error: Words must be inserted in alphabetical order."
61
+ end
62
+
63
+ # find common prefix between word and previous word
64
+ commonPrefix = 0
65
+ for i in 0..[word.length-1, @previousWord.length-1].min
66
+ break if word[i] != @previousWord[i]
67
+ commonPrefix += 1
68
+ end
69
+
70
+ # Check the uncheckedNodes for redundant nodes, proceeding from last
71
+ # one down to the common prefix size. Then truncate the list at that
72
+ # point.
73
+ _minimize( commonPrefix )
74
+
75
+ # add the suffix, starting from the correct node mid-way through the
76
+ # graph
77
+ if @uncheckedNodes.length == 0
78
+ node = @root
79
+ else
80
+ node = @uncheckedNodes[-1][2]
81
+ end
82
+
83
+ for letter in word.split("")[commonPrefix..-1]
84
+ nextNode = DawgNode.new
85
+ node.edges[letter] = nextNode
86
+ @uncheckedNodes<< [node, letter, nextNode]
87
+ node = nextNode
88
+ end
89
+
90
+ node.final = true
91
+ @previousWord = word
92
+ end
93
+ def finish
94
+ # minimize all uncheckedNodes
95
+ _minimize( 0 )
96
+ end
97
+
98
+ def _minimize(downTo)
99
+ # proceed from the leaf up to a certain point
100
+ for i in (@uncheckedNodes.length - 1).downto(downTo)
101
+ parent, letter, child = @uncheckedNodes[i]
102
+ if @minimizedNodes.has_key? child
103
+ # replace the child with the previously encountered one
104
+ parent.edges[letter] = @minimizedNodes[child]
105
+ else
106
+ # add the state to the minimized nodes.
107
+ @minimizedNodes[child] = child
108
+ end
109
+ @uncheckedNodes.pop
110
+ end
111
+ end
112
+
113
+ def lookup(word)
114
+ node = @root
115
+ for letter in word.split("")
116
+ return false if !node.edges.has_key? letter
117
+ node = node.edges[letter]
118
+ end
119
+ node.final
120
+ end
121
+
122
+ def find_similar(word)
123
+ node = @root
124
+ for letter in word.split("")
125
+ return [] if !node.edges.has_key? letter
126
+ node = node.edges[letter]
127
+ end
128
+ results = get_recuirsively_all(node)
129
+
130
+ return [word].product(results).map(&:join)
131
+ end
132
+
133
+ def get_recuirsively_all(node)
134
+ suffixes = []
135
+
136
+ node.edges.each do |key,value|
137
+ results = get_recuirsively_all(value)
138
+
139
+ # result.flatten! if result.length==2
140
+ results.each do |result|
141
+ suffixes << [[key] + [result]].flatten.join
142
+ end
143
+
144
+ suffixes << key if results.empty?
145
+
146
+
147
+ end
148
+ return suffixes
149
+ end
150
+ def nodeCount
151
+ @minimizedNodes.length
152
+ end
153
+
154
+ def edgeCount
155
+ count = 0
156
+ for key,node in @minimizedNodes
157
+ count += node.edges.length
158
+ end
159
+ count
160
+ end
161
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dawg
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Maksatbek Manurov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-29 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Basic deterministic acyclic finite state automaton in ruby
14
+ email:
15
+ - maksat.mansurov@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/dawg.rb
21
+ homepage: https://github.com/baltavay/dawg
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.2.2
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: Deterministic acyclic finite state automaton
45
+ test_files: []