dictionary_map 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/dict_map.rb +172 -0
  2. metadata +46 -0
data/lib/dict_map.rb ADDED
@@ -0,0 +1,172 @@
1
+ #Represents a node in the dictionary graph. As the graph is built,
2
+ #each node is given links to possible letters that follow in this string.
3
+ #A node represents a character in a word so far - i.e. There might be a node
4
+ #for the character a in the string ca, which would have pointers to nodes
5
+ #containing t, n, etc for the words cat, can, and so on. There will be multiple
6
+ #a nodes in the entire graph for different string beginnings.
7
+ #The nodes have no notion of the string that came before them; this is managed
8
+ #by the calling program.
9
+ class WordNode
10
+ def initialize(letter)
11
+ @ends_word = false
12
+ @successors = Hash.new
13
+ @letter = letter
14
+ @exhausted = false
15
+ end
16
+
17
+ #If true, this letter is the end of a word in the dictionary.
18
+ def ends_word?
19
+ @ends_word
20
+ end
21
+
22
+ def end_word!
23
+ @ends_word = true
24
+ end
25
+
26
+ #the letter represented by this node
27
+ def letter
28
+ @letter
29
+ end
30
+
31
+ #successors are nodes that can follow the string so far.
32
+ def successors
33
+ @successors
34
+ end
35
+
36
+ #A node is exhausted if all solutions of the string beyond its point have
37
+ #been found in a grid. This is useful in situations where you only want to find a word once
38
+ #and allows the search to be cut off early.
39
+ def exhausted?
40
+ @exhausted
41
+ end
42
+
43
+ def exhausted= (val)
44
+ @exhausted = val
45
+ end
46
+
47
+ #Checks the exhausted status of the successor nodes. Ideally, this should be private... but there may
48
+ #be cases where we actually exhaust a longer word but not this one, or vice versa. As a result, there is
49
+ #no implication between this word being "used" and its successors being used.
50
+ def successors_exhausted?
51
+ if @successors.empty?
52
+ return true
53
+ end
54
+ @successors.each_value do |node|
55
+ if !node.exhausted?
56
+ return false
57
+ end
58
+ end
59
+ return true
60
+ end
61
+ end
62
+
63
+ #The DictionaryMap represents a directed graph of words found in a dictionary.
64
+ #There are 26 starting nodes (assuming there is at least one word in the
65
+ #dictionary beginning with each letter of the alphabet). Each of those nodes
66
+ #Point to succeeding nodes that form the beginnings (and eventually the ends)
67
+ #of words.
68
+ class DictionaryMap
69
+
70
+ def initialize(file_path)
71
+ @start_nodes = Hash.new
72
+ open(file_path, 'r').readlines.each do |line|
73
+ update_map(@start_nodes, line.downcase.chomp)
74
+ end
75
+ end
76
+
77
+ #adds information about the word to the map
78
+ def update_map(init_nodes, word)
79
+ current_nodes = init_nodes
80
+ char = nil
81
+ #follow the chain of characters in this word
82
+ 0.upto(word.size - 1) do |index|
83
+ char = word[index]
84
+ #if we find a point where the letters haven't yet been added, update
85
+ if !current_nodes[char]
86
+ current_nodes[char] = WordNode.new(char)
87
+ end
88
+ #if we're at the last character, denote this node as word-ending
89
+ if index == word.size - 1
90
+ current_nodes[char].end_word!
91
+ #advance position in the map
92
+ else
93
+ current_nodes = current_nodes[char].successors
94
+ end
95
+ end
96
+ end
97
+
98
+ #the beginning point of navigating the map. Designed this class to allow easy coupling to allow
99
+ #for optimizations. A black box with include? would still be fast, but this was built with Boggle
100
+ #in mind, so we want to be able to cut searches short and mark words as used without having
101
+ #to traverse the tree again.
102
+ #
103
+ #start_nodes is a map of first character->successor nodes
104
+ def start_nodes
105
+ @start_nodes
106
+ end
107
+
108
+ #clears the exhausted status of the provided node and its successors
109
+ def reset_node(node)
110
+ node.exhausted = false
111
+ node.successors.each_value do |new_node|
112
+ reset_node(new_node)
113
+ end
114
+ end
115
+
116
+ #resets the exhausted status of all nodes
117
+ def reset
118
+ @start_nodes.each_value do |node|
119
+ reset_node(node)
120
+ end
121
+ end
122
+
123
+ #searches the dictionary quickly for word
124
+ #TODO: potentially add an exhaust? option so that if we want to only search for a word once, it gets exhausted if found.
125
+ def include? (word)
126
+ if word.nil? || word.empty?
127
+ return false
128
+ end
129
+ cur_node = self.start_nodes[word[0]]
130
+ index = 1
131
+ #as long as we have more characters in the word and this word has successors, keep moving
132
+ while index < word.size && cur_node.successors[word[index]] do
133
+ cur_node = cur_node.successors[word[index]]
134
+ index += 1
135
+ end
136
+ #if the loop ended, we either ran out of characters in the word
137
+ #or there were no more successors. If we ran out of characters in the word,
138
+ #this node must be the end or we don't yet have a word
139
+ #if we had more characters and the dictionary didn't have an answer, this can't be word.
140
+ #things like "nodenasdfasd" count as words.
141
+ #in conclusion, we must be kicked out by the index < word.size test to even be
142
+ #considered.
143
+ return index == word.size && cur_node.ends_word?
144
+ end
145
+
146
+ end
147
+
148
+ #Collapses the map back into words.
149
+ #Failure of a comment above. Ammended:
150
+ #Returns a list of all words beginning with string.
151
+ #map should be the list of nodes succeeding in the Dictionary
152
+ #after the last character in string. For example,
153
+ #for all words in the dictionary, send dictionary.start_nodes only.
154
+ #For all words that start with x, call get_map (dictionary.start_nodes['x'].successors, 'x')
155
+ def get_map(map, string = '')
156
+ words = []
157
+ if map.empty?
158
+ puts "Error, made it to the end of string #{string} and found no end node."
159
+ return words
160
+ end
161
+ new_string = nil
162
+ map.each do |letter, node|
163
+ new_string = string + letter
164
+ if node.ends_word?
165
+ words << new_string
166
+ end
167
+ if !node.successors.empty?
168
+ words = words + get_map(node.successors, new_string)
169
+ end
170
+ end
171
+ return words
172
+ end
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dictionary_map
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Brian Fults
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-08 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email:
16
+ - xclite@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/dict_map.rb
22
+ homepage: https://github.com/xclite/dictionary_map
23
+ licenses: []
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project: nowarning
42
+ rubygems_version: 1.8.23
43
+ signing_key:
44
+ specification_version: 3
45
+ summary: Quickly searchable string dictionary structure.
46
+ test_files: []