dictionary_map 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/dict_map.rb +172 -0
  2. metadata +46 -0
data/lib/dict_map.rb ADDED
@@ -0,0 +1,172 @@
1
+ #Represents a node in the dictionary graph. As the graph is built,
2
+ #each node is given links to possible letters that follow in this string.
3
+ #A node represents a character in a word so far - i.e. There might be a node
4
+ #for the character a in the string ca, which would have pointers to nodes
5
+ #containing t, n, etc for the words cat, can, and so on. There will be multiple
6
+ #a nodes in the entire graph for different string beginnings.
7
+ #The nodes have no notion of the string that came before them; this is managed
8
+ #by the calling program.
9
+ class WordNode
10
+ def initialize(letter)
11
+ @ends_word = false
12
+ @successors = Hash.new
13
+ @letter = letter
14
+ @exhausted = false
15
+ end
16
+
17
+ #If true, this letter is the end of a word in the dictionary.
18
+ def ends_word?
19
+ @ends_word
20
+ end
21
+
22
+ def end_word!
23
+ @ends_word = true
24
+ end
25
+
26
+ #the letter represented by this node
27
+ def letter
28
+ @letter
29
+ end
30
+
31
+ #successors are nodes that can follow the string so far.
32
+ def successors
33
+ @successors
34
+ end
35
+
36
+ #A node is exhausted if all solutions of the string beyond its point have
37
+ #been found in a grid. This is useful in situations where you only want to find a word once
38
+ #and allows the search to be cut off early.
39
+ def exhausted?
40
+ @exhausted
41
+ end
42
+
43
+ def exhausted= (val)
44
+ @exhausted = val
45
+ end
46
+
47
+ #Checks the exhausted status of the successor nodes. Ideally, this should be private... but there may
48
+ #be cases where we actually exhaust a longer word but not this one, or vice versa. As a result, there is
49
+ #no implication between this word being "used" and its successors being used.
50
+ def successors_exhausted?
51
+ if @successors.empty?
52
+ return true
53
+ end
54
+ @successors.each_value do |node|
55
+ if !node.exhausted?
56
+ return false
57
+ end
58
+ end
59
+ return true
60
+ end
61
+ end
62
+
63
+ #The DictionaryMap represents a directed graph of words found in a dictionary.
64
+ #There are 26 starting nodes (assuming there is at least one word in the
65
+ #dictionary beginning with each letter of the alphabet). Each of those nodes
66
+ #Point to succeeding nodes that form the beginnings (and eventually the ends)
67
+ #of words.
68
+ class DictionaryMap
69
+
70
+ def initialize(file_path)
71
+ @start_nodes = Hash.new
72
+ open(file_path, 'r').readlines.each do |line|
73
+ update_map(@start_nodes, line.downcase.chomp)
74
+ end
75
+ end
76
+
77
+ #adds information about the word to the map
78
+ def update_map(init_nodes, word)
79
+ current_nodes = init_nodes
80
+ char = nil
81
+ #follow the chain of characters in this word
82
+ 0.upto(word.size - 1) do |index|
83
+ char = word[index]
84
+ #if we find a point where the letters haven't yet been added, update
85
+ if !current_nodes[char]
86
+ current_nodes[char] = WordNode.new(char)
87
+ end
88
+ #if we're at the last character, denote this node as word-ending
89
+ if index == word.size - 1
90
+ current_nodes[char].end_word!
91
+ #advance position in the map
92
+ else
93
+ current_nodes = current_nodes[char].successors
94
+ end
95
+ end
96
+ end
97
+
98
+ #the beginning point of navigating the map. Designed this class to allow easy coupling to allow
99
+ #for optimizations. A black box with include? would still be fast, but this was built with Boggle
100
+ #in mind, so we want to be able to cut searches short and mark words as used without having
101
+ #to traverse the tree again.
102
+ #
103
+ #start_nodes is a map of first character->successor nodes
104
+ def start_nodes
105
+ @start_nodes
106
+ end
107
+
108
+ #clears the exhausted status of the provided node and its successors
109
+ def reset_node(node)
110
+ node.exhausted = false
111
+ node.successors.each_value do |new_node|
112
+ reset_node(new_node)
113
+ end
114
+ end
115
+
116
+ #resets the exhausted status of all nodes
117
+ def reset
118
+ @start_nodes.each_value do |node|
119
+ reset_node(node)
120
+ end
121
+ end
122
+
123
+ #searches the dictionary quickly for word
124
+ #TODO: potentially add an exhaust? option so that if we want to only search for a word once, it gets exhausted if found.
125
+ def include? (word)
126
+ if word.nil? || word.empty?
127
+ return false
128
+ end
129
+ cur_node = self.start_nodes[word[0]]
130
+ index = 1
131
+ #as long as we have more characters in the word and this word has successors, keep moving
132
+ while index < word.size && cur_node.successors[word[index]] do
133
+ cur_node = cur_node.successors[word[index]]
134
+ index += 1
135
+ end
136
+ #if the loop ended, we either ran out of characters in the word
137
+ #or there were no more successors. If we ran out of characters in the word,
138
+ #this node must be the end or we don't yet have a word
139
+ #if we had more characters and the dictionary didn't have an answer, this can't be word.
140
+ #things like "nodenasdfasd" count as words.
141
+ #in conclusion, we must be kicked out by the index < word.size test to even be
142
+ #considered.
143
+ return index == word.size && cur_node.ends_word?
144
+ end
145
+
146
+ end
147
+
148
+ #Collapses the map back into words.
149
+ #Failure of a comment above. Ammended:
150
+ #Returns a list of all words beginning with string.
151
+ #map should be the list of nodes succeeding in the Dictionary
152
+ #after the last character in string. For example,
153
+ #for all words in the dictionary, send dictionary.start_nodes only.
154
+ #For all words that start with x, call get_map (dictionary.start_nodes['x'].successors, 'x')
155
+ def get_map(map, string = '')
156
+ words = []
157
+ if map.empty?
158
+ puts "Error, made it to the end of string #{string} and found no end node."
159
+ return words
160
+ end
161
+ new_string = nil
162
+ map.each do |letter, node|
163
+ new_string = string + letter
164
+ if node.ends_word?
165
+ words << new_string
166
+ end
167
+ if !node.successors.empty?
168
+ words = words + get_map(node.successors, new_string)
169
+ end
170
+ end
171
+ return words
172
+ end
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dictionary_map
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Brian Fults
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-08 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email:
16
+ - xclite@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/dict_map.rb
22
+ homepage: https://github.com/xclite/dictionary_map
23
+ licenses: []
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project: nowarning
42
+ rubygems_version: 1.8.23
43
+ signing_key:
44
+ specification_version: 3
45
+ summary: Quickly searchable string dictionary structure.
46
+ test_files: []