words 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -4
- data/README.markdown +45 -14
- data/VERSION +1 -1
- data/bin/build_wordnet +75 -86
- data/examples.rb +44 -31
- data/lib/evocations.rb +85 -0
- data/lib/homographs.rb +106 -0
- data/lib/relation.rb +91 -0
- data/lib/synset.rb +199 -0
- data/lib/wordnet_connection.rb +187 -0
- data/lib/wordnet_connectors/pure_wordnet_connection.rb +142 -0
- data/lib/wordnet_connectors/tokyo_wordnet_connection.rb +85 -0
- data/lib/words.rb +79 -498
- data/spec/words_spec.rb +113 -0
- data/words.gemspec +11 -6
- metadata +11 -6
- data/test/helper.rb +0 -9
- data/test/test_words.rb +0 -7
data/lib/words.rb
CHANGED
@@ -1,505 +1,86 @@
|
|
1
|
-
# std includes
|
1
|
+
# std library includes
|
2
2
|
require 'pathname'
|
3
|
-
require 'set'
|
4
3
|
|
5
|
-
#
|
6
|
-
require '
|
7
|
-
require 'rufus-tokyo' if Gem.available?('rufus-tokyo')
|
4
|
+
# local includes
|
5
|
+
require File.join(File.dirname(__FILE__),'homographs.rb')
|
8
6
|
|
9
7
|
module Words
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
raise "Coulden't find the rufus-tokyo gem. Please ensure it's installed." unless Gem.available?('rufus-tokyo')
|
25
|
-
@connection = Rufus::Tokyo::Table.new(@data_path.to_s, :mode => 'r')
|
26
|
-
@connected = true
|
27
|
-
elsif @connection_type == :pure
|
28
|
-
# open the index is there
|
29
|
-
File.open(@data_path, 'r') do |file|
|
30
|
-
@connection = Marshal.load file.read
|
31
|
-
end
|
32
|
-
evocation_path = Pathname.new("#{File.dirname(__FILE__)}/../data/evocations.dmp")
|
33
|
-
File.open(evocation_path, 'r') do |file|
|
34
|
-
@evocations = Marshal.load file.read
|
35
|
-
end if evocation_path.exist?
|
36
|
-
# search for the wordnet files
|
37
|
-
if locate_wordnet?(wordnet_path)
|
38
|
-
@connected = true
|
39
|
-
else
|
40
|
-
@connected = false
|
41
|
-
raise "Failed to locate the wordnet database. Please ensure it is installed and that if it resides at a custom path that path is given as an argument when constructing the Words object."
|
42
|
-
end
|
43
|
-
else
|
44
|
-
@connected = false
|
45
|
-
end
|
46
|
-
else
|
47
|
-
@connected = false
|
48
|
-
raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided 'build_wordnet' command."
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def close
|
54
|
-
@connected = false
|
55
|
-
if @connected && connection_type == :tokyo
|
56
|
-
connection.close
|
57
|
-
end
|
58
|
-
return true
|
59
|
-
end
|
60
|
-
|
61
|
-
def homographs(term)
|
62
|
-
if connection_type == :pure
|
63
|
-
raw_homographs = @connection[term]
|
64
|
-
{ 'lemma' => raw_homographs[0], 'tagsense_counts' => raw_homographs[1], 'synset_ids' => raw_homographs[2]} unless raw_homographs.nil?
|
65
|
-
else
|
66
|
-
@connection[term]
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def evocations(senset_id)
|
71
|
-
if connection_type == :pure
|
72
|
-
if defined? @evocations
|
73
|
-
raw_evocations = @evocations[senset_id + "s"]
|
74
|
-
{ 'relations' => raw_evocations[0], 'means' => raw_evocations[1], 'medians' => raw_evocations[2]} unless raw_evocations.nil?
|
75
|
-
else
|
76
|
-
nil
|
77
|
-
end
|
78
|
-
else
|
79
|
-
@connection[senset_id + "s"]
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def synset(synset_id)
|
84
|
-
if connection_type == :pure
|
85
|
-
pos = synset_id[0,1]
|
86
|
-
File.open(@wordnet_dir + "data.#{SHORT_TO_POS_FILE_TYPE[pos]}","r") do |file|
|
87
|
-
file.seek(synset_id[1..-1].to_i)
|
88
|
-
data_line, gloss = file.readline.strip.split(" | ")
|
89
|
-
data_parts = data_line.split(" ")
|
90
|
-
synset_id, lexical_filenum, synset_type, word_count = pos + data_parts.shift, data_parts.shift, data_parts.shift, data_parts.shift.to_i(16)
|
91
|
-
words = Array.new(word_count).map { "#{data_parts.shift}.#{data_parts.shift}" }
|
92
|
-
relations = Array.new(data_parts.shift.to_i).map { "#{data_parts.shift}.#{data_parts.shift}.#{data_parts.shift}.#{data_parts.shift}" }
|
93
|
-
{ "synset_id" => synset_id, "lexical_filenum" => lexical_filenum, "synset_type" => synset_type, "words" => words.join('|'), "relations" => relations.join('|'), "gloss" => gloss.strip }
|
94
|
-
end
|
95
|
-
else
|
96
|
-
@connection[synset_id]
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
def locate_wordnet?(base_dirs)
|
101
|
-
|
102
|
-
base_dirs = case base_dirs
|
103
|
-
when :search
|
104
|
-
['/usr/share/wordnet', '/usr/local/share/wordnet', '/usr/local/WordNet-3.0']
|
105
|
-
else
|
106
|
-
[ base_dirs ]
|
107
|
-
end
|
108
|
-
|
109
|
-
base_dirs.each do |dir|
|
110
|
-
["", "dict"].each do |sub_folder|
|
111
|
-
path = Pathname.new(dir + sub_folder)
|
112
|
-
@wordnet_dir = path if (path + "data.noun").exist?
|
113
|
-
break if !@wordnet_dir.nil?
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
return !@wordnet_dir.nil?
|
118
|
-
|
119
|
-
end
|
120
|
-
|
121
|
-
end
|
122
|
-
|
123
|
-
class Evocations
|
124
|
-
|
125
|
-
def initialize(evocation_construct, source_synset, wordnet_connection)
|
126
|
-
@wordnet_connection = wordnet_connection
|
127
|
-
@source = source_synset
|
128
|
-
@evocation_construct = evocation_construct
|
129
|
-
end
|
130
|
-
|
131
|
-
def means
|
132
|
-
@means = @evocation_construct["means"].split('|') unless defined? @means
|
133
|
-
@means
|
134
|
-
end
|
135
|
-
|
136
|
-
def medians
|
137
|
-
@medians = @evocation_construct["medians"].split('|') unless defined? @medians
|
138
|
-
@medians
|
139
|
-
end
|
140
|
-
|
141
|
-
def size
|
142
|
-
means.size
|
143
|
-
end
|
144
|
-
|
145
|
-
def first
|
146
|
-
self[0]
|
147
|
-
end
|
148
|
-
|
149
|
-
def last
|
150
|
-
self[size-1]
|
151
|
-
end
|
152
|
-
|
153
|
-
def [] (index)
|
154
|
-
{ :destination => destinations[index], :mean => means[index], :median => medians[index] }
|
155
|
-
end
|
156
|
-
|
157
|
-
def destinations(pos = :all)
|
158
|
-
destination_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection, @source.homographs }
|
159
|
-
end
|
160
|
-
|
161
|
-
def destination_ids(pos = :all)
|
162
|
-
@destination_ids = @evocation_construct["relations"].split('|') unless defined? @destination_ids
|
163
|
-
case
|
164
|
-
when Homographs::SYMBOL_TO_POS.include?(pos.to_sym)
|
165
|
-
@destination_ids.select { |synset_id| synset_id[0,1] == Homographs::SYMBOL_TO_POS[pos.to_sym] }
|
166
|
-
when Homographs::POS_TO_SYMBOL.include?(pos.to_s)
|
167
|
-
@destination_ids.select { |synset_id| synset_id[0,1] == pos.to_s }
|
168
|
-
else
|
169
|
-
@destination_ids
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
def to_s
|
174
|
-
"#{size} evocations from #{@source}"
|
175
|
-
end
|
176
|
-
|
177
|
-
end
|
178
|
-
|
179
|
-
class Relation
|
180
|
-
|
181
|
-
RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
|
182
|
-
";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
|
183
|
-
"-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
|
184
|
-
"%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
|
185
|
-
"\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
|
186
|
-
SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
|
187
|
-
|
188
|
-
def initialize(relation_construct, source_synset, wordnet_connection)
|
189
|
-
@wordnet_connection = wordnet_connection
|
190
|
-
@symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
|
191
|
-
@dest_synset_id = @pos + @dest_synset_id
|
192
|
-
@symbol = RELATION_TO_SYMBOL[@symbol]
|
193
|
-
@source_synset = source_synset
|
194
|
-
end
|
195
|
-
|
196
|
-
def is_semantic?
|
197
|
-
@source_dest == "0000"
|
198
|
-
end
|
199
|
-
|
200
|
-
def source_word
|
201
|
-
is_semantic? ? @source_word = nil : @source_word = @source_synset.words[@source_dest[0..1].to_i(16)-1] unless defined? @source_word
|
202
|
-
@source_word
|
203
|
-
end
|
204
|
-
|
205
|
-
def destination_word
|
206
|
-
is_semantic? ? @destination_word = nil : @destination_word = destination.words[@source_dest[2..3].to_i(16)-1] unless defined? @destination_word
|
207
|
-
@destination_word
|
208
|
-
end
|
209
|
-
|
210
|
-
def relation_type?(type)
|
211
|
-
case
|
212
|
-
when SYMBOL_TO_RELATION.include?(type.to_sym)
|
213
|
-
type.to_sym == @symbol
|
214
|
-
when RELATION_TO_SYMBOL.include?(pos.to_s)
|
215
|
-
POINTER_TO_SYMBOL[type.to_sym] == @symbol
|
216
|
-
else
|
217
|
-
false
|
218
|
-
end
|
219
|
-
end
|
220
|
-
|
221
|
-
def relation_type
|
222
|
-
@symbol
|
223
|
-
end
|
224
|
-
|
225
|
-
def destination
|
226
|
-
@destination = Synset.new(@dest_synset_id, @wordnet_connection, nil) unless defined? @destination
|
227
|
-
@destination
|
228
|
-
end
|
229
|
-
|
230
|
-
def to_s
|
231
|
-
@to_s = "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\"" if !is_semantic? && !defined?(@to_s)
|
232
|
-
@to_s = "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}" if is_semantic? && !defined?(@to_s)
|
233
|
-
@to_s
|
234
|
-
end
|
235
|
-
|
236
|
-
def inspect
|
237
|
-
{ :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
|
238
|
-
end
|
239
|
-
|
240
|
-
end
|
241
|
-
|
242
|
-
class Synset
|
243
|
-
|
244
|
-
SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
|
245
|
-
SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
|
246
|
-
NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
|
247
|
-
{ :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
|
248
|
-
{ :lex => :adv_all, :description => "all adverbs" },
|
249
|
-
{ :lex => :noun_Tops, :description => "unique beginner for nouns" },
|
250
|
-
{ :lex => :noun_act, :description => "nouns denoting acts or actions" },
|
251
|
-
{ :lex => :noun_animal, :description => "nouns denoting animals" },
|
252
|
-
{ :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
|
253
|
-
{ :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
|
254
|
-
{ :lex => :noun_body, :description => "nouns denoting body parts" },
|
255
|
-
{ :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
|
256
|
-
{ :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
|
257
|
-
{ :lex => :noun_event, :description => "nouns denoting natural events" },
|
258
|
-
{ :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
|
259
|
-
{ :lex => :noun_food, :description => "nouns denoting foods and drinks" },
|
260
|
-
{ :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
|
261
|
-
{ :lex => :noun_location, :description => "nouns denoting spatial position" },
|
262
|
-
{ :lex => :noun_motive, :description => "nouns denoting goals" },
|
263
|
-
{ :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
|
264
|
-
{ :lex => :noun_person, :description => "nouns denoting people" },
|
265
|
-
{ :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
|
266
|
-
{ :lex => :noun_plant, :description => "nouns denoting plants" },
|
267
|
-
{ :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
|
268
|
-
{ :lex => :noun_process, :description => "nouns denoting natural processes" },
|
269
|
-
{ :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
|
270
|
-
{ :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
|
271
|
-
{ :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
|
272
|
-
{ :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
|
273
|
-
{ :lex => :noun_substance, :description => "nouns denoting substances" },
|
274
|
-
{ :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
|
275
|
-
{ :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
|
276
|
-
{ :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
|
277
|
-
{ :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
|
278
|
-
{ :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
|
279
|
-
{ :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
|
280
|
-
{ :lex => :verb_consumption, :description => "verbs of eating and drinking" },
|
281
|
-
{ :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
|
282
|
-
{ :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
|
283
|
-
{ :lex => :verb_emotion, :description => "verbs of feeling" },
|
284
|
-
{ :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
|
285
|
-
{ :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
|
286
|
-
{ :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
|
287
|
-
{ :lex => :verb_social, :description => "verbs of political and social activities and events" },
|
288
|
-
{ :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
|
289
|
-
{ :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
|
290
|
-
{ :lex => :adj_ppl, :description => "participial adjectives" } ]
|
291
|
-
|
292
|
-
def initialize(synset_id, wordnet_connection, homographs)
|
293
|
-
@wordnet_connection = wordnet_connection
|
294
|
-
@synset_hash = wordnet_connection.synset(synset_id)
|
295
|
-
@homographs = homographs
|
296
|
-
# construct some conveniance menthods for relation type access
|
297
|
-
Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
|
298
|
-
self.class.send(:define_method, "#{relation_type}s?") do
|
299
|
-
relations(relation_type).size > 0
|
300
|
-
end
|
301
|
-
self.class.send(:define_method, "#{relation_type}s") do
|
302
|
-
relations(relation_type)
|
303
|
-
end
|
304
|
-
end
|
305
|
-
end
|
306
|
-
|
307
|
-
def synset_type
|
308
|
-
SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
|
309
|
-
end
|
310
|
-
|
311
|
-
def words
|
312
|
-
@words = words_with_lexical_ids.map { |word_with_num| word_with_num[:word] } unless defined? @words
|
313
|
-
@words
|
314
|
-
end
|
315
|
-
|
316
|
-
def lexical_ids
|
317
|
-
@words = words_with_lexical_ids.map { |word_with_num| word_with_num[:lexical_id] } unless defined? @words
|
318
|
-
@words
|
319
|
-
end
|
320
|
-
|
321
|
-
def size
|
322
|
-
words.size
|
323
|
-
end
|
324
|
-
|
325
|
-
def words_with_lexical_ids
|
326
|
-
@words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } } unless defined? @words_with_num
|
327
|
-
@words_with_num
|
328
|
-
end
|
329
|
-
|
330
|
-
def lexical_filenum
|
331
|
-
@synset_hash["lexical_filenum"]
|
332
|
-
end
|
333
|
-
|
334
|
-
def lexical_catagory
|
335
|
-
lexical[:lex]
|
336
|
-
end
|
337
|
-
|
338
|
-
def lexical_description
|
339
|
-
lexical[:description]
|
340
|
-
end
|
341
|
-
|
342
|
-
def lexical
|
343
|
-
NUM_TO_LEX[lexical_filenum.to_i]
|
344
|
-
end
|
345
|
-
|
346
|
-
def synset_id
|
347
|
-
@synset_hash["synset_id"]
|
348
|
-
end
|
349
|
-
|
350
|
-
def gloss
|
351
|
-
@synset_hash["gloss"]
|
352
|
-
end
|
353
|
-
|
354
|
-
def lemma
|
355
|
-
@homographs.lemma
|
356
|
-
end
|
357
|
-
|
358
|
-
def homographs
|
359
|
-
@homographs
|
360
|
-
end
|
361
|
-
|
362
|
-
def inspect
|
363
|
-
@synset_hash.inspect
|
364
|
-
end
|
365
|
-
|
366
|
-
def relations(type = :all)
|
367
|
-
@relations = @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) } unless defined? @relations
|
368
|
-
case
|
369
|
-
when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
|
370
|
-
@relations.select { |relation| relation.relation_type == type.to_sym }
|
371
|
-
when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
|
372
|
-
@relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
|
373
|
-
else
|
374
|
-
@relations
|
375
|
-
end
|
376
|
-
end
|
377
|
-
|
378
|
-
def evocations
|
379
|
-
evocations_arr = @wordnet_connection.evocations(synset_id)
|
380
|
-
Evocations.new evocations_arr, self, @wordnet_connection unless evocations_arr.nil?
|
381
|
-
end
|
382
|
-
|
383
|
-
def to_s
|
384
|
-
@to_s = "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}" unless defined? @to_s
|
385
|
-
@to_s
|
386
|
-
end
|
387
|
-
|
388
|
-
alias word lemma
|
389
|
-
|
390
|
-
end
|
391
|
-
|
392
|
-
class Homographs
|
393
|
-
|
394
|
-
POS_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb}
|
395
|
-
SYMBOL_TO_POS = POS_TO_SYMBOL.invert
|
396
|
-
|
397
|
-
def initialize(raw_homographs, wordnet_connection)
|
398
|
-
@wordnet_connection = wordnet_connection
|
399
|
-
@raw_homographs = raw_homographs
|
400
|
-
# construct some conveniance menthods for relation type access
|
401
|
-
SYMBOL_TO_POS.keys.each do |pos|
|
402
|
-
self.class.send(:define_method, "#{pos}s?") do
|
403
|
-
size(pos) > 0
|
404
|
-
end
|
405
|
-
self.class.send(:define_method, "#{pos}s") do
|
406
|
-
synsets(pos)
|
407
|
-
end
|
408
|
-
self.class.send(:define_method, "#{pos}_count") do
|
409
|
-
size(pos)
|
410
|
-
end
|
411
|
-
self.class.send(:define_method, "#{pos}_ids") do
|
412
|
-
synset_ids(pos)
|
413
|
-
end
|
414
|
-
end
|
415
|
-
end
|
416
|
-
|
417
|
-
def tagsense_counts
|
418
|
-
@tagsense_counts = @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } } unless defined? @tagsense_counts
|
419
|
-
@tagsense_counts
|
420
|
-
end
|
421
|
-
|
422
|
-
def lemma
|
423
|
-
@lemma = @raw_homographs["lemma"].gsub('_', ' ') unless defined? @lemma
|
424
|
-
@lemma
|
425
|
-
end
|
426
|
-
|
427
|
-
def available_pos
|
428
|
-
@available_pos = synset_ids.map { |synset_id| POS_TO_SYMBOL[synset_id[0,1]] }.uniq unless defined? @available_pos
|
429
|
-
@available_pos
|
430
|
-
end
|
431
|
-
|
432
|
-
def to_s
|
433
|
-
@to_s = [lemma, " " + available_pos.join("/")].join(",") unless defined? @to_s
|
434
|
-
@to_s
|
435
|
-
end
|
436
|
-
|
437
|
-
def size(pos = :all)
|
438
|
-
synset_ids(pos).size
|
439
|
-
end
|
440
|
-
|
441
|
-
def synsets(pos = :all)
|
442
|
-
synset_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection, self }
|
443
|
-
end
|
444
|
-
|
445
|
-
def synset_ids(pos = :all)
|
446
|
-
@synset_ids = @raw_homographs["synset_ids"].split('|') unless defined? @synset_ids
|
447
|
-
case
|
448
|
-
when SYMBOL_TO_POS.include?(pos.to_sym)
|
449
|
-
@synset_ids.select { |synset_id| synset_id[0,1] == SYMBOL_TO_POS[pos.to_sym] }
|
450
|
-
when POS_TO_SYMBOL.include?(pos.to_s)
|
451
|
-
@synset_ids.select { |synset_id| synset_id[0,1] == pos.to_s }
|
452
|
-
else
|
453
|
-
@synset_ids
|
454
|
-
end
|
455
|
-
end
|
456
|
-
|
457
|
-
def inspect
|
458
|
-
@raw_homographs.inspect
|
459
|
-
end
|
460
|
-
|
461
|
-
alias word lemma
|
462
|
-
alias pos available_pos
|
463
|
-
alias senses synsets
|
464
|
-
alias sense_ids synset_ids
|
465
|
-
|
466
|
-
end
|
467
|
-
|
468
|
-
class Words
|
469
|
-
|
470
|
-
@wordnet_connection = nil
|
471
|
-
|
472
|
-
def initialize(type = :tokyo, path = :default, wordnet_path = :search)
|
473
|
-
@wordnet_connection = WordnetConnection.new(type, path, wordnet_path)
|
474
|
-
end
|
475
|
-
|
476
|
-
def find(word)
|
477
|
-
homographs = @wordnet_connection.homographs(word)
|
478
|
-
Homographs.new homographs, @wordnet_connection unless homographs.nil?
|
479
|
-
end
|
480
|
-
|
481
|
-
def connection_type
|
482
|
-
@wordnet_connection.connection_type
|
483
|
-
end
|
484
|
-
|
485
|
-
def wordnet_dir
|
486
|
-
@wordnet_connection.wordnet_dir
|
487
|
-
end
|
488
|
-
|
489
|
-
def close
|
490
|
-
@wordnet_connection.close
|
491
|
-
end
|
492
|
-
|
493
|
-
def connected
|
494
|
-
@wordnet_connection.connected
|
495
|
-
end
|
8
|
+
|
9
|
+
# we identify each wordnet connector installed and there paths
|
10
|
+
SUPPORTED_CONNECTIORS = Dir[File.join(File.dirname(__FILE__),'wordnet_connectors','*_wordnet_connection.rb')].inject(Hash.new) { |connectors, connection_file| connectors[ File.basename(connection_file).split('_').first.to_sym ] = connection_file; connectors }
|
11
|
+
DEFAULT_WORDNET_LOCATIONS = ['/usr/share/wordnet', '/usr/local/share/wordnet', '/usr/local/WordNet-3.0', '/opt/WordNet-3.0', '/opt/wordnet', '/opt/local/share/WordNet-3.0/']
|
12
|
+
|
13
|
+
# specify some useful exception types
|
14
|
+
class BadWordnetConnector < RuntimeError; end
|
15
|
+
class BadWordnetDataset < RuntimeError; end
|
16
|
+
class NoWordnetConnection < RuntimeError; end
|
17
|
+
|
18
|
+
# specify the wordnet control object
|
19
|
+
class Wordnet
|
20
|
+
|
21
|
+
attr_reader :wordnet_connection
|
496
22
|
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
23
|
+
def initialize(connector_type = :pure, wordnet_path = :search, data_path = :default)
|
24
|
+
|
25
|
+
# check and specify useful paths
|
26
|
+
wordnet_path = Wordnet::locate_wordnet(wordnet_path)
|
27
|
+
data_path = (data_path == :default ? Pathname.new(File.join(File.dirname(__FILE__), '..', 'data')) : Pathname.new( data_path ))
|
28
|
+
|
29
|
+
# ensure we have a valid connector type
|
30
|
+
raise BadWordnetConnector, "You specified an unsupported wordnet connector type. Supported connectors are: #{SUPPORTED_CONNECTIORS}" unless SUPPORTED_CONNECTIORS.include? connector_type
|
31
|
+
|
32
|
+
# assuming we have a valid connection type we can import the relevant code (the reason we do this dynamically is to reduce loadtime)
|
33
|
+
require SUPPORTED_CONNECTIORS[connector_type]
|
34
|
+
|
35
|
+
# construct the connector object
|
36
|
+
@wordnet_connection = Words.const_get( File.basename(SUPPORTED_CONNECTIORS[connector_type], '.rb').gsub(/(^|_)(.)/) { $2.upcase } ).new(data_path, wordnet_path)
|
37
|
+
|
38
|
+
# construct some conveniance menthods for relation type access
|
39
|
+
[:connection_type, :wordnet_path, :data_path, :close!, :open!, :connected?, :evocations?].each do |method_name|
|
40
|
+
self.class.send(:define_method, method_name) do
|
41
|
+
@wordnet_connection.send method_name if defined? @wordnet_connection
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
def find(term)
|
48
|
+
|
49
|
+
raise NoWordnetConnection, "There is presently no connection to wordnet. To attempt to reistablish a connection you should use the 'open!' command on the Wordnet object." unless connected?
|
50
|
+
homographs = @wordnet_connection.homographs(term)
|
51
|
+
Homographs.new(homographs, @wordnet_connection) unless homographs.nil?
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s
|
56
|
+
|
57
|
+
# return a description of the connector
|
58
|
+
!connected? ? "Words not connected" : @wordnet_connection.to_s
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def self.locate_wordnet(base_dirs)
|
65
|
+
|
66
|
+
base_dirs = case base_dirs
|
67
|
+
when :search
|
68
|
+
DEFAULT_WORDNET_LOCATIONS
|
69
|
+
else
|
70
|
+
[ base_dirs ]
|
71
|
+
end
|
72
|
+
|
73
|
+
base_dirs.each do |dir|
|
74
|
+
["", "dict"].each do |sub_folder|
|
75
|
+
path = Pathname.new(dir + sub_folder)
|
76
|
+
return path if (path + "data.noun").exist?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
return nil
|
81
|
+
|
82
|
+
end
|
83
|
+
|
501
84
|
end
|
502
|
-
|
503
|
-
end
|
504
85
|
|
505
|
-
end
|
86
|
+
end
|
data/spec/words_spec.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
require 'words'
|
3
|
+
|
4
|
+
describe "Words Constructer" do
|
5
|
+
|
6
|
+
it "should reject bad modes" do
|
7
|
+
lambda { Words::Wordnet.new(:rubbish) }.should raise_exception(Words::BadWordnetConnector)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should when in pure mode, when provided with a bad wordnet directory, return a BadWordnetDataset exception" do
|
11
|
+
lambda { Words::Wordnet.new(:pure, '/lib') }.should raise_exception(Words::BadWordnetDataset)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should when in tokyo mode, when provided with a bad dataset directory, return a BadWordnetDataset exception" do
|
15
|
+
lambda { Words::Wordnet.new(:tokyo, :search, '/lib') }.should raise_exception(Words::BadWordnetDataset)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "Pure Words Constructor" do
|
21
|
+
|
22
|
+
before do
|
23
|
+
@words = Words::Wordnet.new(:pure)
|
24
|
+
end
|
25
|
+
|
26
|
+
after do
|
27
|
+
@words.close!
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should accept pure mode" do
|
31
|
+
@words.should_not be_nil
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should when given the request for a pure mode return a pure connection" do
|
35
|
+
@words.wordnet_connection.should be_kind_of Words::PureWordnetConnection
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should when given the request for a pure mode return an open pure connection" do
|
39
|
+
@words.connected?.should be_true
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should when in pure mode, report itself as to in to_s" do
|
43
|
+
@words.to_s.should match /Words running in pure mode using wordnet files found at .*/
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should when in pure mode, when the connection is closed, report itself as closed" do
|
47
|
+
@words.close!
|
48
|
+
@words.connected?.should be_false
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should when in pure mode, when the connection is closed, report itself as closed in to_s" do
|
52
|
+
@words.close!
|
53
|
+
@words.to_s.should match 'Words not connected'
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should when in pure mode, when the connection is closed, raise NoWordnetConnection exception if a find is attempted" do
|
57
|
+
@words.close!
|
58
|
+
lambda { @words.find('test') }.should raise_exception(Words::NoWordnetConnection)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should when checked report itself as a pure connection" do
|
62
|
+
@words.connection_type.should equal :pure
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "Tokyo Words Constructor" do
|
68
|
+
|
69
|
+
before do
|
70
|
+
@words = Words::Wordnet.new(:tokyo)
|
71
|
+
end
|
72
|
+
|
73
|
+
after do
|
74
|
+
@words.close!
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should accept tokyo mode" do
|
78
|
+
@words.should_not be_nil
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should when given the request for a tokyo mode return a tokyo connection" do
|
82
|
+
@words.wordnet_connection.should be_kind_of Words::TokyoWordnetConnection
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should when given the request for a tokyo mode return an open tokyo connection" do
|
86
|
+
@words.connected?.should be_true
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should when in tokyo mode should report itself as to in to_s" do
|
90
|
+
@words.to_s.should match /Words running in tokyo mode with dataset at .*/
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should when in tokyo mode should when the connection is closed report itself as closed" do
|
94
|
+
@words.close!
|
95
|
+
@words.connected?.should be_false
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should when in tokyo mode should when the connection is closed report itself as closed in to_s" do
|
99
|
+
@words.close!
|
100
|
+
@words.to_s.should match 'Words not connected'
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should when in tokyo mode, when the connection is closed, raise NoWordnetConnection exception if a find is attempted" do
|
104
|
+
@words.close!
|
105
|
+
lambda { @words.find('test') }.should raise_exception(Words::NoWordnetConnection)
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should when checked report itself as a tokyo connection" do
|
109
|
+
@words.connection_type.should equal :tokyo
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
|