words 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -4
- data/README.markdown +45 -14
- data/VERSION +1 -1
- data/bin/build_wordnet +75 -86
- data/examples.rb +44 -31
- data/lib/evocations.rb +85 -0
- data/lib/homographs.rb +106 -0
- data/lib/relation.rb +91 -0
- data/lib/synset.rb +199 -0
- data/lib/wordnet_connection.rb +187 -0
- data/lib/wordnet_connectors/pure_wordnet_connection.rb +142 -0
- data/lib/wordnet_connectors/tokyo_wordnet_connection.rb +85 -0
- data/lib/words.rb +79 -498
- data/spec/words_spec.rb +113 -0
- data/words.gemspec +11 -6
- metadata +11 -6
- data/test/helper.rb +0 -9
- data/test/test_words.rb +0 -7
data/lib/words.rb
CHANGED
@@ -1,505 +1,86 @@
|
|
1
|
-
# std includes
|
1
|
+
# std library includes
|
2
2
|
require 'pathname'
|
3
|
-
require 'set'
|
4
3
|
|
5
|
-
#
|
6
|
-
require '
|
7
|
-
require 'rufus-tokyo' if Gem.available?('rufus-tokyo')
|
4
|
+
# local includes
|
5
|
+
require File.join(File.dirname(__FILE__),'homographs.rb')
|
8
6
|
|
9
7
|
module Words
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
raise "Coulden't find the rufus-tokyo gem. Please ensure it's installed." unless Gem.available?('rufus-tokyo')
|
25
|
-
@connection = Rufus::Tokyo::Table.new(@data_path.to_s, :mode => 'r')
|
26
|
-
@connected = true
|
27
|
-
elsif @connection_type == :pure
|
28
|
-
# open the index is there
|
29
|
-
File.open(@data_path, 'r') do |file|
|
30
|
-
@connection = Marshal.load file.read
|
31
|
-
end
|
32
|
-
evocation_path = Pathname.new("#{File.dirname(__FILE__)}/../data/evocations.dmp")
|
33
|
-
File.open(evocation_path, 'r') do |file|
|
34
|
-
@evocations = Marshal.load file.read
|
35
|
-
end if evocation_path.exist?
|
36
|
-
# search for the wordnet files
|
37
|
-
if locate_wordnet?(wordnet_path)
|
38
|
-
@connected = true
|
39
|
-
else
|
40
|
-
@connected = false
|
41
|
-
raise "Failed to locate the wordnet database. Please ensure it is installed and that if it resides at a custom path that path is given as an argument when constructing the Words object."
|
42
|
-
end
|
43
|
-
else
|
44
|
-
@connected = false
|
45
|
-
end
|
46
|
-
else
|
47
|
-
@connected = false
|
48
|
-
raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided 'build_wordnet' command."
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def close
|
54
|
-
@connected = false
|
55
|
-
if @connected && connection_type == :tokyo
|
56
|
-
connection.close
|
57
|
-
end
|
58
|
-
return true
|
59
|
-
end
|
60
|
-
|
61
|
-
def homographs(term)
|
62
|
-
if connection_type == :pure
|
63
|
-
raw_homographs = @connection[term]
|
64
|
-
{ 'lemma' => raw_homographs[0], 'tagsense_counts' => raw_homographs[1], 'synset_ids' => raw_homographs[2]} unless raw_homographs.nil?
|
65
|
-
else
|
66
|
-
@connection[term]
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def evocations(senset_id)
|
71
|
-
if connection_type == :pure
|
72
|
-
if defined? @evocations
|
73
|
-
raw_evocations = @evocations[senset_id + "s"]
|
74
|
-
{ 'relations' => raw_evocations[0], 'means' => raw_evocations[1], 'medians' => raw_evocations[2]} unless raw_evocations.nil?
|
75
|
-
else
|
76
|
-
nil
|
77
|
-
end
|
78
|
-
else
|
79
|
-
@connection[senset_id + "s"]
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def synset(synset_id)
|
84
|
-
if connection_type == :pure
|
85
|
-
pos = synset_id[0,1]
|
86
|
-
File.open(@wordnet_dir + "data.#{SHORT_TO_POS_FILE_TYPE[pos]}","r") do |file|
|
87
|
-
file.seek(synset_id[1..-1].to_i)
|
88
|
-
data_line, gloss = file.readline.strip.split(" | ")
|
89
|
-
data_parts = data_line.split(" ")
|
90
|
-
synset_id, lexical_filenum, synset_type, word_count = pos + data_parts.shift, data_parts.shift, data_parts.shift, data_parts.shift.to_i(16)
|
91
|
-
words = Array.new(word_count).map { "#{data_parts.shift}.#{data_parts.shift}" }
|
92
|
-
relations = Array.new(data_parts.shift.to_i).map { "#{data_parts.shift}.#{data_parts.shift}.#{data_parts.shift}.#{data_parts.shift}" }
|
93
|
-
{ "synset_id" => synset_id, "lexical_filenum" => lexical_filenum, "synset_type" => synset_type, "words" => words.join('|'), "relations" => relations.join('|'), "gloss" => gloss.strip }
|
94
|
-
end
|
95
|
-
else
|
96
|
-
@connection[synset_id]
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
def locate_wordnet?(base_dirs)
|
101
|
-
|
102
|
-
base_dirs = case base_dirs
|
103
|
-
when :search
|
104
|
-
['/usr/share/wordnet', '/usr/local/share/wordnet', '/usr/local/WordNet-3.0']
|
105
|
-
else
|
106
|
-
[ base_dirs ]
|
107
|
-
end
|
108
|
-
|
109
|
-
base_dirs.each do |dir|
|
110
|
-
["", "dict"].each do |sub_folder|
|
111
|
-
path = Pathname.new(dir + sub_folder)
|
112
|
-
@wordnet_dir = path if (path + "data.noun").exist?
|
113
|
-
break if !@wordnet_dir.nil?
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
return !@wordnet_dir.nil?
|
118
|
-
|
119
|
-
end
|
120
|
-
|
121
|
-
end
|
122
|
-
|
123
|
-
class Evocations
|
124
|
-
|
125
|
-
def initialize(evocation_construct, source_synset, wordnet_connection)
|
126
|
-
@wordnet_connection = wordnet_connection
|
127
|
-
@source = source_synset
|
128
|
-
@evocation_construct = evocation_construct
|
129
|
-
end
|
130
|
-
|
131
|
-
def means
|
132
|
-
@means = @evocation_construct["means"].split('|') unless defined? @means
|
133
|
-
@means
|
134
|
-
end
|
135
|
-
|
136
|
-
def medians
|
137
|
-
@medians = @evocation_construct["medians"].split('|') unless defined? @medians
|
138
|
-
@medians
|
139
|
-
end
|
140
|
-
|
141
|
-
def size
|
142
|
-
means.size
|
143
|
-
end
|
144
|
-
|
145
|
-
def first
|
146
|
-
self[0]
|
147
|
-
end
|
148
|
-
|
149
|
-
def last
|
150
|
-
self[size-1]
|
151
|
-
end
|
152
|
-
|
153
|
-
def [] (index)
|
154
|
-
{ :destination => destinations[index], :mean => means[index], :median => medians[index] }
|
155
|
-
end
|
156
|
-
|
157
|
-
def destinations(pos = :all)
|
158
|
-
destination_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection, @source.homographs }
|
159
|
-
end
|
160
|
-
|
161
|
-
def destination_ids(pos = :all)
|
162
|
-
@destination_ids = @evocation_construct["relations"].split('|') unless defined? @destination_ids
|
163
|
-
case
|
164
|
-
when Homographs::SYMBOL_TO_POS.include?(pos.to_sym)
|
165
|
-
@destination_ids.select { |synset_id| synset_id[0,1] == Homographs::SYMBOL_TO_POS[pos.to_sym] }
|
166
|
-
when Homographs::POS_TO_SYMBOL.include?(pos.to_s)
|
167
|
-
@destination_ids.select { |synset_id| synset_id[0,1] == pos.to_s }
|
168
|
-
else
|
169
|
-
@destination_ids
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
def to_s
|
174
|
-
"#{size} evocations from #{@source}"
|
175
|
-
end
|
176
|
-
|
177
|
-
end
|
178
|
-
|
179
|
-
class Relation
|
180
|
-
|
181
|
-
RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
|
182
|
-
";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
|
183
|
-
"-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
|
184
|
-
"%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
|
185
|
-
"\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
|
186
|
-
SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
|
187
|
-
|
188
|
-
def initialize(relation_construct, source_synset, wordnet_connection)
|
189
|
-
@wordnet_connection = wordnet_connection
|
190
|
-
@symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
|
191
|
-
@dest_synset_id = @pos + @dest_synset_id
|
192
|
-
@symbol = RELATION_TO_SYMBOL[@symbol]
|
193
|
-
@source_synset = source_synset
|
194
|
-
end
|
195
|
-
|
196
|
-
def is_semantic?
|
197
|
-
@source_dest == "0000"
|
198
|
-
end
|
199
|
-
|
200
|
-
def source_word
|
201
|
-
is_semantic? ? @source_word = nil : @source_word = @source_synset.words[@source_dest[0..1].to_i(16)-1] unless defined? @source_word
|
202
|
-
@source_word
|
203
|
-
end
|
204
|
-
|
205
|
-
def destination_word
|
206
|
-
is_semantic? ? @destination_word = nil : @destination_word = destination.words[@source_dest[2..3].to_i(16)-1] unless defined? @destination_word
|
207
|
-
@destination_word
|
208
|
-
end
|
209
|
-
|
210
|
-
def relation_type?(type)
|
211
|
-
case
|
212
|
-
when SYMBOL_TO_RELATION.include?(type.to_sym)
|
213
|
-
type.to_sym == @symbol
|
214
|
-
when RELATION_TO_SYMBOL.include?(pos.to_s)
|
215
|
-
POINTER_TO_SYMBOL[type.to_sym] == @symbol
|
216
|
-
else
|
217
|
-
false
|
218
|
-
end
|
219
|
-
end
|
220
|
-
|
221
|
-
def relation_type
|
222
|
-
@symbol
|
223
|
-
end
|
224
|
-
|
225
|
-
def destination
|
226
|
-
@destination = Synset.new(@dest_synset_id, @wordnet_connection, nil) unless defined? @destination
|
227
|
-
@destination
|
228
|
-
end
|
229
|
-
|
230
|
-
def to_s
|
231
|
-
@to_s = "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\"" if !is_semantic? && !defined?(@to_s)
|
232
|
-
@to_s = "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}" if is_semantic? && !defined?(@to_s)
|
233
|
-
@to_s
|
234
|
-
end
|
235
|
-
|
236
|
-
def inspect
|
237
|
-
{ :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
|
238
|
-
end
|
239
|
-
|
240
|
-
end
|
241
|
-
|
242
|
-
class Synset
|
243
|
-
|
244
|
-
SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
|
245
|
-
SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
|
246
|
-
NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
|
247
|
-
{ :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
|
248
|
-
{ :lex => :adv_all, :description => "all adverbs" },
|
249
|
-
{ :lex => :noun_Tops, :description => "unique beginner for nouns" },
|
250
|
-
{ :lex => :noun_act, :description => "nouns denoting acts or actions" },
|
251
|
-
{ :lex => :noun_animal, :description => "nouns denoting animals" },
|
252
|
-
{ :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
|
253
|
-
{ :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
|
254
|
-
{ :lex => :noun_body, :description => "nouns denoting body parts" },
|
255
|
-
{ :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
|
256
|
-
{ :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
|
257
|
-
{ :lex => :noun_event, :description => "nouns denoting natural events" },
|
258
|
-
{ :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
|
259
|
-
{ :lex => :noun_food, :description => "nouns denoting foods and drinks" },
|
260
|
-
{ :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
|
261
|
-
{ :lex => :noun_location, :description => "nouns denoting spatial position" },
|
262
|
-
{ :lex => :noun_motive, :description => "nouns denoting goals" },
|
263
|
-
{ :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
|
264
|
-
{ :lex => :noun_person, :description => "nouns denoting people" },
|
265
|
-
{ :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
|
266
|
-
{ :lex => :noun_plant, :description => "nouns denoting plants" },
|
267
|
-
{ :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
|
268
|
-
{ :lex => :noun_process, :description => "nouns denoting natural processes" },
|
269
|
-
{ :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
|
270
|
-
{ :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
|
271
|
-
{ :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
|
272
|
-
{ :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
|
273
|
-
{ :lex => :noun_substance, :description => "nouns denoting substances" },
|
274
|
-
{ :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
|
275
|
-
{ :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
|
276
|
-
{ :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
|
277
|
-
{ :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
|
278
|
-
{ :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
|
279
|
-
{ :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
|
280
|
-
{ :lex => :verb_consumption, :description => "verbs of eating and drinking" },
|
281
|
-
{ :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
|
282
|
-
{ :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
|
283
|
-
{ :lex => :verb_emotion, :description => "verbs of feeling" },
|
284
|
-
{ :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
|
285
|
-
{ :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
|
286
|
-
{ :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
|
287
|
-
{ :lex => :verb_social, :description => "verbs of political and social activities and events" },
|
288
|
-
{ :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
|
289
|
-
{ :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
|
290
|
-
{ :lex => :adj_ppl, :description => "participial adjectives" } ]
|
291
|
-
|
292
|
-
def initialize(synset_id, wordnet_connection, homographs)
|
293
|
-
@wordnet_connection = wordnet_connection
|
294
|
-
@synset_hash = wordnet_connection.synset(synset_id)
|
295
|
-
@homographs = homographs
|
296
|
-
# construct some conveniance menthods for relation type access
|
297
|
-
Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
|
298
|
-
self.class.send(:define_method, "#{relation_type}s?") do
|
299
|
-
relations(relation_type).size > 0
|
300
|
-
end
|
301
|
-
self.class.send(:define_method, "#{relation_type}s") do
|
302
|
-
relations(relation_type)
|
303
|
-
end
|
304
|
-
end
|
305
|
-
end
|
306
|
-
|
307
|
-
def synset_type
|
308
|
-
SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
|
309
|
-
end
|
310
|
-
|
311
|
-
def words
|
312
|
-
@words = words_with_lexical_ids.map { |word_with_num| word_with_num[:word] } unless defined? @words
|
313
|
-
@words
|
314
|
-
end
|
315
|
-
|
316
|
-
def lexical_ids
|
317
|
-
@words = words_with_lexical_ids.map { |word_with_num| word_with_num[:lexical_id] } unless defined? @words
|
318
|
-
@words
|
319
|
-
end
|
320
|
-
|
321
|
-
def size
|
322
|
-
words.size
|
323
|
-
end
|
324
|
-
|
325
|
-
def words_with_lexical_ids
|
326
|
-
@words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } } unless defined? @words_with_num
|
327
|
-
@words_with_num
|
328
|
-
end
|
329
|
-
|
330
|
-
def lexical_filenum
|
331
|
-
@synset_hash["lexical_filenum"]
|
332
|
-
end
|
333
|
-
|
334
|
-
def lexical_catagory
|
335
|
-
lexical[:lex]
|
336
|
-
end
|
337
|
-
|
338
|
-
def lexical_description
|
339
|
-
lexical[:description]
|
340
|
-
end
|
341
|
-
|
342
|
-
def lexical
|
343
|
-
NUM_TO_LEX[lexical_filenum.to_i]
|
344
|
-
end
|
345
|
-
|
346
|
-
def synset_id
|
347
|
-
@synset_hash["synset_id"]
|
348
|
-
end
|
349
|
-
|
350
|
-
def gloss
|
351
|
-
@synset_hash["gloss"]
|
352
|
-
end
|
353
|
-
|
354
|
-
def lemma
|
355
|
-
@homographs.lemma
|
356
|
-
end
|
357
|
-
|
358
|
-
def homographs
|
359
|
-
@homographs
|
360
|
-
end
|
361
|
-
|
362
|
-
def inspect
|
363
|
-
@synset_hash.inspect
|
364
|
-
end
|
365
|
-
|
366
|
-
def relations(type = :all)
|
367
|
-
@relations = @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) } unless defined? @relations
|
368
|
-
case
|
369
|
-
when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
|
370
|
-
@relations.select { |relation| relation.relation_type == type.to_sym }
|
371
|
-
when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
|
372
|
-
@relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
|
373
|
-
else
|
374
|
-
@relations
|
375
|
-
end
|
376
|
-
end
|
377
|
-
|
378
|
-
def evocations
|
379
|
-
evocations_arr = @wordnet_connection.evocations(synset_id)
|
380
|
-
Evocations.new evocations_arr, self, @wordnet_connection unless evocations_arr.nil?
|
381
|
-
end
|
382
|
-
|
383
|
-
def to_s
|
384
|
-
@to_s = "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}" unless defined? @to_s
|
385
|
-
@to_s
|
386
|
-
end
|
387
|
-
|
388
|
-
alias word lemma
|
389
|
-
|
390
|
-
end
|
391
|
-
|
392
|
-
class Homographs
|
393
|
-
|
394
|
-
POS_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb}
|
395
|
-
SYMBOL_TO_POS = POS_TO_SYMBOL.invert
|
396
|
-
|
397
|
-
def initialize(raw_homographs, wordnet_connection)
|
398
|
-
@wordnet_connection = wordnet_connection
|
399
|
-
@raw_homographs = raw_homographs
|
400
|
-
# construct some conveniance menthods for relation type access
|
401
|
-
SYMBOL_TO_POS.keys.each do |pos|
|
402
|
-
self.class.send(:define_method, "#{pos}s?") do
|
403
|
-
size(pos) > 0
|
404
|
-
end
|
405
|
-
self.class.send(:define_method, "#{pos}s") do
|
406
|
-
synsets(pos)
|
407
|
-
end
|
408
|
-
self.class.send(:define_method, "#{pos}_count") do
|
409
|
-
size(pos)
|
410
|
-
end
|
411
|
-
self.class.send(:define_method, "#{pos}_ids") do
|
412
|
-
synset_ids(pos)
|
413
|
-
end
|
414
|
-
end
|
415
|
-
end
|
416
|
-
|
417
|
-
def tagsense_counts
|
418
|
-
@tagsense_counts = @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } } unless defined? @tagsense_counts
|
419
|
-
@tagsense_counts
|
420
|
-
end
|
421
|
-
|
422
|
-
def lemma
|
423
|
-
@lemma = @raw_homographs["lemma"].gsub('_', ' ') unless defined? @lemma
|
424
|
-
@lemma
|
425
|
-
end
|
426
|
-
|
427
|
-
def available_pos
|
428
|
-
@available_pos = synset_ids.map { |synset_id| POS_TO_SYMBOL[synset_id[0,1]] }.uniq unless defined? @available_pos
|
429
|
-
@available_pos
|
430
|
-
end
|
431
|
-
|
432
|
-
def to_s
|
433
|
-
@to_s = [lemma, " " + available_pos.join("/")].join(",") unless defined? @to_s
|
434
|
-
@to_s
|
435
|
-
end
|
436
|
-
|
437
|
-
def size(pos = :all)
|
438
|
-
synset_ids(pos).size
|
439
|
-
end
|
440
|
-
|
441
|
-
def synsets(pos = :all)
|
442
|
-
synset_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection, self }
|
443
|
-
end
|
444
|
-
|
445
|
-
def synset_ids(pos = :all)
|
446
|
-
@synset_ids = @raw_homographs["synset_ids"].split('|') unless defined? @synset_ids
|
447
|
-
case
|
448
|
-
when SYMBOL_TO_POS.include?(pos.to_sym)
|
449
|
-
@synset_ids.select { |synset_id| synset_id[0,1] == SYMBOL_TO_POS[pos.to_sym] }
|
450
|
-
when POS_TO_SYMBOL.include?(pos.to_s)
|
451
|
-
@synset_ids.select { |synset_id| synset_id[0,1] == pos.to_s }
|
452
|
-
else
|
453
|
-
@synset_ids
|
454
|
-
end
|
455
|
-
end
|
456
|
-
|
457
|
-
def inspect
|
458
|
-
@raw_homographs.inspect
|
459
|
-
end
|
460
|
-
|
461
|
-
alias word lemma
|
462
|
-
alias pos available_pos
|
463
|
-
alias senses synsets
|
464
|
-
alias sense_ids synset_ids
|
465
|
-
|
466
|
-
end
|
467
|
-
|
468
|
-
class Words
|
469
|
-
|
470
|
-
@wordnet_connection = nil
|
471
|
-
|
472
|
-
def initialize(type = :tokyo, path = :default, wordnet_path = :search)
|
473
|
-
@wordnet_connection = WordnetConnection.new(type, path, wordnet_path)
|
474
|
-
end
|
475
|
-
|
476
|
-
def find(word)
|
477
|
-
homographs = @wordnet_connection.homographs(word)
|
478
|
-
Homographs.new homographs, @wordnet_connection unless homographs.nil?
|
479
|
-
end
|
480
|
-
|
481
|
-
def connection_type
|
482
|
-
@wordnet_connection.connection_type
|
483
|
-
end
|
484
|
-
|
485
|
-
def wordnet_dir
|
486
|
-
@wordnet_connection.wordnet_dir
|
487
|
-
end
|
488
|
-
|
489
|
-
def close
|
490
|
-
@wordnet_connection.close
|
491
|
-
end
|
492
|
-
|
493
|
-
def connected
|
494
|
-
@wordnet_connection.connected
|
495
|
-
end
|
8
|
+
|
9
|
+
# we identify each wordnet connector installed and there paths
|
10
|
+
SUPPORTED_CONNECTIORS = Dir[File.join(File.dirname(__FILE__),'wordnet_connectors','*_wordnet_connection.rb')].inject(Hash.new) { |connectors, connection_file| connectors[ File.basename(connection_file).split('_').first.to_sym ] = connection_file; connectors }
|
11
|
+
DEFAULT_WORDNET_LOCATIONS = ['/usr/share/wordnet', '/usr/local/share/wordnet', '/usr/local/WordNet-3.0', '/opt/WordNet-3.0', '/opt/wordnet', '/opt/local/share/WordNet-3.0/']
|
12
|
+
|
13
|
+
# specify some useful exception types
|
14
|
+
class BadWordnetConnector < RuntimeError; end
|
15
|
+
class BadWordnetDataset < RuntimeError; end
|
16
|
+
class NoWordnetConnection < RuntimeError; end
|
17
|
+
|
18
|
+
# specify the wordnet control object
|
19
|
+
class Wordnet
|
20
|
+
|
21
|
+
attr_reader :wordnet_connection
|
496
22
|
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
23
|
+
def initialize(connector_type = :pure, wordnet_path = :search, data_path = :default)
|
24
|
+
|
25
|
+
# check and specify useful paths
|
26
|
+
wordnet_path = Wordnet::locate_wordnet(wordnet_path)
|
27
|
+
data_path = (data_path == :default ? Pathname.new(File.join(File.dirname(__FILE__), '..', 'data')) : Pathname.new( data_path ))
|
28
|
+
|
29
|
+
# ensure we have a valid connector type
|
30
|
+
raise BadWordnetConnector, "You specified an unsupported wordnet connector type. Supported connectors are: #{SUPPORTED_CONNECTIORS}" unless SUPPORTED_CONNECTIORS.include? connector_type
|
31
|
+
|
32
|
+
# assuming we have a valid connection type we can import the relevant code (the reason we do this dynamically is to reduce loadtime)
|
33
|
+
require SUPPORTED_CONNECTIORS[connector_type]
|
34
|
+
|
35
|
+
# construct the connector object
|
36
|
+
@wordnet_connection = Words.const_get( File.basename(SUPPORTED_CONNECTIORS[connector_type], '.rb').gsub(/(^|_)(.)/) { $2.upcase } ).new(data_path, wordnet_path)
|
37
|
+
|
38
|
+
# construct some conveniance menthods for relation type access
|
39
|
+
[:connection_type, :wordnet_path, :data_path, :close!, :open!, :connected?, :evocations?].each do |method_name|
|
40
|
+
self.class.send(:define_method, method_name) do
|
41
|
+
@wordnet_connection.send method_name if defined? @wordnet_connection
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
def find(term)
|
48
|
+
|
49
|
+
raise NoWordnetConnection, "There is presently no connection to wordnet. To attempt to reistablish a connection you should use the 'open!' command on the Wordnet object." unless connected?
|
50
|
+
homographs = @wordnet_connection.homographs(term)
|
51
|
+
Homographs.new(homographs, @wordnet_connection) unless homographs.nil?
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s
|
56
|
+
|
57
|
+
# return a description of the connector
|
58
|
+
!connected? ? "Words not connected" : @wordnet_connection.to_s
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def self.locate_wordnet(base_dirs)
|
65
|
+
|
66
|
+
base_dirs = case base_dirs
|
67
|
+
when :search
|
68
|
+
DEFAULT_WORDNET_LOCATIONS
|
69
|
+
else
|
70
|
+
[ base_dirs ]
|
71
|
+
end
|
72
|
+
|
73
|
+
base_dirs.each do |dir|
|
74
|
+
["", "dict"].each do |sub_folder|
|
75
|
+
path = Pathname.new(dir + sub_folder)
|
76
|
+
return path if (path + "data.noun").exist?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
return nil
|
81
|
+
|
82
|
+
end
|
83
|
+
|
501
84
|
end
|
502
|
-
|
503
|
-
end
|
504
85
|
|
505
|
-
end
|
86
|
+
end
|
data/spec/words_spec.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
require 'words'
|
3
|
+
|
4
|
+
describe "Words Constructer" do
|
5
|
+
|
6
|
+
it "should reject bad modes" do
|
7
|
+
lambda { Words::Wordnet.new(:rubbish) }.should raise_exception(Words::BadWordnetConnector)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should when in pure mode, when provided with a bad wordnet directory, return a BadWordnetDataset exception" do
|
11
|
+
lambda { Words::Wordnet.new(:pure, '/lib') }.should raise_exception(Words::BadWordnetDataset)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should when in tokyo mode, when provided with a bad dataset directory, return a BadWordnetDataset exception" do
|
15
|
+
lambda { Words::Wordnet.new(:tokyo, :search, '/lib') }.should raise_exception(Words::BadWordnetDataset)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "Pure Words Constructor" do
|
21
|
+
|
22
|
+
before do
|
23
|
+
@words = Words::Wordnet.new(:pure)
|
24
|
+
end
|
25
|
+
|
26
|
+
after do
|
27
|
+
@words.close!
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should accept pure mode" do
|
31
|
+
@words.should_not be_nil
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should when given the request for a pure mode return a pure connection" do
|
35
|
+
@words.wordnet_connection.should be_kind_of Words::PureWordnetConnection
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should when given the request for a pure mode return an open pure connection" do
|
39
|
+
@words.connected?.should be_true
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should when in pure mode, report itself as to in to_s" do
|
43
|
+
@words.to_s.should match /Words running in pure mode using wordnet files found at .*/
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should when in pure mode, when the connection is closed, report itself as closed" do
|
47
|
+
@words.close!
|
48
|
+
@words.connected?.should be_false
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should when in pure mode, when the connection is closed, report itself as closed in to_s" do
|
52
|
+
@words.close!
|
53
|
+
@words.to_s.should match 'Words not connected'
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should when in pure mode, when the connection is closed, raise NoWordnetConnection exception if a find is attempted" do
|
57
|
+
@words.close!
|
58
|
+
lambda { @words.find('test') }.should raise_exception(Words::NoWordnetConnection)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should when checked report itself as a pure connection" do
|
62
|
+
@words.connection_type.should equal :pure
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "Tokyo Words Constructor" do
|
68
|
+
|
69
|
+
before do
|
70
|
+
@words = Words::Wordnet.new(:tokyo)
|
71
|
+
end
|
72
|
+
|
73
|
+
after do
|
74
|
+
@words.close!
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should accept tokyo mode" do
|
78
|
+
@words.should_not be_nil
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should when given the request for a tokyo mode return a tokyo connection" do
|
82
|
+
@words.wordnet_connection.should be_kind_of Words::TokyoWordnetConnection
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should when given the request for a tokyo mode return an open tokyo connection" do
|
86
|
+
@words.connected?.should be_true
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should when in tokyo mode should report itself as to in to_s" do
|
90
|
+
@words.to_s.should match /Words running in tokyo mode with dataset at .*/
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should when in tokyo mode should when the connection is closed report itself as closed" do
|
94
|
+
@words.close!
|
95
|
+
@words.connected?.should be_false
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should when in tokyo mode should when the connection is closed report itself as closed in to_s" do
|
99
|
+
@words.close!
|
100
|
+
@words.to_s.should match 'Words not connected'
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should when in tokyo mode, when the connection is closed, raise NoWordnetConnection exception if a find is attempted" do
|
104
|
+
@words.close!
|
105
|
+
lambda { @words.find('test') }.should raise_exception(Words::NoWordnetConnection)
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should when checked report itself as a tokyo connection" do
|
109
|
+
@words.connection_type.should equal :tokyo
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
|