words 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/words.rb CHANGED
@@ -1,505 +1,86 @@
1
- # std includes
1
+ # std library includes
2
2
  require 'pathname'
3
- require 'set'
4
3
 
5
- # gem includes
6
- require 'rubygems'
7
- require 'rufus-tokyo' if Gem.available?('rufus-tokyo')
4
+ # local includes
5
+ require File.join(File.dirname(__FILE__),'homographs.rb')
8
6
 
9
7
  module Words
10
-
11
- class WordnetConnection
12
-
13
- SHORT_TO_POS_FILE_TYPE = { 'a' => 'adj', 'r' => 'adv', 'n' => 'noun', 'v' => 'verb' }
14
-
15
- attr_reader :connected, :connection_type, :data_path, :wordnet_dir
16
-
17
- def initialize(type, path, wordnet_path)
18
- @data_path = Pathname.new("#{File.dirname(__FILE__)}/../data/wordnet.tct") if type == :tokyo && path == :default
19
- @data_path = Pathname.new("#{File.dirname(__FILE__)}/../data/index.dmp") if type == :pure && path == :default
20
- @connection_type = type
21
-
22
- if @data_path.exist?
23
- if @connection_type == :tokyo
24
- raise "Coulden't find the rufus-tokyo gem. Please ensure it's installed." unless Gem.available?('rufus-tokyo')
25
- @connection = Rufus::Tokyo::Table.new(@data_path.to_s, :mode => 'r')
26
- @connected = true
27
- elsif @connection_type == :pure
28
- # open the index is there
29
- File.open(@data_path, 'r') do |file|
30
- @connection = Marshal.load file.read
31
- end
32
- evocation_path = Pathname.new("#{File.dirname(__FILE__)}/../data/evocations.dmp")
33
- File.open(evocation_path, 'r') do |file|
34
- @evocations = Marshal.load file.read
35
- end if evocation_path.exist?
36
- # search for the wordnet files
37
- if locate_wordnet?(wordnet_path)
38
- @connected = true
39
- else
40
- @connected = false
41
- raise "Failed to locate the wordnet database. Please ensure it is installed and that if it resides at a custom path that path is given as an argument when constructing the Words object."
42
- end
43
- else
44
- @connected = false
45
- end
46
- else
47
- @connected = false
48
- raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided 'build_wordnet' command."
49
- end
50
-
51
- end
52
-
53
- def close
54
- @connected = false
55
- if @connected && connection_type == :tokyo
56
- connection.close
57
- end
58
- return true
59
- end
60
-
61
- def homographs(term)
62
- if connection_type == :pure
63
- raw_homographs = @connection[term]
64
- { 'lemma' => raw_homographs[0], 'tagsense_counts' => raw_homographs[1], 'synset_ids' => raw_homographs[2]} unless raw_homographs.nil?
65
- else
66
- @connection[term]
67
- end
68
- end
69
-
70
- def evocations(senset_id)
71
- if connection_type == :pure
72
- if defined? @evocations
73
- raw_evocations = @evocations[senset_id + "s"]
74
- { 'relations' => raw_evocations[0], 'means' => raw_evocations[1], 'medians' => raw_evocations[2]} unless raw_evocations.nil?
75
- else
76
- nil
77
- end
78
- else
79
- @connection[senset_id + "s"]
80
- end
81
- end
82
-
83
- def synset(synset_id)
84
- if connection_type == :pure
85
- pos = synset_id[0,1]
86
- File.open(@wordnet_dir + "data.#{SHORT_TO_POS_FILE_TYPE[pos]}","r") do |file|
87
- file.seek(synset_id[1..-1].to_i)
88
- data_line, gloss = file.readline.strip.split(" | ")
89
- data_parts = data_line.split(" ")
90
- synset_id, lexical_filenum, synset_type, word_count = pos + data_parts.shift, data_parts.shift, data_parts.shift, data_parts.shift.to_i(16)
91
- words = Array.new(word_count).map { "#{data_parts.shift}.#{data_parts.shift}" }
92
- relations = Array.new(data_parts.shift.to_i).map { "#{data_parts.shift}.#{data_parts.shift}.#{data_parts.shift}.#{data_parts.shift}" }
93
- { "synset_id" => synset_id, "lexical_filenum" => lexical_filenum, "synset_type" => synset_type, "words" => words.join('|'), "relations" => relations.join('|'), "gloss" => gloss.strip }
94
- end
95
- else
96
- @connection[synset_id]
97
- end
98
- end
99
-
100
- def locate_wordnet?(base_dirs)
101
-
102
- base_dirs = case base_dirs
103
- when :search
104
- ['/usr/share/wordnet', '/usr/local/share/wordnet', '/usr/local/WordNet-3.0']
105
- else
106
- [ base_dirs ]
107
- end
108
-
109
- base_dirs.each do |dir|
110
- ["", "dict"].each do |sub_folder|
111
- path = Pathname.new(dir + sub_folder)
112
- @wordnet_dir = path if (path + "data.noun").exist?
113
- break if !@wordnet_dir.nil?
114
- end
115
- end
116
-
117
- return !@wordnet_dir.nil?
118
-
119
- end
120
-
121
- end
122
-
123
- class Evocations
124
-
125
- def initialize(evocation_construct, source_synset, wordnet_connection)
126
- @wordnet_connection = wordnet_connection
127
- @source = source_synset
128
- @evocation_construct = evocation_construct
129
- end
130
-
131
- def means
132
- @means = @evocation_construct["means"].split('|') unless defined? @means
133
- @means
134
- end
135
-
136
- def medians
137
- @medians = @evocation_construct["medians"].split('|') unless defined? @medians
138
- @medians
139
- end
140
-
141
- def size
142
- means.size
143
- end
144
-
145
- def first
146
- self[0]
147
- end
148
-
149
- def last
150
- self[size-1]
151
- end
152
-
153
- def [] (index)
154
- { :destination => destinations[index], :mean => means[index], :median => medians[index] }
155
- end
156
-
157
- def destinations(pos = :all)
158
- destination_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection, @source.homographs }
159
- end
160
-
161
- def destination_ids(pos = :all)
162
- @destination_ids = @evocation_construct["relations"].split('|') unless defined? @destination_ids
163
- case
164
- when Homographs::SYMBOL_TO_POS.include?(pos.to_sym)
165
- @destination_ids.select { |synset_id| synset_id[0,1] == Homographs::SYMBOL_TO_POS[pos.to_sym] }
166
- when Homographs::POS_TO_SYMBOL.include?(pos.to_s)
167
- @destination_ids.select { |synset_id| synset_id[0,1] == pos.to_s }
168
- else
169
- @destination_ids
170
- end
171
- end
172
-
173
- def to_s
174
- "#{size} evocations from #{@source}"
175
- end
176
-
177
- end
178
-
179
- class Relation
180
-
181
- RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
182
- ";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
183
- "-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
184
- "%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
185
- "\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
186
- SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
187
-
188
- def initialize(relation_construct, source_synset, wordnet_connection)
189
- @wordnet_connection = wordnet_connection
190
- @symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
191
- @dest_synset_id = @pos + @dest_synset_id
192
- @symbol = RELATION_TO_SYMBOL[@symbol]
193
- @source_synset = source_synset
194
- end
195
-
196
- def is_semantic?
197
- @source_dest == "0000"
198
- end
199
-
200
- def source_word
201
- is_semantic? ? @source_word = nil : @source_word = @source_synset.words[@source_dest[0..1].to_i(16)-1] unless defined? @source_word
202
- @source_word
203
- end
204
-
205
- def destination_word
206
- is_semantic? ? @destination_word = nil : @destination_word = destination.words[@source_dest[2..3].to_i(16)-1] unless defined? @destination_word
207
- @destination_word
208
- end
209
-
210
- def relation_type?(type)
211
- case
212
- when SYMBOL_TO_RELATION.include?(type.to_sym)
213
- type.to_sym == @symbol
214
- when RELATION_TO_SYMBOL.include?(pos.to_s)
215
- POINTER_TO_SYMBOL[type.to_sym] == @symbol
216
- else
217
- false
218
- end
219
- end
220
-
221
- def relation_type
222
- @symbol
223
- end
224
-
225
- def destination
226
- @destination = Synset.new(@dest_synset_id, @wordnet_connection, nil) unless defined? @destination
227
- @destination
228
- end
229
-
230
- def to_s
231
- @to_s = "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\"" if !is_semantic? && !defined?(@to_s)
232
- @to_s = "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}" if is_semantic? && !defined?(@to_s)
233
- @to_s
234
- end
235
-
236
- def inspect
237
- { :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
238
- end
239
-
240
- end
241
-
242
- class Synset
243
-
244
- SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
245
- SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
246
- NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
247
- { :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
248
- { :lex => :adv_all, :description => "all adverbs" },
249
- { :lex => :noun_Tops, :description => "unique beginner for nouns" },
250
- { :lex => :noun_act, :description => "nouns denoting acts or actions" },
251
- { :lex => :noun_animal, :description => "nouns denoting animals" },
252
- { :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
253
- { :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
254
- { :lex => :noun_body, :description => "nouns denoting body parts" },
255
- { :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
256
- { :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
257
- { :lex => :noun_event, :description => "nouns denoting natural events" },
258
- { :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
259
- { :lex => :noun_food, :description => "nouns denoting foods and drinks" },
260
- { :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
261
- { :lex => :noun_location, :description => "nouns denoting spatial position" },
262
- { :lex => :noun_motive, :description => "nouns denoting goals" },
263
- { :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
264
- { :lex => :noun_person, :description => "nouns denoting people" },
265
- { :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
266
- { :lex => :noun_plant, :description => "nouns denoting plants" },
267
- { :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
268
- { :lex => :noun_process, :description => "nouns denoting natural processes" },
269
- { :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
270
- { :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
271
- { :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
272
- { :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
273
- { :lex => :noun_substance, :description => "nouns denoting substances" },
274
- { :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
275
- { :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
276
- { :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
277
- { :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
278
- { :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
279
- { :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
280
- { :lex => :verb_consumption, :description => "verbs of eating and drinking" },
281
- { :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
282
- { :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
283
- { :lex => :verb_emotion, :description => "verbs of feeling" },
284
- { :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
285
- { :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
286
- { :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
287
- { :lex => :verb_social, :description => "verbs of political and social activities and events" },
288
- { :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
289
- { :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
290
- { :lex => :adj_ppl, :description => "participial adjectives" } ]
291
-
292
- def initialize(synset_id, wordnet_connection, homographs)
293
- @wordnet_connection = wordnet_connection
294
- @synset_hash = wordnet_connection.synset(synset_id)
295
- @homographs = homographs
296
- # construct some conveniance menthods for relation type access
297
- Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
298
- self.class.send(:define_method, "#{relation_type}s?") do
299
- relations(relation_type).size > 0
300
- end
301
- self.class.send(:define_method, "#{relation_type}s") do
302
- relations(relation_type)
303
- end
304
- end
305
- end
306
-
307
- def synset_type
308
- SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
309
- end
310
-
311
- def words
312
- @words = words_with_lexical_ids.map { |word_with_num| word_with_num[:word] } unless defined? @words
313
- @words
314
- end
315
-
316
- def lexical_ids
317
- @words = words_with_lexical_ids.map { |word_with_num| word_with_num[:lexical_id] } unless defined? @words
318
- @words
319
- end
320
-
321
- def size
322
- words.size
323
- end
324
-
325
- def words_with_lexical_ids
326
- @words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } } unless defined? @words_with_num
327
- @words_with_num
328
- end
329
-
330
- def lexical_filenum
331
- @synset_hash["lexical_filenum"]
332
- end
333
-
334
- def lexical_catagory
335
- lexical[:lex]
336
- end
337
-
338
- def lexical_description
339
- lexical[:description]
340
- end
341
-
342
- def lexical
343
- NUM_TO_LEX[lexical_filenum.to_i]
344
- end
345
-
346
- def synset_id
347
- @synset_hash["synset_id"]
348
- end
349
-
350
- def gloss
351
- @synset_hash["gloss"]
352
- end
353
-
354
- def lemma
355
- @homographs.lemma
356
- end
357
-
358
- def homographs
359
- @homographs
360
- end
361
-
362
- def inspect
363
- @synset_hash.inspect
364
- end
365
-
366
- def relations(type = :all)
367
- @relations = @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) } unless defined? @relations
368
- case
369
- when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
370
- @relations.select { |relation| relation.relation_type == type.to_sym }
371
- when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
372
- @relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
373
- else
374
- @relations
375
- end
376
- end
377
-
378
- def evocations
379
- evocations_arr = @wordnet_connection.evocations(synset_id)
380
- Evocations.new evocations_arr, self, @wordnet_connection unless evocations_arr.nil?
381
- end
382
-
383
- def to_s
384
- @to_s = "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}" unless defined? @to_s
385
- @to_s
386
- end
387
-
388
- alias word lemma
389
-
390
- end
391
-
392
- class Homographs
393
-
394
- POS_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb}
395
- SYMBOL_TO_POS = POS_TO_SYMBOL.invert
396
-
397
- def initialize(raw_homographs, wordnet_connection)
398
- @wordnet_connection = wordnet_connection
399
- @raw_homographs = raw_homographs
400
- # construct some conveniance menthods for relation type access
401
- SYMBOL_TO_POS.keys.each do |pos|
402
- self.class.send(:define_method, "#{pos}s?") do
403
- size(pos) > 0
404
- end
405
- self.class.send(:define_method, "#{pos}s") do
406
- synsets(pos)
407
- end
408
- self.class.send(:define_method, "#{pos}_count") do
409
- size(pos)
410
- end
411
- self.class.send(:define_method, "#{pos}_ids") do
412
- synset_ids(pos)
413
- end
414
- end
415
- end
416
-
417
- def tagsense_counts
418
- @tagsense_counts = @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } } unless defined? @tagsense_counts
419
- @tagsense_counts
420
- end
421
-
422
- def lemma
423
- @lemma = @raw_homographs["lemma"].gsub('_', ' ') unless defined? @lemma
424
- @lemma
425
- end
426
-
427
- def available_pos
428
- @available_pos = synset_ids.map { |synset_id| POS_TO_SYMBOL[synset_id[0,1]] }.uniq unless defined? @available_pos
429
- @available_pos
430
- end
431
-
432
- def to_s
433
- @to_s = [lemma, " " + available_pos.join("/")].join(",") unless defined? @to_s
434
- @to_s
435
- end
436
-
437
- def size(pos = :all)
438
- synset_ids(pos).size
439
- end
440
-
441
- def synsets(pos = :all)
442
- synset_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection, self }
443
- end
444
-
445
- def synset_ids(pos = :all)
446
- @synset_ids = @raw_homographs["synset_ids"].split('|') unless defined? @synset_ids
447
- case
448
- when SYMBOL_TO_POS.include?(pos.to_sym)
449
- @synset_ids.select { |synset_id| synset_id[0,1] == SYMBOL_TO_POS[pos.to_sym] }
450
- when POS_TO_SYMBOL.include?(pos.to_s)
451
- @synset_ids.select { |synset_id| synset_id[0,1] == pos.to_s }
452
- else
453
- @synset_ids
454
- end
455
- end
456
-
457
- def inspect
458
- @raw_homographs.inspect
459
- end
460
-
461
- alias word lemma
462
- alias pos available_pos
463
- alias senses synsets
464
- alias sense_ids synset_ids
465
-
466
- end
467
-
468
- class Words
469
-
470
- @wordnet_connection = nil
471
-
472
- def initialize(type = :tokyo, path = :default, wordnet_path = :search)
473
- @wordnet_connection = WordnetConnection.new(type, path, wordnet_path)
474
- end
475
-
476
- def find(word)
477
- homographs = @wordnet_connection.homographs(word)
478
- Homographs.new homographs, @wordnet_connection unless homographs.nil?
479
- end
480
-
481
- def connection_type
482
- @wordnet_connection.connection_type
483
- end
484
-
485
- def wordnet_dir
486
- @wordnet_connection.wordnet_dir
487
- end
488
-
489
- def close
490
- @wordnet_connection.close
491
- end
492
-
493
- def connected
494
- @wordnet_connection.connected
495
- end
8
+
9
+ # we identify each wordnet connector installed and there paths
10
+ SUPPORTED_CONNECTIORS = Dir[File.join(File.dirname(__FILE__),'wordnet_connectors','*_wordnet_connection.rb')].inject(Hash.new) { |connectors, connection_file| connectors[ File.basename(connection_file).split('_').first.to_sym ] = connection_file; connectors }
11
+ DEFAULT_WORDNET_LOCATIONS = ['/usr/share/wordnet', '/usr/local/share/wordnet', '/usr/local/WordNet-3.0', '/opt/WordNet-3.0', '/opt/wordnet', '/opt/local/share/WordNet-3.0/']
12
+
13
+ # specify some useful exception types
14
+ class BadWordnetConnector < RuntimeError; end
15
+ class BadWordnetDataset < RuntimeError; end
16
+ class NoWordnetConnection < RuntimeError; end
17
+
18
+ # specify the wordnet control object
19
+ class Wordnet
20
+
21
+ attr_reader :wordnet_connection
496
22
 
497
- def to_s
498
- return "Words not connected" if !connected
499
- return "Words running in pure mode using wordnet files found at #{wordnet_dir} and index at #{@wordnet_connection.data_path}" if connection_type == :pure
500
- return "Words running in tokyo mode with dataset at #{@wordnet_connection.data_path}" if connection_type == :tokyo
23
+ def initialize(connector_type = :pure, wordnet_path = :search, data_path = :default)
24
+
25
+ # check and specify useful paths
26
+ wordnet_path = Wordnet::locate_wordnet(wordnet_path)
27
+ data_path = (data_path == :default ? Pathname.new(File.join(File.dirname(__FILE__), '..', 'data')) : Pathname.new( data_path ))
28
+
29
+ # ensure we have a valid connector type
30
+ raise BadWordnetConnector, "You specified an unsupported wordnet connector type. Supported connectors are: #{SUPPORTED_CONNECTIORS}" unless SUPPORTED_CONNECTIORS.include? connector_type
31
+
32
+ # assuming we have a valid connection type we can import the relevant code (the reason we do this dynamically is to reduce loadtime)
33
+ require SUPPORTED_CONNECTIORS[connector_type]
34
+
35
+ # construct the connector object
36
+ @wordnet_connection = Words.const_get( File.basename(SUPPORTED_CONNECTIORS[connector_type], '.rb').gsub(/(^|_)(.)/) { $2.upcase } ).new(data_path, wordnet_path)
37
+
38
+ # construct some conveniance menthods for relation type access
39
+ [:connection_type, :wordnet_path, :data_path, :close!, :open!, :connected?, :evocations?].each do |method_name|
40
+ self.class.send(:define_method, method_name) do
41
+ @wordnet_connection.send method_name if defined? @wordnet_connection
42
+ end
43
+ end
44
+
45
+ end
46
+
47
+ def find(term)
48
+
49
+ raise NoWordnetConnection, "There is presently no connection to wordnet. To attempt to reistablish a connection you should use the 'open!' command on the Wordnet object." unless connected?
50
+ homographs = @wordnet_connection.homographs(term)
51
+ Homographs.new(homographs, @wordnet_connection) unless homographs.nil?
52
+
53
+ end
54
+
55
+ def to_s
56
+
57
+ # return a description of the connector
58
+ !connected? ? "Words not connected" : @wordnet_connection.to_s
59
+
60
+ end
61
+
62
+ private
63
+
64
+ def self.locate_wordnet(base_dirs)
65
+
66
+ base_dirs = case base_dirs
67
+ when :search
68
+ DEFAULT_WORDNET_LOCATIONS
69
+ else
70
+ [ base_dirs ]
71
+ end
72
+
73
+ base_dirs.each do |dir|
74
+ ["", "dict"].each do |sub_folder|
75
+ path = Pathname.new(dir + sub_folder)
76
+ return path if (path + "data.noun").exist?
77
+ end
78
+ end
79
+
80
+ return nil
81
+
82
+ end
83
+
501
84
  end
502
-
503
- end
504
85
 
505
- end
86
+ end
@@ -0,0 +1,113 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ require 'words'
3
+
4
+ describe "Words Constructer" do
5
+
6
+ it "should reject bad modes" do
7
+ lambda { Words::Wordnet.new(:rubbish) }.should raise_exception(Words::BadWordnetConnector)
8
+ end
9
+
10
+ it "should when in pure mode, when provided with a bad wordnet directory, return a BadWordnetDataset exception" do
11
+ lambda { Words::Wordnet.new(:pure, '/lib') }.should raise_exception(Words::BadWordnetDataset)
12
+ end
13
+
14
+ it "should when in tokyo mode, when provided with a bad dataset directory, return a BadWordnetDataset exception" do
15
+ lambda { Words::Wordnet.new(:tokyo, :search, '/lib') }.should raise_exception(Words::BadWordnetDataset)
16
+ end
17
+
18
+ end
19
+
20
+ describe "Pure Words Constructor" do
21
+
22
+ before do
23
+ @words = Words::Wordnet.new(:pure)
24
+ end
25
+
26
+ after do
27
+ @words.close!
28
+ end
29
+
30
+ it "should accept pure mode" do
31
+ @words.should_not be_nil
32
+ end
33
+
34
+ it "should when given the request for a pure mode return a pure connection" do
35
+ @words.wordnet_connection.should be_kind_of Words::PureWordnetConnection
36
+ end
37
+
38
+ it "should when given the request for a pure mode return an open pure connection" do
39
+ @words.connected?.should be_true
40
+ end
41
+
42
+ it "should when in pure mode, report itself as to in to_s" do
43
+ @words.to_s.should match /Words running in pure mode using wordnet files found at .*/
44
+ end
45
+
46
+ it "should when in pure mode, when the connection is closed, report itself as closed" do
47
+ @words.close!
48
+ @words.connected?.should be_false
49
+ end
50
+
51
+ it "should when in pure mode, when the connection is closed, report itself as closed in to_s" do
52
+ @words.close!
53
+ @words.to_s.should match 'Words not connected'
54
+ end
55
+
56
+ it "should when in pure mode, when the connection is closed, raise NoWordnetConnection exception if a find is attempted" do
57
+ @words.close!
58
+ lambda { @words.find('test') }.should raise_exception(Words::NoWordnetConnection)
59
+ end
60
+
61
+ it "should when checked report itself as a pure connection" do
62
+ @words.connection_type.should equal :pure
63
+ end
64
+
65
+ end
66
+
67
+ describe "Tokyo Words Constructor" do
68
+
69
+ before do
70
+ @words = Words::Wordnet.new(:tokyo)
71
+ end
72
+
73
+ after do
74
+ @words.close!
75
+ end
76
+
77
+ it "should accept tokyo mode" do
78
+ @words.should_not be_nil
79
+ end
80
+
81
+ it "should when given the request for a tokyo mode return a tokyo connection" do
82
+ @words.wordnet_connection.should be_kind_of Words::TokyoWordnetConnection
83
+ end
84
+
85
+ it "should when given the request for a tokyo mode return an open tokyo connection" do
86
+ @words.connected?.should be_true
87
+ end
88
+
89
+ it "should when in tokyo mode should report itself as to in to_s" do
90
+ @words.to_s.should match /Words running in tokyo mode with dataset at .*/
91
+ end
92
+
93
+ it "should when in tokyo mode should when the connection is closed report itself as closed" do
94
+ @words.close!
95
+ @words.connected?.should be_false
96
+ end
97
+
98
+ it "should when in tokyo mode should when the connection is closed report itself as closed in to_s" do
99
+ @words.close!
100
+ @words.to_s.should match 'Words not connected'
101
+ end
102
+
103
+ it "should when in tokyo mode, when the connection is closed, raise NoWordnetConnection exception if a find is attempted" do
104
+ @words.close!
105
+ lambda { @words.find('test') }.should raise_exception(Words::NoWordnetConnection)
106
+ end
107
+
108
+ it "should when checked report itself as a tokyo connection" do
109
+ @words.connection_type.should equal :tokyo
110
+ end
111
+
112
+ end
113
+