words 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/VERSION +1 -1
  2. data/lib/words.rb +43 -19
  3. data/words.gemspec +2 -2
  4. metadata +2 -2
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
data/lib/words.rb CHANGED
@@ -40,7 +40,7 @@ module Words
40
40
  end
41
41
  else
42
42
  @connected = false
43
- raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided 'build_dataset.rb' command."
43
+ raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided 'build_wordnet' command."
44
44
  end
45
45
 
46
46
  end
@@ -53,10 +53,10 @@ module Words
53
53
  return true
54
54
  end
55
55
 
56
- def lemma(term)
56
+ def homographs(term)
57
57
  if connection_type == :pure
58
- raw_lemma = @connection[term]
59
- { 'lemma' => raw_lemma[0], 'tagsense_counts' => raw_lemma[1], 'synset_ids' => raw_lemma[2]}
58
+ raw_homographs = @connection[term]
59
+ { 'lemma' => raw_homographs[0], 'tagsense_counts' => raw_homographs[1], 'synset_ids' => raw_homographs[2]}
60
60
  else
61
61
  @connection[term]
62
62
  end
@@ -168,6 +168,7 @@ module Words
168
168
  class Synset
169
169
 
170
170
  SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
171
+ SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
171
172
  NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
172
173
  { :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
173
174
  { :lex => :adv_all, :description => "all adverbs" },
@@ -214,9 +215,10 @@ module Words
214
215
  { :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
215
216
  { :lex => :adj_ppl, :description => "participial adjectives" } ]
216
217
 
217
- def initialize(synset_id, wordnet_connection)
218
+ def initialize(synset_id, wordnet_connection, homographs)
218
219
  @wordnet_connection = wordnet_connection
219
220
  @synset_hash = wordnet_connection.synset(synset_id)
221
+ @homographs = homographs
220
222
  # construct some conveniance menthods for relation type access
221
223
  Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
222
224
  self.class.send(:define_method, "#{relation_type}s?") do
@@ -237,17 +239,22 @@ module Words
237
239
  @words
238
240
  end
239
241
 
242
+ def lexical_ids
243
+ @words = words_with_num.map { |word_with_num| word_with_num[:lexical_id] } unless defined? @words
244
+ @words
245
+ end
246
+
240
247
  def size
241
248
  words.size
242
249
  end
243
250
 
244
- def words_with_num
245
- @words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :num => word_parts[1] } } unless defined? @words_with_num
251
+ def words_with_lexical_ids
252
+ @words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } } unless defined? @words_with_num
246
253
  @words_with_num
247
254
  end
248
255
 
249
256
  def lexical_filenum
250
- @synset_hash["lexical_filenum"].to_i
257
+ @synset_hash["lexical_filenum"]
251
258
  end
252
259
 
253
260
  def lexical_catagory
@@ -259,7 +266,7 @@ module Words
259
266
  end
260
267
 
261
268
  def lexical
262
- NUM_TO_LEX[@synset_hash["lexical_filenum"].to_i]
269
+ NUM_TO_LEX[lexical_filenum.to_i]
263
270
  end
264
271
 
265
272
  def synset_id
@@ -270,6 +277,14 @@ module Words
270
277
  @synset_hash["gloss"]
271
278
  end
272
279
 
280
+ def lemma
281
+ @homographs.lemma
282
+ end
283
+
284
+ def homographs
285
+ @homographs
286
+ end
287
+
273
288
  def inspect
274
289
  @synset_hash.inspect
275
290
  end
@@ -293,22 +308,25 @@ module Words
293
308
 
294
309
  end
295
310
 
296
- class Lemma
311
+ class Homographs
297
312
 
298
313
  POS_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb}
299
314
  SYMBOL_TO_POS = POS_TO_SYMBOL.invert
300
315
 
301
- def initialize(raw_lemma, wordnet_connection)
316
+ def initialize(raw_homographs, wordnet_connection)
302
317
  @wordnet_connection = wordnet_connection
303
- @lemma_hash = raw_lemma
318
+ @lemma_hash = raw_homographs
304
319
  # construct some conveniance menthods for relation type access
305
320
  SYMBOL_TO_POS.keys.each do |pos|
306
321
  self.class.send(:define_method, "#{pos}s?") do
307
- synsets(pos).size > 0
322
+ size(pos) > 0
308
323
  end
309
324
  self.class.send(:define_method, "#{pos}s") do
310
325
  synsets(pos)
311
326
  end
327
+ self.class.send(:define_method, "#{pos}_count") do
328
+ size(pos)
329
+ end
312
330
  self.class.send(:define_method, "#{pos}_ids") do
313
331
  synset_ids(pos)
314
332
  end
@@ -316,12 +334,12 @@ module Words
316
334
  end
317
335
 
318
336
  def tagsense_counts
319
- @tagsense_counts = @lemma_hash["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } } unless defined? @tagsense_counts
337
+ @tagsense_counts = @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } } unless defined? @tagsense_counts
320
338
  @tagsense_counts
321
339
  end
322
340
 
323
341
  def lemma
324
- @lemma = @lemma_hash["lemma"].gsub('_', ' ') unless defined? @lemma
342
+ @lemma = @raw_homographs["lemma"].gsub('_', ' ') unless defined? @lemma
325
343
  @lemma
326
344
  end
327
345
 
@@ -335,12 +353,16 @@ module Words
335
353
  @to_s
336
354
  end
337
355
 
356
+ def size(pos = :all)
357
+ synset_ids(pos).size
358
+ end
359
+
338
360
  def synsets(pos = :all)
339
- synset_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection }
361
+ synset_ids(pos).map { |synset_id| Synset.new synset_id, self, @wordnet_connection }
340
362
  end
341
363
 
342
364
  def synset_ids(pos = :all)
343
- @synset_ids = @lemma_hash["synset_ids"].split('|') unless defined? @synset_ids
365
+ @synset_ids = @raw_homographs["synset_ids"].split('|') unless defined? @synset_ids
344
366
  case
345
367
  when SYMBOL_TO_POS.include?(pos.to_sym)
346
368
  @synset_ids.select { |synset_id| synset_id[0,1] == SYMBOL_TO_POS[pos.to_sym] }
@@ -352,11 +374,13 @@ module Words
352
374
  end
353
375
 
354
376
  def inspect
355
- @lemma_hash.inspect
377
+ @raw_homographs.inspect
356
378
  end
357
379
 
358
380
  alias word lemma
359
381
  alias pos available_pos
382
+ alias senses synsets
383
+ alias sense_ids synset_ids
360
384
 
361
385
  end
362
386
 
@@ -369,7 +393,7 @@ module Words
369
393
  end
370
394
 
371
395
  def find(word)
372
- Lemma.new @wordnet_connection.lemma(word), @wordnet_connection
396
+ Homographs.new @wordnet_connection.homographs(word), @wordnet_connection
373
397
  end
374
398
 
375
399
  def connection_type
data/words.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{words}
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roja Buck"]
12
- s.date = %q{2010-01-16}
12
+ s.date = %q{2010-01-17}
13
13
  s.default_executable = %q{build_wordnet}
14
14
  s.description = %q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
15
15
  s.email = %q{roja@arbia.co.uk}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: words
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roja Buck
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-16 00:00:00 +00:00
12
+ date: 2010-01-17 00:00:00 +00:00
13
13
  default_executable: build_wordnet
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency