words 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/VERSION +1 -1
  2. data/lib/words.rb +43 -19
  3. data/words.gemspec +2 -2
  4. metadata +2 -2
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
data/lib/words.rb CHANGED
@@ -40,7 +40,7 @@ module Words
40
40
  end
41
41
  else
42
42
  @connected = false
43
- raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided 'build_dataset.rb' command."
43
+ raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided 'build_wordnet' command."
44
44
  end
45
45
 
46
46
  end
@@ -53,10 +53,10 @@ module Words
53
53
  return true
54
54
  end
55
55
 
56
- def lemma(term)
56
+ def homographs(term)
57
57
  if connection_type == :pure
58
- raw_lemma = @connection[term]
59
- { 'lemma' => raw_lemma[0], 'tagsense_counts' => raw_lemma[1], 'synset_ids' => raw_lemma[2]}
58
+ raw_homographs = @connection[term]
59
+ { 'lemma' => raw_homographs[0], 'tagsense_counts' => raw_homographs[1], 'synset_ids' => raw_homographs[2]}
60
60
  else
61
61
  @connection[term]
62
62
  end
@@ -168,6 +168,7 @@ module Words
168
168
  class Synset
169
169
 
170
170
  SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
171
+ SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
171
172
  NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
172
173
  { :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
173
174
  { :lex => :adv_all, :description => "all adverbs" },
@@ -214,9 +215,10 @@ module Words
214
215
  { :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
215
216
  { :lex => :adj_ppl, :description => "participial adjectives" } ]
216
217
 
217
- def initialize(synset_id, wordnet_connection)
218
+ def initialize(synset_id, wordnet_connection, homographs)
218
219
  @wordnet_connection = wordnet_connection
219
220
  @synset_hash = wordnet_connection.synset(synset_id)
221
+ @homographs = homographs
220
222
  # construct some conveniance menthods for relation type access
221
223
  Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
222
224
  self.class.send(:define_method, "#{relation_type}s?") do
@@ -237,17 +239,22 @@ module Words
237
239
  @words
238
240
  end
239
241
 
242
+ def lexical_ids
243
+ @words = words_with_num.map { |word_with_num| word_with_num[:lexical_id] } unless defined? @words
244
+ @words
245
+ end
246
+
240
247
  def size
241
248
  words.size
242
249
  end
243
250
 
244
- def words_with_num
245
- @words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :num => word_parts[1] } } unless defined? @words_with_num
251
+ def words_with_lexical_ids
252
+ @words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } } unless defined? @words_with_num
246
253
  @words_with_num
247
254
  end
248
255
 
249
256
  def lexical_filenum
250
- @synset_hash["lexical_filenum"].to_i
257
+ @synset_hash["lexical_filenum"]
251
258
  end
252
259
 
253
260
  def lexical_catagory
@@ -259,7 +266,7 @@ module Words
259
266
  end
260
267
 
261
268
  def lexical
262
- NUM_TO_LEX[@synset_hash["lexical_filenum"].to_i]
269
+ NUM_TO_LEX[lexical_filenum.to_i]
263
270
  end
264
271
 
265
272
  def synset_id
@@ -270,6 +277,14 @@ module Words
270
277
  @synset_hash["gloss"]
271
278
  end
272
279
 
280
+ def lemma
281
+ @homographs.lemma
282
+ end
283
+
284
+ def homographs
285
+ @homographs
286
+ end
287
+
273
288
  def inspect
274
289
  @synset_hash.inspect
275
290
  end
@@ -293,22 +308,25 @@ module Words
293
308
 
294
309
  end
295
310
 
296
- class Lemma
311
+ class Homographs
297
312
 
298
313
  POS_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb}
299
314
  SYMBOL_TO_POS = POS_TO_SYMBOL.invert
300
315
 
301
- def initialize(raw_lemma, wordnet_connection)
316
+ def initialize(raw_homographs, wordnet_connection)
302
317
  @wordnet_connection = wordnet_connection
303
- @lemma_hash = raw_lemma
318
+ @lemma_hash = raw_homographs
304
319
  # construct some conveniance menthods for relation type access
305
320
  SYMBOL_TO_POS.keys.each do |pos|
306
321
  self.class.send(:define_method, "#{pos}s?") do
307
- synsets(pos).size > 0
322
+ size(pos) > 0
308
323
  end
309
324
  self.class.send(:define_method, "#{pos}s") do
310
325
  synsets(pos)
311
326
  end
327
+ self.class.send(:define_method, "#{pos}_count") do
328
+ size(pos)
329
+ end
312
330
  self.class.send(:define_method, "#{pos}_ids") do
313
331
  synset_ids(pos)
314
332
  end
@@ -316,12 +334,12 @@ module Words
316
334
  end
317
335
 
318
336
  def tagsense_counts
319
- @tagsense_counts = @lemma_hash["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } } unless defined? @tagsense_counts
337
+ @tagsense_counts = @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } } unless defined? @tagsense_counts
320
338
  @tagsense_counts
321
339
  end
322
340
 
323
341
  def lemma
324
- @lemma = @lemma_hash["lemma"].gsub('_', ' ') unless defined? @lemma
342
+ @lemma = @raw_homographs["lemma"].gsub('_', ' ') unless defined? @lemma
325
343
  @lemma
326
344
  end
327
345
 
@@ -335,12 +353,16 @@ module Words
335
353
  @to_s
336
354
  end
337
355
 
356
+ def size(pos = :all)
357
+ synset_ids(pos).size
358
+ end
359
+
338
360
  def synsets(pos = :all)
339
- synset_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection }
361
+ synset_ids(pos).map { |synset_id| Synset.new synset_id, self, @wordnet_connection }
340
362
  end
341
363
 
342
364
  def synset_ids(pos = :all)
343
- @synset_ids = @lemma_hash["synset_ids"].split('|') unless defined? @synset_ids
365
+ @synset_ids = @raw_homographs["synset_ids"].split('|') unless defined? @synset_ids
344
366
  case
345
367
  when SYMBOL_TO_POS.include?(pos.to_sym)
346
368
  @synset_ids.select { |synset_id| synset_id[0,1] == SYMBOL_TO_POS[pos.to_sym] }
@@ -352,11 +374,13 @@ module Words
352
374
  end
353
375
 
354
376
  def inspect
355
- @lemma_hash.inspect
377
+ @raw_homographs.inspect
356
378
  end
357
379
 
358
380
  alias word lemma
359
381
  alias pos available_pos
382
+ alias senses synsets
383
+ alias sense_ids synset_ids
360
384
 
361
385
  end
362
386
 
@@ -369,7 +393,7 @@ module Words
369
393
  end
370
394
 
371
395
  def find(word)
372
- Lemma.new @wordnet_connection.lemma(word), @wordnet_connection
396
+ Homographs.new @wordnet_connection.homographs(word), @wordnet_connection
373
397
  end
374
398
 
375
399
  def connection_type
data/words.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{words}
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roja Buck"]
12
- s.date = %q{2010-01-16}
12
+ s.date = %q{2010-01-17}
13
13
  s.default_executable = %q{build_wordnet}
14
14
  s.description = %q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
15
15
  s.email = %q{roja@arbia.co.uk}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: words
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roja Buck
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-16 00:00:00 +00:00
12
+ date: 2010-01-17 00:00:00 +00:00
13
13
  default_executable: build_wordnet
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency