words 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/words.rb +43 -19
- data/words.gemspec +2 -2
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.2
|
data/lib/words.rb
CHANGED
@@ -40,7 +40,7 @@ module Words
|
|
40
40
|
end
|
41
41
|
else
|
42
42
|
@connected = false
|
43
|
-
raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided '
|
43
|
+
raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided 'build_wordnet' command."
|
44
44
|
end
|
45
45
|
|
46
46
|
end
|
@@ -53,10 +53,10 @@ module Words
|
|
53
53
|
return true
|
54
54
|
end
|
55
55
|
|
56
|
-
def
|
56
|
+
def homographs(term)
|
57
57
|
if connection_type == :pure
|
58
|
-
|
59
|
-
{ 'lemma' =>
|
58
|
+
raw_homographs = @connection[term]
|
59
|
+
{ 'lemma' => raw_homographs[0], 'tagsense_counts' => raw_homographs[1], 'synset_ids' => raw_homographs[2]}
|
60
60
|
else
|
61
61
|
@connection[term]
|
62
62
|
end
|
@@ -168,6 +168,7 @@ module Words
|
|
168
168
|
class Synset
|
169
169
|
|
170
170
|
SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
|
171
|
+
SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
|
171
172
|
NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
|
172
173
|
{ :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
|
173
174
|
{ :lex => :adv_all, :description => "all adverbs" },
|
@@ -214,9 +215,10 @@ module Words
|
|
214
215
|
{ :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
|
215
216
|
{ :lex => :adj_ppl, :description => "participial adjectives" } ]
|
216
217
|
|
217
|
-
def initialize(synset_id, wordnet_connection)
|
218
|
+
def initialize(synset_id, wordnet_connection, homographs)
|
218
219
|
@wordnet_connection = wordnet_connection
|
219
220
|
@synset_hash = wordnet_connection.synset(synset_id)
|
221
|
+
@homographs = homographs
|
220
222
|
# construct some conveniance menthods for relation type access
|
221
223
|
Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
|
222
224
|
self.class.send(:define_method, "#{relation_type}s?") do
|
@@ -237,17 +239,22 @@ module Words
|
|
237
239
|
@words
|
238
240
|
end
|
239
241
|
|
242
|
+
def lexical_ids
|
243
|
+
@words = words_with_num.map { |word_with_num| word_with_num[:lexical_id] } unless defined? @words
|
244
|
+
@words
|
245
|
+
end
|
246
|
+
|
240
247
|
def size
|
241
248
|
words.size
|
242
249
|
end
|
243
250
|
|
244
|
-
def
|
245
|
-
@words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :
|
251
|
+
def words_with_lexical_ids
|
252
|
+
@words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } } unless defined? @words_with_num
|
246
253
|
@words_with_num
|
247
254
|
end
|
248
255
|
|
249
256
|
def lexical_filenum
|
250
|
-
@synset_hash["lexical_filenum"]
|
257
|
+
@synset_hash["lexical_filenum"]
|
251
258
|
end
|
252
259
|
|
253
260
|
def lexical_catagory
|
@@ -259,7 +266,7 @@ module Words
|
|
259
266
|
end
|
260
267
|
|
261
268
|
def lexical
|
262
|
-
NUM_TO_LEX[
|
269
|
+
NUM_TO_LEX[lexical_filenum.to_i]
|
263
270
|
end
|
264
271
|
|
265
272
|
def synset_id
|
@@ -270,6 +277,14 @@ module Words
|
|
270
277
|
@synset_hash["gloss"]
|
271
278
|
end
|
272
279
|
|
280
|
+
def lemma
|
281
|
+
@homographs.lemma
|
282
|
+
end
|
283
|
+
|
284
|
+
def homographs
|
285
|
+
@homographs
|
286
|
+
end
|
287
|
+
|
273
288
|
def inspect
|
274
289
|
@synset_hash.inspect
|
275
290
|
end
|
@@ -293,22 +308,25 @@ module Words
|
|
293
308
|
|
294
309
|
end
|
295
310
|
|
296
|
-
class
|
311
|
+
class Homographs
|
297
312
|
|
298
313
|
POS_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb}
|
299
314
|
SYMBOL_TO_POS = POS_TO_SYMBOL.invert
|
300
315
|
|
301
|
-
def initialize(
|
316
|
+
def initialize(raw_homographs, wordnet_connection)
|
302
317
|
@wordnet_connection = wordnet_connection
|
303
|
-
@lemma_hash =
|
318
|
+
@lemma_hash = raw_homographs
|
304
319
|
# construct some conveniance menthods for relation type access
|
305
320
|
SYMBOL_TO_POS.keys.each do |pos|
|
306
321
|
self.class.send(:define_method, "#{pos}s?") do
|
307
|
-
|
322
|
+
size(pos) > 0
|
308
323
|
end
|
309
324
|
self.class.send(:define_method, "#{pos}s") do
|
310
325
|
synsets(pos)
|
311
326
|
end
|
327
|
+
self.class.send(:define_method, "#{pos}_count") do
|
328
|
+
size(pos)
|
329
|
+
end
|
312
330
|
self.class.send(:define_method, "#{pos}_ids") do
|
313
331
|
synset_ids(pos)
|
314
332
|
end
|
@@ -316,12 +334,12 @@ module Words
|
|
316
334
|
end
|
317
335
|
|
318
336
|
def tagsense_counts
|
319
|
-
@tagsense_counts = @
|
337
|
+
@tagsense_counts = @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } } unless defined? @tagsense_counts
|
320
338
|
@tagsense_counts
|
321
339
|
end
|
322
340
|
|
323
341
|
def lemma
|
324
|
-
@lemma = @
|
342
|
+
@lemma = @raw_homographs["lemma"].gsub('_', ' ') unless defined? @lemma
|
325
343
|
@lemma
|
326
344
|
end
|
327
345
|
|
@@ -335,12 +353,16 @@ module Words
|
|
335
353
|
@to_s
|
336
354
|
end
|
337
355
|
|
356
|
+
def size(pos = :all)
|
357
|
+
synset_ids(pos).size
|
358
|
+
end
|
359
|
+
|
338
360
|
def synsets(pos = :all)
|
339
|
-
synset_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection }
|
361
|
+
synset_ids(pos).map { |synset_id| Synset.new synset_id, self, @wordnet_connection }
|
340
362
|
end
|
341
363
|
|
342
364
|
def synset_ids(pos = :all)
|
343
|
-
@synset_ids = @
|
365
|
+
@synset_ids = @raw_homographs["synset_ids"].split('|') unless defined? @synset_ids
|
344
366
|
case
|
345
367
|
when SYMBOL_TO_POS.include?(pos.to_sym)
|
346
368
|
@synset_ids.select { |synset_id| synset_id[0,1] == SYMBOL_TO_POS[pos.to_sym] }
|
@@ -352,11 +374,13 @@ module Words
|
|
352
374
|
end
|
353
375
|
|
354
376
|
def inspect
|
355
|
-
@
|
377
|
+
@raw_homographs.inspect
|
356
378
|
end
|
357
379
|
|
358
380
|
alias word lemma
|
359
381
|
alias pos available_pos
|
382
|
+
alias senses synsets
|
383
|
+
alias sense_ids synset_ids
|
360
384
|
|
361
385
|
end
|
362
386
|
|
@@ -369,7 +393,7 @@ module Words
|
|
369
393
|
end
|
370
394
|
|
371
395
|
def find(word)
|
372
|
-
|
396
|
+
Homographs.new @wordnet_connection.homographs(word), @wordnet_connection
|
373
397
|
end
|
374
398
|
|
375
399
|
def connection_type
|
data/words.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{words}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roja Buck"]
|
12
|
-
s.date = %q{2010-01-
|
12
|
+
s.date = %q{2010-01-17}
|
13
13
|
s.default_executable = %q{build_wordnet}
|
14
14
|
s.description = %q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
|
15
15
|
s.email = %q{roja@arbia.co.uk}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: words
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Roja Buck
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-17 00:00:00 +00:00
|
13
13
|
default_executable: build_wordnet
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|