words 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/words.rb +43 -19
- data/words.gemspec +2 -2
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.2
|
data/lib/words.rb
CHANGED
@@ -40,7 +40,7 @@ module Words
|
|
40
40
|
end
|
41
41
|
else
|
42
42
|
@connected = false
|
43
|
-
raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided '
|
43
|
+
raise "Failed to locate the words #{ @connection_type == :pure ? 'index' : 'dataset' } at #{@data_path}. Please insure you have created it using the words gems provided 'build_wordnet' command."
|
44
44
|
end
|
45
45
|
|
46
46
|
end
|
@@ -53,10 +53,10 @@ module Words
|
|
53
53
|
return true
|
54
54
|
end
|
55
55
|
|
56
|
-
def
|
56
|
+
def homographs(term)
|
57
57
|
if connection_type == :pure
|
58
|
-
|
59
|
-
{ 'lemma' =>
|
58
|
+
raw_homographs = @connection[term]
|
59
|
+
{ 'lemma' => raw_homographs[0], 'tagsense_counts' => raw_homographs[1], 'synset_ids' => raw_homographs[2]}
|
60
60
|
else
|
61
61
|
@connection[term]
|
62
62
|
end
|
@@ -168,6 +168,7 @@ module Words
|
|
168
168
|
class Synset
|
169
169
|
|
170
170
|
SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
|
171
|
+
SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
|
171
172
|
NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
|
172
173
|
{ :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
|
173
174
|
{ :lex => :adv_all, :description => "all adverbs" },
|
@@ -214,9 +215,10 @@ module Words
|
|
214
215
|
{ :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
|
215
216
|
{ :lex => :adj_ppl, :description => "participial adjectives" } ]
|
216
217
|
|
217
|
-
def initialize(synset_id, wordnet_connection)
|
218
|
+
def initialize(synset_id, wordnet_connection, homographs)
|
218
219
|
@wordnet_connection = wordnet_connection
|
219
220
|
@synset_hash = wordnet_connection.synset(synset_id)
|
221
|
+
@homographs = homographs
|
220
222
|
# construct some conveniance menthods for relation type access
|
221
223
|
Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
|
222
224
|
self.class.send(:define_method, "#{relation_type}s?") do
|
@@ -237,17 +239,22 @@ module Words
|
|
237
239
|
@words
|
238
240
|
end
|
239
241
|
|
242
|
+
def lexical_ids
|
243
|
+
@words = words_with_num.map { |word_with_num| word_with_num[:lexical_id] } unless defined? @words
|
244
|
+
@words
|
245
|
+
end
|
246
|
+
|
240
247
|
def size
|
241
248
|
words.size
|
242
249
|
end
|
243
250
|
|
244
|
-
def
|
245
|
-
@words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :
|
251
|
+
def words_with_lexical_ids
|
252
|
+
@words_with_num = @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } } unless defined? @words_with_num
|
246
253
|
@words_with_num
|
247
254
|
end
|
248
255
|
|
249
256
|
def lexical_filenum
|
250
|
-
@synset_hash["lexical_filenum"]
|
257
|
+
@synset_hash["lexical_filenum"]
|
251
258
|
end
|
252
259
|
|
253
260
|
def lexical_catagory
|
@@ -259,7 +266,7 @@ module Words
|
|
259
266
|
end
|
260
267
|
|
261
268
|
def lexical
|
262
|
-
NUM_TO_LEX[
|
269
|
+
NUM_TO_LEX[lexical_filenum.to_i]
|
263
270
|
end
|
264
271
|
|
265
272
|
def synset_id
|
@@ -270,6 +277,14 @@ module Words
|
|
270
277
|
@synset_hash["gloss"]
|
271
278
|
end
|
272
279
|
|
280
|
+
def lemma
|
281
|
+
@homographs.lemma
|
282
|
+
end
|
283
|
+
|
284
|
+
def homographs
|
285
|
+
@homographs
|
286
|
+
end
|
287
|
+
|
273
288
|
def inspect
|
274
289
|
@synset_hash.inspect
|
275
290
|
end
|
@@ -293,22 +308,25 @@ module Words
|
|
293
308
|
|
294
309
|
end
|
295
310
|
|
296
|
-
class
|
311
|
+
class Homographs
|
297
312
|
|
298
313
|
POS_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb}
|
299
314
|
SYMBOL_TO_POS = POS_TO_SYMBOL.invert
|
300
315
|
|
301
|
-
def initialize(
|
316
|
+
def initialize(raw_homographs, wordnet_connection)
|
302
317
|
@wordnet_connection = wordnet_connection
|
303
|
-
@lemma_hash =
|
318
|
+
@lemma_hash = raw_homographs
|
304
319
|
# construct some conveniance menthods for relation type access
|
305
320
|
SYMBOL_TO_POS.keys.each do |pos|
|
306
321
|
self.class.send(:define_method, "#{pos}s?") do
|
307
|
-
|
322
|
+
size(pos) > 0
|
308
323
|
end
|
309
324
|
self.class.send(:define_method, "#{pos}s") do
|
310
325
|
synsets(pos)
|
311
326
|
end
|
327
|
+
self.class.send(:define_method, "#{pos}_count") do
|
328
|
+
size(pos)
|
329
|
+
end
|
312
330
|
self.class.send(:define_method, "#{pos}_ids") do
|
313
331
|
synset_ids(pos)
|
314
332
|
end
|
@@ -316,12 +334,12 @@ module Words
|
|
316
334
|
end
|
317
335
|
|
318
336
|
def tagsense_counts
|
319
|
-
@tagsense_counts = @
|
337
|
+
@tagsense_counts = @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } } unless defined? @tagsense_counts
|
320
338
|
@tagsense_counts
|
321
339
|
end
|
322
340
|
|
323
341
|
def lemma
|
324
|
-
@lemma = @
|
342
|
+
@lemma = @raw_homographs["lemma"].gsub('_', ' ') unless defined? @lemma
|
325
343
|
@lemma
|
326
344
|
end
|
327
345
|
|
@@ -335,12 +353,16 @@ module Words
|
|
335
353
|
@to_s
|
336
354
|
end
|
337
355
|
|
356
|
+
def size(pos = :all)
|
357
|
+
synset_ids(pos).size
|
358
|
+
end
|
359
|
+
|
338
360
|
def synsets(pos = :all)
|
339
|
-
synset_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection }
|
361
|
+
synset_ids(pos).map { |synset_id| Synset.new synset_id, self, @wordnet_connection }
|
340
362
|
end
|
341
363
|
|
342
364
|
def synset_ids(pos = :all)
|
343
|
-
@synset_ids = @
|
365
|
+
@synset_ids = @raw_homographs["synset_ids"].split('|') unless defined? @synset_ids
|
344
366
|
case
|
345
367
|
when SYMBOL_TO_POS.include?(pos.to_sym)
|
346
368
|
@synset_ids.select { |synset_id| synset_id[0,1] == SYMBOL_TO_POS[pos.to_sym] }
|
@@ -352,11 +374,13 @@ module Words
|
|
352
374
|
end
|
353
375
|
|
354
376
|
def inspect
|
355
|
-
@
|
377
|
+
@raw_homographs.inspect
|
356
378
|
end
|
357
379
|
|
358
380
|
alias word lemma
|
359
381
|
alias pos available_pos
|
382
|
+
alias senses synsets
|
383
|
+
alias sense_ids synset_ids
|
360
384
|
|
361
385
|
end
|
362
386
|
|
@@ -369,7 +393,7 @@ module Words
|
|
369
393
|
end
|
370
394
|
|
371
395
|
def find(word)
|
372
|
-
|
396
|
+
Homographs.new @wordnet_connection.homographs(word), @wordnet_connection
|
373
397
|
end
|
374
398
|
|
375
399
|
def connection_type
|
data/words.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{words}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Roja Buck"]
|
12
|
-
s.date = %q{2010-01-
|
12
|
+
s.date = %q{2010-01-17}
|
13
13
|
s.default_executable = %q{build_wordnet}
|
14
14
|
s.description = %q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
|
15
15
|
s.email = %q{roja@arbia.co.uk}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: words
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Roja Buck
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-17 00:00:00 +00:00
|
13
13
|
default_executable: build_wordnet
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|