wordnet 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +720 -0
- data/LICENSE +27 -0
- data/README +93 -0
- data/Rakefile +291 -0
- data/Rakefile.local +46 -0
- data/convertdb.rb +417 -0
- data/examples/addLacedBoots.rb +27 -0
- data/examples/clothesWithCollars.rb +36 -0
- data/examples/clothesWithTongues.rb +28 -0
- data/examples/distance.rb +37 -0
- data/examples/domainTree.rb +27 -0
- data/examples/gcs.rb +54 -0
- data/examples/holonymTree.rb +27 -0
- data/examples/hypernymTree.rb +28 -0
- data/examples/hyponymTree.rb +28 -0
- data/examples/memberTree.rb +27 -0
- data/examples/meronymTree.rb +29 -0
- data/lib/wordnet.rb +87 -0
- data/lib/wordnet/constants.rb +301 -0
- data/lib/wordnet/lexicon.rb +430 -0
- data/lib/wordnet/synset.rb +908 -0
- data/rake/dependencies.rb +76 -0
- data/rake/helpers.rb +384 -0
- data/rake/manual.rb +755 -0
- data/rake/packaging.rb +112 -0
- data/rake/publishing.rb +303 -0
- data/rake/rdoc.rb +35 -0
- data/rake/style.rb +62 -0
- data/rake/svn.rb +469 -0
- data/rake/testing.rb +192 -0
- data/rake/verifytask.rb +64 -0
- data/spec/lib/helpers.rb +155 -0
- data/spec/wordnet/lexicon_spec.rb +248 -0
- data/spec/wordnet/synset_spec.rb +288 -0
- data/utils.rb +838 -0
- metadata +216 -0
@@ -0,0 +1,430 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# WordNet Lexicon object class
|
4
|
+
#
|
5
|
+
# == Synopsis
|
6
|
+
#
|
7
|
+
# lexicon = WordNet::Lexicon.new( dictpath )
|
8
|
+
#
|
9
|
+
# == Description
|
10
|
+
#
|
11
|
+
# Instances of this class abstract access to the various databases of the
|
12
|
+
# WordNet lexicon. It can be used to look up and search for WordNet::Synsets.
|
13
|
+
#
|
14
|
+
# == Author
|
15
|
+
#
|
16
|
+
# Michael Granger <ged@FaerieMUD.org>
|
17
|
+
#
|
18
|
+
# Copyright (c) 2002, 2003, 2005 The FaerieMUD Consortium. All rights reserved.
|
19
|
+
#
|
20
|
+
# This module is free software. You may use, modify, and/or redistribute this
|
21
|
+
# software under the terms of the Perl Artistic License. (See
|
22
|
+
# http://language.perl.com/misc/Artistic.html)
|
23
|
+
#
|
24
|
+
# Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
|
25
|
+
# by Dan Brian.
|
26
|
+
#
|
27
|
+
# == Version
|
28
|
+
#
|
29
|
+
# $Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
|
30
|
+
#
|
31
|
+
|
32
|
+
require 'rbconfig'
|
33
|
+
require 'pathname'
|
34
|
+
require 'bdb'
|
35
|
+
require 'sync'
|
36
|
+
|
37
|
+
require 'wordnet/constants'
|
38
|
+
require 'wordnet/synset'
|
39
|
+
|
40
|
+
### Lexicon exception - something has gone wrong in the internals of the
|
41
|
+
### lexicon.
|
42
|
+
class WordNet::LexiconError < StandardError ; end
|
43
|
+
|
44
|
+
### Lookup error - the object being looked up either doesn't exist or is
|
45
|
+
### malformed
|
46
|
+
class WordNet::LookupError < StandardError ; end
|
47
|
+
|
48
|
+
### WordNet lexicon class - abstracts access to the WordNet lexical
|
49
|
+
### databases, and provides factory methods for looking up and creating new
|
50
|
+
### WordNet::Synset objects.
|
51
|
+
class WordNet::Lexicon
|
52
|
+
include WordNet::Constants
|
53
|
+
include CrossCase if defined?( CrossCase )
|
54
|
+
|
55
|
+
# Subversion Id
|
56
|
+
SvnId = %q$Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
|
57
|
+
|
58
|
+
# Subversion revision
|
59
|
+
SvnRev = %q$Rev: 93 $
|
60
|
+
|
61
|
+
|
62
|
+
#############################################################
|
63
|
+
### B E R K E L E Y D B C O N F I G U R A T I O N
|
64
|
+
#############################################################
|
65
|
+
|
66
|
+
# The path to the WordNet BerkeleyDB Env. It lives in the directory that
|
67
|
+
# this module is in.
|
68
|
+
DEFAULT_DB_ENV = File::join( Config::CONFIG['datadir'], "ruby-wordnet" )
|
69
|
+
|
70
|
+
# Options for the creation of the Env object
|
71
|
+
ENV_OPTIONS = {
|
72
|
+
:set_timeout => 50,
|
73
|
+
:set_lk_detect => 1,
|
74
|
+
:set_verbose => false,
|
75
|
+
:set_lk_max => 3000,
|
76
|
+
}
|
77
|
+
|
78
|
+
# Flags for the creation of the Env object (read-write and read-only)
|
79
|
+
ENV_FLAGS_RW = BDB::CREATE|BDB::INIT_TRANSACTION|BDB::RECOVER|BDB::INIT_MPOOL
|
80
|
+
ENV_FLAGS_RO = BDB::INIT_MPOOL
|
81
|
+
|
82
|
+
|
83
|
+
#############################################################
|
84
|
+
### I N S T A N C E M E T H O D S
|
85
|
+
#############################################################
|
86
|
+
|
87
|
+
### Create a new WordNet::Lexicon object that will read its data from
|
88
|
+
### the given +dbenv+ (a BerkeleyDB env directory). The database will be
|
89
|
+
### opened with the specified +mode+, which can either be a numeric
|
90
|
+
### octal mode (e.g., 0444) or one of (:readonly, :readwrite).
|
91
|
+
def initialize( dbenv=DEFAULT_DB_ENV, mode=:readonly )
|
92
|
+
@mode = normalize_mode( mode )
|
93
|
+
debug_msg "Mode is: %04o" % [ mode ]
|
94
|
+
|
95
|
+
envflags = 0
|
96
|
+
dbflags = 0
|
97
|
+
|
98
|
+
unless self.readonly?
|
99
|
+
debug_msg "Using read/write flags"
|
100
|
+
envflags = ENV_FLAGS_RW
|
101
|
+
dbflags = BDB::CREATE
|
102
|
+
else
|
103
|
+
debug_msg "Using readonly flags"
|
104
|
+
envflags = ENV_FLAGS_RO
|
105
|
+
dbflags = 0
|
106
|
+
end
|
107
|
+
|
108
|
+
debug_msg "Env flags are: %0s, dbflags are %0s" %
|
109
|
+
[ envflags.to_s(2), dbflags.to_s(2) ]
|
110
|
+
|
111
|
+
begin
|
112
|
+
@env = BDB::Env.new( dbenv, envflags, ENV_OPTIONS )
|
113
|
+
@index_db = @env.open_db( BDB::BTREE, "index", nil, dbflags, @mode )
|
114
|
+
@data_db = @env.open_db( BDB::BTREE, "data", nil, dbflags, @mode )
|
115
|
+
@morph_db = @env.open_db( BDB::BTREE, "morph", nil, dbflags, @mode )
|
116
|
+
rescue StandardError => err
|
117
|
+
msg = "Error while opening Ruby-WordNet data files: #{dbenv}: %s" %
|
118
|
+
[ err.message ]
|
119
|
+
raise err, msg, err.backtrace
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
######
|
126
|
+
public
|
127
|
+
######
|
128
|
+
|
129
|
+
# The BDB::Env object which contains the wordnet lexicon's databases.
|
130
|
+
attr_reader :env
|
131
|
+
|
132
|
+
# The handle to the index table
|
133
|
+
attr_reader :index_db
|
134
|
+
|
135
|
+
# The handle to the synset data table
|
136
|
+
attr_reader :data_db
|
137
|
+
|
138
|
+
# The handle to the morph table
|
139
|
+
attr_reader :morph_db
|
140
|
+
|
141
|
+
|
142
|
+
### Returns +true+ if the lexicon was opened in read-only mode.
|
143
|
+
def readonly?
|
144
|
+
( @mode & 0200 ).nonzero? ? false : true
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
### Returns +true+ if the lexicon was opened in read-write mode.
|
149
|
+
def readwrite?
|
150
|
+
! self.readonly?
|
151
|
+
end
|
152
|
+
|
153
|
+
|
154
|
+
### Close the lexicon's database environment
|
155
|
+
def close
|
156
|
+
@env.close if @env
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
### Checkpoint the database. (BerkeleyDB-specific)
|
161
|
+
def checkpoint( bytes=0, minutes=0 )
|
162
|
+
@env.checkpoint
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
### Remove any archival logfiles for the lexicon's database
|
167
|
+
### environment. (BerkeleyDB-specific).
|
168
|
+
def clean_logs
|
169
|
+
return unless self.readwrite?
|
170
|
+
self.archlogs.each do |logfile|
|
171
|
+
File::chmod( 0777, logfile )
|
172
|
+
File::delete( logfile )
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
### Returns an integer of the familiarity/polysemy count for +word+ as a
|
178
|
+
### +part_of_speech+. Note that polysemy can be identified for a given
|
179
|
+
### word by counting the synsets returned by #lookup_synsets.
|
180
|
+
def familiarity( word, part_of_speech, polyCount=nil )
|
181
|
+
wordkey = self.make_word_key( word, part_of_speech )
|
182
|
+
return nil unless @index_db.key?( wordkey )
|
183
|
+
@index_db[ wordkey ].split( WordNet::SUB_DELIM_RE ).length
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
### Look up sysets (Wordnet::Synset objects) matching +text+ as a
|
188
|
+
### +part_of_speech+, where +part_of_speech+ is one of +WordNet::Noun+,
|
189
|
+
### +WordNet::Verb+, +WordNet::Adjective+, or +WordNet::Adverb+. Without
|
190
|
+
### +sense+, #lookup_synsets will return all matches that are a
|
191
|
+
### +part_of_speech+. If +sense+ is specified, only the synset object that
|
192
|
+
### matches that particular +part_of_speech+ and +sense+ is returned.
|
193
|
+
def lookup_synsets( word, part_of_speech, sense=nil )
|
194
|
+
wordkey = self.make_word_key( word, part_of_speech )
|
195
|
+
pos = self.make_pos( part_of_speech )
|
196
|
+
synsets = []
|
197
|
+
|
198
|
+
# Look up the index entry, trying first the word as given, and if
|
199
|
+
# that fails, trying morphological conversion.
|
200
|
+
entry = @index_db[ wordkey ]
|
201
|
+
|
202
|
+
if entry.nil? && (word = self.morph( word, part_of_speech ))
|
203
|
+
wordkey = self.make_word_key( word, part_of_speech )
|
204
|
+
entry = @index_db[ wordkey ]
|
205
|
+
end
|
206
|
+
|
207
|
+
# If the lookup failed both ways, just abort
|
208
|
+
return nil unless entry
|
209
|
+
|
210
|
+
# Make synset keys from the entry, narrowing it to just the sense
|
211
|
+
# requested if one was specified.
|
212
|
+
synkeys = entry.split( SUB_DELIM_RE ).collect {|off| "#{off}%#{pos}" }
|
213
|
+
if sense
|
214
|
+
return lookup_synsets_by_key( synkeys[sense - 1] )
|
215
|
+
else
|
216
|
+
return [ lookup_synsets_by_key(*synkeys) ].flatten
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
### Returns the WordNet::Synset objects corresponding to the +keys+
|
222
|
+
### specified. The +keys+ are made up of the target synset's "offset"
|
223
|
+
### and syntactic category catenated together with a '%' character.
|
224
|
+
def lookup_synsets_by_key( *keys )
|
225
|
+
synsets = []
|
226
|
+
|
227
|
+
keys.each {|key|
|
228
|
+
raise WordNet::LookupError, "Failed lookup of synset '#{key}':"\
|
229
|
+
"No such synset" unless @data_db.key?( key )
|
230
|
+
|
231
|
+
data = @data_db[ key ]
|
232
|
+
offset, part_of_speech = key.split( /%/, 2 )
|
233
|
+
synsets << WordNet::Synset::new( self, offset, part_of_speech, nil, data )
|
234
|
+
}
|
235
|
+
|
236
|
+
return *synsets
|
237
|
+
end
|
238
|
+
alias_method :lookup_synsetsByOffset, :lookup_synsets_by_key
|
239
|
+
|
240
|
+
|
241
|
+
### Returns a form of +word+ as a part of speech +part_of_speech+, as
|
242
|
+
### found in the WordNet morph files. The #lookup_synsets method perfoms
|
243
|
+
### morphological conversion automatically, so a call to #morph is not
|
244
|
+
### required.
|
245
|
+
def morph( word, part_of_speech )
|
246
|
+
return @morph_db[ self.make_word_key(word, part_of_speech) ]
|
247
|
+
end
|
248
|
+
|
249
|
+
|
250
|
+
### Returns the result of looking up +word+ in the inverse of the WordNet
|
251
|
+
### morph files. _(This is undocumented in Lingua::Wordnet)_
|
252
|
+
def reverse_morph( word )
|
253
|
+
@morph_db.invert[ word ]
|
254
|
+
end
|
255
|
+
|
256
|
+
|
257
|
+
### Returns an array of compound words matching +text+.
|
258
|
+
def grep( text )
|
259
|
+
return [] if text.empty?
|
260
|
+
|
261
|
+
words = []
|
262
|
+
|
263
|
+
# Grab a cursor into the database and fetch while the key matches
|
264
|
+
# the target text
|
265
|
+
cursor = @index_db.cursor
|
266
|
+
rec = cursor.set_range( text )
|
267
|
+
while /^#{text}/ =~ rec[0]
|
268
|
+
words.push rec[0]
|
269
|
+
rec = cursor.next
|
270
|
+
end
|
271
|
+
cursor.close
|
272
|
+
|
273
|
+
return *words
|
274
|
+
end
|
275
|
+
|
276
|
+
|
277
|
+
### Factory method: Creates and returns a new WordNet::Synset object in
|
278
|
+
### this lexicon for the specified +word+ and +part_of_speech+.
|
279
|
+
def create_synset( word, part_of_speech )
|
280
|
+
return WordNet::Synset::new( self, '', part_of_speech, word )
|
281
|
+
end
|
282
|
+
alias_method :new_synset, :create_synset
|
283
|
+
|
284
|
+
|
285
|
+
### Store the specified +synset+ (a WordNet::Synset object) in the
|
286
|
+
### lexicon. Returns the key of the stored synset.
|
287
|
+
def store_synset( synset )
|
288
|
+
strippedOffset = nil
|
289
|
+
pos = nil
|
290
|
+
|
291
|
+
# Start a transaction
|
292
|
+
@env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
|
293
|
+
|
294
|
+
# If this is a new synset, generate an offset for it
|
295
|
+
if synset.offset == 1
|
296
|
+
synset.offset =
|
297
|
+
(datadb['offsetcount'] = datadb['offsetcount'].to_i + 1)
|
298
|
+
end
|
299
|
+
|
300
|
+
# Write the data entry
|
301
|
+
datadb[ synset.key ] = synset.serialize
|
302
|
+
|
303
|
+
# Write the index entries
|
304
|
+
txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
|
305
|
+
|
306
|
+
# Make word/part-of-speech pairs from the words in the synset
|
307
|
+
synset.words.collect {|word| word + "%" + pos }.each {|word|
|
308
|
+
|
309
|
+
# If the index already has this word, but not this
|
310
|
+
# synset, add it
|
311
|
+
if indexdb.key?( word )
|
312
|
+
indexdb[ word ] << SUB_DELIM << synset.offset unless
|
313
|
+
indexdb[ word ].include?( synset.offset )
|
314
|
+
else
|
315
|
+
indexdb[ word ] = synset.offset
|
316
|
+
end
|
317
|
+
}
|
318
|
+
end # transaction on @index_db
|
319
|
+
end # transaction on @dataDB
|
320
|
+
|
321
|
+
return synset.offset
|
322
|
+
end
|
323
|
+
|
324
|
+
|
325
|
+
### Remove the specified +synset+ (a WordNet::Synset object) in the
|
326
|
+
### lexicon. Returns the offset of the stored synset.
|
327
|
+
def remove_synset( synset )
|
328
|
+
# If it's not in the database (ie., doesn't have a real offset),
|
329
|
+
# just return.
|
330
|
+
return nil if synset.offset == 1
|
331
|
+
|
332
|
+
# Start a transaction on the data table
|
333
|
+
@env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
|
334
|
+
|
335
|
+
# First remove the index entries for this synset by iterating
|
336
|
+
# over each of its words
|
337
|
+
txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
|
338
|
+
synset.words.collect {|word| word + "%" + pos }.each {|word|
|
339
|
+
|
340
|
+
# If the index contains an entry for this word, either
|
341
|
+
# splice out the offset for the synset being deleted if
|
342
|
+
# there are more than one, or just delete the whole
|
343
|
+
# entry if it's the only one.
|
344
|
+
if indexdb.key?( word )
|
345
|
+
offsets = indexdb[ word ].
|
346
|
+
split( SUB_DELIM_RE ).
|
347
|
+
reject {|offset| offset == synset.offset}
|
348
|
+
|
349
|
+
unless offsets.empty?
|
350
|
+
index_db[ word ] = newoffsets.join( SUB_DELIM )
|
351
|
+
else
|
352
|
+
index_db.delete( word )
|
353
|
+
end
|
354
|
+
end
|
355
|
+
}
|
356
|
+
end
|
357
|
+
|
358
|
+
# :TODO: Delete synset from pointers of related synsets
|
359
|
+
|
360
|
+
# Delete the synset from the main db
|
361
|
+
datadb.delete( synset.offset )
|
362
|
+
end
|
363
|
+
|
364
|
+
return true
|
365
|
+
end
|
366
|
+
|
367
|
+
|
368
|
+
#########
|
369
|
+
protected
|
370
|
+
#########
|
371
|
+
|
372
|
+
### Normalize various ways of specifying a part of speech into the
|
373
|
+
### WordNet part of speech indicator from the +original+ representation,
|
374
|
+
### which may be the name (e.g., "noun"); +nil+, in which case it
|
375
|
+
### defaults to the indicator for a noun; or the indicator character
|
376
|
+
### itself, in which case it is returned unmodified.
|
377
|
+
def make_pos( original )
|
378
|
+
return WordNet::Noun if original.nil?
|
379
|
+
osym = original.to_s.intern
|
380
|
+
return WordNet::SYNTACTIC_CATEGORIES[ osym ] if
|
381
|
+
WordNet::SYNTACTIC_CATEGORIES.key?( osym )
|
382
|
+
return original if SYNTACTIC_SYMBOLS.key?( original )
|
383
|
+
return nil
|
384
|
+
end
|
385
|
+
|
386
|
+
|
387
|
+
### Make a lexicon key out of the given +word+ and part of speech
|
388
|
+
### (+pos+).
|
389
|
+
def make_word_key( word, pos )
|
390
|
+
pos = self.make_pos( pos )
|
391
|
+
word = word.gsub( /\s+/, '_' )
|
392
|
+
return "#{word}%#{pos}"
|
393
|
+
end
|
394
|
+
|
395
|
+
|
396
|
+
### Return a list of archival logfiles that can be removed
|
397
|
+
### safely. (BerkeleyDB-specific).
|
398
|
+
def archlogs
|
399
|
+
return @env.log_archive( BDB::ARCH_ABS )
|
400
|
+
end
|
401
|
+
|
402
|
+
|
403
|
+
#######
|
404
|
+
private
|
405
|
+
#######
|
406
|
+
|
407
|
+
### Turn the given +origmode+ into an octal file mode such as that
|
408
|
+
### given to File.open.
|
409
|
+
def normalize_mode( origmode )
|
410
|
+
case origmode
|
411
|
+
when :readonly
|
412
|
+
0444 & ~File.umask
|
413
|
+
when :readwrite, :writable
|
414
|
+
0666 & ~File.umask
|
415
|
+
when Fixnum
|
416
|
+
origmode
|
417
|
+
else
|
418
|
+
raise ArgumentError, "unrecognized mode %p" % [origmode]
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
### Output the given +msg+ to STDERR if $DEBUG is turned on.
|
423
|
+
def debug_msg( *msg )
|
424
|
+
return unless $DEBUG
|
425
|
+
$deferr.puts msg
|
426
|
+
end
|
427
|
+
|
428
|
+
|
429
|
+
end # class WordNet::Lexicon
|
430
|
+
|
@@ -0,0 +1,908 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# WordNet synonym-set object class
|
4
|
+
#
|
5
|
+
# == Synopsis
|
6
|
+
#
|
7
|
+
# ss = lexicon.lookupSynset( "word", WordNet::Noun, 1 )
|
8
|
+
# puts "Definition: %s" % ss.gloss
|
9
|
+
# coords = ss.coordinates
|
10
|
+
#
|
11
|
+
# == Description
|
12
|
+
#
|
13
|
+
# Instances of this class encapsulate the data for a synonym set ('synset') in a
|
14
|
+
# Wordnet lexical database. A synonym set is a set of words that are
|
15
|
+
# interchangeable in some context.
|
16
|
+
#
|
17
|
+
# == Author
|
18
|
+
#
|
19
|
+
# Michael Granger <ged@FaerieMUD.org>
|
20
|
+
#
|
21
|
+
# Copyright (c) 2002-2008 The FaerieMUD Consortium. All rights reserved.
|
22
|
+
#
|
23
|
+
# This module is free software. You may use, modify, and/or redistribute this
|
24
|
+
# software under the terms of the Perl Artistic License. (See
|
25
|
+
# http://language.perl.com/misc/Artistic.html)
|
26
|
+
#
|
27
|
+
# Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
|
28
|
+
# by Dan Brian.
|
29
|
+
#
|
30
|
+
# == Version
|
31
|
+
#
|
32
|
+
# $Id: synset.rb 90 2008-07-09 23:02:53Z deveiant $
|
33
|
+
#
|
34
|
+
|
35
|
+
require 'sync'
|
36
|
+
require 'wordnet/constants'
|
37
|
+
|
38
|
+
module WordNet
|
39
|
+
|
40
|
+
### Synset internal error class
|
41
|
+
class SynsetError < StandardError ; end
|
42
|
+
|
43
|
+
### "Synonym set" class - encapsulates the data for a set of words in the
|
44
|
+
### lexical database that are interchangeable in some context, and provides
|
45
|
+
### methods for accessing its relationships.
|
46
|
+
class Synset
|
47
|
+
include WordNet::Constants
|
48
|
+
include CrossCase if defined?( CrossCase )
|
49
|
+
|
50
|
+
# Subversion ID
|
51
|
+
SVNId = %q$Id: synset.rb 90 2008-07-09 23:02:53Z deveiant $
|
52
|
+
|
53
|
+
# Subversion Rev
|
54
|
+
SVNRev = %q$Rev: 90 $
|
55
|
+
|
56
|
+
# The "pointer" type that encapsulates relationships between one synset
|
57
|
+
# and another.
|
58
|
+
class Pointer
|
59
|
+
include WordNet::Constants
|
60
|
+
include CrossCase if defined?( CrossCase )
|
61
|
+
|
62
|
+
#########################################################
|
63
|
+
### C L A S S M E T H O D S
|
64
|
+
#########################################################
|
65
|
+
|
66
|
+
### Make an Array of WordNet::Synset::Pointer objects out of the
|
67
|
+
### given +pointerList+. The pointerlist is a string of pointers
|
68
|
+
### delimited by Constants::SUB_DELIM. Pointers are in the form:
|
69
|
+
### "<pointer_symbol> <synset_offset>%<pos> <source/target>"
|
70
|
+
def self::parse( pointerString )
|
71
|
+
type, offsetPos, ptrNums = pointerString.split(/\s+/)
|
72
|
+
offset, pos = offsetPos.split( /%/, 2 )
|
73
|
+
new( type, offset, pos, ptrNums[0,2], ptrNums[2,2] )
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
#########################################################
|
78
|
+
### I N S T A N C E M E T H O D S
|
79
|
+
#########################################################
|
80
|
+
|
81
|
+
### Create a new synset pointer with the given arguments. The
|
82
|
+
### +ptrType+ is the type of the link between synsets, and must be
|
83
|
+
### either a key or a value of WordNet::Constants::POINTER_TYPES. The
|
84
|
+
### +offset+ is the unique identifier of the target synset, and
|
85
|
+
### +pos+ is its part-of-speech, which must be either a key or value
|
86
|
+
### of WordNet::Constants::SYNTACTIC_CATEGORIES. The +source_wn+ and
|
87
|
+
### +target_wn+ are numerical values which distinguish lexical and
|
88
|
+
### semantic pointers. +source_wn+ indicates the word number in the
|
89
|
+
### current (source) synset, and +target_wn+ indicates the word
|
90
|
+
### number in the target synset. If both are 0 (the default) it
|
91
|
+
### means that the pointer type of the pointer represents a semantic
|
92
|
+
### relation between the current (source) synset and the target
|
93
|
+
### synset indicated by +offset+.
|
94
|
+
def initialize( type, offset, pos=Noun, source_wn=0, target_wn=0 )
|
95
|
+
|
96
|
+
# Allow type = '!', 'antonym', or :antonym. Also handle
|
97
|
+
# splitting of compound pointers (e.g., :memberMeronym / '%m')
|
98
|
+
# into their correct type/subtype parts.
|
99
|
+
@type = @subtype = nil
|
100
|
+
if type.to_s.length == 1
|
101
|
+
@type = POINTER_SYMBOLS[ type[0,1] ]
|
102
|
+
|
103
|
+
elsif type.to_s.length == 2
|
104
|
+
@type = POINTER_SYMBOLS[ type[0,1] ]
|
105
|
+
raise "No known subtypes for '%s'" % [@type] unless
|
106
|
+
POINTER_SUBTYPES.key?( @type )
|
107
|
+
@subtype = POINTER_SUBTYPES[ @type ].index( type ) or
|
108
|
+
raise "Unknown subtype '%s' for '%s'" %
|
109
|
+
[ type, @type ]
|
110
|
+
|
111
|
+
else
|
112
|
+
if POINTER_TYPES.key?( type.to_sym )
|
113
|
+
@type = type.to_sym
|
114
|
+
elsif /([a-z]+)([A-Z][a-z]+)/ =~ type.to_s
|
115
|
+
subtype, maintype = $1, $2.downcase
|
116
|
+
@type = maintype.to_sym if
|
117
|
+
POINTER_TYPES.key?( maintype.to_sym )
|
118
|
+
@subtype = subtype.to_sym
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
raise ArgumentError, "No such pointer type %p" % type if
|
123
|
+
@type.nil?
|
124
|
+
|
125
|
+
# Allow pos = 'n', 'noun', or :noun
|
126
|
+
@part_of_speech = nil
|
127
|
+
if pos.to_s.length == 1
|
128
|
+
@part_of_speech = SYNTACTIC_SYMBOLS[ pos ]
|
129
|
+
else
|
130
|
+
@part_of_speech = pos.to_sym if
|
131
|
+
SYNTACTIC_CATEGORIES.key?( pos.to_sym )
|
132
|
+
end
|
133
|
+
raise ArgumentError, "No such part of speech %p" % pos if
|
134
|
+
@part_of_speech.nil?
|
135
|
+
|
136
|
+
# Other attributes
|
137
|
+
@offset = offset
|
138
|
+
@source_wn = source_wn
|
139
|
+
@target_wn = target_wn
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
######
|
144
|
+
public
|
145
|
+
######
|
146
|
+
|
147
|
+
# The type of the pointer. Will be one of the keys of
|
148
|
+
# WordNet::POINTER_TYPES (e.g., :meronym).
|
149
|
+
attr_accessor :type
|
150
|
+
|
151
|
+
# The subtype of the pointer, if any. Will be one of the keys of one
|
152
|
+
# of the hashes in POINTER_SUBTYPES (e.g., :portion).
|
153
|
+
attr_accessor :subtype
|
154
|
+
|
155
|
+
# The offset of the target synset
|
156
|
+
attr_accessor :offset
|
157
|
+
|
158
|
+
# The part-of-speech of the target synset. Will be one of the keys
|
159
|
+
# of WordNet::SYNTACTIC_CATEGORIES.
|
160
|
+
attr_accessor :part_of_speech
|
161
|
+
|
162
|
+
# The word number in the source synset
|
163
|
+
attr_accessor :source_wn
|
164
|
+
|
165
|
+
# The word number in the target synset
|
166
|
+
attr_accessor :target_wn
|
167
|
+
|
168
|
+
|
169
|
+
### Return the Pointer as a human-readable String suitable for
|
170
|
+
### debugging.
|
171
|
+
def inspect
|
172
|
+
"#<%s:0x%08x %s %s>" % [
|
173
|
+
self.class.name,
|
174
|
+
self.object_id,
|
175
|
+
@subtype ? "#@type(#@subtype)" : @type,
|
176
|
+
self.synset,
|
177
|
+
]
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
### Return the synset key of the target synset (i.e.,
|
182
|
+
### <offset>%<pos symbol>).
|
183
|
+
def synset
|
184
|
+
self.offset + "%" + self.pos
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
### Return the syntactic category symbol for this pointer
|
189
|
+
def pos
|
190
|
+
return SYNTACTIC_CATEGORIES[ @part_of_speech ]
|
191
|
+
end
|
192
|
+
|
193
|
+
|
194
|
+
### Return the pointer type symbol for this pointer
|
195
|
+
def type_symbol
|
196
|
+
unless @subtype
|
197
|
+
return POINTER_TYPES[ @type ]
|
198
|
+
else
|
199
|
+
return POINTER_SUBTYPES[ @type ][ @subtype ]
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
### Comparison operator. Pointer are equivalent if they point at the
|
205
|
+
### same synset and are of the same type.
|
206
|
+
def ==( other )
|
207
|
+
return false unless other.is_a?( self.class )
|
208
|
+
other.offset == self.offset &&
|
209
|
+
other.type == self.type
|
210
|
+
end
|
211
|
+
|
212
|
+
|
213
|
+
### Return the pointer in its stringified form.
|
214
|
+
def to_s
|
215
|
+
"%s %d%%%s %02x%02x" % [
|
216
|
+
ptr.type_symbol,
|
217
|
+
ptr.offset,
|
218
|
+
ptr.posSymbol,
|
219
|
+
ptr.source_wn,
|
220
|
+
ptr.target_wn,
|
221
|
+
]
|
222
|
+
end
|
223
|
+
end # class Pointer
|
224
|
+
|
225
|
+
|
226
|
+
#############################################################
|
227
|
+
### C L A S S M E T H O D S
|
228
|
+
#############################################################
|
229
|
+
|
230
|
+
### Define a group of pointer methods based on +symbol+ that will fetch,
|
231
|
+
### add, and delete pointer synsets of the type indicated. If no pointer
|
232
|
+
### type corresponding to the given +symbol+ is found, a variant without
|
233
|
+
### a trailing 's' is tried (e.g., 'def_pointer_methods :antonyms' will
|
234
|
+
### create methods called #antonyms and #antonyms=, but will fetch
|
235
|
+
### pointers of type :antonym). If the pointer type has subtypes
|
236
|
+
### (according to WordNet::POINTER_SUBTYPES), accessors/mutators for the
|
237
|
+
### subtypes will be generated as well.
|
238
|
+
def self::def_pointer_methods( symbol ) # :nodoc:
|
239
|
+
name = symbol.to_s
|
240
|
+
casename = name.dup
|
241
|
+
casename[ 0,1 ] = casename[ 0,1 ].upcase
|
242
|
+
type = nil
|
243
|
+
$stderr.puts '-' * 50,
|
244
|
+
">>> defining pointer methods for %p" % [symbol] if $DEBUG
|
245
|
+
|
246
|
+
if POINTER_TYPES.key?( symbol )
|
247
|
+
type = symbol
|
248
|
+
elsif POINTER_TYPES.key?( symbol.to_s.sub(/s$/, '').to_sym )
|
249
|
+
type = symbol.to_s.sub(/s$/, '').to_sym
|
250
|
+
else
|
251
|
+
raise ArgumentError, "Unknown pointer type %p" % symbol
|
252
|
+
end
|
253
|
+
|
254
|
+
# Define the accessor
|
255
|
+
$stderr.puts "Defining accessors for %p" % [ type ] if $DEBUG
|
256
|
+
define_method( name.to_sym ) { self.fetch_synset_pointers(type) }
|
257
|
+
define_method( "#{name}=".to_sym ) do |*synsets|
|
258
|
+
self.set_synset_pointers( type, synsets, nil )
|
259
|
+
end
|
260
|
+
|
261
|
+
# If the pointer is one that has subtypes, make the variants list
|
262
|
+
# out of the subtypes. If it doesn't have subtypes, make the only
|
263
|
+
# variant nil, which will cause the mutators to be defined for the
|
264
|
+
# main pointer type.
|
265
|
+
if POINTER_SUBTYPES.key?( type )
|
266
|
+
variants = POINTER_SUBTYPES[ type ].keys
|
267
|
+
else
|
268
|
+
variants = [nil]
|
269
|
+
end
|
270
|
+
|
271
|
+
# Define a set of methods for each variant, or for the main method
|
272
|
+
# if the variant is nil.
|
273
|
+
variants.each do |subtype|
|
274
|
+
varname = subtype ? [subtype, name].join('_') : name
|
275
|
+
|
276
|
+
unless subtype.nil?
|
277
|
+
$stderr.puts "Defining reader for #{varname}" if $DEBUG
|
278
|
+
define_method( varname ) do
|
279
|
+
self.fetch_synset_pointers( type, subtype )
|
280
|
+
end
|
281
|
+
else
|
282
|
+
$stderr.puts "No subtype for %s (subtype = %p)" %
|
283
|
+
[ varname, subtype ] if $DEBUG
|
284
|
+
end
|
285
|
+
|
286
|
+
$stderr.puts "Defining mutator for #{varname}" if $DEBUG
|
287
|
+
define_method( "#{varname}=" ) do |*synsets|
|
288
|
+
self.set_synset_pointers( type, synsets, subtype )
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
|
294
|
+
#############################################################
|
295
|
+
### I N S T A N C E M E T H O D S
|
296
|
+
#############################################################
|
297
|
+
|
298
|
+
### Create a new Synset object in the specified +lexicon+ for the
|
299
|
+
### specified +word+ and +part_of_speech+. If +data+ is specified,
|
300
|
+
### initialize the synset's other object data from it. This method
|
301
|
+
### shouldn't be called directly: you should use one of the Lexicon
|
302
|
+
### class's factory methods: #create_synset, #lookup_synsets, or
|
303
|
+
### #lookup_synsetsByOffset.
|
304
|
+
def initialize( lexicon, offset, pos, word=nil, data=nil )
|
305
|
+
@lexicon = lexicon or
|
306
|
+
raise ArgumentError, "%p is not a WordNet::Lexicon" % lexicon
|
307
|
+
@part_of_speech = SYNTACTIC_SYMBOLS[ pos ] or
|
308
|
+
raise ArgumentError, "No such part of speech %p" % pos
|
309
|
+
@mutex = Sync::new
|
310
|
+
@pointers = []
|
311
|
+
|
312
|
+
if data
|
313
|
+
@offset = offset.to_i
|
314
|
+
@filenum, @wordlist, @pointerlist,
|
315
|
+
@frameslist, @gloss = data.split( DELIM_RE )
|
316
|
+
else
|
317
|
+
@offset = 1
|
318
|
+
@wordlist = word ? word : ''
|
319
|
+
@filenum, @pointerlist, @frameslist, @gloss = [''] * 4
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
|
324
|
+
######
|
325
|
+
public
|
326
|
+
######
|
327
|
+
|
328
|
+
# The WordNet::Lexicon that was used to look up this synset
|
329
|
+
attr_reader :lexicon
|
330
|
+
|
331
|
+
# The syntactic category of this Synset. Will be one of "n" (noun), "v"
|
332
|
+
# (verb), "a" (adjective), "r" (adverb), or "s" (other).
|
333
|
+
attr_accessor :part_of_speech
|
334
|
+
|
335
|
+
# The original byte offset of the synset in the data file; acts as the
|
336
|
+
# unique identifier (when combined with #part_of_speech) of this Synset in
|
337
|
+
# the database.
|
338
|
+
attr_accessor :offset
|
339
|
+
|
340
|
+
# The number corresponding to the lexicographer file name containing the
|
341
|
+
# synset. Calling #lexInfo will return the actual filename. See the
|
342
|
+
# "System Description" of wngloss(7WN) for more info about this.
|
343
|
+
attr_accessor :filenum
|
344
|
+
|
345
|
+
# The raw list of word/lex_id pairs associated with this synset. Each
|
346
|
+
# word and lex_id is separated by a '%' character, and each pair is
|
347
|
+
# delimited with a '|'. E.g., the wordlist for "animal" is:
|
348
|
+
# "animal%0|animate_being%0|beast%0|brute%1|creature%0|fauna%1"
|
349
|
+
attr_accessor :wordlist
|
350
|
+
|
351
|
+
# The list of raw pointers to related synsets. E.g., the pointerlist for
|
352
|
+
# "mourning dove" is:
|
353
|
+
# "@ 01731700%n 0000|#m 01733452%n 0000"
|
354
|
+
attr_accessor :pointerlist
|
355
|
+
|
356
|
+
# The list of raw verb sentence frames for this synset.
|
357
|
+
attr_accessor :frameslist
|
358
|
+
|
359
|
+
# Definition and/or example sentences for the Synset.
|
360
|
+
attr_accessor :gloss
|
361
|
+
|
362
|
+
|
363
|
+
### Return a human-readable representation of the Synset suitable for
|
364
|
+
### debugging.
|
365
|
+
def inspect
|
366
|
+
pointer_counts = self.pointer_map.collect {|type,ptrs|
|
367
|
+
"#{type}s: #{ptrs.length}"
|
368
|
+
}.join( ", " )
|
369
|
+
|
370
|
+
%q{#<%s:0x%08x/%s %s (%s): "%s" (%s)>} % [
|
371
|
+
self.class.name,
|
372
|
+
self.object_id * 2,
|
373
|
+
self.offset,
|
374
|
+
self.words.join(", "),
|
375
|
+
self.part_of_speech,
|
376
|
+
self.gloss,
|
377
|
+
pointer_counts,
|
378
|
+
]
|
379
|
+
end
|
380
|
+
|
381
|
+
|
382
|
+
### Returns the Synset's unique identifier, made up of its offset and
|
383
|
+
### syntactic category catenated together with a '%' symbol.
|
384
|
+
def key
|
385
|
+
"%d%%%s" % [ self.offset, self.pos ]
|
386
|
+
end
|
387
|
+
|
388
|
+
|
389
|
+
### The symbol which represents this synset's syntactic category. Will
|
390
|
+
### be one of :noun, :verb, :adjective, :adverb, or :other.
|
391
|
+
def pos
|
392
|
+
return SYNTACTIC_CATEGORIES[ @part_of_speech ]
|
393
|
+
end
|
394
|
+
|
395
|
+
|
396
|
+
### Return each of the sentences of the gloss for this synset as an
|
397
|
+
### array. The gloss is a definition of the synset, and optionally one
|
398
|
+
### or more example sentences.
|
399
|
+
def glosses
|
400
|
+
return self.gloss.split( /\s*;\s*/ )
|
401
|
+
end
|
402
|
+
|
403
|
+
|
404
|
+
### Returns true if the receiver and otherSyn are identical according to
|
405
|
+
### their offsets.
|
406
|
+
def ==( otherSyn )
|
407
|
+
return false unless otherSyn.kind_of?( WordNet::Synset )
|
408
|
+
return self.offset == otherSyn.offset
|
409
|
+
end
|
410
|
+
|
411
|
+
|
412
|
+
|
413
|
+
### Returns an Array of words and/or collocations associated with this
|
414
|
+
### synset.
|
415
|
+
def words
|
416
|
+
@mutex.synchronize( Sync::SH ) {
|
417
|
+
self.wordlist.split( SUB_DELIM_RE ).collect do |word|
|
418
|
+
word.gsub( /_/, ' ' ).sub( /%.*$/, '' )
|
419
|
+
end
|
420
|
+
}
|
421
|
+
end
|
422
|
+
alias_method :synonyms, :words
|
423
|
+
|
424
|
+
|
425
|
+
### Set the words in this synset's wordlist to +newWords+
|
426
|
+
def words=( *newWords )
|
427
|
+
@mutex.synchronize( Sync::EX ) {
|
428
|
+
@wordlist = newWords.join( SUB_DELIM )
|
429
|
+
}
|
430
|
+
end
|
431
|
+
|
432
|
+
|
433
|
+
### Add the specified +newWords+ to this synset's wordlist. Alias:
|
434
|
+
### +add_words+.
|
435
|
+
def add_words( *newWords )
|
436
|
+
@mutex.synchronize( Sync::EX ) {
|
437
|
+
self.words |= newWords
|
438
|
+
}
|
439
|
+
end
|
440
|
+
|
441
|
+
|
442
|
+
### Delete the specified +oldWords+ from this synset's wordlist. Alias:
|
443
|
+
### +delete_words+.
|
444
|
+
def delete_words( *oldWords )
|
445
|
+
@mutex.synchronize( Sync::EX ) {
|
446
|
+
self.words -= oldWords
|
447
|
+
}
|
448
|
+
end
|
449
|
+
|
450
|
+
|
451
|
+
### Return the synset as a string. Alias: +overview+.
|
452
|
+
def to_s
|
453
|
+
@mutex.synchronize( Sync::SH ) {
|
454
|
+
wordlist = self.words.join(", ").gsub( /%\d/, '' ).gsub( /_/, ' ' )
|
455
|
+
return "#{wordlist} [#{self.part_of_speech}] -- (#{self.gloss})"
|
456
|
+
}
|
457
|
+
end
|
458
|
+
alias_method :overview, :to_s
|
459
|
+
|
460
|
+
|
461
|
+
### Writes any changes made to the object to the database and updates all
|
462
|
+
### affected synset data and indexes. If the object passes out of scope
|
463
|
+
### before #write is called, the changes are lost.
|
464
|
+
def store
|
465
|
+
@mutex.synchronize( Sync::EX ) {
|
466
|
+
self.lexicon.store_synset( self )
|
467
|
+
}
|
468
|
+
end
|
469
|
+
alias_method :write, :store
|
470
|
+
|
471
|
+
|
472
|
+
### Removes this synset from the database.
|
473
|
+
def remove
|
474
|
+
@mutex.synchronize( Sync::EX ) {
|
475
|
+
self.lexicon.remove_synset( self )
|
476
|
+
}
|
477
|
+
end
|
478
|
+
|
479
|
+
|
480
|
+
### Returns the synset's data in a form suitable for storage in the
|
481
|
+
### lexicon's database.
|
482
|
+
def serialize
|
483
|
+
@mutex.synchronize( Sync::SH ) {
|
484
|
+
return [
|
485
|
+
@filenum,
|
486
|
+
@wordlist,
|
487
|
+
@pointerlist,
|
488
|
+
@frameslist,
|
489
|
+
@gloss
|
490
|
+
].join( WordNet::DELIM )
|
491
|
+
}
|
492
|
+
end
|
493
|
+
|
494
|
+
|
495
|
+
### Auto-generate synset pointer methods for the various types
|
496
|
+
|
497
|
+
# The synsets for the receiver's antonyms (opposites). E.g.,
|
498
|
+
# $lexicon.lookup_synsets( "opaque", :adjective, 1 ).antonyms
|
499
|
+
# ==> [#<WordNet::Synset:0x010a9acc/454927 clear (adjective): "free
|
500
|
+
# from cloudiness; allowing light to pass through; "clear water";
|
501
|
+
# "clear plastic bags"; "clear glass"; "the air is clear and
|
502
|
+
# clean"" (similar_tos: 6, attributes: 1, derivations: 2,
|
503
|
+
# antonyms: 1, see_alsos: 1)>]
|
504
|
+
def_pointer_methods :antonyms
|
505
|
+
|
506
|
+
# Synsets for the receiver's entailments (a verb X entails Y if X cannot
|
507
|
+
# be done unless Y is or has been done). E.g.,
|
508
|
+
# $lexicon.lookup_synsets( 'rasp', :verb, 1 ).entailment
|
509
|
+
# ==> [#<WordNet::Synset:0x010dc24c rub (verb): "move over something
|
510
|
+
# with pressure; "rub my hands"; "rub oil into her skin""
|
511
|
+
# (derivations: 2, entailments: 1, hypernyms: 1, hyponyms: 13,
|
512
|
+
# see_alsos: 4)>]
|
513
|
+
def_pointer_methods :entailment
|
514
|
+
|
515
|
+
# Get/set synsets for the receiver's cause pointers (a verb X causes Y
|
516
|
+
# to happen).
|
517
|
+
def_pointer_methods :causes
|
518
|
+
|
519
|
+
# Get/set synsets for the receiver's verb groups. Verb groups link verbs
|
520
|
+
# with similar senses together.
|
521
|
+
def_pointer_methods :verb_groups
|
522
|
+
|
523
|
+
# Get/set list of synsets for the receiver's "similar to" pointers. This
|
524
|
+
# type of pointer links together head adjective synsets with its
|
525
|
+
# satellite adjective synsets.
|
526
|
+
def_pointer_methods :similar_to
|
527
|
+
|
528
|
+
# Get/set synsets for the receiver's participles. Participles are
|
529
|
+
# non-finite forms of a verb; used adjectivally and to form compound
|
530
|
+
# tenses. For example, the first participle for "working" is:
|
531
|
+
# "function, work, operate, go, run (verb)"
|
532
|
+
def_pointer_methods :participles
|
533
|
+
|
534
|
+
# Get/set synsets for the receiver's pertainyms. Pertainyms are
|
535
|
+
# relational adjectives. Adjectives that are pertainyms are usually
|
536
|
+
# defined by such phrases as "of or pertaining to" and do not have
|
537
|
+
# antonyms. A pertainym can point to a noun or another pertainym.
|
538
|
+
def_pointer_methods :pertainyms
|
539
|
+
|
540
|
+
# Get/set synsets for the receiver's attributes.
|
541
|
+
def_pointer_methods :attributes
|
542
|
+
|
543
|
+
# Get/set synsets for the receiver's derived_from.
|
544
|
+
def_pointer_methods :derived_from
|
545
|
+
|
546
|
+
# Get/set synsets for the receiver's derivations.
|
547
|
+
def_pointer_methods :derivations
|
548
|
+
|
549
|
+
# Get/set synsets for the receiver's see_also.
|
550
|
+
def_pointer_methods :see_also
|
551
|
+
|
552
|
+
|
553
|
+
# Auto-generate types with subtypes
|
554
|
+
|
555
|
+
# Synsets for the receiver's hypernyms (more-general terms). E.g.,
|
556
|
+
# $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hypernyms
|
557
|
+
# ==> [#<WordNet::Synset:0x0109a644/3023321 club (noun): "stout
|
558
|
+
# stick that is larger at one end; "he carried a club in self
|
559
|
+
# defense"; "he felt as if he had been hit with a club""
|
560
|
+
# (derivations: 1, hypernyms: 1, hyponyms: 7)>]
|
561
|
+
#
|
562
|
+
# Also generates accessors for subtypes:
|
563
|
+
#
|
564
|
+
# [instance_hypernyms]
|
565
|
+
# A proper noun that refers to a particular, unique referent (as
|
566
|
+
# distinguished from nouns that refer to classes).
|
567
|
+
def_pointer_methods :hypernyms
|
568
|
+
|
569
|
+
|
570
|
+
# :TODO: Generate an example for this
|
571
|
+
|
572
|
+
# Get/set synsets for the receiver's hyponyms (more-specific terms). E.g.,
|
573
|
+
# $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hyponyms
|
574
|
+
# ==> [...]
|
575
|
+
# [instance_hyponyms]
|
576
|
+
# The specific term used to designate a member of a class. X is a
|
577
|
+
# hyponym of Y if X is a (kind of) Y.
|
578
|
+
# Also generates accessors for subtypes:
|
579
|
+
#
|
580
|
+
# [instance_hyponyms]
|
581
|
+
# A proper noun that refers to a particular, unique referent (as
|
582
|
+
# distinguished from nouns that refer to classes).
|
583
|
+
def_pointer_methods :hyponyms
|
584
|
+
|
585
|
+
|
586
|
+
# Get/set synsets for the receiver's meronyms. In addition to the
|
587
|
+
# general accessors for all meronyms, there are also accessors for
|
588
|
+
# subtypes as well:
|
589
|
+
#
|
590
|
+
# [member_meronyms]
|
591
|
+
# Get/set synsets for the receiver's "member" meronyms (HAS MEMBER
|
592
|
+
# relation).
|
593
|
+
# [stuff_meronyms]
|
594
|
+
# Get/set synsets for the receiver's "stuff" meronyms (IS MADE OUT OF
|
595
|
+
# relation).
|
596
|
+
# [portion_meronyms]
|
597
|
+
# Get/set synsets for the receiver's "portion" meronyms (HAS PORTION
|
598
|
+
# relation).
|
599
|
+
# [component_meronyms]
|
600
|
+
# Get/set synsets for the receiver's "component" meronyms (HAS
|
601
|
+
# COMPONENT relation).
|
602
|
+
# [feature_meronyms]
|
603
|
+
# Get/set synsets for the receiver's "feature" meronyms (HAS FEATURE
|
604
|
+
# relation).
|
605
|
+
# [phase_meronyms]
|
606
|
+
# Get/set synsets for the receiver's "phase" meronyms (HAS PHASE
|
607
|
+
# relation).
|
608
|
+
# [place_meronyms]
|
609
|
+
# Get/set synsets for the receiver's "place" meronyms (HAS PLACE
|
610
|
+
# relation).
|
611
|
+
def_pointer_methods :meronyms
|
612
|
+
|
613
|
+
# Get/set synsets for the receiver's holonyms. In addition to the
|
614
|
+
# general accessors for all holonyms, there are also accessors for
|
615
|
+
# subtypes as well:
|
616
|
+
#
|
617
|
+
# [member_holonyms]
|
618
|
+
# Get/set synsets for the receiver's "member" holonyms (IS A MEMBER OF
|
619
|
+
# relation).
|
620
|
+
# [stuff_holonyms]
|
621
|
+
# Get/set synsets for the receiver's "stuff" holonyms (IS MATERIAL OF
|
622
|
+
# relation).
|
623
|
+
# [portion_holonyms]
|
624
|
+
# Get/set synsets for the receiver's "portion" holonyms (IS A PORTION
|
625
|
+
# OF relation).
|
626
|
+
# [component_holonyms]
|
627
|
+
# Get/set synsets for the receiver's "component" holonyms (IS A
|
628
|
+
# COMPONENT OF relation).
|
629
|
+
# [feature_holonyms]
|
630
|
+
# Get/set synsets for the receiver's "feature" holonyms (IS A FEATURE
|
631
|
+
# OF relation).
|
632
|
+
# [phase_holonyms]
|
633
|
+
# Get/set synsets for the receiver's "phase" holonyms (IS A PHASE OF
|
634
|
+
# relation).
|
635
|
+
# [place_holonyms]
|
636
|
+
# Get/set synsets for the receiver's "place" holonyms (IS A PLACE IN
|
637
|
+
# relation).
|
638
|
+
def_pointer_methods :holonyms
|
639
|
+
|
640
|
+
# Get/set synsets for the receiver's topical domain members. In addition
|
641
|
+
# to the general members accessor, there are also accessors for
|
642
|
+
# membership subtypes:
|
643
|
+
#
|
644
|
+
# [category_members]
|
645
|
+
# Get/set synsets for the receiver's
|
646
|
+
# "category" topical domain members.
|
647
|
+
# [region_members]
|
648
|
+
# Get/set synsets for the receiver's "region"
|
649
|
+
# topical domain members.
|
650
|
+
# [usage_members]
|
651
|
+
# Get/set synsets for the receiver's "usage"
|
652
|
+
# topical domain members.
|
653
|
+
def_pointer_methods :members
|
654
|
+
|
655
|
+
# Get/set synsets for the receiver's topical domain domains. In addition
|
656
|
+
# to the general domains accessor, there are also accessors for
|
657
|
+
# domainship subtypes:
|
658
|
+
#
|
659
|
+
# [category_domains]
|
660
|
+
# Get/set synsets for the receiver's
|
661
|
+
# "category" topical domain domains.
|
662
|
+
# [region_domains]
|
663
|
+
# Get/set synsets for the receiver's "region"
|
664
|
+
# topical domain domains.
|
665
|
+
# [usage_domains]
|
666
|
+
# Get/set synsets for the receiver's "usage"
|
667
|
+
# topical domain domains.
|
668
|
+
def_pointer_methods :domains
|
669
|
+
|
670
|
+
|
671
|
+
### Returns an Array of the coordinate sisters of the receiver.
|
672
|
+
def coordinates
|
673
|
+
self.hypernyms.collect {|syn|
|
674
|
+
syn.hyponyms
|
675
|
+
}.flatten
|
676
|
+
end
|
677
|
+
|
678
|
+
|
679
|
+
### Return the name of the "lexicographer's file" associated with this
|
680
|
+
### synset.
|
681
|
+
def lex_info
|
682
|
+
@mutex.synchronize( Sync::SH ) {
|
683
|
+
return LEXFILES[ self.filenum.to_i ]
|
684
|
+
}
|
685
|
+
end
|
686
|
+
|
687
|
+
|
688
|
+
### Sets the "lexicographer's file" association for this synset to
|
689
|
+
### +id+. The value in +id+ should correspond to one of the values in
|
690
|
+
### #WordNet::LEXFILES
|
691
|
+
def lexInfo=( id )
|
692
|
+
raise ArgumentError, "Bad index: Lexinfo id must be within LEXFILES" unless
|
693
|
+
LEXFILES[id]
|
694
|
+
@mutex.synchronize( Sync::EX ) {
|
695
|
+
self.filenum = id
|
696
|
+
}
|
697
|
+
end
|
698
|
+
|
699
|
+
|
700
|
+
### Returns an +Array+ of verb frame +String+s for the synset.
|
701
|
+
def frames
|
702
|
+
frarray = self.frameslist.split( WordNet::SUB_DELIM_RE )
|
703
|
+
verbFrames = []
|
704
|
+
|
705
|
+
@mutex.synchronize( Sync::SH ) {
|
706
|
+
frarray.each {|fr|
|
707
|
+
fnum, wnum = fr.split
|
708
|
+
if wnum > 0
|
709
|
+
wordtext = " (" + self.words[wnum] + ")"
|
710
|
+
verbFrames.push VERB_SENTS[ fnum ] + wordtext
|
711
|
+
else
|
712
|
+
verbFrames.push VERB_SENTS[ fnum ]
|
713
|
+
end
|
714
|
+
}
|
715
|
+
}
|
716
|
+
|
717
|
+
return verbFrames
|
718
|
+
end
|
719
|
+
|
720
|
+
|
721
|
+
### Traversal iterator: Iterates depth-first over a particular
|
722
|
+
### +type+ of the receiver, and all of the pointed-to synset's
|
723
|
+
### pointers. If called with a block, the block is called once for each
|
724
|
+
### synset with the +foundSyn+ and its +depth+ in relation to the
|
725
|
+
### originating synset as arguments. The first call will be the
|
726
|
+
### originating synset with a depth of +0+ unless +includeOrigin+ is
|
727
|
+
### +false+. If the +callback+ returns +true+, the traversal is halted,
|
728
|
+
### and the method returns immediately. This method returns an Array of
|
729
|
+
### the synsets which were traversed if no block is given, or a flag
|
730
|
+
### which indicates whether or not the traversal was interrupted if a
|
731
|
+
### block is given.
|
732
|
+
def traverse( type, includeOrigin=true )
|
733
|
+
raise ArgumentError, "Illegal parameter 1: Must be either a String or a Symbol" unless
|
734
|
+
type.kind_of?( String ) || type.kind_of?( Symbol )
|
735
|
+
|
736
|
+
raise ArgumentError, "Synset doesn't support the #{type.to_s} pointer type." unless
|
737
|
+
self.respond_to?( type )
|
738
|
+
|
739
|
+
foundSyns = []
|
740
|
+
depth = 0
|
741
|
+
traversalFunc = nil
|
742
|
+
|
743
|
+
# Build a traversal function which we can call recursively. It'll return
|
744
|
+
# the synsets it traverses.
|
745
|
+
traversalFunc = Proc.new {|syn,newDepth|
|
746
|
+
|
747
|
+
# Flag to continue traversal
|
748
|
+
haltFlag = false
|
749
|
+
|
750
|
+
# Call the block if it exists and we're either past the origin or
|
751
|
+
# including it
|
752
|
+
if block_given? && (newDepth > 0 || includeOrigin)
|
753
|
+
res = yield( syn, newDepth )
|
754
|
+
haltFlag = true if res.is_a? TrueClass
|
755
|
+
end
|
756
|
+
|
757
|
+
# Make an array for holding sub-synsets we see
|
758
|
+
subSyns = []
|
759
|
+
subSyns.push( syn ) unless newDepth == 0 && !includeOrigin
|
760
|
+
|
761
|
+
# Iterate over each synset returned by calling the pointer on the
|
762
|
+
# current syn. For each one, we call ourselves recursively, and
|
763
|
+
# break out of the iterator with a false value if the block has
|
764
|
+
# indicated we should abort by returning a false value.
|
765
|
+
unless haltFlag
|
766
|
+
syn.send( type ).each {|subSyn|
|
767
|
+
subSubSyns, haltFlag = traversalFunc.call( subSyn, newDepth + 1 )
|
768
|
+
subSyns.push( *subSubSyns ) unless subSubSyns.empty?
|
769
|
+
break if haltFlag
|
770
|
+
}
|
771
|
+
end
|
772
|
+
|
773
|
+
# return
|
774
|
+
[ subSyns, haltFlag ]
|
775
|
+
}
|
776
|
+
|
777
|
+
# Call the iterator
|
778
|
+
traversedSets, haltFlag = traversalFunc.call( self, depth )
|
779
|
+
|
780
|
+
# If a block was given, just return whether or not the block was halted.
|
781
|
+
if block_given?
|
782
|
+
return haltFlag
|
783
|
+
|
784
|
+
# If no block was given, return the traversed synsets
|
785
|
+
else
|
786
|
+
return traversedSets
|
787
|
+
end
|
788
|
+
end
|
789
|
+
|
790
|
+
|
791
|
+
### Returns the distance in pointers between the receiver and +otherSynset+
|
792
|
+
### using +type+ as the search path.
|
793
|
+
def distance( type, otherSynset )
|
794
|
+
dist = nil
|
795
|
+
self.traverse( type ) {|syn,depth|
|
796
|
+
if syn == otherSynset
|
797
|
+
dist = depth
|
798
|
+
true
|
799
|
+
end
|
800
|
+
}
|
801
|
+
|
802
|
+
return dist
|
803
|
+
end
|
804
|
+
|
805
|
+
|
806
|
+
### Recursively searches all of the receiver's pointers of the specified
|
807
|
+
### +type+ for +otherSynset+, returning +true+ if it is found.
|
808
|
+
def search( type, otherSynset )
|
809
|
+
self.traverse( type ) {|syn,depth|
|
810
|
+
syn == otherSynset
|
811
|
+
}
|
812
|
+
end
|
813
|
+
|
814
|
+
|
815
|
+
### Union: Return the least general synset that the receiver and
|
816
|
+
### +otherSynset+ have in common as a hypernym, or nil if it doesn't share
|
817
|
+
### any.
|
818
|
+
def |( otherSyn )
|
819
|
+
|
820
|
+
# Find all of this syn's hypernyms
|
821
|
+
hyperSyns = self.traverse( :hypernyms )
|
822
|
+
commonSyn = nil
|
823
|
+
|
824
|
+
# Now traverse the other synset's hypernyms looking for one of our
|
825
|
+
# own hypernyms.
|
826
|
+
otherSyn.traverse( :hypernyms ) {|syn,depth|
|
827
|
+
if hyperSyns.include?( syn )
|
828
|
+
commonSyn = syn
|
829
|
+
true
|
830
|
+
end
|
831
|
+
}
|
832
|
+
|
833
|
+
return commonSyn
|
834
|
+
end
|
835
|
+
|
836
|
+
|
837
|
+
### Returns the pointers in this synset's pointerlist as an +Array+
|
838
|
+
def pointers
|
839
|
+
@mutex.synchronize( Sync::SH ) {
|
840
|
+
@mutex.synchronize( Sync::EX ) {
|
841
|
+
@pointers = @pointerlist.split(SUB_DELIM_RE).collect {|pstr|
|
842
|
+
Pointer::parse( pstr )
|
843
|
+
}
|
844
|
+
} if @pointers.empty?
|
845
|
+
@pointers
|
846
|
+
}
|
847
|
+
end
|
848
|
+
|
849
|
+
|
850
|
+
### Set the pointers in this synset's pointerlist to +newPointers+
|
851
|
+
def pointers=( *newPointers )
|
852
|
+
@mutex.synchronize( Sync::EX ) {
|
853
|
+
@pointerlist = newPointers.collect {|ptr| ptr.to_s}.join( SUB_DELIM )
|
854
|
+
@pointers = newPointers
|
855
|
+
}
|
856
|
+
end
|
857
|
+
|
858
|
+
|
859
|
+
### Returns the synset's pointers in a Hash keyed by their type.
|
860
|
+
def pointer_map
|
861
|
+
return self.pointers.inject( {} ) do |hsh,ptr|
|
862
|
+
hsh[ ptr.type ] ||= []
|
863
|
+
hsh[ ptr.type ] << ptr
|
864
|
+
hsh
|
865
|
+
end
|
866
|
+
end
|
867
|
+
|
868
|
+
|
869
|
+
|
870
|
+
#########
|
871
|
+
protected
|
872
|
+
#########
|
873
|
+
|
874
|
+
### Returns an Array of synset objects for the receiver's pointers of the
|
875
|
+
### specified +type+.
|
876
|
+
def fetch_synset_pointers( type, subtype=nil )
|
877
|
+
synsets = nil
|
878
|
+
|
879
|
+
# Iterate over this synset's pointers, looking for ones that match
|
880
|
+
# the type we're after. When we find one, we extract its offset and
|
881
|
+
# use that to look it up.
|
882
|
+
@mutex.synchronize( Sync::SH ) do
|
883
|
+
synsets = self.pointers.
|
884
|
+
find_all {|ptr|
|
885
|
+
ptr.type == type and
|
886
|
+
subtype.nil? || ptr.subtype == subtype
|
887
|
+
}.
|
888
|
+
collect {|ptr| ptr.synset }.
|
889
|
+
collect {|key| @lexicon.lookup_synsets_by_key( key )}
|
890
|
+
end
|
891
|
+
|
892
|
+
return synsets.flatten
|
893
|
+
end
|
894
|
+
|
895
|
+
|
896
|
+
### Sets the receiver's synset pointers for the specified +type+ to
|
897
|
+
### the specified +synsets+.
|
898
|
+
def set_synset_pointers( type, synsets, subtype=nil )
|
899
|
+
synsets = [ synsets ] unless synsets.is_a?( Array )
|
900
|
+
pmap = self.pointer_map
|
901
|
+
pmap[ type ] = synsets
|
902
|
+
self.pointers = pmap.values
|
903
|
+
end
|
904
|
+
|
905
|
+
|
906
|
+
end # class Synset
|
907
|
+
end # module WordNet
|
908
|
+
|