wordnet 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +720 -0
- data/LICENSE +27 -0
- data/README +93 -0
- data/Rakefile +291 -0
- data/Rakefile.local +46 -0
- data/convertdb.rb +417 -0
- data/examples/addLacedBoots.rb +27 -0
- data/examples/clothesWithCollars.rb +36 -0
- data/examples/clothesWithTongues.rb +28 -0
- data/examples/distance.rb +37 -0
- data/examples/domainTree.rb +27 -0
- data/examples/gcs.rb +54 -0
- data/examples/holonymTree.rb +27 -0
- data/examples/hypernymTree.rb +28 -0
- data/examples/hyponymTree.rb +28 -0
- data/examples/memberTree.rb +27 -0
- data/examples/meronymTree.rb +29 -0
- data/lib/wordnet.rb +87 -0
- data/lib/wordnet/constants.rb +301 -0
- data/lib/wordnet/lexicon.rb +430 -0
- data/lib/wordnet/synset.rb +908 -0
- data/rake/dependencies.rb +76 -0
- data/rake/helpers.rb +384 -0
- data/rake/manual.rb +755 -0
- data/rake/packaging.rb +112 -0
- data/rake/publishing.rb +303 -0
- data/rake/rdoc.rb +35 -0
- data/rake/style.rb +62 -0
- data/rake/svn.rb +469 -0
- data/rake/testing.rb +192 -0
- data/rake/verifytask.rb +64 -0
- data/spec/lib/helpers.rb +155 -0
- data/spec/wordnet/lexicon_spec.rb +248 -0
- data/spec/wordnet/synset_spec.rb +288 -0
- data/utils.rb +838 -0
- metadata +216 -0
@@ -0,0 +1,430 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# WordNet Lexicon object class
|
4
|
+
#
|
5
|
+
# == Synopsis
|
6
|
+
#
|
7
|
+
# lexicon = WordNet::Lexicon.new( dictpath )
|
8
|
+
#
|
9
|
+
# == Description
|
10
|
+
#
|
11
|
+
# Instances of this class abstract access to the various databases of the
|
12
|
+
# WordNet lexicon. It can be used to look up and search for WordNet::Synsets.
|
13
|
+
#
|
14
|
+
# == Author
|
15
|
+
#
|
16
|
+
# Michael Granger <ged@FaerieMUD.org>
|
17
|
+
#
|
18
|
+
# Copyright (c) 2002, 2003, 2005 The FaerieMUD Consortium. All rights reserved.
|
19
|
+
#
|
20
|
+
# This module is free software. You may use, modify, and/or redistribute this
|
21
|
+
# software under the terms of the Perl Artistic License. (See
|
22
|
+
# http://language.perl.com/misc/Artistic.html)
|
23
|
+
#
|
24
|
+
# Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
|
25
|
+
# by Dan Brian.
|
26
|
+
#
|
27
|
+
# == Version
|
28
|
+
#
|
29
|
+
# $Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
|
30
|
+
#
|
31
|
+
|
32
|
+
require 'rbconfig'
|
33
|
+
require 'pathname'
|
34
|
+
require 'bdb'
|
35
|
+
require 'sync'
|
36
|
+
|
37
|
+
require 'wordnet/constants'
|
38
|
+
require 'wordnet/synset'
|
39
|
+
|
40
|
+
### Lexicon exception - something has gone wrong in the internals of the
|
41
|
+
### lexicon.
|
42
|
+
class WordNet::LexiconError < StandardError ; end
|
43
|
+
|
44
|
+
### Lookup error - the object being looked up either doesn't exist or is
|
45
|
+
### malformed
|
46
|
+
class WordNet::LookupError < StandardError ; end
|
47
|
+
|
48
|
+
### WordNet lexicon class - abstracts access to the WordNet lexical
|
49
|
+
### databases, and provides factory methods for looking up and creating new
|
50
|
+
### WordNet::Synset objects.
|
51
|
+
class WordNet::Lexicon
|
52
|
+
include WordNet::Constants
|
53
|
+
include CrossCase if defined?( CrossCase )
|
54
|
+
|
55
|
+
# Subversion Id
|
56
|
+
SvnId = %q$Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
|
57
|
+
|
58
|
+
# Subversion revision
|
59
|
+
SvnRev = %q$Rev: 93 $
|
60
|
+
|
61
|
+
|
62
|
+
#############################################################
|
63
|
+
### B E R K E L E Y D B C O N F I G U R A T I O N
|
64
|
+
#############################################################
|
65
|
+
|
66
|
+
# The path to the WordNet BerkeleyDB Env. It lives in the directory that
|
67
|
+
# this module is in.
|
68
|
+
DEFAULT_DB_ENV = File::join( Config::CONFIG['datadir'], "ruby-wordnet" )
|
69
|
+
|
70
|
+
# Options for the creation of the Env object
|
71
|
+
ENV_OPTIONS = {
|
72
|
+
:set_timeout => 50,
|
73
|
+
:set_lk_detect => 1,
|
74
|
+
:set_verbose => false,
|
75
|
+
:set_lk_max => 3000,
|
76
|
+
}
|
77
|
+
|
78
|
+
# Flags for the creation of the Env object (read-write and read-only)
|
79
|
+
ENV_FLAGS_RW = BDB::CREATE|BDB::INIT_TRANSACTION|BDB::RECOVER|BDB::INIT_MPOOL
|
80
|
+
ENV_FLAGS_RO = BDB::INIT_MPOOL
|
81
|
+
|
82
|
+
|
83
|
+
#############################################################
|
84
|
+
### I N S T A N C E M E T H O D S
|
85
|
+
#############################################################
|
86
|
+
|
87
|
+
### Create a new WordNet::Lexicon object that will read its data from
|
88
|
+
### the given +dbenv+ (a BerkeleyDB env directory). The database will be
|
89
|
+
### opened with the specified +mode+, which can either be a numeric
|
90
|
+
### octal mode (e.g., 0444) or one of (:readonly, :readwrite).
|
91
|
+
def initialize( dbenv=DEFAULT_DB_ENV, mode=:readonly )
|
92
|
+
@mode = normalize_mode( mode )
|
93
|
+
debug_msg "Mode is: %04o" % [ mode ]
|
94
|
+
|
95
|
+
envflags = 0
|
96
|
+
dbflags = 0
|
97
|
+
|
98
|
+
unless self.readonly?
|
99
|
+
debug_msg "Using read/write flags"
|
100
|
+
envflags = ENV_FLAGS_RW
|
101
|
+
dbflags = BDB::CREATE
|
102
|
+
else
|
103
|
+
debug_msg "Using readonly flags"
|
104
|
+
envflags = ENV_FLAGS_RO
|
105
|
+
dbflags = 0
|
106
|
+
end
|
107
|
+
|
108
|
+
debug_msg "Env flags are: %0s, dbflags are %0s" %
|
109
|
+
[ envflags.to_s(2), dbflags.to_s(2) ]
|
110
|
+
|
111
|
+
begin
|
112
|
+
@env = BDB::Env.new( dbenv, envflags, ENV_OPTIONS )
|
113
|
+
@index_db = @env.open_db( BDB::BTREE, "index", nil, dbflags, @mode )
|
114
|
+
@data_db = @env.open_db( BDB::BTREE, "data", nil, dbflags, @mode )
|
115
|
+
@morph_db = @env.open_db( BDB::BTREE, "morph", nil, dbflags, @mode )
|
116
|
+
rescue StandardError => err
|
117
|
+
msg = "Error while opening Ruby-WordNet data files: #{dbenv}: %s" %
|
118
|
+
[ err.message ]
|
119
|
+
raise err, msg, err.backtrace
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
######
|
126
|
+
public
|
127
|
+
######
|
128
|
+
|
129
|
+
# The BDB::Env object which contains the wordnet lexicon's databases.
|
130
|
+
attr_reader :env
|
131
|
+
|
132
|
+
# The handle to the index table
|
133
|
+
attr_reader :index_db
|
134
|
+
|
135
|
+
# The handle to the synset data table
|
136
|
+
attr_reader :data_db
|
137
|
+
|
138
|
+
# The handle to the morph table
|
139
|
+
attr_reader :morph_db
|
140
|
+
|
141
|
+
|
142
|
+
### Returns +true+ if the lexicon was opened in read-only mode.
|
143
|
+
def readonly?
|
144
|
+
( @mode & 0200 ).nonzero? ? false : true
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
### Returns +true+ if the lexicon was opened in read-write mode.
|
149
|
+
def readwrite?
|
150
|
+
! self.readonly?
|
151
|
+
end
|
152
|
+
|
153
|
+
|
154
|
+
### Close the lexicon's database environment
|
155
|
+
def close
|
156
|
+
@env.close if @env
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
### Checkpoint the database. (BerkeleyDB-specific)
|
161
|
+
def checkpoint( bytes=0, minutes=0 )
|
162
|
+
@env.checkpoint
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
### Remove any archival logfiles for the lexicon's database
|
167
|
+
### environment. (BerkeleyDB-specific).
|
168
|
+
def clean_logs
|
169
|
+
return unless self.readwrite?
|
170
|
+
self.archlogs.each do |logfile|
|
171
|
+
File::chmod( 0777, logfile )
|
172
|
+
File::delete( logfile )
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
### Returns an integer of the familiarity/polysemy count for +word+ as a
|
178
|
+
### +part_of_speech+. Note that polysemy can be identified for a given
|
179
|
+
### word by counting the synsets returned by #lookup_synsets.
|
180
|
+
def familiarity( word, part_of_speech, polyCount=nil )
|
181
|
+
wordkey = self.make_word_key( word, part_of_speech )
|
182
|
+
return nil unless @index_db.key?( wordkey )
|
183
|
+
@index_db[ wordkey ].split( WordNet::SUB_DELIM_RE ).length
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
### Look up sysets (Wordnet::Synset objects) matching +text+ as a
|
188
|
+
### +part_of_speech+, where +part_of_speech+ is one of +WordNet::Noun+,
|
189
|
+
### +WordNet::Verb+, +WordNet::Adjective+, or +WordNet::Adverb+. Without
|
190
|
+
### +sense+, #lookup_synsets will return all matches that are a
|
191
|
+
### +part_of_speech+. If +sense+ is specified, only the synset object that
|
192
|
+
### matches that particular +part_of_speech+ and +sense+ is returned.
|
193
|
+
def lookup_synsets( word, part_of_speech, sense=nil )
|
194
|
+
wordkey = self.make_word_key( word, part_of_speech )
|
195
|
+
pos = self.make_pos( part_of_speech )
|
196
|
+
synsets = []
|
197
|
+
|
198
|
+
# Look up the index entry, trying first the word as given, and if
|
199
|
+
# that fails, trying morphological conversion.
|
200
|
+
entry = @index_db[ wordkey ]
|
201
|
+
|
202
|
+
if entry.nil? && (word = self.morph( word, part_of_speech ))
|
203
|
+
wordkey = self.make_word_key( word, part_of_speech )
|
204
|
+
entry = @index_db[ wordkey ]
|
205
|
+
end
|
206
|
+
|
207
|
+
# If the lookup failed both ways, just abort
|
208
|
+
return nil unless entry
|
209
|
+
|
210
|
+
# Make synset keys from the entry, narrowing it to just the sense
|
211
|
+
# requested if one was specified.
|
212
|
+
synkeys = entry.split( SUB_DELIM_RE ).collect {|off| "#{off}%#{pos}" }
|
213
|
+
if sense
|
214
|
+
return lookup_synsets_by_key( synkeys[sense - 1] )
|
215
|
+
else
|
216
|
+
return [ lookup_synsets_by_key(*synkeys) ].flatten
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
### Returns the WordNet::Synset objects corresponding to the +keys+
|
222
|
+
### specified. The +keys+ are made up of the target synset's "offset"
|
223
|
+
### and syntactic category catenated together with a '%' character.
|
224
|
+
def lookup_synsets_by_key( *keys )
|
225
|
+
synsets = []
|
226
|
+
|
227
|
+
keys.each {|key|
|
228
|
+
raise WordNet::LookupError, "Failed lookup of synset '#{key}':"\
|
229
|
+
"No such synset" unless @data_db.key?( key )
|
230
|
+
|
231
|
+
data = @data_db[ key ]
|
232
|
+
offset, part_of_speech = key.split( /%/, 2 )
|
233
|
+
synsets << WordNet::Synset::new( self, offset, part_of_speech, nil, data )
|
234
|
+
}
|
235
|
+
|
236
|
+
return *synsets
|
237
|
+
end
|
238
|
+
alias_method :lookup_synsetsByOffset, :lookup_synsets_by_key
|
239
|
+
|
240
|
+
|
241
|
+
### Returns a form of +word+ as a part of speech +part_of_speech+, as
|
242
|
+
### found in the WordNet morph files. The #lookup_synsets method perfoms
|
243
|
+
### morphological conversion automatically, so a call to #morph is not
|
244
|
+
### required.
|
245
|
+
def morph( word, part_of_speech )
|
246
|
+
return @morph_db[ self.make_word_key(word, part_of_speech) ]
|
247
|
+
end
|
248
|
+
|
249
|
+
|
250
|
+
### Returns the result of looking up +word+ in the inverse of the WordNet
|
251
|
+
### morph files. _(This is undocumented in Lingua::Wordnet)_
|
252
|
+
def reverse_morph( word )
|
253
|
+
@morph_db.invert[ word ]
|
254
|
+
end
|
255
|
+
|
256
|
+
|
257
|
+
### Returns an array of compound words matching +text+.
|
258
|
+
def grep( text )
|
259
|
+
return [] if text.empty?
|
260
|
+
|
261
|
+
words = []
|
262
|
+
|
263
|
+
# Grab a cursor into the database and fetch while the key matches
|
264
|
+
# the target text
|
265
|
+
cursor = @index_db.cursor
|
266
|
+
rec = cursor.set_range( text )
|
267
|
+
while /^#{text}/ =~ rec[0]
|
268
|
+
words.push rec[0]
|
269
|
+
rec = cursor.next
|
270
|
+
end
|
271
|
+
cursor.close
|
272
|
+
|
273
|
+
return *words
|
274
|
+
end
|
275
|
+
|
276
|
+
|
277
|
+
### Factory method: Creates and returns a new WordNet::Synset object in
|
278
|
+
### this lexicon for the specified +word+ and +part_of_speech+.
|
279
|
+
def create_synset( word, part_of_speech )
|
280
|
+
return WordNet::Synset::new( self, '', part_of_speech, word )
|
281
|
+
end
|
282
|
+
alias_method :new_synset, :create_synset
|
283
|
+
|
284
|
+
|
285
|
+
### Store the specified +synset+ (a WordNet::Synset object) in the
|
286
|
+
### lexicon. Returns the key of the stored synset.
|
287
|
+
def store_synset( synset )
|
288
|
+
strippedOffset = nil
|
289
|
+
pos = nil
|
290
|
+
|
291
|
+
# Start a transaction
|
292
|
+
@env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
|
293
|
+
|
294
|
+
# If this is a new synset, generate an offset for it
|
295
|
+
if synset.offset == 1
|
296
|
+
synset.offset =
|
297
|
+
(datadb['offsetcount'] = datadb['offsetcount'].to_i + 1)
|
298
|
+
end
|
299
|
+
|
300
|
+
# Write the data entry
|
301
|
+
datadb[ synset.key ] = synset.serialize
|
302
|
+
|
303
|
+
# Write the index entries
|
304
|
+
txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
|
305
|
+
|
306
|
+
# Make word/part-of-speech pairs from the words in the synset
|
307
|
+
synset.words.collect {|word| word + "%" + pos }.each {|word|
|
308
|
+
|
309
|
+
# If the index already has this word, but not this
|
310
|
+
# synset, add it
|
311
|
+
if indexdb.key?( word )
|
312
|
+
indexdb[ word ] << SUB_DELIM << synset.offset unless
|
313
|
+
indexdb[ word ].include?( synset.offset )
|
314
|
+
else
|
315
|
+
indexdb[ word ] = synset.offset
|
316
|
+
end
|
317
|
+
}
|
318
|
+
end # transaction on @index_db
|
319
|
+
end # transaction on @dataDB
|
320
|
+
|
321
|
+
return synset.offset
|
322
|
+
end
|
323
|
+
|
324
|
+
|
325
|
+
### Remove the specified +synset+ (a WordNet::Synset object) in the
|
326
|
+
### lexicon. Returns the offset of the stored synset.
|
327
|
+
def remove_synset( synset )
|
328
|
+
# If it's not in the database (ie., doesn't have a real offset),
|
329
|
+
# just return.
|
330
|
+
return nil if synset.offset == 1
|
331
|
+
|
332
|
+
# Start a transaction on the data table
|
333
|
+
@env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
|
334
|
+
|
335
|
+
# First remove the index entries for this synset by iterating
|
336
|
+
# over each of its words
|
337
|
+
txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
|
338
|
+
synset.words.collect {|word| word + "%" + pos }.each {|word|
|
339
|
+
|
340
|
+
# If the index contains an entry for this word, either
|
341
|
+
# splice out the offset for the synset being deleted if
|
342
|
+
# there are more than one, or just delete the whole
|
343
|
+
# entry if it's the only one.
|
344
|
+
if indexdb.key?( word )
|
345
|
+
offsets = indexdb[ word ].
|
346
|
+
split( SUB_DELIM_RE ).
|
347
|
+
reject {|offset| offset == synset.offset}
|
348
|
+
|
349
|
+
unless offsets.empty?
|
350
|
+
index_db[ word ] = newoffsets.join( SUB_DELIM )
|
351
|
+
else
|
352
|
+
index_db.delete( word )
|
353
|
+
end
|
354
|
+
end
|
355
|
+
}
|
356
|
+
end
|
357
|
+
|
358
|
+
# :TODO: Delete synset from pointers of related synsets
|
359
|
+
|
360
|
+
# Delete the synset from the main db
|
361
|
+
datadb.delete( synset.offset )
|
362
|
+
end
|
363
|
+
|
364
|
+
return true
|
365
|
+
end
|
366
|
+
|
367
|
+
|
368
|
+
#########
|
369
|
+
protected
|
370
|
+
#########
|
371
|
+
|
372
|
+
### Normalize various ways of specifying a part of speech into the
|
373
|
+
### WordNet part of speech indicator from the +original+ representation,
|
374
|
+
### which may be the name (e.g., "noun"); +nil+, in which case it
|
375
|
+
### defaults to the indicator for a noun; or the indicator character
|
376
|
+
### itself, in which case it is returned unmodified.
|
377
|
+
def make_pos( original )
|
378
|
+
return WordNet::Noun if original.nil?
|
379
|
+
osym = original.to_s.intern
|
380
|
+
return WordNet::SYNTACTIC_CATEGORIES[ osym ] if
|
381
|
+
WordNet::SYNTACTIC_CATEGORIES.key?( osym )
|
382
|
+
return original if SYNTACTIC_SYMBOLS.key?( original )
|
383
|
+
return nil
|
384
|
+
end
|
385
|
+
|
386
|
+
|
387
|
+
### Make a lexicon key out of the given +word+ and part of speech
|
388
|
+
### (+pos+).
|
389
|
+
def make_word_key( word, pos )
|
390
|
+
pos = self.make_pos( pos )
|
391
|
+
word = word.gsub( /\s+/, '_' )
|
392
|
+
return "#{word}%#{pos}"
|
393
|
+
end
|
394
|
+
|
395
|
+
|
396
|
+
### Return a list of archival logfiles that can be removed
|
397
|
+
### safely. (BerkeleyDB-specific).
|
398
|
+
def archlogs
|
399
|
+
return @env.log_archive( BDB::ARCH_ABS )
|
400
|
+
end
|
401
|
+
|
402
|
+
|
403
|
+
#######
|
404
|
+
private
|
405
|
+
#######
|
406
|
+
|
407
|
+
### Turn the given +origmode+ into an octal file mode such as that
|
408
|
+
### given to File.open.
|
409
|
+
def normalize_mode( origmode )
|
410
|
+
case origmode
|
411
|
+
when :readonly
|
412
|
+
0444 & ~File.umask
|
413
|
+
when :readwrite, :writable
|
414
|
+
0666 & ~File.umask
|
415
|
+
when Fixnum
|
416
|
+
origmode
|
417
|
+
else
|
418
|
+
raise ArgumentError, "unrecognized mode %p" % [origmode]
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
### Output the given +msg+ to STDERR if $DEBUG is turned on.
|
423
|
+
def debug_msg( *msg )
|
424
|
+
return unless $DEBUG
|
425
|
+
$deferr.puts msg
|
426
|
+
end
|
427
|
+
|
428
|
+
|
429
|
+
end # class WordNet::Lexicon
|
430
|
+
|
@@ -0,0 +1,908 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# WordNet synonym-set object class
|
4
|
+
#
|
5
|
+
# == Synopsis
|
6
|
+
#
|
7
|
+
# ss = lexicon.lookupSynset( "word", WordNet::Noun, 1 )
|
8
|
+
# puts "Definition: %s" % ss.gloss
|
9
|
+
# coords = ss.coordinates
|
10
|
+
#
|
11
|
+
# == Description
|
12
|
+
#
|
13
|
+
# Instances of this class encapsulate the data for a synonym set ('synset') in a
|
14
|
+
# Wordnet lexical database. A synonym set is a set of words that are
|
15
|
+
# interchangeable in some context.
|
16
|
+
#
|
17
|
+
# == Author
|
18
|
+
#
|
19
|
+
# Michael Granger <ged@FaerieMUD.org>
|
20
|
+
#
|
21
|
+
# Copyright (c) 2002-2008 The FaerieMUD Consortium. All rights reserved.
|
22
|
+
#
|
23
|
+
# This module is free software. You may use, modify, and/or redistribute this
|
24
|
+
# software under the terms of the Perl Artistic License. (See
|
25
|
+
# http://language.perl.com/misc/Artistic.html)
|
26
|
+
#
|
27
|
+
# Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
|
28
|
+
# by Dan Brian.
|
29
|
+
#
|
30
|
+
# == Version
|
31
|
+
#
|
32
|
+
# $Id: synset.rb 90 2008-07-09 23:02:53Z deveiant $
|
33
|
+
#
|
34
|
+
|
35
|
+
require 'sync'
|
36
|
+
require 'wordnet/constants'
|
37
|
+
|
38
|
+
module WordNet
|
39
|
+
|
40
|
+
### Synset internal error class
|
41
|
+
class SynsetError < StandardError ; end
|
42
|
+
|
43
|
+
### "Synonym set" class - encapsulates the data for a set of words in the
|
44
|
+
### lexical database that are interchangeable in some context, and provides
|
45
|
+
### methods for accessing its relationships.
|
46
|
+
class Synset
|
47
|
+
include WordNet::Constants
|
48
|
+
include CrossCase if defined?( CrossCase )
|
49
|
+
|
50
|
+
# Subversion ID
|
51
|
+
SVNId = %q$Id: synset.rb 90 2008-07-09 23:02:53Z deveiant $
|
52
|
+
|
53
|
+
# Subversion Rev
|
54
|
+
SVNRev = %q$Rev: 90 $
|
55
|
+
|
56
|
+
# The "pointer" type that encapsulates relationships between one synset
|
57
|
+
# and another.
|
58
|
+
class Pointer
|
59
|
+
include WordNet::Constants
|
60
|
+
include CrossCase if defined?( CrossCase )
|
61
|
+
|
62
|
+
#########################################################
|
63
|
+
### C L A S S M E T H O D S
|
64
|
+
#########################################################
|
65
|
+
|
66
|
+
### Make an Array of WordNet::Synset::Pointer objects out of the
|
67
|
+
### given +pointerList+. The pointerlist is a string of pointers
|
68
|
+
### delimited by Constants::SUB_DELIM. Pointers are in the form:
|
69
|
+
### "<pointer_symbol> <synset_offset>%<pos> <source/target>"
|
70
|
+
def self::parse( pointerString )
|
71
|
+
type, offsetPos, ptrNums = pointerString.split(/\s+/)
|
72
|
+
offset, pos = offsetPos.split( /%/, 2 )
|
73
|
+
new( type, offset, pos, ptrNums[0,2], ptrNums[2,2] )
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
#########################################################
|
78
|
+
### I N S T A N C E M E T H O D S
|
79
|
+
#########################################################
|
80
|
+
|
81
|
+
### Create a new synset pointer with the given arguments. The
|
82
|
+
### +ptrType+ is the type of the link between synsets, and must be
|
83
|
+
### either a key or a value of WordNet::Constants::POINTER_TYPES. The
|
84
|
+
### +offset+ is the unique identifier of the target synset, and
|
85
|
+
### +pos+ is its part-of-speech, which must be either a key or value
|
86
|
+
### of WordNet::Constants::SYNTACTIC_CATEGORIES. The +source_wn+ and
|
87
|
+
### +target_wn+ are numerical values which distinguish lexical and
|
88
|
+
### semantic pointers. +source_wn+ indicates the word number in the
|
89
|
+
### current (source) synset, and +target_wn+ indicates the word
|
90
|
+
### number in the target synset. If both are 0 (the default) it
|
91
|
+
### means that the pointer type of the pointer represents a semantic
|
92
|
+
### relation between the current (source) synset and the target
|
93
|
+
### synset indicated by +offset+.
|
94
|
+
def initialize( type, offset, pos=Noun, source_wn=0, target_wn=0 )
|
95
|
+
|
96
|
+
# Allow type = '!', 'antonym', or :antonym. Also handle
|
97
|
+
# splitting of compound pointers (e.g., :memberMeronym / '%m')
|
98
|
+
# into their correct type/subtype parts.
|
99
|
+
@type = @subtype = nil
|
100
|
+
if type.to_s.length == 1
|
101
|
+
@type = POINTER_SYMBOLS[ type[0,1] ]
|
102
|
+
|
103
|
+
elsif type.to_s.length == 2
|
104
|
+
@type = POINTER_SYMBOLS[ type[0,1] ]
|
105
|
+
raise "No known subtypes for '%s'" % [@type] unless
|
106
|
+
POINTER_SUBTYPES.key?( @type )
|
107
|
+
@subtype = POINTER_SUBTYPES[ @type ].index( type ) or
|
108
|
+
raise "Unknown subtype '%s' for '%s'" %
|
109
|
+
[ type, @type ]
|
110
|
+
|
111
|
+
else
|
112
|
+
if POINTER_TYPES.key?( type.to_sym )
|
113
|
+
@type = type.to_sym
|
114
|
+
elsif /([a-z]+)([A-Z][a-z]+)/ =~ type.to_s
|
115
|
+
subtype, maintype = $1, $2.downcase
|
116
|
+
@type = maintype.to_sym if
|
117
|
+
POINTER_TYPES.key?( maintype.to_sym )
|
118
|
+
@subtype = subtype.to_sym
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
raise ArgumentError, "No such pointer type %p" % type if
|
123
|
+
@type.nil?
|
124
|
+
|
125
|
+
# Allow pos = 'n', 'noun', or :noun
|
126
|
+
@part_of_speech = nil
|
127
|
+
if pos.to_s.length == 1
|
128
|
+
@part_of_speech = SYNTACTIC_SYMBOLS[ pos ]
|
129
|
+
else
|
130
|
+
@part_of_speech = pos.to_sym if
|
131
|
+
SYNTACTIC_CATEGORIES.key?( pos.to_sym )
|
132
|
+
end
|
133
|
+
raise ArgumentError, "No such part of speech %p" % pos if
|
134
|
+
@part_of_speech.nil?
|
135
|
+
|
136
|
+
# Other attributes
|
137
|
+
@offset = offset
|
138
|
+
@source_wn = source_wn
|
139
|
+
@target_wn = target_wn
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
######
|
144
|
+
public
|
145
|
+
######
|
146
|
+
|
147
|
+
# The type of the pointer. Will be one of the keys of
|
148
|
+
# WordNet::POINTER_TYPES (e.g., :meronym).
|
149
|
+
attr_accessor :type
|
150
|
+
|
151
|
+
# The subtype of the pointer, if any. Will be one of the keys of one
|
152
|
+
# of the hashes in POINTER_SUBTYPES (e.g., :portion).
|
153
|
+
attr_accessor :subtype
|
154
|
+
|
155
|
+
# The offset of the target synset
|
156
|
+
attr_accessor :offset
|
157
|
+
|
158
|
+
# The part-of-speech of the target synset. Will be one of the keys
|
159
|
+
# of WordNet::SYNTACTIC_CATEGORIES.
|
160
|
+
attr_accessor :part_of_speech
|
161
|
+
|
162
|
+
# The word number in the source synset
|
163
|
+
attr_accessor :source_wn
|
164
|
+
|
165
|
+
# The word number in the target synset
|
166
|
+
attr_accessor :target_wn
|
167
|
+
|
168
|
+
|
169
|
+
### Return the Pointer as a human-readable String suitable for
|
170
|
+
### debugging.
|
171
|
+
def inspect
|
172
|
+
"#<%s:0x%08x %s %s>" % [
|
173
|
+
self.class.name,
|
174
|
+
self.object_id,
|
175
|
+
@subtype ? "#@type(#@subtype)" : @type,
|
176
|
+
self.synset,
|
177
|
+
]
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
### Return the synset key of the target synset (i.e.,
|
182
|
+
### <offset>%<pos symbol>).
|
183
|
+
def synset
|
184
|
+
self.offset + "%" + self.pos
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
### Return the syntactic category symbol for this pointer
|
189
|
+
def pos
|
190
|
+
return SYNTACTIC_CATEGORIES[ @part_of_speech ]
|
191
|
+
end
|
192
|
+
|
193
|
+
|
194
|
+
### Return the pointer type symbol for this pointer
|
195
|
+
def type_symbol
|
196
|
+
unless @subtype
|
197
|
+
return POINTER_TYPES[ @type ]
|
198
|
+
else
|
199
|
+
return POINTER_SUBTYPES[ @type ][ @subtype ]
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
### Comparison operator. Pointer are equivalent if they point at the
|
205
|
+
### same synset and are of the same type.
|
206
|
+
def ==( other )
|
207
|
+
return false unless other.is_a?( self.class )
|
208
|
+
other.offset == self.offset &&
|
209
|
+
other.type == self.type
|
210
|
+
end
|
211
|
+
|
212
|
+
|
213
|
+
### Return the pointer in its stringified form.
|
214
|
+
def to_s
|
215
|
+
"%s %d%%%s %02x%02x" % [
|
216
|
+
ptr.type_symbol,
|
217
|
+
ptr.offset,
|
218
|
+
ptr.posSymbol,
|
219
|
+
ptr.source_wn,
|
220
|
+
ptr.target_wn,
|
221
|
+
]
|
222
|
+
end
|
223
|
+
end # class Pointer
|
224
|
+
|
225
|
+
|
226
|
+
#############################################################
|
227
|
+
### C L A S S M E T H O D S
|
228
|
+
#############################################################
|
229
|
+
|
230
|
+
### Define a group of pointer methods based on +symbol+ that will fetch,
|
231
|
+
### add, and delete pointer synsets of the type indicated. If no pointer
|
232
|
+
### type corresponding to the given +symbol+ is found, a variant without
|
233
|
+
### a trailing 's' is tried (e.g., 'def_pointer_methods :antonyms' will
|
234
|
+
### create methods called #antonyms and #antonyms=, but will fetch
|
235
|
+
### pointers of type :antonym). If the pointer type has subtypes
|
236
|
+
### (according to WordNet::POINTER_SUBTYPES), accessors/mutators for the
|
237
|
+
### subtypes will be generated as well.
|
238
|
+
def self::def_pointer_methods( symbol ) # :nodoc:
|
239
|
+
name = symbol.to_s
|
240
|
+
casename = name.dup
|
241
|
+
casename[ 0,1 ] = casename[ 0,1 ].upcase
|
242
|
+
type = nil
|
243
|
+
$stderr.puts '-' * 50,
|
244
|
+
">>> defining pointer methods for %p" % [symbol] if $DEBUG
|
245
|
+
|
246
|
+
if POINTER_TYPES.key?( symbol )
|
247
|
+
type = symbol
|
248
|
+
elsif POINTER_TYPES.key?( symbol.to_s.sub(/s$/, '').to_sym )
|
249
|
+
type = symbol.to_s.sub(/s$/, '').to_sym
|
250
|
+
else
|
251
|
+
raise ArgumentError, "Unknown pointer type %p" % symbol
|
252
|
+
end
|
253
|
+
|
254
|
+
# Define the accessor
|
255
|
+
$stderr.puts "Defining accessors for %p" % [ type ] if $DEBUG
|
256
|
+
define_method( name.to_sym ) { self.fetch_synset_pointers(type) }
|
257
|
+
define_method( "#{name}=".to_sym ) do |*synsets|
|
258
|
+
self.set_synset_pointers( type, synsets, nil )
|
259
|
+
end
|
260
|
+
|
261
|
+
# If the pointer is one that has subtypes, make the variants list
|
262
|
+
# out of the subtypes. If it doesn't have subtypes, make the only
|
263
|
+
# variant nil, which will cause the mutators to be defined for the
|
264
|
+
# main pointer type.
|
265
|
+
if POINTER_SUBTYPES.key?( type )
|
266
|
+
variants = POINTER_SUBTYPES[ type ].keys
|
267
|
+
else
|
268
|
+
variants = [nil]
|
269
|
+
end
|
270
|
+
|
271
|
+
# Define a set of methods for each variant, or for the main method
|
272
|
+
# if the variant is nil.
|
273
|
+
variants.each do |subtype|
|
274
|
+
varname = subtype ? [subtype, name].join('_') : name
|
275
|
+
|
276
|
+
unless subtype.nil?
|
277
|
+
$stderr.puts "Defining reader for #{varname}" if $DEBUG
|
278
|
+
define_method( varname ) do
|
279
|
+
self.fetch_synset_pointers( type, subtype )
|
280
|
+
end
|
281
|
+
else
|
282
|
+
$stderr.puts "No subtype for %s (subtype = %p)" %
|
283
|
+
[ varname, subtype ] if $DEBUG
|
284
|
+
end
|
285
|
+
|
286
|
+
$stderr.puts "Defining mutator for #{varname}" if $DEBUG
|
287
|
+
define_method( "#{varname}=" ) do |*synsets|
|
288
|
+
self.set_synset_pointers( type, synsets, subtype )
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
|
294
|
+
#############################################################
|
295
|
+
### I N S T A N C E M E T H O D S
|
296
|
+
#############################################################
|
297
|
+
|
298
|
+
### Create a new Synset object in the specified +lexicon+ for the
|
299
|
+
### specified +word+ and +part_of_speech+. If +data+ is specified,
|
300
|
+
### initialize the synset's other object data from it. This method
|
301
|
+
### shouldn't be called directly: you should use one of the Lexicon
|
302
|
+
### class's factory methods: #create_synset, #lookup_synsets, or
|
303
|
+
### #lookup_synsetsByOffset.
|
304
|
+
def initialize( lexicon, offset, pos, word=nil, data=nil )
|
305
|
+
@lexicon = lexicon or
|
306
|
+
raise ArgumentError, "%p is not a WordNet::Lexicon" % lexicon
|
307
|
+
@part_of_speech = SYNTACTIC_SYMBOLS[ pos ] or
|
308
|
+
raise ArgumentError, "No such part of speech %p" % pos
|
309
|
+
@mutex = Sync::new
|
310
|
+
@pointers = []
|
311
|
+
|
312
|
+
if data
|
313
|
+
@offset = offset.to_i
|
314
|
+
@filenum, @wordlist, @pointerlist,
|
315
|
+
@frameslist, @gloss = data.split( DELIM_RE )
|
316
|
+
else
|
317
|
+
@offset = 1
|
318
|
+
@wordlist = word ? word : ''
|
319
|
+
@filenum, @pointerlist, @frameslist, @gloss = [''] * 4
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
|
324
|
+
######
|
325
|
+
public
|
326
|
+
######
|
327
|
+
|
328
|
+
# The WordNet::Lexicon that was used to look up this synset
|
329
|
+
attr_reader :lexicon
|
330
|
+
|
331
|
+
# The syntactic category of this Synset. Will be one of "n" (noun), "v"
|
332
|
+
# (verb), "a" (adjective), "r" (adverb), or "s" (other).
|
333
|
+
attr_accessor :part_of_speech
|
334
|
+
|
335
|
+
# The original byte offset of the synset in the data file; acts as the
|
336
|
+
# unique identifier (when combined with #part_of_speech) of this Synset in
|
337
|
+
# the database.
|
338
|
+
attr_accessor :offset
|
339
|
+
|
340
|
+
# The number corresponding to the lexicographer file name containing the
|
341
|
+
# synset. Calling #lexInfo will return the actual filename. See the
|
342
|
+
# "System Description" of wngloss(7WN) for more info about this.
|
343
|
+
attr_accessor :filenum
|
344
|
+
|
345
|
+
# The raw list of word/lex_id pairs associated with this synset. Each
|
346
|
+
# word and lex_id is separated by a '%' character, and each pair is
|
347
|
+
# delimited with a '|'. E.g., the wordlist for "animal" is:
|
348
|
+
# "animal%0|animate_being%0|beast%0|brute%1|creature%0|fauna%1"
|
349
|
+
attr_accessor :wordlist
|
350
|
+
|
351
|
+
# The list of raw pointers to related synsets. E.g., the pointerlist for
|
352
|
+
# "mourning dove" is:
|
353
|
+
# "@ 01731700%n 0000|#m 01733452%n 0000"
|
354
|
+
attr_accessor :pointerlist
|
355
|
+
|
356
|
+
# The list of raw verb sentence frames for this synset.
|
357
|
+
attr_accessor :frameslist
|
358
|
+
|
359
|
+
# Definition and/or example sentences for the Synset.
|
360
|
+
attr_accessor :gloss
|
361
|
+
|
362
|
+
|
363
|
+
### Return a human-readable representation of the Synset suitable for
|
364
|
+
### debugging.
|
365
|
+
def inspect
|
366
|
+
pointer_counts = self.pointer_map.collect {|type,ptrs|
|
367
|
+
"#{type}s: #{ptrs.length}"
|
368
|
+
}.join( ", " )
|
369
|
+
|
370
|
+
%q{#<%s:0x%08x/%s %s (%s): "%s" (%s)>} % [
|
371
|
+
self.class.name,
|
372
|
+
self.object_id * 2,
|
373
|
+
self.offset,
|
374
|
+
self.words.join(", "),
|
375
|
+
self.part_of_speech,
|
376
|
+
self.gloss,
|
377
|
+
pointer_counts,
|
378
|
+
]
|
379
|
+
end
|
380
|
+
|
381
|
+
|
382
|
+
### Returns the Synset's unique identifier, made up of its offset and
|
383
|
+
### syntactic category catenated together with a '%' symbol.
|
384
|
+
def key
|
385
|
+
"%d%%%s" % [ self.offset, self.pos ]
|
386
|
+
end
|
387
|
+
|
388
|
+
|
389
|
+
### The symbol which represents this synset's syntactic category. Will
|
390
|
+
### be one of :noun, :verb, :adjective, :adverb, or :other.
|
391
|
+
def pos
|
392
|
+
return SYNTACTIC_CATEGORIES[ @part_of_speech ]
|
393
|
+
end
|
394
|
+
|
395
|
+
|
396
|
+
### Return each of the sentences of the gloss for this synset as an
|
397
|
+
### array. The gloss is a definition of the synset, and optionally one
|
398
|
+
### or more example sentences.
|
399
|
+
def glosses
|
400
|
+
return self.gloss.split( /\s*;\s*/ )
|
401
|
+
end
|
402
|
+
|
403
|
+
|
404
|
+
### Returns true if the receiver and otherSyn are identical according to
|
405
|
+
### their offsets.
|
406
|
+
def ==( otherSyn )
|
407
|
+
return false unless otherSyn.kind_of?( WordNet::Synset )
|
408
|
+
return self.offset == otherSyn.offset
|
409
|
+
end
|
410
|
+
|
411
|
+
|
412
|
+
|
413
|
+
### Returns an Array of words and/or collocations associated with this
|
414
|
+
### synset.
|
415
|
+
def words
|
416
|
+
@mutex.synchronize( Sync::SH ) {
|
417
|
+
self.wordlist.split( SUB_DELIM_RE ).collect do |word|
|
418
|
+
word.gsub( /_/, ' ' ).sub( /%.*$/, '' )
|
419
|
+
end
|
420
|
+
}
|
421
|
+
end
|
422
|
+
alias_method :synonyms, :words
|
423
|
+
|
424
|
+
|
425
|
+
### Set the words in this synset's wordlist to +newWords+
|
426
|
+
def words=( *newWords )
|
427
|
+
@mutex.synchronize( Sync::EX ) {
|
428
|
+
@wordlist = newWords.join( SUB_DELIM )
|
429
|
+
}
|
430
|
+
end
|
431
|
+
|
432
|
+
|
433
|
+
### Add the specified +newWords+ to this synset's wordlist. Alias:
|
434
|
+
### +add_words+.
|
435
|
+
def add_words( *newWords )
|
436
|
+
@mutex.synchronize( Sync::EX ) {
|
437
|
+
self.words |= newWords
|
438
|
+
}
|
439
|
+
end
|
440
|
+
|
441
|
+
|
442
|
+
### Delete the specified +oldWords+ from this synset's wordlist. Alias:
|
443
|
+
### +delete_words+.
|
444
|
+
def delete_words( *oldWords )
|
445
|
+
@mutex.synchronize( Sync::EX ) {
|
446
|
+
self.words -= oldWords
|
447
|
+
}
|
448
|
+
end
|
449
|
+
|
450
|
+
|
451
|
+
### Return the synset as a string. Alias: +overview+.
|
452
|
+
def to_s
|
453
|
+
@mutex.synchronize( Sync::SH ) {
|
454
|
+
wordlist = self.words.join(", ").gsub( /%\d/, '' ).gsub( /_/, ' ' )
|
455
|
+
return "#{wordlist} [#{self.part_of_speech}] -- (#{self.gloss})"
|
456
|
+
}
|
457
|
+
end
|
458
|
+
alias_method :overview, :to_s
|
459
|
+
|
460
|
+
|
461
|
+
### Writes any changes made to the object to the database and updates all
|
462
|
+
### affected synset data and indexes. If the object passes out of scope
|
463
|
+
### before #write is called, the changes are lost.
|
464
|
+
def store
|
465
|
+
@mutex.synchronize( Sync::EX ) {
|
466
|
+
self.lexicon.store_synset( self )
|
467
|
+
}
|
468
|
+
end
|
469
|
+
alias_method :write, :store
|
470
|
+
|
471
|
+
|
472
|
+
### Removes this synset from the database.
|
473
|
+
def remove
|
474
|
+
@mutex.synchronize( Sync::EX ) {
|
475
|
+
self.lexicon.remove_synset( self )
|
476
|
+
}
|
477
|
+
end
|
478
|
+
|
479
|
+
|
480
|
+
### Returns the synset's data in a form suitable for storage in the
|
481
|
+
### lexicon's database.
|
482
|
+
def serialize
|
483
|
+
@mutex.synchronize( Sync::SH ) {
|
484
|
+
return [
|
485
|
+
@filenum,
|
486
|
+
@wordlist,
|
487
|
+
@pointerlist,
|
488
|
+
@frameslist,
|
489
|
+
@gloss
|
490
|
+
].join( WordNet::DELIM )
|
491
|
+
}
|
492
|
+
end
|
493
|
+
|
494
|
+
|
495
|
+
### Auto-generate synset pointer methods for the various types
|
496
|
+
|
497
|
+
# The synsets for the receiver's antonyms (opposites). E.g.,
|
498
|
+
# $lexicon.lookup_synsets( "opaque", :adjective, 1 ).antonyms
|
499
|
+
# ==> [#<WordNet::Synset:0x010a9acc/454927 clear (adjective): "free
|
500
|
+
# from cloudiness; allowing light to pass through; "clear water";
|
501
|
+
# "clear plastic bags"; "clear glass"; "the air is clear and
|
502
|
+
# clean"" (similar_tos: 6, attributes: 1, derivations: 2,
|
503
|
+
# antonyms: 1, see_alsos: 1)>]
|
504
|
+
def_pointer_methods :antonyms
|
505
|
+
|
506
|
+
# Synsets for the receiver's entailments (a verb X entails Y if X cannot
|
507
|
+
# be done unless Y is or has been done). E.g.,
|
508
|
+
# $lexicon.lookup_synsets( 'rasp', :verb, 1 ).entailment
|
509
|
+
# ==> [#<WordNet::Synset:0x010dc24c rub (verb): "move over something
|
510
|
+
# with pressure; "rub my hands"; "rub oil into her skin""
|
511
|
+
# (derivations: 2, entailments: 1, hypernyms: 1, hyponyms: 13,
|
512
|
+
# see_alsos: 4)>]
|
513
|
+
def_pointer_methods :entailment
|
514
|
+
|
515
|
+
# Get/set synsets for the receiver's cause pointers (a verb X causes Y
|
516
|
+
# to happen).
|
517
|
+
def_pointer_methods :causes
|
518
|
+
|
519
|
+
# Get/set synsets for the receiver's verb groups. Verb groups link verbs
|
520
|
+
# with similar senses together.
|
521
|
+
def_pointer_methods :verb_groups
|
522
|
+
|
523
|
+
# Get/set list of synsets for the receiver's "similar to" pointers. This
|
524
|
+
# type of pointer links together head adjective synsets with its
|
525
|
+
# satellite adjective synsets.
|
526
|
+
def_pointer_methods :similar_to
|
527
|
+
|
528
|
+
# Get/set synsets for the receiver's participles. Participles are
|
529
|
+
# non-finite forms of a verb; used adjectivally and to form compound
|
530
|
+
# tenses. For example, the first participle for "working" is:
|
531
|
+
# "function, work, operate, go, run (verb)"
|
532
|
+
def_pointer_methods :participles
|
533
|
+
|
534
|
+
# Get/set synsets for the receiver's pertainyms. Pertainyms are
|
535
|
+
# relational adjectives. Adjectives that are pertainyms are usually
|
536
|
+
# defined by such phrases as "of or pertaining to" and do not have
|
537
|
+
# antonyms. A pertainym can point to a noun or another pertainym.
|
538
|
+
def_pointer_methods :pertainyms
|
539
|
+
|
540
|
+
# Get/set synsets for the receiver's attributes.
|
541
|
+
def_pointer_methods :attributes
|
542
|
+
|
543
|
+
# Get/set synsets for the receiver's derived_from.
|
544
|
+
def_pointer_methods :derived_from
|
545
|
+
|
546
|
+
# Get/set synsets for the receiver's derivations.
|
547
|
+
def_pointer_methods :derivations
|
548
|
+
|
549
|
+
# Get/set synsets for the receiver's see_also.
|
550
|
+
def_pointer_methods :see_also
|
551
|
+
|
552
|
+
|
553
|
+
# Auto-generate types with subtypes
|
554
|
+
|
555
|
+
# Synsets for the receiver's hypernyms (more-general terms). E.g.,
|
556
|
+
# $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hypernyms
|
557
|
+
# ==> [#<WordNet::Synset:0x0109a644/3023321 club (noun): "stout
|
558
|
+
# stick that is larger at one end; "he carried a club in self
|
559
|
+
# defense"; "he felt as if he had been hit with a club""
|
560
|
+
# (derivations: 1, hypernyms: 1, hyponyms: 7)>]
|
561
|
+
#
|
562
|
+
# Also generates accessors for subtypes:
|
563
|
+
#
|
564
|
+
# [instance_hypernyms]
|
565
|
+
# A proper noun that refers to a particular, unique referent (as
|
566
|
+
# distinguished from nouns that refer to classes).
|
567
|
+
def_pointer_methods :hypernyms
|
568
|
+
|
569
|
+
|
570
|
+
# :TODO: Generate an example for this
|
571
|
+
|
572
|
+
# Get/set synsets for the receiver's hyponyms (more-specific terms). E.g.,
|
573
|
+
# $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hyponyms
|
574
|
+
# ==> [...]
|
575
|
+
# [instance_hyponyms]
|
576
|
+
# The specific term used to designate a member of a class. X is a
|
577
|
+
# hyponym of Y if X is a (kind of) Y.
|
578
|
+
# Also generates accessors for subtypes:
|
579
|
+
#
|
580
|
+
# [instance_hyponyms]
|
581
|
+
# A proper noun that refers to a particular, unique referent (as
|
582
|
+
# distinguished from nouns that refer to classes).
|
583
|
+
def_pointer_methods :hyponyms
|
584
|
+
|
585
|
+
|
586
|
+
# Get/set synsets for the receiver's meronyms. In addition to the
|
587
|
+
# general accessors for all meronyms, there are also accessors for
|
588
|
+
# subtypes as well:
|
589
|
+
#
|
590
|
+
# [member_meronyms]
|
591
|
+
# Get/set synsets for the receiver's "member" meronyms (HAS MEMBER
|
592
|
+
# relation).
|
593
|
+
# [stuff_meronyms]
|
594
|
+
# Get/set synsets for the receiver's "stuff" meronyms (IS MADE OUT OF
|
595
|
+
# relation).
|
596
|
+
# [portion_meronyms]
|
597
|
+
# Get/set synsets for the receiver's "portion" meronyms (HAS PORTION
|
598
|
+
# relation).
|
599
|
+
# [component_meronyms]
|
600
|
+
# Get/set synsets for the receiver's "component" meronyms (HAS
|
601
|
+
# COMPONENT relation).
|
602
|
+
# [feature_meronyms]
|
603
|
+
# Get/set synsets for the receiver's "feature" meronyms (HAS FEATURE
|
604
|
+
# relation).
|
605
|
+
# [phase_meronyms]
|
606
|
+
# Get/set synsets for the receiver's "phase" meronyms (HAS PHASE
|
607
|
+
# relation).
|
608
|
+
# [place_meronyms]
|
609
|
+
# Get/set synsets for the receiver's "place" meronyms (HAS PLACE
|
610
|
+
# relation).
|
611
|
+
def_pointer_methods :meronyms
|
612
|
+
|
613
|
+
# Get/set synsets for the receiver's holonyms. In addition to the
|
614
|
+
# general accessors for all holonyms, there are also accessors for
|
615
|
+
# subtypes as well:
|
616
|
+
#
|
617
|
+
# [member_holonyms]
|
618
|
+
# Get/set synsets for the receiver's "member" holonyms (IS A MEMBER OF
|
619
|
+
# relation).
|
620
|
+
# [stuff_holonyms]
|
621
|
+
# Get/set synsets for the receiver's "stuff" holonyms (IS MATERIAL OF
|
622
|
+
# relation).
|
623
|
+
# [portion_holonyms]
|
624
|
+
# Get/set synsets for the receiver's "portion" holonyms (IS A PORTION
|
625
|
+
# OF relation).
|
626
|
+
# [component_holonyms]
|
627
|
+
# Get/set synsets for the receiver's "component" holonyms (IS A
|
628
|
+
# COMPONENT OF relation).
|
629
|
+
# [feature_holonyms]
|
630
|
+
# Get/set synsets for the receiver's "feature" holonyms (IS A FEATURE
|
631
|
+
# OF relation).
|
632
|
+
# [phase_holonyms]
|
633
|
+
# Get/set synsets for the receiver's "phase" holonyms (IS A PHASE OF
|
634
|
+
# relation).
|
635
|
+
# [place_holonyms]
|
636
|
+
# Get/set synsets for the receiver's "place" holonyms (IS A PLACE IN
|
637
|
+
# relation).
|
638
|
+
def_pointer_methods :holonyms
|
639
|
+
|
640
|
+
# Get/set synsets for the receiver's topical domain members. In addition
|
641
|
+
# to the general members accessor, there are also accessors for
|
642
|
+
# membership subtypes:
|
643
|
+
#
|
644
|
+
# [category_members]
|
645
|
+
# Get/set synsets for the receiver's
|
646
|
+
# "category" topical domain members.
|
647
|
+
# [region_members]
|
648
|
+
# Get/set synsets for the receiver's "region"
|
649
|
+
# topical domain members.
|
650
|
+
# [usage_members]
|
651
|
+
# Get/set synsets for the receiver's "usage"
|
652
|
+
# topical domain members.
|
653
|
+
def_pointer_methods :members
|
654
|
+
|
655
|
+
# Get/set synsets for the receiver's topical domain domains. In addition
|
656
|
+
# to the general domains accessor, there are also accessors for
|
657
|
+
# domainship subtypes:
|
658
|
+
#
|
659
|
+
# [category_domains]
|
660
|
+
# Get/set synsets for the receiver's
|
661
|
+
# "category" topical domain domains.
|
662
|
+
# [region_domains]
|
663
|
+
# Get/set synsets for the receiver's "region"
|
664
|
+
# topical domain domains.
|
665
|
+
# [usage_domains]
|
666
|
+
# Get/set synsets for the receiver's "usage"
|
667
|
+
# topical domain domains.
|
668
|
+
def_pointer_methods :domains
|
669
|
+
|
670
|
+
|
671
|
+
### Returns an Array of the coordinate sisters of the receiver.
|
672
|
+
def coordinates
|
673
|
+
self.hypernyms.collect {|syn|
|
674
|
+
syn.hyponyms
|
675
|
+
}.flatten
|
676
|
+
end
|
677
|
+
|
678
|
+
|
679
|
+
### Return the name of the "lexicographer's file" associated with this
|
680
|
+
### synset.
|
681
|
+
def lex_info
|
682
|
+
@mutex.synchronize( Sync::SH ) {
|
683
|
+
return LEXFILES[ self.filenum.to_i ]
|
684
|
+
}
|
685
|
+
end
|
686
|
+
|
687
|
+
|
688
|
+
### Sets the "lexicographer's file" association for this synset to
|
689
|
+
### +id+. The value in +id+ should correspond to one of the values in
|
690
|
+
### #WordNet::LEXFILES
|
691
|
+
def lexInfo=( id )
|
692
|
+
raise ArgumentError, "Bad index: Lexinfo id must be within LEXFILES" unless
|
693
|
+
LEXFILES[id]
|
694
|
+
@mutex.synchronize( Sync::EX ) {
|
695
|
+
self.filenum = id
|
696
|
+
}
|
697
|
+
end
|
698
|
+
|
699
|
+
|
700
|
+
### Returns an +Array+ of verb frame +String+s for the synset.
|
701
|
+
def frames
|
702
|
+
frarray = self.frameslist.split( WordNet::SUB_DELIM_RE )
|
703
|
+
verbFrames = []
|
704
|
+
|
705
|
+
@mutex.synchronize( Sync::SH ) {
|
706
|
+
frarray.each {|fr|
|
707
|
+
fnum, wnum = fr.split
|
708
|
+
if wnum > 0
|
709
|
+
wordtext = " (" + self.words[wnum] + ")"
|
710
|
+
verbFrames.push VERB_SENTS[ fnum ] + wordtext
|
711
|
+
else
|
712
|
+
verbFrames.push VERB_SENTS[ fnum ]
|
713
|
+
end
|
714
|
+
}
|
715
|
+
}
|
716
|
+
|
717
|
+
return verbFrames
|
718
|
+
end
|
719
|
+
|
720
|
+
|
721
|
+
### Traversal iterator: Iterates depth-first over a particular
|
722
|
+
### +type+ of the receiver, and all of the pointed-to synset's
|
723
|
+
### pointers. If called with a block, the block is called once for each
|
724
|
+
### synset with the +foundSyn+ and its +depth+ in relation to the
|
725
|
+
### originating synset as arguments. The first call will be the
|
726
|
+
### originating synset with a depth of +0+ unless +includeOrigin+ is
|
727
|
+
### +false+. If the +callback+ returns +true+, the traversal is halted,
|
728
|
+
### and the method returns immediately. This method returns an Array of
|
729
|
+
### the synsets which were traversed if no block is given, or a flag
|
730
|
+
### which indicates whether or not the traversal was interrupted if a
|
731
|
+
### block is given.
|
732
|
+
def traverse( type, includeOrigin=true )
|
733
|
+
raise ArgumentError, "Illegal parameter 1: Must be either a String or a Symbol" unless
|
734
|
+
type.kind_of?( String ) || type.kind_of?( Symbol )
|
735
|
+
|
736
|
+
raise ArgumentError, "Synset doesn't support the #{type.to_s} pointer type." unless
|
737
|
+
self.respond_to?( type )
|
738
|
+
|
739
|
+
foundSyns = []
|
740
|
+
depth = 0
|
741
|
+
traversalFunc = nil
|
742
|
+
|
743
|
+
# Build a traversal function which we can call recursively. It'll return
|
744
|
+
# the synsets it traverses.
|
745
|
+
traversalFunc = Proc.new {|syn,newDepth|
|
746
|
+
|
747
|
+
# Flag to continue traversal
|
748
|
+
haltFlag = false
|
749
|
+
|
750
|
+
# Call the block if it exists and we're either past the origin or
|
751
|
+
# including it
|
752
|
+
if block_given? && (newDepth > 0 || includeOrigin)
|
753
|
+
res = yield( syn, newDepth )
|
754
|
+
haltFlag = true if res.is_a? TrueClass
|
755
|
+
end
|
756
|
+
|
757
|
+
# Make an array for holding sub-synsets we see
|
758
|
+
subSyns = []
|
759
|
+
subSyns.push( syn ) unless newDepth == 0 && !includeOrigin
|
760
|
+
|
761
|
+
# Iterate over each synset returned by calling the pointer on the
|
762
|
+
# current syn. For each one, we call ourselves recursively, and
|
763
|
+
# break out of the iterator with a false value if the block has
|
764
|
+
# indicated we should abort by returning a false value.
|
765
|
+
unless haltFlag
|
766
|
+
syn.send( type ).each {|subSyn|
|
767
|
+
subSubSyns, haltFlag = traversalFunc.call( subSyn, newDepth + 1 )
|
768
|
+
subSyns.push( *subSubSyns ) unless subSubSyns.empty?
|
769
|
+
break if haltFlag
|
770
|
+
}
|
771
|
+
end
|
772
|
+
|
773
|
+
# return
|
774
|
+
[ subSyns, haltFlag ]
|
775
|
+
}
|
776
|
+
|
777
|
+
# Call the iterator
|
778
|
+
traversedSets, haltFlag = traversalFunc.call( self, depth )
|
779
|
+
|
780
|
+
# If a block was given, just return whether or not the block was halted.
|
781
|
+
if block_given?
|
782
|
+
return haltFlag
|
783
|
+
|
784
|
+
# If no block was given, return the traversed synsets
|
785
|
+
else
|
786
|
+
return traversedSets
|
787
|
+
end
|
788
|
+
end
|
789
|
+
|
790
|
+
|
791
|
+
### Returns the distance in pointers between the receiver and +otherSynset+
|
792
|
+
### using +type+ as the search path.
|
793
|
+
def distance( type, otherSynset )
|
794
|
+
dist = nil
|
795
|
+
self.traverse( type ) {|syn,depth|
|
796
|
+
if syn == otherSynset
|
797
|
+
dist = depth
|
798
|
+
true
|
799
|
+
end
|
800
|
+
}
|
801
|
+
|
802
|
+
return dist
|
803
|
+
end
|
804
|
+
|
805
|
+
|
806
|
+
### Recursively searches all of the receiver's pointers of the specified
|
807
|
+
### +type+ for +otherSynset+, returning +true+ if it is found.
|
808
|
+
def search( type, otherSynset )
|
809
|
+
self.traverse( type ) {|syn,depth|
|
810
|
+
syn == otherSynset
|
811
|
+
}
|
812
|
+
end
|
813
|
+
|
814
|
+
|
815
|
+
### Union: Return the least general synset that the receiver and
|
816
|
+
### +otherSynset+ have in common as a hypernym, or nil if it doesn't share
|
817
|
+
### any.
|
818
|
+
def |( otherSyn )
|
819
|
+
|
820
|
+
# Find all of this syn's hypernyms
|
821
|
+
hyperSyns = self.traverse( :hypernyms )
|
822
|
+
commonSyn = nil
|
823
|
+
|
824
|
+
# Now traverse the other synset's hypernyms looking for one of our
|
825
|
+
# own hypernyms.
|
826
|
+
otherSyn.traverse( :hypernyms ) {|syn,depth|
|
827
|
+
if hyperSyns.include?( syn )
|
828
|
+
commonSyn = syn
|
829
|
+
true
|
830
|
+
end
|
831
|
+
}
|
832
|
+
|
833
|
+
return commonSyn
|
834
|
+
end
|
835
|
+
|
836
|
+
|
837
|
+
### Returns the pointers in this synset's pointerlist as an +Array+
|
838
|
+
def pointers
|
839
|
+
@mutex.synchronize( Sync::SH ) {
|
840
|
+
@mutex.synchronize( Sync::EX ) {
|
841
|
+
@pointers = @pointerlist.split(SUB_DELIM_RE).collect {|pstr|
|
842
|
+
Pointer::parse( pstr )
|
843
|
+
}
|
844
|
+
} if @pointers.empty?
|
845
|
+
@pointers
|
846
|
+
}
|
847
|
+
end
|
848
|
+
|
849
|
+
|
850
|
+
### Set the pointers in this synset's pointerlist to +newPointers+
|
851
|
+
def pointers=( *newPointers )
|
852
|
+
@mutex.synchronize( Sync::EX ) {
|
853
|
+
@pointerlist = newPointers.collect {|ptr| ptr.to_s}.join( SUB_DELIM )
|
854
|
+
@pointers = newPointers
|
855
|
+
}
|
856
|
+
end
|
857
|
+
|
858
|
+
|
859
|
+
### Returns the synset's pointers in a Hash keyed by their type.
|
860
|
+
def pointer_map
|
861
|
+
return self.pointers.inject( {} ) do |hsh,ptr|
|
862
|
+
hsh[ ptr.type ] ||= []
|
863
|
+
hsh[ ptr.type ] << ptr
|
864
|
+
hsh
|
865
|
+
end
|
866
|
+
end
|
867
|
+
|
868
|
+
|
869
|
+
|
870
|
+
#########
|
871
|
+
protected
|
872
|
+
#########
|
873
|
+
|
874
|
+
### Returns an Array of synset objects for the receiver's pointers of the
|
875
|
+
### specified +type+.
|
876
|
+
def fetch_synset_pointers( type, subtype=nil )
|
877
|
+
synsets = nil
|
878
|
+
|
879
|
+
# Iterate over this synset's pointers, looking for ones that match
|
880
|
+
# the type we're after. When we find one, we extract its offset and
|
881
|
+
# use that to look it up.
|
882
|
+
@mutex.synchronize( Sync::SH ) do
|
883
|
+
synsets = self.pointers.
|
884
|
+
find_all {|ptr|
|
885
|
+
ptr.type == type and
|
886
|
+
subtype.nil? || ptr.subtype == subtype
|
887
|
+
}.
|
888
|
+
collect {|ptr| ptr.synset }.
|
889
|
+
collect {|key| @lexicon.lookup_synsets_by_key( key )}
|
890
|
+
end
|
891
|
+
|
892
|
+
return synsets.flatten
|
893
|
+
end
|
894
|
+
|
895
|
+
|
896
|
+
### Sets the receiver's synset pointers for the specified +type+ to
|
897
|
+
### the specified +synsets+.
|
898
|
+
def set_synset_pointers( type, synsets, subtype=nil )
|
899
|
+
synsets = [ synsets ] unless synsets.is_a?( Array )
|
900
|
+
pmap = self.pointer_map
|
901
|
+
pmap[ type ] = synsets
|
902
|
+
self.pointers = pmap.values
|
903
|
+
end
|
904
|
+
|
905
|
+
|
906
|
+
end # class Synset
|
907
|
+
end # module WordNet
|
908
|
+
|