wordnet 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,430 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # WordNet Lexicon object class
4
+ #
5
+ # == Synopsis
6
+ #
7
+ # lexicon = WordNet::Lexicon.new( dictpath )
8
+ #
9
+ # == Description
10
+ #
11
+ # Instances of this class abstract access to the various databases of the
12
+ # WordNet lexicon. It can be used to look up and search for WordNet::Synsets.
13
+ #
14
+ # == Author
15
+ #
16
+ # Michael Granger <ged@FaerieMUD.org>
17
+ #
18
+ # Copyright (c) 2002, 2003, 2005 The FaerieMUD Consortium. All rights reserved.
19
+ #
20
+ # This module is free software. You may use, modify, and/or redistribute this
21
+ # software under the terms of the Perl Artistic License. (See
22
+ # http://language.perl.com/misc/Artistic.html)
23
+ #
24
+ # Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
25
+ # by Dan Brian.
26
+ #
27
+ # == Version
28
+ #
29
+ # $Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
30
+ #
31
+
32
+ require 'rbconfig'
33
+ require 'pathname'
34
+ require 'bdb'
35
+ require 'sync'
36
+
37
+ require 'wordnet/constants'
38
+ require 'wordnet/synset'
39
+
40
+ ### Lexicon exception - something has gone wrong in the internals of the
41
+ ### lexicon.
42
+ class WordNet::LexiconError < StandardError ; end
43
+
44
+ ### Lookup error - the object being looked up either doesn't exist or is
45
+ ### malformed
46
+ class WordNet::LookupError < StandardError ; end
47
+
48
+ ### WordNet lexicon class - abstracts access to the WordNet lexical
49
+ ### databases, and provides factory methods for looking up and creating new
50
+ ### WordNet::Synset objects.
51
+ class WordNet::Lexicon
52
+ include WordNet::Constants
53
+ include CrossCase if defined?( CrossCase )
54
+
55
+ # Subversion Id
56
+ SvnId = %q$Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
57
+
58
+ # Subversion revision
59
+ SvnRev = %q$Rev: 93 $
60
+
61
+
62
+ #############################################################
63
+ ### B E R K E L E Y D B C O N F I G U R A T I O N
64
+ #############################################################
65
+
66
+ # The path to the WordNet BerkeleyDB Env. It lives in the directory that
67
+ # this module is in.
68
+ DEFAULT_DB_ENV = File::join( Config::CONFIG['datadir'], "ruby-wordnet" )
69
+
70
+ # Options for the creation of the Env object
71
+ ENV_OPTIONS = {
72
+ :set_timeout => 50,
73
+ :set_lk_detect => 1,
74
+ :set_verbose => false,
75
+ :set_lk_max => 3000,
76
+ }
77
+
78
+ # Flags for the creation of the Env object (read-write and read-only)
79
+ ENV_FLAGS_RW = BDB::CREATE|BDB::INIT_TRANSACTION|BDB::RECOVER|BDB::INIT_MPOOL
80
+ ENV_FLAGS_RO = BDB::INIT_MPOOL
81
+
82
+
83
+ #############################################################
84
+ ### I N S T A N C E M E T H O D S
85
+ #############################################################
86
+
87
+ ### Create a new WordNet::Lexicon object that will read its data from
88
+ ### the given +dbenv+ (a BerkeleyDB env directory). The database will be
89
+ ### opened with the specified +mode+, which can either be a numeric
90
+ ### octal mode (e.g., 0444) or one of (:readonly, :readwrite).
91
+ def initialize( dbenv=DEFAULT_DB_ENV, mode=:readonly )
92
+ @mode = normalize_mode( mode )
93
+ debug_msg "Mode is: %04o" % [ mode ]
94
+
95
+ envflags = 0
96
+ dbflags = 0
97
+
98
+ unless self.readonly?
99
+ debug_msg "Using read/write flags"
100
+ envflags = ENV_FLAGS_RW
101
+ dbflags = BDB::CREATE
102
+ else
103
+ debug_msg "Using readonly flags"
104
+ envflags = ENV_FLAGS_RO
105
+ dbflags = 0
106
+ end
107
+
108
+ debug_msg "Env flags are: %0s, dbflags are %0s" %
109
+ [ envflags.to_s(2), dbflags.to_s(2) ]
110
+
111
+ begin
112
+ @env = BDB::Env.new( dbenv, envflags, ENV_OPTIONS )
113
+ @index_db = @env.open_db( BDB::BTREE, "index", nil, dbflags, @mode )
114
+ @data_db = @env.open_db( BDB::BTREE, "data", nil, dbflags, @mode )
115
+ @morph_db = @env.open_db( BDB::BTREE, "morph", nil, dbflags, @mode )
116
+ rescue StandardError => err
117
+ msg = "Error while opening Ruby-WordNet data files: #{dbenv}: %s" %
118
+ [ err.message ]
119
+ raise err, msg, err.backtrace
120
+ end
121
+ end
122
+
123
+
124
+
125
+ ######
126
+ public
127
+ ######
128
+
129
+ # The BDB::Env object which contains the wordnet lexicon's databases.
130
+ attr_reader :env
131
+
132
+ # The handle to the index table
133
+ attr_reader :index_db
134
+
135
+ # The handle to the synset data table
136
+ attr_reader :data_db
137
+
138
+ # The handle to the morph table
139
+ attr_reader :morph_db
140
+
141
+
142
+ ### Returns +true+ if the lexicon was opened in read-only mode.
143
+ def readonly?
144
+ ( @mode & 0200 ).nonzero? ? false : true
145
+ end
146
+
147
+
148
+ ### Returns +true+ if the lexicon was opened in read-write mode.
149
+ def readwrite?
150
+ ! self.readonly?
151
+ end
152
+
153
+
154
+ ### Close the lexicon's database environment
155
+ def close
156
+ @env.close if @env
157
+ end
158
+
159
+
160
+ ### Checkpoint the database. (BerkeleyDB-specific)
161
+ def checkpoint( bytes=0, minutes=0 )
162
+ @env.checkpoint
163
+ end
164
+
165
+
166
+ ### Remove any archival logfiles for the lexicon's database
167
+ ### environment. (BerkeleyDB-specific).
168
+ def clean_logs
169
+ return unless self.readwrite?
170
+ self.archlogs.each do |logfile|
171
+ File::chmod( 0777, logfile )
172
+ File::delete( logfile )
173
+ end
174
+ end
175
+
176
+
177
+ ### Returns an integer of the familiarity/polysemy count for +word+ as a
178
+ ### +part_of_speech+. Note that polysemy can be identified for a given
179
+ ### word by counting the synsets returned by #lookup_synsets.
180
+ def familiarity( word, part_of_speech, polyCount=nil )
181
+ wordkey = self.make_word_key( word, part_of_speech )
182
+ return nil unless @index_db.key?( wordkey )
183
+ @index_db[ wordkey ].split( WordNet::SUB_DELIM_RE ).length
184
+ end
185
+
186
+
187
+ ### Look up sysets (Wordnet::Synset objects) matching +text+ as a
188
+ ### +part_of_speech+, where +part_of_speech+ is one of +WordNet::Noun+,
189
+ ### +WordNet::Verb+, +WordNet::Adjective+, or +WordNet::Adverb+. Without
190
+ ### +sense+, #lookup_synsets will return all matches that are a
191
+ ### +part_of_speech+. If +sense+ is specified, only the synset object that
192
+ ### matches that particular +part_of_speech+ and +sense+ is returned.
193
+ def lookup_synsets( word, part_of_speech, sense=nil )
194
+ wordkey = self.make_word_key( word, part_of_speech )
195
+ pos = self.make_pos( part_of_speech )
196
+ synsets = []
197
+
198
+ # Look up the index entry, trying first the word as given, and if
199
+ # that fails, trying morphological conversion.
200
+ entry = @index_db[ wordkey ]
201
+
202
+ if entry.nil? && (word = self.morph( word, part_of_speech ))
203
+ wordkey = self.make_word_key( word, part_of_speech )
204
+ entry = @index_db[ wordkey ]
205
+ end
206
+
207
+ # If the lookup failed both ways, just abort
208
+ return nil unless entry
209
+
210
+ # Make synset keys from the entry, narrowing it to just the sense
211
+ # requested if one was specified.
212
+ synkeys = entry.split( SUB_DELIM_RE ).collect {|off| "#{off}%#{pos}" }
213
+ if sense
214
+ return lookup_synsets_by_key( synkeys[sense - 1] )
215
+ else
216
+ return [ lookup_synsets_by_key(*synkeys) ].flatten
217
+ end
218
+ end
219
+
220
+
221
+ ### Returns the WordNet::Synset objects corresponding to the +keys+
222
+ ### specified. The +keys+ are made up of the target synset's "offset"
223
+ ### and syntactic category catenated together with a '%' character.
224
+ def lookup_synsets_by_key( *keys )
225
+ synsets = []
226
+
227
+ keys.each {|key|
228
+ raise WordNet::LookupError, "Failed lookup of synset '#{key}':"\
229
+ "No such synset" unless @data_db.key?( key )
230
+
231
+ data = @data_db[ key ]
232
+ offset, part_of_speech = key.split( /%/, 2 )
233
+ synsets << WordNet::Synset::new( self, offset, part_of_speech, nil, data )
234
+ }
235
+
236
+ return *synsets
237
+ end
238
+ alias_method :lookup_synsetsByOffset, :lookup_synsets_by_key
239
+
240
+
241
+ ### Returns a form of +word+ as a part of speech +part_of_speech+, as
242
+ ### found in the WordNet morph files. The #lookup_synsets method perfoms
243
+ ### morphological conversion automatically, so a call to #morph is not
244
+ ### required.
245
+ def morph( word, part_of_speech )
246
+ return @morph_db[ self.make_word_key(word, part_of_speech) ]
247
+ end
248
+
249
+
250
+ ### Returns the result of looking up +word+ in the inverse of the WordNet
251
+ ### morph files. _(This is undocumented in Lingua::Wordnet)_
252
+ def reverse_morph( word )
253
+ @morph_db.invert[ word ]
254
+ end
255
+
256
+
257
+ ### Returns an array of compound words matching +text+.
258
+ def grep( text )
259
+ return [] if text.empty?
260
+
261
+ words = []
262
+
263
+ # Grab a cursor into the database and fetch while the key matches
264
+ # the target text
265
+ cursor = @index_db.cursor
266
+ rec = cursor.set_range( text )
267
+ while /^#{text}/ =~ rec[0]
268
+ words.push rec[0]
269
+ rec = cursor.next
270
+ end
271
+ cursor.close
272
+
273
+ return *words
274
+ end
275
+
276
+
277
+ ### Factory method: Creates and returns a new WordNet::Synset object in
278
+ ### this lexicon for the specified +word+ and +part_of_speech+.
279
+ def create_synset( word, part_of_speech )
280
+ return WordNet::Synset::new( self, '', part_of_speech, word )
281
+ end
282
+ alias_method :new_synset, :create_synset
283
+
284
+
285
+ ### Store the specified +synset+ (a WordNet::Synset object) in the
286
+ ### lexicon. Returns the key of the stored synset.
287
+ def store_synset( synset )
288
+ strippedOffset = nil
289
+ pos = nil
290
+
291
+ # Start a transaction
292
+ @env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
293
+
294
+ # If this is a new synset, generate an offset for it
295
+ if synset.offset == 1
296
+ synset.offset =
297
+ (datadb['offsetcount'] = datadb['offsetcount'].to_i + 1)
298
+ end
299
+
300
+ # Write the data entry
301
+ datadb[ synset.key ] = synset.serialize
302
+
303
+ # Write the index entries
304
+ txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
305
+
306
+ # Make word/part-of-speech pairs from the words in the synset
307
+ synset.words.collect {|word| word + "%" + pos }.each {|word|
308
+
309
+ # If the index already has this word, but not this
310
+ # synset, add it
311
+ if indexdb.key?( word )
312
+ indexdb[ word ] << SUB_DELIM << synset.offset unless
313
+ indexdb[ word ].include?( synset.offset )
314
+ else
315
+ indexdb[ word ] = synset.offset
316
+ end
317
+ }
318
+ end # transaction on @index_db
319
+ end # transaction on @dataDB
320
+
321
+ return synset.offset
322
+ end
323
+
324
+
325
+ ### Remove the specified +synset+ (a WordNet::Synset object) in the
326
+ ### lexicon. Returns the offset of the stored synset.
327
+ def remove_synset( synset )
328
+ # If it's not in the database (ie., doesn't have a real offset),
329
+ # just return.
330
+ return nil if synset.offset == 1
331
+
332
+ # Start a transaction on the data table
333
+ @env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
334
+
335
+ # First remove the index entries for this synset by iterating
336
+ # over each of its words
337
+ txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
338
+ synset.words.collect {|word| word + "%" + pos }.each {|word|
339
+
340
+ # If the index contains an entry for this word, either
341
+ # splice out the offset for the synset being deleted if
342
+ # there are more than one, or just delete the whole
343
+ # entry if it's the only one.
344
+ if indexdb.key?( word )
345
+ offsets = indexdb[ word ].
346
+ split( SUB_DELIM_RE ).
347
+ reject {|offset| offset == synset.offset}
348
+
349
+ unless offsets.empty?
350
+ index_db[ word ] = newoffsets.join( SUB_DELIM )
351
+ else
352
+ index_db.delete( word )
353
+ end
354
+ end
355
+ }
356
+ end
357
+
358
+ # :TODO: Delete synset from pointers of related synsets
359
+
360
+ # Delete the synset from the main db
361
+ datadb.delete( synset.offset )
362
+ end
363
+
364
+ return true
365
+ end
366
+
367
+
368
+ #########
369
+ protected
370
+ #########
371
+
372
+ ### Normalize various ways of specifying a part of speech into the
373
+ ### WordNet part of speech indicator from the +original+ representation,
374
+ ### which may be the name (e.g., "noun"); +nil+, in which case it
375
+ ### defaults to the indicator for a noun; or the indicator character
376
+ ### itself, in which case it is returned unmodified.
377
+ def make_pos( original )
378
+ return WordNet::Noun if original.nil?
379
+ osym = original.to_s.intern
380
+ return WordNet::SYNTACTIC_CATEGORIES[ osym ] if
381
+ WordNet::SYNTACTIC_CATEGORIES.key?( osym )
382
+ return original if SYNTACTIC_SYMBOLS.key?( original )
383
+ return nil
384
+ end
385
+
386
+
387
+ ### Make a lexicon key out of the given +word+ and part of speech
388
+ ### (+pos+).
389
+ def make_word_key( word, pos )
390
+ pos = self.make_pos( pos )
391
+ word = word.gsub( /\s+/, '_' )
392
+ return "#{word}%#{pos}"
393
+ end
394
+
395
+
396
+ ### Return a list of archival logfiles that can be removed
397
+ ### safely. (BerkeleyDB-specific).
398
+ def archlogs
399
+ return @env.log_archive( BDB::ARCH_ABS )
400
+ end
401
+
402
+
403
+ #######
404
+ private
405
+ #######
406
+
407
+ ### Turn the given +origmode+ into an octal file mode such as that
408
+ ### given to File.open.
409
+ def normalize_mode( origmode )
410
+ case origmode
411
+ when :readonly
412
+ 0444 & ~File.umask
413
+ when :readwrite, :writable
414
+ 0666 & ~File.umask
415
+ when Fixnum
416
+ origmode
417
+ else
418
+ raise ArgumentError, "unrecognized mode %p" % [origmode]
419
+ end
420
+ end
421
+
422
+ ### Output the given +msg+ to STDERR if $DEBUG is turned on.
423
+ def debug_msg( *msg )
424
+ return unless $DEBUG
425
+ $deferr.puts msg
426
+ end
427
+
428
+
429
+ end # class WordNet::Lexicon
430
+
@@ -0,0 +1,908 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # WordNet synonym-set object class
4
+ #
5
+ # == Synopsis
6
+ #
7
+ # ss = lexicon.lookupSynset( "word", WordNet::Noun, 1 )
8
+ # puts "Definition: %s" % ss.gloss
9
+ # coords = ss.coordinates
10
+ #
11
+ # == Description
12
+ #
13
+ # Instances of this class encapsulate the data for a synonym set ('synset') in a
14
+ # Wordnet lexical database. A synonym set is a set of words that are
15
+ # interchangeable in some context.
16
+ #
17
+ # == Author
18
+ #
19
+ # Michael Granger <ged@FaerieMUD.org>
20
+ #
21
+ # Copyright (c) 2002-2008 The FaerieMUD Consortium. All rights reserved.
22
+ #
23
+ # This module is free software. You may use, modify, and/or redistribute this
24
+ # software under the terms of the Perl Artistic License. (See
25
+ # http://language.perl.com/misc/Artistic.html)
26
+ #
27
+ # Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
28
+ # by Dan Brian.
29
+ #
30
+ # == Version
31
+ #
32
+ # $Id: synset.rb 90 2008-07-09 23:02:53Z deveiant $
33
+ #
34
+
35
+ require 'sync'
36
+ require 'wordnet/constants'
37
+
38
+ module WordNet
39
+
40
+ ### Synset internal error class
41
+ class SynsetError < StandardError ; end
42
+
43
+ ### "Synonym set" class - encapsulates the data for a set of words in the
44
+ ### lexical database that are interchangeable in some context, and provides
45
+ ### methods for accessing its relationships.
46
+ class Synset
47
+ include WordNet::Constants
48
+ include CrossCase if defined?( CrossCase )
49
+
50
+ # Subversion ID
51
+ SVNId = %q$Id: synset.rb 90 2008-07-09 23:02:53Z deveiant $
52
+
53
+ # Subversion Rev
54
+ SVNRev = %q$Rev: 90 $
55
+
56
+ # The "pointer" type that encapsulates relationships between one synset
57
+ # and another.
58
+ class Pointer
59
+ include WordNet::Constants
60
+ include CrossCase if defined?( CrossCase )
61
+
62
+ #########################################################
63
+ ### C L A S S M E T H O D S
64
+ #########################################################
65
+
66
+ ### Make an Array of WordNet::Synset::Pointer objects out of the
67
+ ### given +pointerList+. The pointerlist is a string of pointers
68
+ ### delimited by Constants::SUB_DELIM. Pointers are in the form:
69
+ ### "<pointer_symbol> <synset_offset>%<pos> <source/target>"
70
+ def self::parse( pointerString )
71
+ type, offsetPos, ptrNums = pointerString.split(/\s+/)
72
+ offset, pos = offsetPos.split( /%/, 2 )
73
+ new( type, offset, pos, ptrNums[0,2], ptrNums[2,2] )
74
+ end
75
+
76
+
77
+ #########################################################
78
+ ### I N S T A N C E M E T H O D S
79
+ #########################################################
80
+
81
+ ### Create a new synset pointer with the given arguments. The
82
+ ### +ptrType+ is the type of the link between synsets, and must be
83
+ ### either a key or a value of WordNet::Constants::POINTER_TYPES. The
84
+ ### +offset+ is the unique identifier of the target synset, and
85
+ ### +pos+ is its part-of-speech, which must be either a key or value
86
+ ### of WordNet::Constants::SYNTACTIC_CATEGORIES. The +source_wn+ and
87
+ ### +target_wn+ are numerical values which distinguish lexical and
88
+ ### semantic pointers. +source_wn+ indicates the word number in the
89
+ ### current (source) synset, and +target_wn+ indicates the word
90
+ ### number in the target synset. If both are 0 (the default) it
91
+ ### means that the pointer type of the pointer represents a semantic
92
+ ### relation between the current (source) synset and the target
93
+ ### synset indicated by +offset+.
94
+ def initialize( type, offset, pos=Noun, source_wn=0, target_wn=0 )
95
+
96
+ # Allow type = '!', 'antonym', or :antonym. Also handle
97
+ # splitting of compound pointers (e.g., :memberMeronym / '%m')
98
+ # into their correct type/subtype parts.
99
+ @type = @subtype = nil
100
+ if type.to_s.length == 1
101
+ @type = POINTER_SYMBOLS[ type[0,1] ]
102
+
103
+ elsif type.to_s.length == 2
104
+ @type = POINTER_SYMBOLS[ type[0,1] ]
105
+ raise "No known subtypes for '%s'" % [@type] unless
106
+ POINTER_SUBTYPES.key?( @type )
107
+ @subtype = POINTER_SUBTYPES[ @type ].index( type ) or
108
+ raise "Unknown subtype '%s' for '%s'" %
109
+ [ type, @type ]
110
+
111
+ else
112
+ if POINTER_TYPES.key?( type.to_sym )
113
+ @type = type.to_sym
114
+ elsif /([a-z]+)([A-Z][a-z]+)/ =~ type.to_s
115
+ subtype, maintype = $1, $2.downcase
116
+ @type = maintype.to_sym if
117
+ POINTER_TYPES.key?( maintype.to_sym )
118
+ @subtype = subtype.to_sym
119
+ end
120
+ end
121
+
122
+ raise ArgumentError, "No such pointer type %p" % type if
123
+ @type.nil?
124
+
125
+ # Allow pos = 'n', 'noun', or :noun
126
+ @part_of_speech = nil
127
+ if pos.to_s.length == 1
128
+ @part_of_speech = SYNTACTIC_SYMBOLS[ pos ]
129
+ else
130
+ @part_of_speech = pos.to_sym if
131
+ SYNTACTIC_CATEGORIES.key?( pos.to_sym )
132
+ end
133
+ raise ArgumentError, "No such part of speech %p" % pos if
134
+ @part_of_speech.nil?
135
+
136
+ # Other attributes
137
+ @offset = offset
138
+ @source_wn = source_wn
139
+ @target_wn = target_wn
140
+ end
141
+
142
+
143
+ ######
144
+ public
145
+ ######
146
+
147
+ # The type of the pointer. Will be one of the keys of
148
+ # WordNet::POINTER_TYPES (e.g., :meronym).
149
+ attr_accessor :type
150
+
151
+ # The subtype of the pointer, if any. Will be one of the keys of one
152
+ # of the hashes in POINTER_SUBTYPES (e.g., :portion).
153
+ attr_accessor :subtype
154
+
155
+ # The offset of the target synset
156
+ attr_accessor :offset
157
+
158
+ # The part-of-speech of the target synset. Will be one of the keys
159
+ # of WordNet::SYNTACTIC_CATEGORIES.
160
+ attr_accessor :part_of_speech
161
+
162
+ # The word number in the source synset
163
+ attr_accessor :source_wn
164
+
165
+ # The word number in the target synset
166
+ attr_accessor :target_wn
167
+
168
+
169
+ ### Return the Pointer as a human-readable String suitable for
170
+ ### debugging.
171
+ def inspect
172
+ "#<%s:0x%08x %s %s>" % [
173
+ self.class.name,
174
+ self.object_id,
175
+ @subtype ? "#@type(#@subtype)" : @type,
176
+ self.synset,
177
+ ]
178
+ end
179
+
180
+
181
+ ### Return the synset key of the target synset (i.e.,
182
+ ### <offset>%<pos symbol>).
183
+ def synset
184
+ self.offset + "%" + self.pos
185
+ end
186
+
187
+
188
+ ### Return the syntactic category symbol for this pointer
189
+ def pos
190
+ return SYNTACTIC_CATEGORIES[ @part_of_speech ]
191
+ end
192
+
193
+
194
+ ### Return the pointer type symbol for this pointer
195
+ def type_symbol
196
+ unless @subtype
197
+ return POINTER_TYPES[ @type ]
198
+ else
199
+ return POINTER_SUBTYPES[ @type ][ @subtype ]
200
+ end
201
+ end
202
+
203
+
204
+ ### Comparison operator. Pointer are equivalent if they point at the
205
+ ### same synset and are of the same type.
206
+ def ==( other )
207
+ return false unless other.is_a?( self.class )
208
+ other.offset == self.offset &&
209
+ other.type == self.type
210
+ end
211
+
212
+
213
+ ### Return the pointer in its stringified form.
214
+ def to_s
215
+ "%s %d%%%s %02x%02x" % [
216
+ ptr.type_symbol,
217
+ ptr.offset,
218
+ ptr.posSymbol,
219
+ ptr.source_wn,
220
+ ptr.target_wn,
221
+ ]
222
+ end
223
+ end # class Pointer
224
+
225
+
226
+ #############################################################
227
+ ### C L A S S M E T H O D S
228
+ #############################################################
229
+
230
+ ### Define a group of pointer methods based on +symbol+ that will fetch,
231
+ ### add, and delete pointer synsets of the type indicated. If no pointer
232
+ ### type corresponding to the given +symbol+ is found, a variant without
233
+ ### a trailing 's' is tried (e.g., 'def_pointer_methods :antonyms' will
234
+ ### create methods called #antonyms and #antonyms=, but will fetch
235
+ ### pointers of type :antonym). If the pointer type has subtypes
236
+ ### (according to WordNet::POINTER_SUBTYPES), accessors/mutators for the
237
+ ### subtypes will be generated as well.
238
+ def self::def_pointer_methods( symbol ) # :nodoc:
239
+ name = symbol.to_s
240
+ casename = name.dup
241
+ casename[ 0,1 ] = casename[ 0,1 ].upcase
242
+ type = nil
243
+ $stderr.puts '-' * 50,
244
+ ">>> defining pointer methods for %p" % [symbol] if $DEBUG
245
+
246
+ if POINTER_TYPES.key?( symbol )
247
+ type = symbol
248
+ elsif POINTER_TYPES.key?( symbol.to_s.sub(/s$/, '').to_sym )
249
+ type = symbol.to_s.sub(/s$/, '').to_sym
250
+ else
251
+ raise ArgumentError, "Unknown pointer type %p" % symbol
252
+ end
253
+
254
+ # Define the accessor
255
+ $stderr.puts "Defining accessors for %p" % [ type ] if $DEBUG
256
+ define_method( name.to_sym ) { self.fetch_synset_pointers(type) }
257
+ define_method( "#{name}=".to_sym ) do |*synsets|
258
+ self.set_synset_pointers( type, synsets, nil )
259
+ end
260
+
261
+ # If the pointer is one that has subtypes, make the variants list
262
+ # out of the subtypes. If it doesn't have subtypes, make the only
263
+ # variant nil, which will cause the mutators to be defined for the
264
+ # main pointer type.
265
+ if POINTER_SUBTYPES.key?( type )
266
+ variants = POINTER_SUBTYPES[ type ].keys
267
+ else
268
+ variants = [nil]
269
+ end
270
+
271
+ # Define a set of methods for each variant, or for the main method
272
+ # if the variant is nil.
273
+ variants.each do |subtype|
274
+ varname = subtype ? [subtype, name].join('_') : name
275
+
276
+ unless subtype.nil?
277
+ $stderr.puts "Defining reader for #{varname}" if $DEBUG
278
+ define_method( varname ) do
279
+ self.fetch_synset_pointers( type, subtype )
280
+ end
281
+ else
282
+ $stderr.puts "No subtype for %s (subtype = %p)" %
283
+ [ varname, subtype ] if $DEBUG
284
+ end
285
+
286
+ $stderr.puts "Defining mutator for #{varname}" if $DEBUG
287
+ define_method( "#{varname}=" ) do |*synsets|
288
+ self.set_synset_pointers( type, synsets, subtype )
289
+ end
290
+ end
291
+ end
292
+
293
+
294
+ #############################################################
295
+ ### I N S T A N C E M E T H O D S
296
+ #############################################################
297
+
298
+ ### Create a new Synset object in the specified +lexicon+ for the
299
+ ### specified +word+ and +part_of_speech+. If +data+ is specified,
300
+ ### initialize the synset's other object data from it. This method
301
+ ### shouldn't be called directly: you should use one of the Lexicon
302
+ ### class's factory methods: #create_synset, #lookup_synsets, or
303
+ ### #lookup_synsetsByOffset.
304
+ def initialize( lexicon, offset, pos, word=nil, data=nil )
305
+ @lexicon = lexicon or
306
+ raise ArgumentError, "%p is not a WordNet::Lexicon" % lexicon
307
+ @part_of_speech = SYNTACTIC_SYMBOLS[ pos ] or
308
+ raise ArgumentError, "No such part of speech %p" % pos
309
+ @mutex = Sync::new
310
+ @pointers = []
311
+
312
+ if data
313
+ @offset = offset.to_i
314
+ @filenum, @wordlist, @pointerlist,
315
+ @frameslist, @gloss = data.split( DELIM_RE )
316
+ else
317
+ @offset = 1
318
+ @wordlist = word ? word : ''
319
+ @filenum, @pointerlist, @frameslist, @gloss = [''] * 4
320
+ end
321
+ end
322
+
323
+
324
+ ######
325
+ public
326
+ ######
327
+
328
+ # The WordNet::Lexicon that was used to look up this synset
329
+ attr_reader :lexicon
330
+
331
+ # The syntactic category of this Synset. Will be one of "n" (noun), "v"
332
+ # (verb), "a" (adjective), "r" (adverb), or "s" (other).
333
+ attr_accessor :part_of_speech
334
+
335
+ # The original byte offset of the synset in the data file; acts as the
336
+ # unique identifier (when combined with #part_of_speech) of this Synset in
337
+ # the database.
338
+ attr_accessor :offset
339
+
340
+ # The number corresponding to the lexicographer file name containing the
341
+ # synset. Calling #lexInfo will return the actual filename. See the
342
+ # "System Description" of wngloss(7WN) for more info about this.
343
+ attr_accessor :filenum
344
+
345
+ # The raw list of word/lex_id pairs associated with this synset. Each
346
+ # word and lex_id is separated by a '%' character, and each pair is
347
+ # delimited with a '|'. E.g., the wordlist for "animal" is:
348
+ # "animal%0|animate_being%0|beast%0|brute%1|creature%0|fauna%1"
349
+ attr_accessor :wordlist
350
+
351
+ # The list of raw pointers to related synsets. E.g., the pointerlist for
352
+ # "mourning dove" is:
353
+ # "@ 01731700%n 0000|#m 01733452%n 0000"
354
+ attr_accessor :pointerlist
355
+
356
+ # The list of raw verb sentence frames for this synset.
357
+ attr_accessor :frameslist
358
+
359
+ # Definition and/or example sentences for the Synset.
360
+ attr_accessor :gloss
361
+
362
+
363
+ ### Return a human-readable representation of the Synset suitable for
364
+ ### debugging.
365
+ def inspect
366
+ pointer_counts = self.pointer_map.collect {|type,ptrs|
367
+ "#{type}s: #{ptrs.length}"
368
+ }.join( ", " )
369
+
370
+ %q{#<%s:0x%08x/%s %s (%s): "%s" (%s)>} % [
371
+ self.class.name,
372
+ self.object_id * 2,
373
+ self.offset,
374
+ self.words.join(", "),
375
+ self.part_of_speech,
376
+ self.gloss,
377
+ pointer_counts,
378
+ ]
379
+ end
380
+
381
+
382
+ ### Returns the Synset's unique identifier, made up of its offset and
383
+ ### syntactic category catenated together with a '%' symbol.
384
+ def key
385
+ "%d%%%s" % [ self.offset, self.pos ]
386
+ end
387
+
388
+
389
+ ### The symbol which represents this synset's syntactic category. Will
390
+ ### be one of :noun, :verb, :adjective, :adverb, or :other.
391
+ def pos
392
+ return SYNTACTIC_CATEGORIES[ @part_of_speech ]
393
+ end
394
+
395
+
396
+ ### Return each of the sentences of the gloss for this synset as an
397
+ ### array. The gloss is a definition of the synset, and optionally one
398
+ ### or more example sentences.
399
+ def glosses
400
+ return self.gloss.split( /\s*;\s*/ )
401
+ end
402
+
403
+
404
+ ### Returns true if the receiver and otherSyn are identical according to
405
+ ### their offsets.
406
+ def ==( otherSyn )
407
+ return false unless otherSyn.kind_of?( WordNet::Synset )
408
+ return self.offset == otherSyn.offset
409
+ end
410
+
411
+
412
+
413
+ ### Returns an Array of words and/or collocations associated with this
414
+ ### synset.
415
+ def words
416
+ @mutex.synchronize( Sync::SH ) {
417
+ self.wordlist.split( SUB_DELIM_RE ).collect do |word|
418
+ word.gsub( /_/, ' ' ).sub( /%.*$/, '' )
419
+ end
420
+ }
421
+ end
422
+ alias_method :synonyms, :words
423
+
424
+
425
+ ### Set the words in this synset's wordlist to +newWords+
426
+ def words=( *newWords )
427
+ @mutex.synchronize( Sync::EX ) {
428
+ @wordlist = newWords.join( SUB_DELIM )
429
+ }
430
+ end
431
+
432
+
433
+ ### Add the specified +newWords+ to this synset's wordlist. Alias:
434
+ ### +add_words+.
435
+ def add_words( *newWords )
436
+ @mutex.synchronize( Sync::EX ) {
437
+ self.words |= newWords
438
+ }
439
+ end
440
+
441
+
442
+ ### Delete the specified +oldWords+ from this synset's wordlist. Alias:
443
+ ### +delete_words+.
444
+ def delete_words( *oldWords )
445
+ @mutex.synchronize( Sync::EX ) {
446
+ self.words -= oldWords
447
+ }
448
+ end
449
+
450
+
451
+ ### Return the synset as a string. Alias: +overview+.
452
+ def to_s
453
+ @mutex.synchronize( Sync::SH ) {
454
+ wordlist = self.words.join(", ").gsub( /%\d/, '' ).gsub( /_/, ' ' )
455
+ return "#{wordlist} [#{self.part_of_speech}] -- (#{self.gloss})"
456
+ }
457
+ end
458
+ alias_method :overview, :to_s
459
+
460
+
461
+ ### Writes any changes made to the object to the database and updates all
462
+ ### affected synset data and indexes. If the object passes out of scope
463
+ ### before #write is called, the changes are lost.
464
+ def store
465
+ @mutex.synchronize( Sync::EX ) {
466
+ self.lexicon.store_synset( self )
467
+ }
468
+ end
469
+ alias_method :write, :store
470
+
471
+
472
+ ### Removes this synset from the database.
473
+ def remove
474
+ @mutex.synchronize( Sync::EX ) {
475
+ self.lexicon.remove_synset( self )
476
+ }
477
+ end
478
+
479
+
480
+ ### Returns the synset's data in a form suitable for storage in the
481
+ ### lexicon's database.
482
+ def serialize
483
+ @mutex.synchronize( Sync::SH ) {
484
+ return [
485
+ @filenum,
486
+ @wordlist,
487
+ @pointerlist,
488
+ @frameslist,
489
+ @gloss
490
+ ].join( WordNet::DELIM )
491
+ }
492
+ end
493
+
494
+
495
+ ### Auto-generate synset pointer methods for the various types
496
+
497
+ # The synsets for the receiver's antonyms (opposites). E.g.,
498
+ # $lexicon.lookup_synsets( "opaque", :adjective, 1 ).antonyms
499
+ # ==> [#<WordNet::Synset:0x010a9acc/454927 clear (adjective): "free
500
+ # from cloudiness; allowing light to pass through; "clear water";
501
+ # "clear plastic bags"; "clear glass"; "the air is clear and
502
+ # clean"" (similar_tos: 6, attributes: 1, derivations: 2,
503
+ # antonyms: 1, see_alsos: 1)>]
504
+ def_pointer_methods :antonyms
505
+
506
+ # Synsets for the receiver's entailments (a verb X entails Y if X cannot
507
+ # be done unless Y is or has been done). E.g.,
508
+ # $lexicon.lookup_synsets( 'rasp', :verb, 1 ).entailment
509
+ # ==> [#<WordNet::Synset:0x010dc24c rub (verb): "move over something
510
+ # with pressure; "rub my hands"; "rub oil into her skin""
511
+ # (derivations: 2, entailments: 1, hypernyms: 1, hyponyms: 13,
512
+ # see_alsos: 4)>]
513
+ def_pointer_methods :entailment
514
+
515
+ # Get/set synsets for the receiver's cause pointers (a verb X causes Y
516
+ # to happen).
517
+ def_pointer_methods :causes
518
+
519
+ # Get/set synsets for the receiver's verb groups. Verb groups link verbs
520
+ # with similar senses together.
521
+ def_pointer_methods :verb_groups
522
+
523
+ # Get/set list of synsets for the receiver's "similar to" pointers. This
524
+ # type of pointer links together head adjective synsets with its
525
+ # satellite adjective synsets.
526
+ def_pointer_methods :similar_to
527
+
528
+ # Get/set synsets for the receiver's participles. Participles are
529
+ # non-finite forms of a verb; used adjectivally and to form compound
530
+ # tenses. For example, the first participle for "working" is:
531
+ # "function, work, operate, go, run (verb)"
532
+ def_pointer_methods :participles
533
+
534
+ # Get/set synsets for the receiver's pertainyms. Pertainyms are
535
+ # relational adjectives. Adjectives that are pertainyms are usually
536
+ # defined by such phrases as "of or pertaining to" and do not have
537
+ # antonyms. A pertainym can point to a noun or another pertainym.
538
+ def_pointer_methods :pertainyms
539
+
540
+ # Get/set synsets for the receiver's attributes.
541
+ def_pointer_methods :attributes
542
+
543
+ # Get/set synsets for the receiver's derived_from.
544
+ def_pointer_methods :derived_from
545
+
546
+ # Get/set synsets for the receiver's derivations.
547
+ def_pointer_methods :derivations
548
+
549
+ # Get/set synsets for the receiver's see_also.
550
+ def_pointer_methods :see_also
551
+
552
+
553
+ # Auto-generate types with subtypes
554
+
555
+ # Synsets for the receiver's hypernyms (more-general terms). E.g.,
556
+ # $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hypernyms
557
+ # ==> [#<WordNet::Synset:0x0109a644/3023321 club (noun): "stout
558
+ # stick that is larger at one end; "he carried a club in self
559
+ # defense"; "he felt as if he had been hit with a club""
560
+ # (derivations: 1, hypernyms: 1, hyponyms: 7)>]
561
+ #
562
+ # Also generates accessors for subtypes:
563
+ #
564
+ # [instance_hypernyms]
565
+ # A proper noun that refers to a particular, unique referent (as
566
+ # distinguished from nouns that refer to classes).
567
+ def_pointer_methods :hypernyms
568
+
569
+
570
+ # :TODO: Generate an example for this
571
+
572
+ # Get/set synsets for the receiver's hyponyms (more-specific terms). E.g.,
573
+ # $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hyponyms
574
+ # ==> [...]
575
+ # [instance_hyponyms]
576
+ # The specific term used to designate a member of a class. X is a
577
+ # hyponym of Y if X is a (kind of) Y.
578
+ # Also generates accessors for subtypes:
579
+ #
580
+ # [instance_hyponyms]
581
+ # A proper noun that refers to a particular, unique referent (as
582
+ # distinguished from nouns that refer to classes).
583
+ def_pointer_methods :hyponyms
584
+
585
+
586
+ # Get/set synsets for the receiver's meronyms. In addition to the
587
+ # general accessors for all meronyms, there are also accessors for
588
+ # subtypes as well:
589
+ #
590
+ # [member_meronyms]
591
+ # Get/set synsets for the receiver's "member" meronyms (HAS MEMBER
592
+ # relation).
593
+ # [stuff_meronyms]
594
+ # Get/set synsets for the receiver's "stuff" meronyms (IS MADE OUT OF
595
+ # relation).
596
+ # [portion_meronyms]
597
+ # Get/set synsets for the receiver's "portion" meronyms (HAS PORTION
598
+ # relation).
599
+ # [component_meronyms]
600
+ # Get/set synsets for the receiver's "component" meronyms (HAS
601
+ # COMPONENT relation).
602
+ # [feature_meronyms]
603
+ # Get/set synsets for the receiver's "feature" meronyms (HAS FEATURE
604
+ # relation).
605
+ # [phase_meronyms]
606
+ # Get/set synsets for the receiver's "phase" meronyms (HAS PHASE
607
+ # relation).
608
+ # [place_meronyms]
609
+ # Get/set synsets for the receiver's "place" meronyms (HAS PLACE
610
+ # relation).
611
+ def_pointer_methods :meronyms
612
+
613
+ # Get/set synsets for the receiver's holonyms. In addition to the
614
+ # general accessors for all holonyms, there are also accessors for
615
+ # subtypes as well:
616
+ #
617
+ # [member_holonyms]
618
+ # Get/set synsets for the receiver's "member" holonyms (IS A MEMBER OF
619
+ # relation).
620
+ # [stuff_holonyms]
621
+ # Get/set synsets for the receiver's "stuff" holonyms (IS MATERIAL OF
622
+ # relation).
623
+ # [portion_holonyms]
624
+ # Get/set synsets for the receiver's "portion" holonyms (IS A PORTION
625
+ # OF relation).
626
+ # [component_holonyms]
627
+ # Get/set synsets for the receiver's "component" holonyms (IS A
628
+ # COMPONENT OF relation).
629
+ # [feature_holonyms]
630
+ # Get/set synsets for the receiver's "feature" holonyms (IS A FEATURE
631
+ # OF relation).
632
+ # [phase_holonyms]
633
+ # Get/set synsets for the receiver's "phase" holonyms (IS A PHASE OF
634
+ # relation).
635
+ # [place_holonyms]
636
+ # Get/set synsets for the receiver's "place" holonyms (IS A PLACE IN
637
+ # relation).
638
+ def_pointer_methods :holonyms
639
+
640
+ # Get/set synsets for the receiver's topical domain members. In addition
641
+ # to the general members accessor, there are also accessors for
642
+ # membership subtypes:
643
+ #
644
+ # [category_members]
645
+ # Get/set synsets for the receiver's
646
+ # "category" topical domain members.
647
+ # [region_members]
648
+ # Get/set synsets for the receiver's "region"
649
+ # topical domain members.
650
+ # [usage_members]
651
+ # Get/set synsets for the receiver's "usage"
652
+ # topical domain members.
653
+ def_pointer_methods :members
654
+
655
+ # Get/set synsets for the receiver's topical domain domains. In addition
656
+ # to the general domains accessor, there are also accessors for
657
+ # domainship subtypes:
658
+ #
659
+ # [category_domains]
660
+ # Get/set synsets for the receiver's
661
+ # "category" topical domain domains.
662
+ # [region_domains]
663
+ # Get/set synsets for the receiver's "region"
664
+ # topical domain domains.
665
+ # [usage_domains]
666
+ # Get/set synsets for the receiver's "usage"
667
+ # topical domain domains.
668
+ def_pointer_methods :domains
669
+
670
+
671
+ ### Returns an Array of the coordinate sisters of the receiver.
672
+ def coordinates
673
+ self.hypernyms.collect {|syn|
674
+ syn.hyponyms
675
+ }.flatten
676
+ end
677
+
678
+
679
+ ### Return the name of the "lexicographer's file" associated with this
680
+ ### synset.
681
+ def lex_info
682
+ @mutex.synchronize( Sync::SH ) {
683
+ return LEXFILES[ self.filenum.to_i ]
684
+ }
685
+ end
686
+
687
+
688
+ ### Sets the "lexicographer's file" association for this synset to
689
+ ### +id+. The value in +id+ should correspond to one of the values in
690
+ ### #WordNet::LEXFILES
691
+ def lexInfo=( id )
692
+ raise ArgumentError, "Bad index: Lexinfo id must be within LEXFILES" unless
693
+ LEXFILES[id]
694
+ @mutex.synchronize( Sync::EX ) {
695
+ self.filenum = id
696
+ }
697
+ end
698
+
699
+
700
+ ### Returns an +Array+ of verb frame +String+s for the synset.
701
+ def frames
702
+ frarray = self.frameslist.split( WordNet::SUB_DELIM_RE )
703
+ verbFrames = []
704
+
705
+ @mutex.synchronize( Sync::SH ) {
706
+ frarray.each {|fr|
707
+ fnum, wnum = fr.split
708
+ if wnum > 0
709
+ wordtext = " (" + self.words[wnum] + ")"
710
+ verbFrames.push VERB_SENTS[ fnum ] + wordtext
711
+ else
712
+ verbFrames.push VERB_SENTS[ fnum ]
713
+ end
714
+ }
715
+ }
716
+
717
+ return verbFrames
718
+ end
719
+
720
+
721
+ ### Traversal iterator: Iterates depth-first over a particular
722
+ ### +type+ of the receiver, and all of the pointed-to synset's
723
+ ### pointers. If called with a block, the block is called once for each
724
+ ### synset with the +foundSyn+ and its +depth+ in relation to the
725
+ ### originating synset as arguments. The first call will be the
726
+ ### originating synset with a depth of +0+ unless +includeOrigin+ is
727
+ ### +false+. If the +callback+ returns +true+, the traversal is halted,
728
+ ### and the method returns immediately. This method returns an Array of
729
+ ### the synsets which were traversed if no block is given, or a flag
730
+ ### which indicates whether or not the traversal was interrupted if a
731
+ ### block is given.
732
+ def traverse( type, includeOrigin=true )
733
+ raise ArgumentError, "Illegal parameter 1: Must be either a String or a Symbol" unless
734
+ type.kind_of?( String ) || type.kind_of?( Symbol )
735
+
736
+ raise ArgumentError, "Synset doesn't support the #{type.to_s} pointer type." unless
737
+ self.respond_to?( type )
738
+
739
+ foundSyns = []
740
+ depth = 0
741
+ traversalFunc = nil
742
+
743
+ # Build a traversal function which we can call recursively. It'll return
744
+ # the synsets it traverses.
745
+ traversalFunc = Proc.new {|syn,newDepth|
746
+
747
+ # Flag to continue traversal
748
+ haltFlag = false
749
+
750
+ # Call the block if it exists and we're either past the origin or
751
+ # including it
752
+ if block_given? && (newDepth > 0 || includeOrigin)
753
+ res = yield( syn, newDepth )
754
+ haltFlag = true if res.is_a? TrueClass
755
+ end
756
+
757
+ # Make an array for holding sub-synsets we see
758
+ subSyns = []
759
+ subSyns.push( syn ) unless newDepth == 0 && !includeOrigin
760
+
761
+ # Iterate over each synset returned by calling the pointer on the
762
+ # current syn. For each one, we call ourselves recursively, and
763
+ # break out of the iterator with a false value if the block has
764
+ # indicated we should abort by returning a false value.
765
+ unless haltFlag
766
+ syn.send( type ).each {|subSyn|
767
+ subSubSyns, haltFlag = traversalFunc.call( subSyn, newDepth + 1 )
768
+ subSyns.push( *subSubSyns ) unless subSubSyns.empty?
769
+ break if haltFlag
770
+ }
771
+ end
772
+
773
+ # return
774
+ [ subSyns, haltFlag ]
775
+ }
776
+
777
+ # Call the iterator
778
+ traversedSets, haltFlag = traversalFunc.call( self, depth )
779
+
780
+ # If a block was given, just return whether or not the block was halted.
781
+ if block_given?
782
+ return haltFlag
783
+
784
+ # If no block was given, return the traversed synsets
785
+ else
786
+ return traversedSets
787
+ end
788
+ end
789
+
790
+
791
+ ### Returns the distance in pointers between the receiver and +otherSynset+
792
+ ### using +type+ as the search path.
793
+ def distance( type, otherSynset )
794
+ dist = nil
795
+ self.traverse( type ) {|syn,depth|
796
+ if syn == otherSynset
797
+ dist = depth
798
+ true
799
+ end
800
+ }
801
+
802
+ return dist
803
+ end
804
+
805
+
806
+ ### Recursively searches all of the receiver's pointers of the specified
807
+ ### +type+ for +otherSynset+, returning +true+ if it is found.
808
+ def search( type, otherSynset )
809
+ self.traverse( type ) {|syn,depth|
810
+ syn == otherSynset
811
+ }
812
+ end
813
+
814
+
815
+ ### Union: Return the least general synset that the receiver and
816
+ ### +otherSynset+ have in common as a hypernym, or nil if it doesn't share
817
+ ### any.
818
+ def |( otherSyn )
819
+
820
+ # Find all of this syn's hypernyms
821
+ hyperSyns = self.traverse( :hypernyms )
822
+ commonSyn = nil
823
+
824
+ # Now traverse the other synset's hypernyms looking for one of our
825
+ # own hypernyms.
826
+ otherSyn.traverse( :hypernyms ) {|syn,depth|
827
+ if hyperSyns.include?( syn )
828
+ commonSyn = syn
829
+ true
830
+ end
831
+ }
832
+
833
+ return commonSyn
834
+ end
835
+
836
+
837
+ ### Returns the pointers in this synset's pointerlist as an +Array+
838
+ def pointers
839
+ @mutex.synchronize( Sync::SH ) {
840
+ @mutex.synchronize( Sync::EX ) {
841
+ @pointers = @pointerlist.split(SUB_DELIM_RE).collect {|pstr|
842
+ Pointer::parse( pstr )
843
+ }
844
+ } if @pointers.empty?
845
+ @pointers
846
+ }
847
+ end
848
+
849
+
850
+ ### Set the pointers in this synset's pointerlist to +newPointers+
851
+ def pointers=( *newPointers )
852
+ @mutex.synchronize( Sync::EX ) {
853
+ @pointerlist = newPointers.collect {|ptr| ptr.to_s}.join( SUB_DELIM )
854
+ @pointers = newPointers
855
+ }
856
+ end
857
+
858
+
859
+ ### Returns the synset's pointers in a Hash keyed by their type.
860
+ def pointer_map
861
+ return self.pointers.inject( {} ) do |hsh,ptr|
862
+ hsh[ ptr.type ] ||= []
863
+ hsh[ ptr.type ] << ptr
864
+ hsh
865
+ end
866
+ end
867
+
868
+
869
+
870
+ #########
871
+ protected
872
+ #########
873
+
874
+ ### Returns an Array of synset objects for the receiver's pointers of the
875
+ ### specified +type+.
876
+ def fetch_synset_pointers( type, subtype=nil )
877
+ synsets = nil
878
+
879
+ # Iterate over this synset's pointers, looking for ones that match
880
+ # the type we're after. When we find one, we extract its offset and
881
+ # use that to look it up.
882
+ @mutex.synchronize( Sync::SH ) do
883
+ synsets = self.pointers.
884
+ find_all {|ptr|
885
+ ptr.type == type and
886
+ subtype.nil? || ptr.subtype == subtype
887
+ }.
888
+ collect {|ptr| ptr.synset }.
889
+ collect {|key| @lexicon.lookup_synsets_by_key( key )}
890
+ end
891
+
892
+ return synsets.flatten
893
+ end
894
+
895
+
896
+ ### Sets the receiver's synset pointers for the specified +type+ to
897
+ ### the specified +synsets+.
898
+ def set_synset_pointers( type, synsets, subtype=nil )
899
+ synsets = [ synsets ] unless synsets.is_a?( Array )
900
+ pmap = self.pointer_map
901
+ pmap[ type ] = synsets
902
+ self.pointers = pmap.values
903
+ end
904
+
905
+
906
+ end # class Synset
907
+ end # module WordNet
908
+