wordnet 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,430 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # WordNet Lexicon object class
4
+ #
5
+ # == Synopsis
6
+ #
7
+ # lexicon = WordNet::Lexicon.new( dictpath )
8
+ #
9
+ # == Description
10
+ #
11
+ # Instances of this class abstract access to the various databases of the
12
+ # WordNet lexicon. It can be used to look up and search for WordNet::Synsets.
13
+ #
14
+ # == Author
15
+ #
16
+ # Michael Granger <ged@FaerieMUD.org>
17
+ #
18
+ # Copyright (c) 2002, 2003, 2005 The FaerieMUD Consortium. All rights reserved.
19
+ #
20
+ # This module is free software. You may use, modify, and/or redistribute this
21
+ # software under the terms of the Perl Artistic License. (See
22
+ # http://language.perl.com/misc/Artistic.html)
23
+ #
24
+ # Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
25
+ # by Dan Brian.
26
+ #
27
+ # == Version
28
+ #
29
+ # $Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
30
+ #
31
+
32
+ require 'rbconfig'
33
+ require 'pathname'
34
+ require 'bdb'
35
+ require 'sync'
36
+
37
+ require 'wordnet/constants'
38
+ require 'wordnet/synset'
39
+
40
+ ### Lexicon exception - something has gone wrong in the internals of the
41
+ ### lexicon.
42
+ class WordNet::LexiconError < StandardError ; end
43
+
44
+ ### Lookup error - the object being looked up either doesn't exist or is
45
+ ### malformed
46
+ class WordNet::LookupError < StandardError ; end
47
+
48
+ ### WordNet lexicon class - abstracts access to the WordNet lexical
49
+ ### databases, and provides factory methods for looking up and creating new
50
+ ### WordNet::Synset objects.
51
+ class WordNet::Lexicon
52
+ include WordNet::Constants
53
+ include CrossCase if defined?( CrossCase )
54
+
55
+ # Subversion Id
56
+ SvnId = %q$Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
57
+
58
+ # Subversion revision
59
+ SvnRev = %q$Rev: 93 $
60
+
61
+
62
+ #############################################################
63
+ ### B E R K E L E Y D B C O N F I G U R A T I O N
64
+ #############################################################
65
+
66
+ # The path to the WordNet BerkeleyDB Env. It lives in the directory that
67
+ # this module is in.
68
+ DEFAULT_DB_ENV = File::join( Config::CONFIG['datadir'], "ruby-wordnet" )
69
+
70
+ # Options for the creation of the Env object
71
+ ENV_OPTIONS = {
72
+ :set_timeout => 50,
73
+ :set_lk_detect => 1,
74
+ :set_verbose => false,
75
+ :set_lk_max => 3000,
76
+ }
77
+
78
+ # Flags for the creation of the Env object (read-write and read-only)
79
+ ENV_FLAGS_RW = BDB::CREATE|BDB::INIT_TRANSACTION|BDB::RECOVER|BDB::INIT_MPOOL
80
+ ENV_FLAGS_RO = BDB::INIT_MPOOL
81
+
82
+
83
+ #############################################################
84
+ ### I N S T A N C E M E T H O D S
85
+ #############################################################
86
+
87
+ ### Create a new WordNet::Lexicon object that will read its data from
88
+ ### the given +dbenv+ (a BerkeleyDB env directory). The database will be
89
+ ### opened with the specified +mode+, which can either be a numeric
90
+ ### octal mode (e.g., 0444) or one of (:readonly, :readwrite).
91
+ def initialize( dbenv=DEFAULT_DB_ENV, mode=:readonly )
92
+ @mode = normalize_mode( mode )
93
+ debug_msg "Mode is: %04o" % [ mode ]
94
+
95
+ envflags = 0
96
+ dbflags = 0
97
+
98
+ unless self.readonly?
99
+ debug_msg "Using read/write flags"
100
+ envflags = ENV_FLAGS_RW
101
+ dbflags = BDB::CREATE
102
+ else
103
+ debug_msg "Using readonly flags"
104
+ envflags = ENV_FLAGS_RO
105
+ dbflags = 0
106
+ end
107
+
108
+ debug_msg "Env flags are: %0s, dbflags are %0s" %
109
+ [ envflags.to_s(2), dbflags.to_s(2) ]
110
+
111
+ begin
112
+ @env = BDB::Env.new( dbenv, envflags, ENV_OPTIONS )
113
+ @index_db = @env.open_db( BDB::BTREE, "index", nil, dbflags, @mode )
114
+ @data_db = @env.open_db( BDB::BTREE, "data", nil, dbflags, @mode )
115
+ @morph_db = @env.open_db( BDB::BTREE, "morph", nil, dbflags, @mode )
116
+ rescue StandardError => err
117
+ msg = "Error while opening Ruby-WordNet data files: #{dbenv}: %s" %
118
+ [ err.message ]
119
+ raise err, msg, err.backtrace
120
+ end
121
+ end
122
+
123
+
124
+
125
+ ######
126
+ public
127
+ ######
128
+
129
+ # The BDB::Env object which contains the wordnet lexicon's databases.
130
+ attr_reader :env
131
+
132
+ # The handle to the index table
133
+ attr_reader :index_db
134
+
135
+ # The handle to the synset data table
136
+ attr_reader :data_db
137
+
138
+ # The handle to the morph table
139
+ attr_reader :morph_db
140
+
141
+
142
+ ### Returns +true+ if the lexicon was opened in read-only mode.
143
+ def readonly?
144
+ ( @mode & 0200 ).nonzero? ? false : true
145
+ end
146
+
147
+
148
+ ### Returns +true+ if the lexicon was opened in read-write mode.
149
+ def readwrite?
150
+ ! self.readonly?
151
+ end
152
+
153
+
154
+ ### Close the lexicon's database environment
155
+ def close
156
+ @env.close if @env
157
+ end
158
+
159
+
160
+ ### Checkpoint the database. (BerkeleyDB-specific)
161
+ def checkpoint( bytes=0, minutes=0 )
162
+ @env.checkpoint
163
+ end
164
+
165
+
166
+ ### Remove any archival logfiles for the lexicon's database
167
+ ### environment. (BerkeleyDB-specific).
168
+ def clean_logs
169
+ return unless self.readwrite?
170
+ self.archlogs.each do |logfile|
171
+ File::chmod( 0777, logfile )
172
+ File::delete( logfile )
173
+ end
174
+ end
175
+
176
+
177
+ ### Returns an integer of the familiarity/polysemy count for +word+ as a
178
+ ### +part_of_speech+. Note that polysemy can be identified for a given
179
+ ### word by counting the synsets returned by #lookup_synsets.
180
+ def familiarity( word, part_of_speech, polyCount=nil )
181
+ wordkey = self.make_word_key( word, part_of_speech )
182
+ return nil unless @index_db.key?( wordkey )
183
+ @index_db[ wordkey ].split( WordNet::SUB_DELIM_RE ).length
184
+ end
185
+
186
+
187
+ ### Look up sysets (Wordnet::Synset objects) matching +text+ as a
188
+ ### +part_of_speech+, where +part_of_speech+ is one of +WordNet::Noun+,
189
+ ### +WordNet::Verb+, +WordNet::Adjective+, or +WordNet::Adverb+. Without
190
+ ### +sense+, #lookup_synsets will return all matches that are a
191
+ ### +part_of_speech+. If +sense+ is specified, only the synset object that
192
+ ### matches that particular +part_of_speech+ and +sense+ is returned.
193
+ def lookup_synsets( word, part_of_speech, sense=nil )
194
+ wordkey = self.make_word_key( word, part_of_speech )
195
+ pos = self.make_pos( part_of_speech )
196
+ synsets = []
197
+
198
+ # Look up the index entry, trying first the word as given, and if
199
+ # that fails, trying morphological conversion.
200
+ entry = @index_db[ wordkey ]
201
+
202
+ if entry.nil? && (word = self.morph( word, part_of_speech ))
203
+ wordkey = self.make_word_key( word, part_of_speech )
204
+ entry = @index_db[ wordkey ]
205
+ end
206
+
207
+ # If the lookup failed both ways, just abort
208
+ return nil unless entry
209
+
210
+ # Make synset keys from the entry, narrowing it to just the sense
211
+ # requested if one was specified.
212
+ synkeys = entry.split( SUB_DELIM_RE ).collect {|off| "#{off}%#{pos}" }
213
+ if sense
214
+ return lookup_synsets_by_key( synkeys[sense - 1] )
215
+ else
216
+ return [ lookup_synsets_by_key(*synkeys) ].flatten
217
+ end
218
+ end
219
+
220
+
221
+ ### Returns the WordNet::Synset objects corresponding to the +keys+
222
+ ### specified. The +keys+ are made up of the target synset's "offset"
223
+ ### and syntactic category catenated together with a '%' character.
224
+ def lookup_synsets_by_key( *keys )
225
+ synsets = []
226
+
227
+ keys.each {|key|
228
+ raise WordNet::LookupError, "Failed lookup of synset '#{key}':"\
229
+ "No such synset" unless @data_db.key?( key )
230
+
231
+ data = @data_db[ key ]
232
+ offset, part_of_speech = key.split( /%/, 2 )
233
+ synsets << WordNet::Synset::new( self, offset, part_of_speech, nil, data )
234
+ }
235
+
236
+ return *synsets
237
+ end
238
+ alias_method :lookup_synsetsByOffset, :lookup_synsets_by_key
239
+
240
+
241
+ ### Returns a form of +word+ as a part of speech +part_of_speech+, as
242
+ ### found in the WordNet morph files. The #lookup_synsets method perfoms
243
+ ### morphological conversion automatically, so a call to #morph is not
244
+ ### required.
245
+ def morph( word, part_of_speech )
246
+ return @morph_db[ self.make_word_key(word, part_of_speech) ]
247
+ end
248
+
249
+
250
+ ### Returns the result of looking up +word+ in the inverse of the WordNet
251
+ ### morph files. _(This is undocumented in Lingua::Wordnet)_
252
+ def reverse_morph( word )
253
+ @morph_db.invert[ word ]
254
+ end
255
+
256
+
257
+ ### Returns an array of compound words matching +text+.
258
+ def grep( text )
259
+ return [] if text.empty?
260
+
261
+ words = []
262
+
263
+ # Grab a cursor into the database and fetch while the key matches
264
+ # the target text
265
+ cursor = @index_db.cursor
266
+ rec = cursor.set_range( text )
267
+ while /^#{text}/ =~ rec[0]
268
+ words.push rec[0]
269
+ rec = cursor.next
270
+ end
271
+ cursor.close
272
+
273
+ return *words
274
+ end
275
+
276
+
277
+ ### Factory method: Creates and returns a new WordNet::Synset object in
278
+ ### this lexicon for the specified +word+ and +part_of_speech+.
279
+ def create_synset( word, part_of_speech )
280
+ return WordNet::Synset::new( self, '', part_of_speech, word )
281
+ end
282
+ alias_method :new_synset, :create_synset
283
+
284
+
285
+ ### Store the specified +synset+ (a WordNet::Synset object) in the
286
+ ### lexicon. Returns the key of the stored synset.
287
+ def store_synset( synset )
288
+ strippedOffset = nil
289
+ pos = nil
290
+
291
+ # Start a transaction
292
+ @env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
293
+
294
+ # If this is a new synset, generate an offset for it
295
+ if synset.offset == 1
296
+ synset.offset =
297
+ (datadb['offsetcount'] = datadb['offsetcount'].to_i + 1)
298
+ end
299
+
300
+ # Write the data entry
301
+ datadb[ synset.key ] = synset.serialize
302
+
303
+ # Write the index entries
304
+ txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
305
+
306
+ # Make word/part-of-speech pairs from the words in the synset
307
+ synset.words.collect {|word| word + "%" + pos }.each {|word|
308
+
309
+ # If the index already has this word, but not this
310
+ # synset, add it
311
+ if indexdb.key?( word )
312
+ indexdb[ word ] << SUB_DELIM << synset.offset unless
313
+ indexdb[ word ].include?( synset.offset )
314
+ else
315
+ indexdb[ word ] = synset.offset
316
+ end
317
+ }
318
+ end # transaction on @index_db
319
+ end # transaction on @dataDB
320
+
321
+ return synset.offset
322
+ end
323
+
324
+
325
+ ### Remove the specified +synset+ (a WordNet::Synset object) in the
326
+ ### lexicon. Returns the offset of the stored synset.
327
+ def remove_synset( synset )
328
+ # If it's not in the database (ie., doesn't have a real offset),
329
+ # just return.
330
+ return nil if synset.offset == 1
331
+
332
+ # Start a transaction on the data table
333
+ @env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
334
+
335
+ # First remove the index entries for this synset by iterating
336
+ # over each of its words
337
+ txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
338
+ synset.words.collect {|word| word + "%" + pos }.each {|word|
339
+
340
+ # If the index contains an entry for this word, either
341
+ # splice out the offset for the synset being deleted if
342
+ # there are more than one, or just delete the whole
343
+ # entry if it's the only one.
344
+ if indexdb.key?( word )
345
+ offsets = indexdb[ word ].
346
+ split( SUB_DELIM_RE ).
347
+ reject {|offset| offset == synset.offset}
348
+
349
+ unless offsets.empty?
350
+ index_db[ word ] = newoffsets.join( SUB_DELIM )
351
+ else
352
+ index_db.delete( word )
353
+ end
354
+ end
355
+ }
356
+ end
357
+
358
+ # :TODO: Delete synset from pointers of related synsets
359
+
360
+ # Delete the synset from the main db
361
+ datadb.delete( synset.offset )
362
+ end
363
+
364
+ return true
365
+ end
366
+
367
+
368
+ #########
369
+ protected
370
+ #########
371
+
372
+ ### Normalize various ways of specifying a part of speech into the
373
+ ### WordNet part of speech indicator from the +original+ representation,
374
+ ### which may be the name (e.g., "noun"); +nil+, in which case it
375
+ ### defaults to the indicator for a noun; or the indicator character
376
+ ### itself, in which case it is returned unmodified.
377
+ def make_pos( original )
378
+ return WordNet::Noun if original.nil?
379
+ osym = original.to_s.intern
380
+ return WordNet::SYNTACTIC_CATEGORIES[ osym ] if
381
+ WordNet::SYNTACTIC_CATEGORIES.key?( osym )
382
+ return original if SYNTACTIC_SYMBOLS.key?( original )
383
+ return nil
384
+ end
385
+
386
+
387
+ ### Make a lexicon key out of the given +word+ and part of speech
388
+ ### (+pos+).
389
+ def make_word_key( word, pos )
390
+ pos = self.make_pos( pos )
391
+ word = word.gsub( /\s+/, '_' )
392
+ return "#{word}%#{pos}"
393
+ end
394
+
395
+
396
+ ### Return a list of archival logfiles that can be removed
397
+ ### safely. (BerkeleyDB-specific).
398
+ def archlogs
399
+ return @env.log_archive( BDB::ARCH_ABS )
400
+ end
401
+
402
+
403
+ #######
404
+ private
405
+ #######
406
+
407
+ ### Turn the given +origmode+ into an octal file mode such as that
408
+ ### given to File.open.
409
+ def normalize_mode( origmode )
410
+ case origmode
411
+ when :readonly
412
+ 0444 & ~File.umask
413
+ when :readwrite, :writable
414
+ 0666 & ~File.umask
415
+ when Fixnum
416
+ origmode
417
+ else
418
+ raise ArgumentError, "unrecognized mode %p" % [origmode]
419
+ end
420
+ end
421
+
422
+ ### Output the given +msg+ to STDERR if $DEBUG is turned on.
423
+ def debug_msg( *msg )
424
+ return unless $DEBUG
425
+ $deferr.puts msg
426
+ end
427
+
428
+
429
+ end # class WordNet::Lexicon
430
+
@@ -0,0 +1,908 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # WordNet synonym-set object class
4
+ #
5
+ # == Synopsis
6
+ #
7
+ # ss = lexicon.lookupSynset( "word", WordNet::Noun, 1 )
8
+ # puts "Definition: %s" % ss.gloss
9
+ # coords = ss.coordinates
10
+ #
11
+ # == Description
12
+ #
13
+ # Instances of this class encapsulate the data for a synonym set ('synset') in a
14
+ # Wordnet lexical database. A synonym set is a set of words that are
15
+ # interchangeable in some context.
16
+ #
17
+ # == Author
18
+ #
19
+ # Michael Granger <ged@FaerieMUD.org>
20
+ #
21
+ # Copyright (c) 2002-2008 The FaerieMUD Consortium. All rights reserved.
22
+ #
23
+ # This module is free software. You may use, modify, and/or redistribute this
24
+ # software under the terms of the Perl Artistic License. (See
25
+ # http://language.perl.com/misc/Artistic.html)
26
+ #
27
+ # Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
28
+ # by Dan Brian.
29
+ #
30
+ # == Version
31
+ #
32
+ # $Id: synset.rb 90 2008-07-09 23:02:53Z deveiant $
33
+ #
34
+
35
+ require 'sync'
36
+ require 'wordnet/constants'
37
+
38
+ module WordNet
39
+
40
+ ### Synset internal error class
41
+ class SynsetError < StandardError ; end
42
+
43
+ ### "Synonym set" class - encapsulates the data for a set of words in the
44
+ ### lexical database that are interchangeable in some context, and provides
45
+ ### methods for accessing its relationships.
46
+ class Synset
47
+ include WordNet::Constants
48
+ include CrossCase if defined?( CrossCase )
49
+
50
+ # Subversion ID
51
+ SVNId = %q$Id: synset.rb 90 2008-07-09 23:02:53Z deveiant $
52
+
53
+ # Subversion Rev
54
+ SVNRev = %q$Rev: 90 $
55
+
56
+ # The "pointer" type that encapsulates relationships between one synset
57
+ # and another.
58
+ class Pointer
59
+ include WordNet::Constants
60
+ include CrossCase if defined?( CrossCase )
61
+
62
+ #########################################################
63
+ ### C L A S S M E T H O D S
64
+ #########################################################
65
+
66
+ ### Make an Array of WordNet::Synset::Pointer objects out of the
67
+ ### given +pointerList+. The pointerlist is a string of pointers
68
+ ### delimited by Constants::SUB_DELIM. Pointers are in the form:
69
+ ### "<pointer_symbol> <synset_offset>%<pos> <source/target>"
70
+ def self::parse( pointerString )
71
+ type, offsetPos, ptrNums = pointerString.split(/\s+/)
72
+ offset, pos = offsetPos.split( /%/, 2 )
73
+ new( type, offset, pos, ptrNums[0,2], ptrNums[2,2] )
74
+ end
75
+
76
+
77
+ #########################################################
78
+ ### I N S T A N C E M E T H O D S
79
+ #########################################################
80
+
81
+ ### Create a new synset pointer with the given arguments. The
82
+ ### +ptrType+ is the type of the link between synsets, and must be
83
+ ### either a key or a value of WordNet::Constants::POINTER_TYPES. The
84
+ ### +offset+ is the unique identifier of the target synset, and
85
+ ### +pos+ is its part-of-speech, which must be either a key or value
86
+ ### of WordNet::Constants::SYNTACTIC_CATEGORIES. The +source_wn+ and
87
+ ### +target_wn+ are numerical values which distinguish lexical and
88
+ ### semantic pointers. +source_wn+ indicates the word number in the
89
+ ### current (source) synset, and +target_wn+ indicates the word
90
+ ### number in the target synset. If both are 0 (the default) it
91
+ ### means that the pointer type of the pointer represents a semantic
92
+ ### relation between the current (source) synset and the target
93
+ ### synset indicated by +offset+.
94
+ def initialize( type, offset, pos=Noun, source_wn=0, target_wn=0 )
95
+
96
+ # Allow type = '!', 'antonym', or :antonym. Also handle
97
+ # splitting of compound pointers (e.g., :memberMeronym / '%m')
98
+ # into their correct type/subtype parts.
99
+ @type = @subtype = nil
100
+ if type.to_s.length == 1
101
+ @type = POINTER_SYMBOLS[ type[0,1] ]
102
+
103
+ elsif type.to_s.length == 2
104
+ @type = POINTER_SYMBOLS[ type[0,1] ]
105
+ raise "No known subtypes for '%s'" % [@type] unless
106
+ POINTER_SUBTYPES.key?( @type )
107
+ @subtype = POINTER_SUBTYPES[ @type ].index( type ) or
108
+ raise "Unknown subtype '%s' for '%s'" %
109
+ [ type, @type ]
110
+
111
+ else
112
+ if POINTER_TYPES.key?( type.to_sym )
113
+ @type = type.to_sym
114
+ elsif /([a-z]+)([A-Z][a-z]+)/ =~ type.to_s
115
+ subtype, maintype = $1, $2.downcase
116
+ @type = maintype.to_sym if
117
+ POINTER_TYPES.key?( maintype.to_sym )
118
+ @subtype = subtype.to_sym
119
+ end
120
+ end
121
+
122
+ raise ArgumentError, "No such pointer type %p" % type if
123
+ @type.nil?
124
+
125
+ # Allow pos = 'n', 'noun', or :noun
126
+ @part_of_speech = nil
127
+ if pos.to_s.length == 1
128
+ @part_of_speech = SYNTACTIC_SYMBOLS[ pos ]
129
+ else
130
+ @part_of_speech = pos.to_sym if
131
+ SYNTACTIC_CATEGORIES.key?( pos.to_sym )
132
+ end
133
+ raise ArgumentError, "No such part of speech %p" % pos if
134
+ @part_of_speech.nil?
135
+
136
+ # Other attributes
137
+ @offset = offset
138
+ @source_wn = source_wn
139
+ @target_wn = target_wn
140
+ end
141
+
142
+
143
+ ######
144
+ public
145
+ ######
146
+
147
+ # The type of the pointer. Will be one of the keys of
148
+ # WordNet::POINTER_TYPES (e.g., :meronym).
149
+ attr_accessor :type
150
+
151
+ # The subtype of the pointer, if any. Will be one of the keys of one
152
+ # of the hashes in POINTER_SUBTYPES (e.g., :portion).
153
+ attr_accessor :subtype
154
+
155
+ # The offset of the target synset
156
+ attr_accessor :offset
157
+
158
+ # The part-of-speech of the target synset. Will be one of the keys
159
+ # of WordNet::SYNTACTIC_CATEGORIES.
160
+ attr_accessor :part_of_speech
161
+
162
+ # The word number in the source synset
163
+ attr_accessor :source_wn
164
+
165
+ # The word number in the target synset
166
+ attr_accessor :target_wn
167
+
168
+
169
+ ### Return the Pointer as a human-readable String suitable for
170
+ ### debugging.
171
+ def inspect
172
+ "#<%s:0x%08x %s %s>" % [
173
+ self.class.name,
174
+ self.object_id,
175
+ @subtype ? "#@type(#@subtype)" : @type,
176
+ self.synset,
177
+ ]
178
+ end
179
+
180
+
181
+ ### Return the synset key of the target synset (i.e.,
182
+ ### <offset>%<pos symbol>).
183
+ def synset
184
+ self.offset + "%" + self.pos
185
+ end
186
+
187
+
188
+ ### Return the syntactic category symbol for this pointer
189
+ def pos
190
+ return SYNTACTIC_CATEGORIES[ @part_of_speech ]
191
+ end
192
+
193
+
194
+ ### Return the pointer type symbol for this pointer
195
+ def type_symbol
196
+ unless @subtype
197
+ return POINTER_TYPES[ @type ]
198
+ else
199
+ return POINTER_SUBTYPES[ @type ][ @subtype ]
200
+ end
201
+ end
202
+
203
+
204
+ ### Comparison operator. Pointer are equivalent if they point at the
205
+ ### same synset and are of the same type.
206
+ def ==( other )
207
+ return false unless other.is_a?( self.class )
208
+ other.offset == self.offset &&
209
+ other.type == self.type
210
+ end
211
+
212
+
213
+ ### Return the pointer in its stringified form.
214
+ def to_s
215
+ "%s %d%%%s %02x%02x" % [
216
+ ptr.type_symbol,
217
+ ptr.offset,
218
+ ptr.posSymbol,
219
+ ptr.source_wn,
220
+ ptr.target_wn,
221
+ ]
222
+ end
223
+ end # class Pointer
224
+
225
+
226
+ #############################################################
227
+ ### C L A S S M E T H O D S
228
+ #############################################################
229
+
230
+ ### Define a group of pointer methods based on +symbol+ that will fetch,
231
+ ### add, and delete pointer synsets of the type indicated. If no pointer
232
+ ### type corresponding to the given +symbol+ is found, a variant without
233
+ ### a trailing 's' is tried (e.g., 'def_pointer_methods :antonyms' will
234
+ ### create methods called #antonyms and #antonyms=, but will fetch
235
+ ### pointers of type :antonym). If the pointer type has subtypes
236
+ ### (according to WordNet::POINTER_SUBTYPES), accessors/mutators for the
237
+ ### subtypes will be generated as well.
238
+ def self::def_pointer_methods( symbol ) # :nodoc:
239
+ name = symbol.to_s
240
+ casename = name.dup
241
+ casename[ 0,1 ] = casename[ 0,1 ].upcase
242
+ type = nil
243
+ $stderr.puts '-' * 50,
244
+ ">>> defining pointer methods for %p" % [symbol] if $DEBUG
245
+
246
+ if POINTER_TYPES.key?( symbol )
247
+ type = symbol
248
+ elsif POINTER_TYPES.key?( symbol.to_s.sub(/s$/, '').to_sym )
249
+ type = symbol.to_s.sub(/s$/, '').to_sym
250
+ else
251
+ raise ArgumentError, "Unknown pointer type %p" % symbol
252
+ end
253
+
254
+ # Define the accessor
255
+ $stderr.puts "Defining accessors for %p" % [ type ] if $DEBUG
256
+ define_method( name.to_sym ) { self.fetch_synset_pointers(type) }
257
+ define_method( "#{name}=".to_sym ) do |*synsets|
258
+ self.set_synset_pointers( type, synsets, nil )
259
+ end
260
+
261
+ # If the pointer is one that has subtypes, make the variants list
262
+ # out of the subtypes. If it doesn't have subtypes, make the only
263
+ # variant nil, which will cause the mutators to be defined for the
264
+ # main pointer type.
265
+ if POINTER_SUBTYPES.key?( type )
266
+ variants = POINTER_SUBTYPES[ type ].keys
267
+ else
268
+ variants = [nil]
269
+ end
270
+
271
+ # Define a set of methods for each variant, or for the main method
272
+ # if the variant is nil.
273
+ variants.each do |subtype|
274
+ varname = subtype ? [subtype, name].join('_') : name
275
+
276
+ unless subtype.nil?
277
+ $stderr.puts "Defining reader for #{varname}" if $DEBUG
278
+ define_method( varname ) do
279
+ self.fetch_synset_pointers( type, subtype )
280
+ end
281
+ else
282
+ $stderr.puts "No subtype for %s (subtype = %p)" %
283
+ [ varname, subtype ] if $DEBUG
284
+ end
285
+
286
+ $stderr.puts "Defining mutator for #{varname}" if $DEBUG
287
+ define_method( "#{varname}=" ) do |*synsets|
288
+ self.set_synset_pointers( type, synsets, subtype )
289
+ end
290
+ end
291
+ end
292
+
293
+
294
+ #############################################################
295
+ ### I N S T A N C E M E T H O D S
296
+ #############################################################
297
+
298
+ ### Create a new Synset object in the specified +lexicon+ for the
299
+ ### specified +word+ and +part_of_speech+. If +data+ is specified,
300
+ ### initialize the synset's other object data from it. This method
301
+ ### shouldn't be called directly: you should use one of the Lexicon
302
+ ### class's factory methods: #create_synset, #lookup_synsets, or
303
+ ### #lookup_synsetsByOffset.
304
+ def initialize( lexicon, offset, pos, word=nil, data=nil )
305
+ @lexicon = lexicon or
306
+ raise ArgumentError, "%p is not a WordNet::Lexicon" % lexicon
307
+ @part_of_speech = SYNTACTIC_SYMBOLS[ pos ] or
308
+ raise ArgumentError, "No such part of speech %p" % pos
309
+ @mutex = Sync::new
310
+ @pointers = []
311
+
312
+ if data
313
+ @offset = offset.to_i
314
+ @filenum, @wordlist, @pointerlist,
315
+ @frameslist, @gloss = data.split( DELIM_RE )
316
+ else
317
+ @offset = 1
318
+ @wordlist = word ? word : ''
319
+ @filenum, @pointerlist, @frameslist, @gloss = [''] * 4
320
+ end
321
+ end
322
+
323
+
324
+ ######
325
+ public
326
+ ######
327
+
328
+ # The WordNet::Lexicon that was used to look up this synset
329
+ attr_reader :lexicon
330
+
331
+ # The syntactic category of this Synset. Will be one of "n" (noun), "v"
332
+ # (verb), "a" (adjective), "r" (adverb), or "s" (other).
333
+ attr_accessor :part_of_speech
334
+
335
+ # The original byte offset of the synset in the data file; acts as the
336
+ # unique identifier (when combined with #part_of_speech) of this Synset in
337
+ # the database.
338
+ attr_accessor :offset
339
+
340
+ # The number corresponding to the lexicographer file name containing the
341
+ # synset. Calling #lexInfo will return the actual filename. See the
342
+ # "System Description" of wngloss(7WN) for more info about this.
343
+ attr_accessor :filenum
344
+
345
+ # The raw list of word/lex_id pairs associated with this synset. Each
346
+ # word and lex_id is separated by a '%' character, and each pair is
347
+ # delimited with a '|'. E.g., the wordlist for "animal" is:
348
+ # "animal%0|animate_being%0|beast%0|brute%1|creature%0|fauna%1"
349
+ attr_accessor :wordlist
350
+
351
+ # The list of raw pointers to related synsets. E.g., the pointerlist for
352
+ # "mourning dove" is:
353
+ # "@ 01731700%n 0000|#m 01733452%n 0000"
354
+ attr_accessor :pointerlist
355
+
356
+ # The list of raw verb sentence frames for this synset.
357
+ attr_accessor :frameslist
358
+
359
+ # Definition and/or example sentences for the Synset.
360
+ attr_accessor :gloss
361
+
362
+
363
+ ### Return a human-readable representation of the Synset suitable for
364
+ ### debugging.
365
+ def inspect
366
+ pointer_counts = self.pointer_map.collect {|type,ptrs|
367
+ "#{type}s: #{ptrs.length}"
368
+ }.join( ", " )
369
+
370
+ %q{#<%s:0x%08x/%s %s (%s): "%s" (%s)>} % [
371
+ self.class.name,
372
+ self.object_id * 2,
373
+ self.offset,
374
+ self.words.join(", "),
375
+ self.part_of_speech,
376
+ self.gloss,
377
+ pointer_counts,
378
+ ]
379
+ end
380
+
381
+
382
+ ### Returns the Synset's unique identifier, made up of its offset and
383
+ ### syntactic category catenated together with a '%' symbol.
384
+ def key
385
+ "%d%%%s" % [ self.offset, self.pos ]
386
+ end
387
+
388
+
389
+ ### The symbol which represents this synset's syntactic category. Will
390
+ ### be one of :noun, :verb, :adjective, :adverb, or :other.
391
+ def pos
392
+ return SYNTACTIC_CATEGORIES[ @part_of_speech ]
393
+ end
394
+
395
+
396
+ ### Return each of the sentences of the gloss for this synset as an
397
+ ### array. The gloss is a definition of the synset, and optionally one
398
+ ### or more example sentences.
399
+ def glosses
400
+ return self.gloss.split( /\s*;\s*/ )
401
+ end
402
+
403
+
404
+ ### Returns true if the receiver and otherSyn are identical according to
405
+ ### their offsets.
406
+ def ==( otherSyn )
407
+ return false unless otherSyn.kind_of?( WordNet::Synset )
408
+ return self.offset == otherSyn.offset
409
+ end
410
+
411
+
412
+
413
+ ### Returns an Array of words and/or collocations associated with this
414
+ ### synset.
415
+ def words
416
+ @mutex.synchronize( Sync::SH ) {
417
+ self.wordlist.split( SUB_DELIM_RE ).collect do |word|
418
+ word.gsub( /_/, ' ' ).sub( /%.*$/, '' )
419
+ end
420
+ }
421
+ end
422
+ alias_method :synonyms, :words
423
+
424
+
425
+ ### Set the words in this synset's wordlist to +newWords+
426
+ def words=( *newWords )
427
+ @mutex.synchronize( Sync::EX ) {
428
+ @wordlist = newWords.join( SUB_DELIM )
429
+ }
430
+ end
431
+
432
+
433
+ ### Add the specified +newWords+ to this synset's wordlist. Alias:
434
+ ### +add_words+.
435
+ def add_words( *newWords )
436
+ @mutex.synchronize( Sync::EX ) {
437
+ self.words |= newWords
438
+ }
439
+ end
440
+
441
+
442
+ ### Delete the specified +oldWords+ from this synset's wordlist. Alias:
443
+ ### +delete_words+.
444
+ def delete_words( *oldWords )
445
+ @mutex.synchronize( Sync::EX ) {
446
+ self.words -= oldWords
447
+ }
448
+ end
449
+
450
+
451
+ ### Return the synset as a string. Alias: +overview+.
452
+ def to_s
453
+ @mutex.synchronize( Sync::SH ) {
454
+ wordlist = self.words.join(", ").gsub( /%\d/, '' ).gsub( /_/, ' ' )
455
+ return "#{wordlist} [#{self.part_of_speech}] -- (#{self.gloss})"
456
+ }
457
+ end
458
+ alias_method :overview, :to_s
459
+
460
+
461
+ ### Writes any changes made to the object to the database and updates all
462
+ ### affected synset data and indexes. If the object passes out of scope
463
+ ### before #write is called, the changes are lost.
464
+ def store
465
+ @mutex.synchronize( Sync::EX ) {
466
+ self.lexicon.store_synset( self )
467
+ }
468
+ end
469
+ alias_method :write, :store
470
+
471
+
472
+ ### Removes this synset from the database.
473
+ def remove
474
+ @mutex.synchronize( Sync::EX ) {
475
+ self.lexicon.remove_synset( self )
476
+ }
477
+ end
478
+
479
+
480
+ ### Returns the synset's data in a form suitable for storage in the
481
+ ### lexicon's database.
482
+ def serialize
483
+ @mutex.synchronize( Sync::SH ) {
484
+ return [
485
+ @filenum,
486
+ @wordlist,
487
+ @pointerlist,
488
+ @frameslist,
489
+ @gloss
490
+ ].join( WordNet::DELIM )
491
+ }
492
+ end
493
+
494
+
495
+ ### Auto-generate synset pointer methods for the various types
496
+
497
+ # The synsets for the receiver's antonyms (opposites). E.g.,
498
+ # $lexicon.lookup_synsets( "opaque", :adjective, 1 ).antonyms
499
+ # ==> [#<WordNet::Synset:0x010a9acc/454927 clear (adjective): "free
500
+ # from cloudiness; allowing light to pass through; "clear water";
501
+ # "clear plastic bags"; "clear glass"; "the air is clear and
502
+ # clean"" (similar_tos: 6, attributes: 1, derivations: 2,
503
+ # antonyms: 1, see_alsos: 1)>]
504
+ def_pointer_methods :antonyms
505
+
506
+ # Synsets for the receiver's entailments (a verb X entails Y if X cannot
507
+ # be done unless Y is or has been done). E.g.,
508
+ # $lexicon.lookup_synsets( 'rasp', :verb, 1 ).entailment
509
+ # ==> [#<WordNet::Synset:0x010dc24c rub (verb): "move over something
510
+ # with pressure; "rub my hands"; "rub oil into her skin""
511
+ # (derivations: 2, entailments: 1, hypernyms: 1, hyponyms: 13,
512
+ # see_alsos: 4)>]
513
+ def_pointer_methods :entailment
514
+
515
+ # Get/set synsets for the receiver's cause pointers (a verb X causes Y
516
+ # to happen).
517
+ def_pointer_methods :causes
518
+
519
+ # Get/set synsets for the receiver's verb groups. Verb groups link verbs
520
+ # with similar senses together.
521
+ def_pointer_methods :verb_groups
522
+
523
+ # Get/set list of synsets for the receiver's "similar to" pointers. This
524
+ # type of pointer links together head adjective synsets with its
525
+ # satellite adjective synsets.
526
+ def_pointer_methods :similar_to
527
+
528
+ # Get/set synsets for the receiver's participles. Participles are
529
+ # non-finite forms of a verb; used adjectivally and to form compound
530
+ # tenses. For example, the first participle for "working" is:
531
+ # "function, work, operate, go, run (verb)"
532
+ def_pointer_methods :participles
533
+
534
+ # Get/set synsets for the receiver's pertainyms. Pertainyms are
535
+ # relational adjectives. Adjectives that are pertainyms are usually
536
+ # defined by such phrases as "of or pertaining to" and do not have
537
+ # antonyms. A pertainym can point to a noun or another pertainym.
538
+ def_pointer_methods :pertainyms
539
+
540
+ # Get/set synsets for the receiver's attributes.
541
+ def_pointer_methods :attributes
542
+
543
+ # Get/set synsets for the receiver's derived_from.
544
+ def_pointer_methods :derived_from
545
+
546
+ # Get/set synsets for the receiver's derivations.
547
+ def_pointer_methods :derivations
548
+
549
+ # Get/set synsets for the receiver's see_also.
550
+ def_pointer_methods :see_also
551
+
552
+
553
+ # Auto-generate types with subtypes
554
+
555
+ # Synsets for the receiver's hypernyms (more-general terms). E.g.,
556
+ # $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hypernyms
557
+ # ==> [#<WordNet::Synset:0x0109a644/3023321 club (noun): "stout
558
+ # stick that is larger at one end; "he carried a club in self
559
+ # defense"; "he felt as if he had been hit with a club""
560
+ # (derivations: 1, hypernyms: 1, hyponyms: 7)>]
561
+ #
562
+ # Also generates accessors for subtypes:
563
+ #
564
+ # [instance_hypernyms]
565
+ # A proper noun that refers to a particular, unique referent (as
566
+ # distinguished from nouns that refer to classes).
567
+ def_pointer_methods :hypernyms
568
+
569
+
570
+ # :TODO: Generate an example for this
571
+
572
+ # Get/set synsets for the receiver's hyponyms (more-specific terms). E.g.,
573
+ # $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hyponyms
574
+ # ==> [...]
575
+ # [instance_hyponyms]
576
+ # The specific term used to designate a member of a class. X is a
577
+ # hyponym of Y if X is a (kind of) Y.
578
+ # Also generates accessors for subtypes:
579
+ #
580
+ # [instance_hyponyms]
581
+ # A proper noun that refers to a particular, unique referent (as
582
+ # distinguished from nouns that refer to classes).
583
+ def_pointer_methods :hyponyms
584
+
585
+
586
+ # Get/set synsets for the receiver's meronyms. In addition to the
587
+ # general accessors for all meronyms, there are also accessors for
588
+ # subtypes as well:
589
+ #
590
+ # [member_meronyms]
591
+ # Get/set synsets for the receiver's "member" meronyms (HAS MEMBER
592
+ # relation).
593
+ # [stuff_meronyms]
594
+ # Get/set synsets for the receiver's "stuff" meronyms (IS MADE OUT OF
595
+ # relation).
596
+ # [portion_meronyms]
597
+ # Get/set synsets for the receiver's "portion" meronyms (HAS PORTION
598
+ # relation).
599
+ # [component_meronyms]
600
+ # Get/set synsets for the receiver's "component" meronyms (HAS
601
+ # COMPONENT relation).
602
+ # [feature_meronyms]
603
+ # Get/set synsets for the receiver's "feature" meronyms (HAS FEATURE
604
+ # relation).
605
+ # [phase_meronyms]
606
+ # Get/set synsets for the receiver's "phase" meronyms (HAS PHASE
607
+ # relation).
608
+ # [place_meronyms]
609
+ # Get/set synsets for the receiver's "place" meronyms (HAS PLACE
610
+ # relation).
611
+ def_pointer_methods :meronyms
612
+
613
+ # Get/set synsets for the receiver's holonyms. In addition to the
614
+ # general accessors for all holonyms, there are also accessors for
615
+ # subtypes as well:
616
+ #
617
+ # [member_holonyms]
618
+ # Get/set synsets for the receiver's "member" holonyms (IS A MEMBER OF
619
+ # relation).
620
+ # [stuff_holonyms]
621
+ # Get/set synsets for the receiver's "stuff" holonyms (IS MATERIAL OF
622
+ # relation).
623
+ # [portion_holonyms]
624
+ # Get/set synsets for the receiver's "portion" holonyms (IS A PORTION
625
+ # OF relation).
626
+ # [component_holonyms]
627
+ # Get/set synsets for the receiver's "component" holonyms (IS A
628
+ # COMPONENT OF relation).
629
+ # [feature_holonyms]
630
+ # Get/set synsets for the receiver's "feature" holonyms (IS A FEATURE
631
+ # OF relation).
632
+ # [phase_holonyms]
633
+ # Get/set synsets for the receiver's "phase" holonyms (IS A PHASE OF
634
+ # relation).
635
+ # [place_holonyms]
636
+ # Get/set synsets for the receiver's "place" holonyms (IS A PLACE IN
637
+ # relation).
638
+ def_pointer_methods :holonyms
639
+
640
+ # Get/set synsets for the receiver's topical domain members. In addition
641
+ # to the general members accessor, there are also accessors for
642
+ # membership subtypes:
643
+ #
644
+ # [category_members]
645
+ # Get/set synsets for the receiver's
646
+ # "category" topical domain members.
647
+ # [region_members]
648
+ # Get/set synsets for the receiver's "region"
649
+ # topical domain members.
650
+ # [usage_members]
651
+ # Get/set synsets for the receiver's "usage"
652
+ # topical domain members.
653
+ def_pointer_methods :members
654
+
655
+ # Get/set synsets for the receiver's topical domain domains. In addition
656
+ # to the general domains accessor, there are also accessors for
657
+ # domainship subtypes:
658
+ #
659
+ # [category_domains]
660
+ # Get/set synsets for the receiver's
661
+ # "category" topical domain domains.
662
+ # [region_domains]
663
+ # Get/set synsets for the receiver's "region"
664
+ # topical domain domains.
665
+ # [usage_domains]
666
+ # Get/set synsets for the receiver's "usage"
667
+ # topical domain domains.
668
+ def_pointer_methods :domains
669
+
670
+
671
+ ### Returns an Array of the coordinate sisters of the receiver.
672
+ def coordinates
673
+ self.hypernyms.collect {|syn|
674
+ syn.hyponyms
675
+ }.flatten
676
+ end
677
+
678
+
679
+ ### Return the name of the "lexicographer's file" associated with this
680
+ ### synset.
681
+ def lex_info
682
+ @mutex.synchronize( Sync::SH ) {
683
+ return LEXFILES[ self.filenum.to_i ]
684
+ }
685
+ end
686
+
687
+
688
+ ### Sets the "lexicographer's file" association for this synset to
689
+ ### +id+. The value in +id+ should correspond to one of the values in
690
+ ### #WordNet::LEXFILES
691
+ def lexInfo=( id )
692
+ raise ArgumentError, "Bad index: Lexinfo id must be within LEXFILES" unless
693
+ LEXFILES[id]
694
+ @mutex.synchronize( Sync::EX ) {
695
+ self.filenum = id
696
+ }
697
+ end
698
+
699
+
700
+ ### Returns an +Array+ of verb frame +String+s for the synset.
701
+ def frames
702
+ frarray = self.frameslist.split( WordNet::SUB_DELIM_RE )
703
+ verbFrames = []
704
+
705
+ @mutex.synchronize( Sync::SH ) {
706
+ frarray.each {|fr|
707
+ fnum, wnum = fr.split
708
+ if wnum > 0
709
+ wordtext = " (" + self.words[wnum] + ")"
710
+ verbFrames.push VERB_SENTS[ fnum ] + wordtext
711
+ else
712
+ verbFrames.push VERB_SENTS[ fnum ]
713
+ end
714
+ }
715
+ }
716
+
717
+ return verbFrames
718
+ end
719
+
720
+
721
+ ### Traversal iterator: Iterates depth-first over a particular
722
+ ### +type+ of the receiver, and all of the pointed-to synset's
723
+ ### pointers. If called with a block, the block is called once for each
724
+ ### synset with the +foundSyn+ and its +depth+ in relation to the
725
+ ### originating synset as arguments. The first call will be the
726
+ ### originating synset with a depth of +0+ unless +includeOrigin+ is
727
+ ### +false+. If the +callback+ returns +true+, the traversal is halted,
728
+ ### and the method returns immediately. This method returns an Array of
729
+ ### the synsets which were traversed if no block is given, or a flag
730
+ ### which indicates whether or not the traversal was interrupted if a
731
+ ### block is given.
732
+ def traverse( type, includeOrigin=true )
733
+ raise ArgumentError, "Illegal parameter 1: Must be either a String or a Symbol" unless
734
+ type.kind_of?( String ) || type.kind_of?( Symbol )
735
+
736
+ raise ArgumentError, "Synset doesn't support the #{type.to_s} pointer type." unless
737
+ self.respond_to?( type )
738
+
739
+ foundSyns = []
740
+ depth = 0
741
+ traversalFunc = nil
742
+
743
+ # Build a traversal function which we can call recursively. It'll return
744
+ # the synsets it traverses.
745
+ traversalFunc = Proc.new {|syn,newDepth|
746
+
747
+ # Flag to continue traversal
748
+ haltFlag = false
749
+
750
+ # Call the block if it exists and we're either past the origin or
751
+ # including it
752
+ if block_given? && (newDepth > 0 || includeOrigin)
753
+ res = yield( syn, newDepth )
754
+ haltFlag = true if res.is_a? TrueClass
755
+ end
756
+
757
+ # Make an array for holding sub-synsets we see
758
+ subSyns = []
759
+ subSyns.push( syn ) unless newDepth == 0 && !includeOrigin
760
+
761
+ # Iterate over each synset returned by calling the pointer on the
762
+ # current syn. For each one, we call ourselves recursively, and
763
+ # break out of the iterator with a false value if the block has
764
+ # indicated we should abort by returning a false value.
765
+ unless haltFlag
766
+ syn.send( type ).each {|subSyn|
767
+ subSubSyns, haltFlag = traversalFunc.call( subSyn, newDepth + 1 )
768
+ subSyns.push( *subSubSyns ) unless subSubSyns.empty?
769
+ break if haltFlag
770
+ }
771
+ end
772
+
773
+ # return
774
+ [ subSyns, haltFlag ]
775
+ }
776
+
777
+ # Call the iterator
778
+ traversedSets, haltFlag = traversalFunc.call( self, depth )
779
+
780
+ # If a block was given, just return whether or not the block was halted.
781
+ if block_given?
782
+ return haltFlag
783
+
784
+ # If no block was given, return the traversed synsets
785
+ else
786
+ return traversedSets
787
+ end
788
+ end
789
+
790
+
791
+ ### Returns the distance in pointers between the receiver and +otherSynset+
792
+ ### using +type+ as the search path.
793
+ def distance( type, otherSynset )
794
+ dist = nil
795
+ self.traverse( type ) {|syn,depth|
796
+ if syn == otherSynset
797
+ dist = depth
798
+ true
799
+ end
800
+ }
801
+
802
+ return dist
803
+ end
804
+
805
+
806
+ ### Recursively searches all of the receiver's pointers of the specified
807
+ ### +type+ for +otherSynset+, returning +true+ if it is found.
808
+ def search( type, otherSynset )
809
+ self.traverse( type ) {|syn,depth|
810
+ syn == otherSynset
811
+ }
812
+ end
813
+
814
+
815
+ ### Union: Return the least general synset that the receiver and
816
+ ### +otherSynset+ have in common as a hypernym, or nil if it doesn't share
817
+ ### any.
818
+ def |( otherSyn )
819
+
820
+ # Find all of this syn's hypernyms
821
+ hyperSyns = self.traverse( :hypernyms )
822
+ commonSyn = nil
823
+
824
+ # Now traverse the other synset's hypernyms looking for one of our
825
+ # own hypernyms.
826
+ otherSyn.traverse( :hypernyms ) {|syn,depth|
827
+ if hyperSyns.include?( syn )
828
+ commonSyn = syn
829
+ true
830
+ end
831
+ }
832
+
833
+ return commonSyn
834
+ end
835
+
836
+
837
+ ### Returns the pointers in this synset's pointerlist as an +Array+
838
+ def pointers
839
+ @mutex.synchronize( Sync::SH ) {
840
+ @mutex.synchronize( Sync::EX ) {
841
+ @pointers = @pointerlist.split(SUB_DELIM_RE).collect {|pstr|
842
+ Pointer::parse( pstr )
843
+ }
844
+ } if @pointers.empty?
845
+ @pointers
846
+ }
847
+ end
848
+
849
+
850
+ ### Set the pointers in this synset's pointerlist to +newPointers+
851
+ def pointers=( *newPointers )
852
+ @mutex.synchronize( Sync::EX ) {
853
+ @pointerlist = newPointers.collect {|ptr| ptr.to_s}.join( SUB_DELIM )
854
+ @pointers = newPointers
855
+ }
856
+ end
857
+
858
+
859
+ ### Returns the synset's pointers in a Hash keyed by their type.
860
+ def pointer_map
861
+ return self.pointers.inject( {} ) do |hsh,ptr|
862
+ hsh[ ptr.type ] ||= []
863
+ hsh[ ptr.type ] << ptr
864
+ hsh
865
+ end
866
+ end
867
+
868
+
869
+
870
+ #########
871
+ protected
872
+ #########
873
+
874
+ ### Returns an Array of synset objects for the receiver's pointers of the
875
+ ### specified +type+.
876
+ def fetch_synset_pointers( type, subtype=nil )
877
+ synsets = nil
878
+
879
+ # Iterate over this synset's pointers, looking for ones that match
880
+ # the type we're after. When we find one, we extract its offset and
881
+ # use that to look it up.
882
+ @mutex.synchronize( Sync::SH ) do
883
+ synsets = self.pointers.
884
+ find_all {|ptr|
885
+ ptr.type == type and
886
+ subtype.nil? || ptr.subtype == subtype
887
+ }.
888
+ collect {|ptr| ptr.synset }.
889
+ collect {|key| @lexicon.lookup_synsets_by_key( key )}
890
+ end
891
+
892
+ return synsets.flatten
893
+ end
894
+
895
+
896
+ ### Sets the receiver's synset pointers for the specified +type+ to
897
+ ### the specified +synsets+.
898
+ def set_synset_pointers( type, synsets, subtype=nil )
899
+ synsets = [ synsets ] unless synsets.is_a?( Array )
900
+ pmap = self.pointer_map
901
+ pmap[ type ] = synsets
902
+ self.pointers = pmap.values
903
+ end
904
+
905
+
906
+ end # class Synset
907
+ end # module WordNet
908
+