wordnet 0.0.5 → 1.0.0.pre.126
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/History.rdoc +5 -0
- data/LICENSE +9 -9
- data/Manifest.txt +39 -0
- data/README.rdoc +60 -0
- data/Rakefile +47 -267
- data/TODO +9 -0
- data/WordNet30-license.txt +31 -0
- data/examples/add-laced-boots.rb +35 -0
- data/examples/clothes-with-collars.rb +42 -0
- data/examples/clothesWithTongues.rb +0 -0
- data/examples/domainTree.rb +0 -0
- data/examples/memberTree.rb +0 -0
- data/lib/wordnet/constants.rb +259 -296
- data/lib/wordnet/lexicallink.rb +34 -0
- data/lib/wordnet/lexicon.rb +158 -386
- data/lib/wordnet/mixins.rb +62 -0
- data/lib/wordnet/model.rb +78 -0
- data/lib/wordnet/morph.rb +25 -0
- data/lib/wordnet/semanticlink.rb +52 -0
- data/lib/wordnet/sense.rb +55 -0
- data/lib/wordnet/sumoterm.rb +21 -0
- data/lib/wordnet/synset.rb +404 -859
- data/lib/wordnet/utils.rb +126 -0
- data/lib/wordnet/word.rb +119 -0
- data/lib/wordnet.rb +113 -76
- data/spec/lib/helpers.rb +102 -133
- data/spec/linguawordnet.tests.rb +38 -0
- data/spec/wordnet/lexicon_spec.rb +96 -186
- data/spec/wordnet/model_spec.rb +59 -0
- data/spec/wordnet/semanticlink_spec.rb +42 -0
- data/spec/wordnet/synset_spec.rb +27 -256
- data/spec/wordnet/word_spec.rb +58 -0
- data/spec/wordnet_spec.rb +52 -0
- data.tar.gz.sig +0 -0
- metadata +227 -188
- metadata.gz.sig +0 -0
- data/ChangeLog +0 -720
- data/README +0 -93
- data/Rakefile.local +0 -46
- data/convertdb.rb +0 -417
- data/examples/addLacedBoots.rb +0 -27
- data/examples/clothesWithCollars.rb +0 -36
- data/rake/dependencies.rb +0 -76
- data/rake/helpers.rb +0 -384
- data/rake/manual.rb +0 -755
- data/rake/packaging.rb +0 -112
- data/rake/publishing.rb +0 -303
- data/rake/rdoc.rb +0 -35
- data/rake/style.rb +0 -62
- data/rake/svn.rb +0 -469
- data/rake/testing.rb +0 -192
- data/rake/verifytask.rb +0 -64
- data/utils.rb +0 -838
data/lib/wordnet/synset.rb
CHANGED
@@ -1,908 +1,453 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
-
|
2
|
+
|
3
|
+
require 'wordnet' unless defined?( WordNet )
|
4
|
+
require 'wordnet/constants'
|
5
|
+
require 'wordnet/model'
|
6
|
+
|
7
|
+
|
3
8
|
# WordNet synonym-set object class
|
4
|
-
#
|
5
|
-
# == Synopsis
|
6
|
-
#
|
7
|
-
# ss = lexicon.lookupSynset( "word", WordNet::Noun, 1 )
|
8
|
-
# puts "Definition: %s" % ss.gloss
|
9
|
-
# coords = ss.coordinates
|
10
9
|
#
|
11
|
-
# == Description
|
12
|
-
#
|
13
10
|
# Instances of this class encapsulate the data for a synonym set ('synset') in a
|
14
|
-
#
|
11
|
+
# WordNet lexical database. A synonym set is a set of words that are
|
15
12
|
# interchangeable in some context.
|
16
|
-
#
|
17
|
-
# == Author
|
18
13
|
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
# This module is free software. You may use, modify, and/or redistribute this
|
24
|
-
# software under the terms of the Perl Artistic License. (See
|
25
|
-
# http://language.perl.com/misc/Artistic.html)
|
26
|
-
#
|
27
|
-
# Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
|
28
|
-
# by Dan Brian.
|
29
|
-
#
|
30
|
-
# == Version
|
14
|
+
# ss = WordNet::Synset[ 106286395 ]
|
15
|
+
# # => #<WordNet::Synset @values={:synsetid=>106286395, :pos=>"n",
|
16
|
+
# :lexdomainid=>10,
|
17
|
+
# :definition=>"a unit of language that native speakers can identify"}>
|
31
18
|
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
return POINTER_SUBTYPES[ @type ][ @subtype ]
|
200
|
-
end
|
201
|
-
end
|
202
|
-
|
203
|
-
|
204
|
-
### Comparison operator. Pointer are equivalent if they point at the
|
205
|
-
### same synset and are of the same type.
|
206
|
-
def ==( other )
|
207
|
-
return false unless other.is_a?( self.class )
|
208
|
-
other.offset == self.offset &&
|
209
|
-
other.type == self.type
|
210
|
-
end
|
211
|
-
|
212
|
-
|
213
|
-
### Return the pointer in its stringified form.
|
214
|
-
def to_s
|
215
|
-
"%s %d%%%s %02x%02x" % [
|
216
|
-
ptr.type_symbol,
|
217
|
-
ptr.offset,
|
218
|
-
ptr.posSymbol,
|
219
|
-
ptr.source_wn,
|
220
|
-
ptr.target_wn,
|
221
|
-
]
|
222
|
-
end
|
223
|
-
end # class Pointer
|
224
|
-
|
225
|
-
|
226
|
-
#############################################################
|
227
|
-
### C L A S S M E T H O D S
|
228
|
-
#############################################################
|
229
|
-
|
230
|
-
### Define a group of pointer methods based on +symbol+ that will fetch,
|
231
|
-
### add, and delete pointer synsets of the type indicated. If no pointer
|
232
|
-
### type corresponding to the given +symbol+ is found, a variant without
|
233
|
-
### a trailing 's' is tried (e.g., 'def_pointer_methods :antonyms' will
|
234
|
-
### create methods called #antonyms and #antonyms=, but will fetch
|
235
|
-
### pointers of type :antonym). If the pointer type has subtypes
|
236
|
-
### (according to WordNet::POINTER_SUBTYPES), accessors/mutators for the
|
237
|
-
### subtypes will be generated as well.
|
238
|
-
def self::def_pointer_methods( symbol ) # :nodoc:
|
239
|
-
name = symbol.to_s
|
240
|
-
casename = name.dup
|
241
|
-
casename[ 0,1 ] = casename[ 0,1 ].upcase
|
242
|
-
type = nil
|
243
|
-
$stderr.puts '-' * 50,
|
244
|
-
">>> defining pointer methods for %p" % [symbol] if $DEBUG
|
245
|
-
|
246
|
-
if POINTER_TYPES.key?( symbol )
|
247
|
-
type = symbol
|
248
|
-
elsif POINTER_TYPES.key?( symbol.to_s.sub(/s$/, '').to_sym )
|
249
|
-
type = symbol.to_s.sub(/s$/, '').to_sym
|
250
|
-
else
|
251
|
-
raise ArgumentError, "Unknown pointer type %p" % symbol
|
252
|
-
end
|
253
|
-
|
254
|
-
# Define the accessor
|
255
|
-
$stderr.puts "Defining accessors for %p" % [ type ] if $DEBUG
|
256
|
-
define_method( name.to_sym ) { self.fetch_synset_pointers(type) }
|
257
|
-
define_method( "#{name}=".to_sym ) do |*synsets|
|
258
|
-
self.set_synset_pointers( type, synsets, nil )
|
259
|
-
end
|
260
|
-
|
261
|
-
# If the pointer is one that has subtypes, make the variants list
|
262
|
-
# out of the subtypes. If it doesn't have subtypes, make the only
|
263
|
-
# variant nil, which will cause the mutators to be defined for the
|
264
|
-
# main pointer type.
|
265
|
-
if POINTER_SUBTYPES.key?( type )
|
266
|
-
variants = POINTER_SUBTYPES[ type ].keys
|
267
|
-
else
|
268
|
-
variants = [nil]
|
269
|
-
end
|
270
|
-
|
271
|
-
# Define a set of methods for each variant, or for the main method
|
272
|
-
# if the variant is nil.
|
273
|
-
variants.each do |subtype|
|
274
|
-
varname = subtype ? [subtype, name].join('_') : name
|
275
|
-
|
276
|
-
unless subtype.nil?
|
277
|
-
$stderr.puts "Defining reader for #{varname}" if $DEBUG
|
278
|
-
define_method( varname ) do
|
279
|
-
self.fetch_synset_pointers( type, subtype )
|
280
|
-
end
|
281
|
-
else
|
282
|
-
$stderr.puts "No subtype for %s (subtype = %p)" %
|
283
|
-
[ varname, subtype ] if $DEBUG
|
284
|
-
end
|
285
|
-
|
286
|
-
$stderr.puts "Defining mutator for #{varname}" if $DEBUG
|
287
|
-
define_method( "#{varname}=" ) do |*synsets|
|
288
|
-
self.set_synset_pointers( type, synsets, subtype )
|
289
|
-
end
|
290
|
-
end
|
291
|
-
end
|
292
|
-
|
293
|
-
|
294
|
-
#############################################################
|
295
|
-
### I N S T A N C E M E T H O D S
|
296
|
-
#############################################################
|
297
|
-
|
298
|
-
### Create a new Synset object in the specified +lexicon+ for the
|
299
|
-
### specified +word+ and +part_of_speech+. If +data+ is specified,
|
300
|
-
### initialize the synset's other object data from it. This method
|
301
|
-
### shouldn't be called directly: you should use one of the Lexicon
|
302
|
-
### class's factory methods: #create_synset, #lookup_synsets, or
|
303
|
-
### #lookup_synsetsByOffset.
|
304
|
-
def initialize( lexicon, offset, pos, word=nil, data=nil )
|
305
|
-
@lexicon = lexicon or
|
306
|
-
raise ArgumentError, "%p is not a WordNet::Lexicon" % lexicon
|
307
|
-
@part_of_speech = SYNTACTIC_SYMBOLS[ pos ] or
|
308
|
-
raise ArgumentError, "No such part of speech %p" % pos
|
309
|
-
@mutex = Sync::new
|
310
|
-
@pointers = []
|
311
|
-
|
312
|
-
if data
|
313
|
-
@offset = offset.to_i
|
314
|
-
@filenum, @wordlist, @pointerlist,
|
315
|
-
@frameslist, @gloss = data.split( DELIM_RE )
|
316
|
-
else
|
317
|
-
@offset = 1
|
318
|
-
@wordlist = word ? word : ''
|
319
|
-
@filenum, @pointerlist, @frameslist, @gloss = [''] * 4
|
320
|
-
end
|
321
|
-
end
|
322
|
-
|
323
|
-
|
324
|
-
######
|
325
|
-
public
|
326
|
-
######
|
327
|
-
|
328
|
-
# The WordNet::Lexicon that was used to look up this synset
|
329
|
-
attr_reader :lexicon
|
330
|
-
|
331
|
-
# The syntactic category of this Synset. Will be one of "n" (noun), "v"
|
332
|
-
# (verb), "a" (adjective), "r" (adverb), or "s" (other).
|
333
|
-
attr_accessor :part_of_speech
|
334
|
-
|
335
|
-
# The original byte offset of the synset in the data file; acts as the
|
336
|
-
# unique identifier (when combined with #part_of_speech) of this Synset in
|
337
|
-
# the database.
|
338
|
-
attr_accessor :offset
|
339
|
-
|
340
|
-
# The number corresponding to the lexicographer file name containing the
|
341
|
-
# synset. Calling #lexInfo will return the actual filename. See the
|
342
|
-
# "System Description" of wngloss(7WN) for more info about this.
|
343
|
-
attr_accessor :filenum
|
344
|
-
|
345
|
-
# The raw list of word/lex_id pairs associated with this synset. Each
|
346
|
-
# word and lex_id is separated by a '%' character, and each pair is
|
347
|
-
# delimited with a '|'. E.g., the wordlist for "animal" is:
|
348
|
-
# "animal%0|animate_being%0|beast%0|brute%1|creature%0|fauna%1"
|
349
|
-
attr_accessor :wordlist
|
350
|
-
|
351
|
-
# The list of raw pointers to related synsets. E.g., the pointerlist for
|
352
|
-
# "mourning dove" is:
|
353
|
-
# "@ 01731700%n 0000|#m 01733452%n 0000"
|
354
|
-
attr_accessor :pointerlist
|
355
|
-
|
356
|
-
# The list of raw verb sentence frames for this synset.
|
357
|
-
attr_accessor :frameslist
|
358
|
-
|
359
|
-
# Definition and/or example sentences for the Synset.
|
360
|
-
attr_accessor :gloss
|
361
|
-
|
362
|
-
|
363
|
-
### Return a human-readable representation of the Synset suitable for
|
364
|
-
### debugging.
|
365
|
-
def inspect
|
366
|
-
pointer_counts = self.pointer_map.collect {|type,ptrs|
|
367
|
-
"#{type}s: #{ptrs.length}"
|
368
|
-
}.join( ", " )
|
369
|
-
|
370
|
-
%q{#<%s:0x%08x/%s %s (%s): "%s" (%s)>} % [
|
371
|
-
self.class.name,
|
372
|
-
self.object_id * 2,
|
373
|
-
self.offset,
|
374
|
-
self.words.join(", "),
|
375
|
-
self.part_of_speech,
|
376
|
-
self.gloss,
|
377
|
-
pointer_counts,
|
378
|
-
]
|
379
|
-
end
|
380
|
-
|
381
|
-
|
382
|
-
### Returns the Synset's unique identifier, made up of its offset and
|
383
|
-
### syntactic category catenated together with a '%' symbol.
|
384
|
-
def key
|
385
|
-
"%d%%%s" % [ self.offset, self.pos ]
|
386
|
-
end
|
387
|
-
|
388
|
-
|
389
|
-
### The symbol which represents this synset's syntactic category. Will
|
390
|
-
### be one of :noun, :verb, :adjective, :adverb, or :other.
|
391
|
-
def pos
|
392
|
-
return SYNTACTIC_CATEGORIES[ @part_of_speech ]
|
393
|
-
end
|
394
|
-
|
395
|
-
|
396
|
-
### Return each of the sentences of the gloss for this synset as an
|
397
|
-
### array. The gloss is a definition of the synset, and optionally one
|
398
|
-
### or more example sentences.
|
399
|
-
def glosses
|
400
|
-
return self.gloss.split( /\s*;\s*/ )
|
401
|
-
end
|
402
|
-
|
403
|
-
|
404
|
-
### Returns true if the receiver and otherSyn are identical according to
|
405
|
-
### their offsets.
|
406
|
-
def ==( otherSyn )
|
407
|
-
return false unless otherSyn.kind_of?( WordNet::Synset )
|
408
|
-
return self.offset == otherSyn.offset
|
19
|
+
# ss.words.map( &:lemma )
|
20
|
+
# # => ["word"]
|
21
|
+
#
|
22
|
+
# ss.hypernyms
|
23
|
+
# # => [#<WordNet::Synset @values={:synsetid=>106284225, :pos=>"n",
|
24
|
+
# :lexdomainid=>10,
|
25
|
+
# :definition=>"one of the natural units into which [...]"}>]
|
26
|
+
#
|
27
|
+
# ss.hyponyms
|
28
|
+
# # => [#<WordNet::Synset @values={:synsetid=>106287620, :pos=>"n",
|
29
|
+
# :lexdomainid=>10,
|
30
|
+
# :definition=>"a word or phrase spelled by rearranging [...]"}>,
|
31
|
+
# #<WordNet::Synset @values={:synsetid=>106287859, :pos=>"n",
|
32
|
+
# :lexdomainid=>10,
|
33
|
+
# :definition=>"a word (such as a pronoun) used to avoid [...]"}>,
|
34
|
+
# #<WordNet::Synset @values={:synsetid=>106288024, :pos=>"n",
|
35
|
+
# :lexdomainid=>10,
|
36
|
+
# :definition=>"a word that expresses a meaning opposed [...]"}>,
|
37
|
+
# ...
|
38
|
+
# ]
|
39
|
+
#
|
40
|
+
class WordNet::Synset < WordNet::Model( :synsets )
|
41
|
+
include WordNet::Constants
|
42
|
+
|
43
|
+
require 'wordnet/lexicallink'
|
44
|
+
require 'wordnet/semanticlink'
|
45
|
+
|
46
|
+
# Semantic link type keys; maps what the API calls them to what
|
47
|
+
# they are in the DB.
|
48
|
+
SEMANTIC_TYPEKEYS = Hash.new {|h,type| h[type] = type.to_s.chomp('s').to_sym }
|
49
|
+
|
50
|
+
# Now set the ones that aren't just the API name with
|
51
|
+
# the 's' at the end removed.
|
52
|
+
SEMANTIC_TYPEKEYS.merge!(
|
53
|
+
also_see: :also,
|
54
|
+
domain_categories: :domain_category,
|
55
|
+
domain_member_categories: :domain_member_category,
|
56
|
+
entailments: :entail,
|
57
|
+
similar_words: :similar,
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
set_primary_key :synsetid
|
62
|
+
|
63
|
+
##
|
64
|
+
# :singleton-method:
|
65
|
+
# The WordNet::Words associated with the receiver
|
66
|
+
many_to_many :words,
|
67
|
+
:join_table => :senses,
|
68
|
+
:left_key => :synsetid,
|
69
|
+
:right_key => :wordid
|
70
|
+
|
71
|
+
|
72
|
+
##
|
73
|
+
# :singleton-method:
|
74
|
+
# The WordNet::Senses associated with the receiver
|
75
|
+
one_to_many :senses,
|
76
|
+
:key => :synsetid,
|
77
|
+
:primary_key => :synsetid
|
78
|
+
|
79
|
+
|
80
|
+
##
|
81
|
+
# :singleton-method:
|
82
|
+
# The WordNet::SemanticLinks indicating a relationship with other
|
83
|
+
# WordNet::Synsets
|
84
|
+
one_to_many :semlinks,
|
85
|
+
:class => :"WordNet::SemanticLink",
|
86
|
+
:key => :synset1id,
|
87
|
+
:primary_key => :synsetid,
|
88
|
+
:eager => :target
|
89
|
+
|
90
|
+
|
91
|
+
##
|
92
|
+
# :singleton-method:
|
93
|
+
# The WordNet::SemanticLinks pointing *to* this Synset
|
94
|
+
many_to_one :semlinks_to,
|
95
|
+
:class => :"WordNet::SemanticLink",
|
96
|
+
:key => :synsetid,
|
97
|
+
:primary_key => :synset2id
|
98
|
+
|
99
|
+
|
100
|
+
##
|
101
|
+
# :singleton-method:
|
102
|
+
# Terms from the Suggested Upper Merged Ontology
|
103
|
+
many_to_many :sumo_terms,
|
104
|
+
:join_table => :sumomaps,
|
105
|
+
:left_key => :synsetid,
|
106
|
+
:right_key => :sumoid
|
107
|
+
|
108
|
+
|
109
|
+
#################################################################
|
110
|
+
### C L A S S M E T H O D S
|
111
|
+
#################################################################
|
112
|
+
|
113
|
+
# Cached lookup tables (lazy-loaded)
|
114
|
+
@lexdomain_table = nil
|
115
|
+
@lexdomains = nil
|
116
|
+
@linktype_table = nil
|
117
|
+
@linktypes = nil
|
118
|
+
@postype_table = nil
|
119
|
+
@postypes = nil
|
120
|
+
|
121
|
+
|
122
|
+
#
|
123
|
+
# :section: Dataset Methods
|
124
|
+
# This is a set of methods that return a Sequel::Dataset for Synsets pre-filtered
|
125
|
+
# by a certain criteria. They can be used to do stuff like:
|
126
|
+
#
|
127
|
+
# lexicon[ :language ].synsets_dataset.nouns
|
128
|
+
#
|
129
|
+
|
130
|
+
##
|
131
|
+
# :singleton-method: nouns
|
132
|
+
# Dataset method: filtered by part of speech: nouns.
|
133
|
+
def_dataset_method( :nouns ) { filter(pos: 'n') }
|
134
|
+
|
135
|
+
##
|
136
|
+
# :singleton-method: verbs
|
137
|
+
# Dataset method: filtered by part of speech: verbs.
|
138
|
+
def_dataset_method( :verbs ) { filter(pos: 'v') }
|
139
|
+
|
140
|
+
##
|
141
|
+
# :singleton-method: adjectives
|
142
|
+
# Dataset method: filtered by part of speech: adjectives.
|
143
|
+
def_dataset_method( :adjectives ) { filter(pos: 'a') }
|
144
|
+
|
145
|
+
##
|
146
|
+
# :singleton-method: adverbs
|
147
|
+
# Dataset method: filtered by part of speech: adverbs.
|
148
|
+
def_dataset_method( :adverbs ) { filter(pos: 'r') }
|
149
|
+
|
150
|
+
##
|
151
|
+
# :singleton-method: adjective_satellites
|
152
|
+
# Dataset method: filtered by part of speech: adjective satellites.
|
153
|
+
def_dataset_method( :adjective_satellites ) { filter(pos: 's') }
|
154
|
+
|
155
|
+
|
156
|
+
### Overridden to reset any lookup tables that may have been loaded from the previous
|
157
|
+
### database.
|
158
|
+
def self::db=( newdb )
|
159
|
+
self.reset_lookup_tables
|
160
|
+
super
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
### Unload all of the cached lookup tables that have been loaded.
|
165
|
+
def self::reset_lookup_tables
|
166
|
+
@lexdomain_table = nil
|
167
|
+
@lexdomains = nil
|
168
|
+
@linktype_table = nil
|
169
|
+
@linktypes = nil
|
170
|
+
@postype_table = nil
|
171
|
+
@postypes = nil
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
### Return the table of lexical domains, keyed by id.
|
176
|
+
def self::lexdomain_table
|
177
|
+
@lexdomain_table ||= self.db[:lexdomains].to_hash( :lexdomainid )
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
### Lexical domains, keyed by name as a String (e.g., "verb.cognition")
|
182
|
+
def self::lexdomains
|
183
|
+
@lexdomains ||= self.lexdomain_table.inject({}) do |hash,(id,domain)|
|
184
|
+
hash[ domain[:lexdomainname] ] = domain
|
185
|
+
hash
|
409
186
|
end
|
187
|
+
end
|
410
188
|
|
411
189
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
190
|
+
### Return the table of link types, keyed by linkid
|
191
|
+
def self::linktype_table
|
192
|
+
@linktype_table ||= self.db[:linktypes].inject({}) do |hash,row|
|
193
|
+
hash[ row[:linkid] ] = {
|
194
|
+
:id => row[:linkid],
|
195
|
+
:typename => row[:link],
|
196
|
+
:type => row[:link].gsub( /\s+/, '_' ).to_sym,
|
197
|
+
:recurses => row[:recurses].nonzero? ? true : false,
|
420
198
|
}
|
199
|
+
hash
|
421
200
|
end
|
422
|
-
|
201
|
+
end
|
423
202
|
|
424
203
|
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
204
|
+
### Return the table of link types, keyed by name.
|
205
|
+
def self::linktypes
|
206
|
+
@linktypes ||= self.linktype_table.inject({}) do |hash,(id,link)|
|
207
|
+
hash[ link[:type] ] = link
|
208
|
+
hash
|
430
209
|
end
|
210
|
+
end
|
431
211
|
|
432
212
|
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
}
|
213
|
+
### Return the table of part-of-speech types, keyed by letter identifier.
|
214
|
+
def self::postype_table
|
215
|
+
@postype_table ||= self.db[:postypes].inject({}) do |hash, row|
|
216
|
+
hash[ row[:pos].untaint.to_sym ] = row[:posname]
|
217
|
+
hash
|
439
218
|
end
|
219
|
+
end
|
440
220
|
|
441
221
|
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
self.words -= oldWords
|
447
|
-
}
|
448
|
-
end
|
222
|
+
### Return the table of part-of-speech names to letter identifiers (both Symbols).
|
223
|
+
def self::postypes
|
224
|
+
@postypes ||= self.postype_table.invert
|
225
|
+
end
|
449
226
|
|
450
227
|
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
return "#{wordlist} [#{self.part_of_speech}] -- (#{self.gloss})"
|
456
|
-
}
|
457
|
-
end
|
458
|
-
alias_method :overview, :to_s
|
459
|
-
|
228
|
+
### Generate methods that will return Synsets related by the given semantic pointer
|
229
|
+
### +type+.
|
230
|
+
def self::semantic_link( type )
|
231
|
+
WordNet.log.debug "Generating a %p method" % [ type ]
|
460
232
|
|
461
|
-
|
462
|
-
|
463
|
-
### before #write is called, the changes are lost.
|
464
|
-
def store
|
465
|
-
@mutex.synchronize( Sync::EX ) {
|
466
|
-
self.lexicon.store_synset( self )
|
467
|
-
}
|
233
|
+
ds_method_body = Proc.new do
|
234
|
+
self.semanticlink_dataset( type )
|
468
235
|
end
|
469
|
-
|
470
|
-
|
236
|
+
define_method( "#{type}_dataset", &ds_method_body )
|
471
237
|
|
472
|
-
|
473
|
-
|
474
|
-
@mutex.synchronize( Sync::EX ) {
|
475
|
-
self.lexicon.remove_synset( self )
|
476
|
-
}
|
238
|
+
ss_method_body = Proc.new do
|
239
|
+
self.semanticlink_dataset( type ).all
|
477
240
|
end
|
241
|
+
define_method( type, &ss_method_body )
|
242
|
+
end
|
478
243
|
|
479
244
|
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
@mutex.synchronize( Sync::SH ) {
|
484
|
-
return [
|
485
|
-
@filenum,
|
486
|
-
@wordlist,
|
487
|
-
@pointerlist,
|
488
|
-
@frameslist,
|
489
|
-
@gloss
|
490
|
-
].join( WordNet::DELIM )
|
491
|
-
}
|
492
|
-
end
|
245
|
+
######
|
246
|
+
public
|
247
|
+
######
|
493
248
|
|
249
|
+
### Return a Sequel::Dataset for synsets related to the receiver via the semantic
|
250
|
+
### link of the specified +type+.
|
251
|
+
def semanticlink_dataset( type )
|
252
|
+
typekey = SEMANTIC_TYPEKEYS[ type ]
|
253
|
+
linkinfo = self.class.linktypes[ typekey ] or
|
254
|
+
raise ArgumentError, "no such link type %p" % [ typekey ]
|
255
|
+
ssids = self.semlinks_dataset.filter( :linkid => linkinfo[:id] ).select( :synset2id )
|
494
256
|
|
495
|
-
|
496
|
-
|
497
|
-
# The synsets for the receiver's antonyms (opposites). E.g.,
|
498
|
-
# $lexicon.lookup_synsets( "opaque", :adjective, 1 ).antonyms
|
499
|
-
# ==> [#<WordNet::Synset:0x010a9acc/454927 clear (adjective): "free
|
500
|
-
# from cloudiness; allowing light to pass through; "clear water";
|
501
|
-
# "clear plastic bags"; "clear glass"; "the air is clear and
|
502
|
-
# clean"" (similar_tos: 6, attributes: 1, derivations: 2,
|
503
|
-
# antonyms: 1, see_alsos: 1)>]
|
504
|
-
def_pointer_methods :antonyms
|
505
|
-
|
506
|
-
# Synsets for the receiver's entailments (a verb X entails Y if X cannot
|
507
|
-
# be done unless Y is or has been done). E.g.,
|
508
|
-
# $lexicon.lookup_synsets( 'rasp', :verb, 1 ).entailment
|
509
|
-
# ==> [#<WordNet::Synset:0x010dc24c rub (verb): "move over something
|
510
|
-
# with pressure; "rub my hands"; "rub oil into her skin""
|
511
|
-
# (derivations: 2, entailments: 1, hypernyms: 1, hyponyms: 13,
|
512
|
-
# see_alsos: 4)>]
|
513
|
-
def_pointer_methods :entailment
|
514
|
-
|
515
|
-
# Get/set synsets for the receiver's cause pointers (a verb X causes Y
|
516
|
-
# to happen).
|
517
|
-
def_pointer_methods :causes
|
518
|
-
|
519
|
-
# Get/set synsets for the receiver's verb groups. Verb groups link verbs
|
520
|
-
# with similar senses together.
|
521
|
-
def_pointer_methods :verb_groups
|
522
|
-
|
523
|
-
# Get/set list of synsets for the receiver's "similar to" pointers. This
|
524
|
-
# type of pointer links together head adjective synsets with its
|
525
|
-
# satellite adjective synsets.
|
526
|
-
def_pointer_methods :similar_to
|
527
|
-
|
528
|
-
# Get/set synsets for the receiver's participles. Participles are
|
529
|
-
# non-finite forms of a verb; used adjectivally and to form compound
|
530
|
-
# tenses. For example, the first participle for "working" is:
|
531
|
-
# "function, work, operate, go, run (verb)"
|
532
|
-
def_pointer_methods :participles
|
533
|
-
|
534
|
-
# Get/set synsets for the receiver's pertainyms. Pertainyms are
|
535
|
-
# relational adjectives. Adjectives that are pertainyms are usually
|
536
|
-
# defined by such phrases as "of or pertaining to" and do not have
|
537
|
-
# antonyms. A pertainym can point to a noun or another pertainym.
|
538
|
-
def_pointer_methods :pertainyms
|
539
|
-
|
540
|
-
# Get/set synsets for the receiver's attributes.
|
541
|
-
def_pointer_methods :attributes
|
542
|
-
|
543
|
-
# Get/set synsets for the receiver's derived_from.
|
544
|
-
def_pointer_methods :derived_from
|
545
|
-
|
546
|
-
# Get/set synsets for the receiver's derivations.
|
547
|
-
def_pointer_methods :derivations
|
548
|
-
|
549
|
-
# Get/set synsets for the receiver's see_also.
|
550
|
-
def_pointer_methods :see_also
|
551
|
-
|
552
|
-
|
553
|
-
# Auto-generate types with subtypes
|
554
|
-
|
555
|
-
# Synsets for the receiver's hypernyms (more-general terms). E.g.,
|
556
|
-
# $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hypernyms
|
557
|
-
# ==> [#<WordNet::Synset:0x0109a644/3023321 club (noun): "stout
|
558
|
-
# stick that is larger at one end; "he carried a club in self
|
559
|
-
# defense"; "he felt as if he had been hit with a club""
|
560
|
-
# (derivations: 1, hypernyms: 1, hyponyms: 7)>]
|
561
|
-
#
|
562
|
-
# Also generates accessors for subtypes:
|
563
|
-
#
|
564
|
-
# [instance_hypernyms]
|
565
|
-
# A proper noun that refers to a particular, unique referent (as
|
566
|
-
# distinguished from nouns that refer to classes).
|
567
|
-
def_pointer_methods :hypernyms
|
568
|
-
|
569
|
-
|
570
|
-
# :TODO: Generate an example for this
|
571
|
-
|
572
|
-
# Get/set synsets for the receiver's hyponyms (more-specific terms). E.g.,
|
573
|
-
# $lexicon.lookup_synsets( "cudgel", :noun, 1 ).hyponyms
|
574
|
-
# ==> [...]
|
575
|
-
# [instance_hyponyms]
|
576
|
-
# The specific term used to designate a member of a class. X is a
|
577
|
-
# hyponym of Y if X is a (kind of) Y.
|
578
|
-
# Also generates accessors for subtypes:
|
579
|
-
#
|
580
|
-
# [instance_hyponyms]
|
581
|
-
# A proper noun that refers to a particular, unique referent (as
|
582
|
-
# distinguished from nouns that refer to classes).
|
583
|
-
def_pointer_methods :hyponyms
|
584
|
-
|
585
|
-
|
586
|
-
# Get/set synsets for the receiver's meronyms. In addition to the
|
587
|
-
# general accessors for all meronyms, there are also accessors for
|
588
|
-
# subtypes as well:
|
589
|
-
#
|
590
|
-
# [member_meronyms]
|
591
|
-
# Get/set synsets for the receiver's "member" meronyms (HAS MEMBER
|
592
|
-
# relation).
|
593
|
-
# [stuff_meronyms]
|
594
|
-
# Get/set synsets for the receiver's "stuff" meronyms (IS MADE OUT OF
|
595
|
-
# relation).
|
596
|
-
# [portion_meronyms]
|
597
|
-
# Get/set synsets for the receiver's "portion" meronyms (HAS PORTION
|
598
|
-
# relation).
|
599
|
-
# [component_meronyms]
|
600
|
-
# Get/set synsets for the receiver's "component" meronyms (HAS
|
601
|
-
# COMPONENT relation).
|
602
|
-
# [feature_meronyms]
|
603
|
-
# Get/set synsets for the receiver's "feature" meronyms (HAS FEATURE
|
604
|
-
# relation).
|
605
|
-
# [phase_meronyms]
|
606
|
-
# Get/set synsets for the receiver's "phase" meronyms (HAS PHASE
|
607
|
-
# relation).
|
608
|
-
# [place_meronyms]
|
609
|
-
# Get/set synsets for the receiver's "place" meronyms (HAS PLACE
|
610
|
-
# relation).
|
611
|
-
def_pointer_methods :meronyms
|
612
|
-
|
613
|
-
# Get/set synsets for the receiver's holonyms. In addition to the
|
614
|
-
# general accessors for all holonyms, there are also accessors for
|
615
|
-
# subtypes as well:
|
616
|
-
#
|
617
|
-
# [member_holonyms]
|
618
|
-
# Get/set synsets for the receiver's "member" holonyms (IS A MEMBER OF
|
619
|
-
# relation).
|
620
|
-
# [stuff_holonyms]
|
621
|
-
# Get/set synsets for the receiver's "stuff" holonyms (IS MATERIAL OF
|
622
|
-
# relation).
|
623
|
-
# [portion_holonyms]
|
624
|
-
# Get/set synsets for the receiver's "portion" holonyms (IS A PORTION
|
625
|
-
# OF relation).
|
626
|
-
# [component_holonyms]
|
627
|
-
# Get/set synsets for the receiver's "component" holonyms (IS A
|
628
|
-
# COMPONENT OF relation).
|
629
|
-
# [feature_holonyms]
|
630
|
-
# Get/set synsets for the receiver's "feature" holonyms (IS A FEATURE
|
631
|
-
# OF relation).
|
632
|
-
# [phase_holonyms]
|
633
|
-
# Get/set synsets for the receiver's "phase" holonyms (IS A PHASE OF
|
634
|
-
# relation).
|
635
|
-
# [place_holonyms]
|
636
|
-
# Get/set synsets for the receiver's "place" holonyms (IS A PLACE IN
|
637
|
-
# relation).
|
638
|
-
def_pointer_methods :holonyms
|
639
|
-
|
640
|
-
# Get/set synsets for the receiver's topical domain members. In addition
|
641
|
-
# to the general members accessor, there are also accessors for
|
642
|
-
# membership subtypes:
|
643
|
-
#
|
644
|
-
# [category_members]
|
645
|
-
# Get/set synsets for the receiver's
|
646
|
-
# "category" topical domain members.
|
647
|
-
# [region_members]
|
648
|
-
# Get/set synsets for the receiver's "region"
|
649
|
-
# topical domain members.
|
650
|
-
# [usage_members]
|
651
|
-
# Get/set synsets for the receiver's "usage"
|
652
|
-
# topical domain members.
|
653
|
-
def_pointer_methods :members
|
654
|
-
|
655
|
-
# Get/set synsets for the receiver's topical domain domains. In addition
|
656
|
-
# to the general domains accessor, there are also accessors for
|
657
|
-
# domainship subtypes:
|
658
|
-
#
|
659
|
-
# [category_domains]
|
660
|
-
# Get/set synsets for the receiver's
|
661
|
-
# "category" topical domain domains.
|
662
|
-
# [region_domains]
|
663
|
-
# Get/set synsets for the receiver's "region"
|
664
|
-
# topical domain domains.
|
665
|
-
# [usage_domains]
|
666
|
-
# Get/set synsets for the receiver's "usage"
|
667
|
-
# topical domain domains.
|
668
|
-
def_pointer_methods :domains
|
669
|
-
|
670
|
-
|
671
|
-
### Returns an Array of the coordinate sisters of the receiver.
|
672
|
-
def coordinates
|
673
|
-
self.hypernyms.collect {|syn|
|
674
|
-
syn.hyponyms
|
675
|
-
}.flatten
|
676
|
-
end
|
257
|
+
return self.class.filter( :synsetid => ssids )
|
258
|
+
end
|
677
259
|
|
678
260
|
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
}
|
685
|
-
end
|
686
|
-
|
687
|
-
|
688
|
-
### Sets the "lexicographer's file" association for this synset to
|
689
|
-
### +id+. The value in +id+ should correspond to one of the values in
|
690
|
-
### #WordNet::LEXFILES
|
691
|
-
def lexInfo=( id )
|
692
|
-
raise ArgumentError, "Bad index: Lexinfo id must be within LEXFILES" unless
|
693
|
-
LEXFILES[id]
|
694
|
-
@mutex.synchronize( Sync::EX ) {
|
695
|
-
self.filenum = id
|
696
|
-
}
|
697
|
-
end
|
698
|
-
|
699
|
-
|
700
|
-
### Returns an +Array+ of verb frame +String+s for the synset.
|
701
|
-
def frames
|
702
|
-
frarray = self.frameslist.split( WordNet::SUB_DELIM_RE )
|
703
|
-
verbFrames = []
|
704
|
-
|
705
|
-
@mutex.synchronize( Sync::SH ) {
|
706
|
-
frarray.each {|fr|
|
707
|
-
fnum, wnum = fr.split
|
708
|
-
if wnum > 0
|
709
|
-
wordtext = " (" + self.words[wnum] + ")"
|
710
|
-
verbFrames.push VERB_SENTS[ fnum ] + wordtext
|
711
|
-
else
|
712
|
-
verbFrames.push VERB_SENTS[ fnum ]
|
713
|
-
end
|
714
|
-
}
|
715
|
-
}
|
716
|
-
|
717
|
-
return verbFrames
|
718
|
-
end
|
719
|
-
|
720
|
-
|
721
|
-
### Traversal iterator: Iterates depth-first over a particular
|
722
|
-
### +type+ of the receiver, and all of the pointed-to synset's
|
723
|
-
### pointers. If called with a block, the block is called once for each
|
724
|
-
### synset with the +foundSyn+ and its +depth+ in relation to the
|
725
|
-
### originating synset as arguments. The first call will be the
|
726
|
-
### originating synset with a depth of +0+ unless +includeOrigin+ is
|
727
|
-
### +false+. If the +callback+ returns +true+, the traversal is halted,
|
728
|
-
### and the method returns immediately. This method returns an Array of
|
729
|
-
### the synsets which were traversed if no block is given, or a flag
|
730
|
-
### which indicates whether or not the traversal was interrupted if a
|
731
|
-
### block is given.
|
732
|
-
def traverse( type, includeOrigin=true )
|
733
|
-
raise ArgumentError, "Illegal parameter 1: Must be either a String or a Symbol" unless
|
734
|
-
type.kind_of?( String ) || type.kind_of?( Symbol )
|
735
|
-
|
736
|
-
raise ArgumentError, "Synset doesn't support the #{type.to_s} pointer type." unless
|
737
|
-
self.respond_to?( type )
|
738
|
-
|
739
|
-
foundSyns = []
|
740
|
-
depth = 0
|
741
|
-
traversalFunc = nil
|
742
|
-
|
743
|
-
# Build a traversal function which we can call recursively. It'll return
|
744
|
-
# the synsets it traverses.
|
745
|
-
traversalFunc = Proc.new {|syn,newDepth|
|
746
|
-
|
747
|
-
# Flag to continue traversal
|
748
|
-
haltFlag = false
|
749
|
-
|
750
|
-
# Call the block if it exists and we're either past the origin or
|
751
|
-
# including it
|
752
|
-
if block_given? && (newDepth > 0 || includeOrigin)
|
753
|
-
res = yield( syn, newDepth )
|
754
|
-
haltFlag = true if res.is_a? TrueClass
|
755
|
-
end
|
756
|
-
|
757
|
-
# Make an array for holding sub-synsets we see
|
758
|
-
subSyns = []
|
759
|
-
subSyns.push( syn ) unless newDepth == 0 && !includeOrigin
|
760
|
-
|
761
|
-
# Iterate over each synset returned by calling the pointer on the
|
762
|
-
# current syn. For each one, we call ourselves recursively, and
|
763
|
-
# break out of the iterator with a false value if the block has
|
764
|
-
# indicated we should abort by returning a false value.
|
765
|
-
unless haltFlag
|
766
|
-
syn.send( type ).each {|subSyn|
|
767
|
-
subSubSyns, haltFlag = traversalFunc.call( subSyn, newDepth + 1 )
|
768
|
-
subSyns.push( *subSubSyns ) unless subSubSyns.empty?
|
769
|
-
break if haltFlag
|
770
|
-
}
|
771
|
-
end
|
772
|
-
|
773
|
-
# return
|
774
|
-
[ subSyns, haltFlag ]
|
775
|
-
}
|
776
|
-
|
777
|
-
# Call the iterator
|
778
|
-
traversedSets, haltFlag = traversalFunc.call( self, depth )
|
779
|
-
|
780
|
-
# If a block was given, just return whether or not the block was halted.
|
781
|
-
if block_given?
|
782
|
-
return haltFlag
|
783
|
-
|
784
|
-
# If no block was given, return the traversed synsets
|
785
|
-
else
|
786
|
-
return traversedSets
|
787
|
-
end
|
788
|
-
end
|
789
|
-
|
790
|
-
|
791
|
-
### Returns the distance in pointers between the receiver and +otherSynset+
|
792
|
-
### using +type+ as the search path.
|
793
|
-
def distance( type, otherSynset )
|
794
|
-
dist = nil
|
795
|
-
self.traverse( type ) {|syn,depth|
|
796
|
-
if syn == otherSynset
|
797
|
-
dist = depth
|
798
|
-
true
|
799
|
-
end
|
800
|
-
}
|
801
|
-
|
802
|
-
return dist
|
803
|
-
end
|
261
|
+
### Return an Enumerator that will iterate over the Synsets related to the receiver
|
262
|
+
### via the semantic links of the specified +linktype+.
|
263
|
+
def semanticlink_enum( linktype )
|
264
|
+
return self.semanticlink_dataset( linktype ).to_enum
|
265
|
+
end
|
804
266
|
|
805
267
|
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
syn == otherSynset
|
811
|
-
}
|
812
|
-
end
|
813
|
-
|
268
|
+
### Return the name of the Synset's part of speech (#pos).
|
269
|
+
def part_of_speech
|
270
|
+
return self.class.postype_table[ self.pos.to_sym ]
|
271
|
+
end
|
814
272
|
|
815
|
-
### Union: Return the least general synset that the receiver and
|
816
|
-
### +otherSynset+ have in common as a hypernym, or nil if it doesn't share
|
817
|
-
### any.
|
818
|
-
def |( otherSyn )
|
819
273
|
|
820
|
-
|
821
|
-
|
822
|
-
commonSyn = nil
|
274
|
+
### Stringify the synset.
|
275
|
+
def to_s
|
823
276
|
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
return commonSyn
|
834
|
-
end
|
277
|
+
# Make a sorted list of the semantic link types from this synset
|
278
|
+
semlink_list = self.semlinks_dataset.
|
279
|
+
group_and_count( :linkid ).
|
280
|
+
to_hash( :linkid, :count ).
|
281
|
+
collect do |linkid, count|
|
282
|
+
'%s: %d' % [ self.class.linktype_table[linkid][:typename], count ]
|
283
|
+
end.
|
284
|
+
sort.
|
285
|
+
join( ', ' )
|
835
286
|
|
287
|
+
return "%s (%s): [%s] %s (%s)" % [
|
288
|
+
self.words.map( &:to_s ).join(', '),
|
289
|
+
self.part_of_speech,
|
290
|
+
self.lexical_domain,
|
291
|
+
self.definition,
|
292
|
+
semlink_list
|
293
|
+
]
|
294
|
+
end
|
836
295
|
|
837
|
-
### Returns the pointers in this synset's pointerlist as an +Array+
|
838
|
-
def pointers
|
839
|
-
@mutex.synchronize( Sync::SH ) {
|
840
|
-
@mutex.synchronize( Sync::EX ) {
|
841
|
-
@pointers = @pointerlist.split(SUB_DELIM_RE).collect {|pstr|
|
842
|
-
Pointer::parse( pstr )
|
843
|
-
}
|
844
|
-
} if @pointers.empty?
|
845
|
-
@pointers
|
846
|
-
}
|
847
|
-
end
|
848
296
|
|
297
|
+
### Return the name of the lexical domain the synset belongs to; this also
|
298
|
+
### corresponds to the lexicographer's file the synset was originally loaded from.
|
299
|
+
def lexical_domain
|
300
|
+
return self.class.lexdomain_table[ self.lexdomainid ][ :lexdomainname ]
|
301
|
+
end
|
849
302
|
|
850
|
-
### Set the pointers in this synset's pointerlist to +newPointers+
|
851
|
-
def pointers=( *newPointers )
|
852
|
-
@mutex.synchronize( Sync::EX ) {
|
853
|
-
@pointerlist = newPointers.collect {|ptr| ptr.to_s}.join( SUB_DELIM )
|
854
|
-
@pointers = newPointers
|
855
|
-
}
|
856
|
-
end
|
857
303
|
|
304
|
+
### Return any sample sentences.
|
305
|
+
def samples
|
306
|
+
return self.db[:samples].
|
307
|
+
filter( synsetid: self.synsetid ).
|
308
|
+
order( :sampleid ).
|
309
|
+
map( :sample ).all
|
310
|
+
end
|
858
311
|
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
312
|
+
|
313
|
+
#
|
314
|
+
# :section: Semantic Links
|
315
|
+
#
|
316
|
+
|
317
|
+
##
|
318
|
+
# "See Also" synsets
|
319
|
+
semantic_link :also_see
|
320
|
+
|
321
|
+
##
|
322
|
+
# Attribute synsets
|
323
|
+
semantic_link :attributes
|
324
|
+
|
325
|
+
##
|
326
|
+
# Cause synsets
|
327
|
+
semantic_link :causes
|
328
|
+
|
329
|
+
##
|
330
|
+
# Domain category synsets
|
331
|
+
semantic_link :domain_categories
|
332
|
+
|
333
|
+
##
|
334
|
+
# Domain member category synsets
|
335
|
+
semantic_link :domain_member_categories
|
336
|
+
|
337
|
+
##
|
338
|
+
# Domain member region synsets
|
339
|
+
semantic_link :domain_member_regions
|
340
|
+
|
341
|
+
##
|
342
|
+
# Domain member usage synsets
|
343
|
+
semantic_link :domain_member_usages
|
344
|
+
|
345
|
+
##
|
346
|
+
# Domain region synsets
|
347
|
+
semantic_link :domain_regions
|
348
|
+
|
349
|
+
##
|
350
|
+
# Domain usage synsets
|
351
|
+
semantic_link :domain_usages
|
352
|
+
|
353
|
+
##
|
354
|
+
# Verb entailment synsets
|
355
|
+
semantic_link :entailments
|
356
|
+
|
357
|
+
##
|
358
|
+
# Hypernym sunsets
|
359
|
+
semantic_link :hypernyms
|
360
|
+
|
361
|
+
##
|
362
|
+
# Hyponym synsets
|
363
|
+
semantic_link :hyponyms
|
364
|
+
|
365
|
+
##
|
366
|
+
# Instance hypernym synsets
|
367
|
+
semantic_link :instance_hypernyms
|
368
|
+
|
369
|
+
##
|
370
|
+
# Instance hyponym synsets
|
371
|
+
semantic_link :instance_hyponyms
|
372
|
+
|
373
|
+
##
|
374
|
+
# Member holonym synsets
|
375
|
+
semantic_link :member_holonyms
|
376
|
+
|
377
|
+
##
|
378
|
+
# Member meronym synsets
|
379
|
+
semantic_link :member_meronyms
|
380
|
+
|
381
|
+
##
|
382
|
+
# Part holonym synsets
|
383
|
+
semantic_link :part_holonyms
|
384
|
+
|
385
|
+
##
|
386
|
+
# Part meronym synsets
|
387
|
+
semantic_link :part_meronyms
|
388
|
+
|
389
|
+
##
|
390
|
+
# Similar word synsets
|
391
|
+
semantic_link :similar_words
|
392
|
+
|
393
|
+
##
|
394
|
+
# Substance holonym synsets
|
395
|
+
semantic_link :substance_holonyms
|
396
|
+
|
397
|
+
##
|
398
|
+
# Substance meronym synsets
|
399
|
+
semantic_link :substance_meronyms
|
400
|
+
|
401
|
+
##
|
402
|
+
# Verb group synsets
|
403
|
+
semantic_link :verb_groups
|
404
|
+
|
405
|
+
|
406
|
+
### With a block, yield a WordNet::Synset related to the receiver via a link of
|
407
|
+
### the specified +type+, recursing depth first into each of its links if the link
|
408
|
+
### type is recursive. To exit from the traversal at any depth, throw :stop_traversal.
|
409
|
+
###
|
410
|
+
### If no block is given, return an Enumerator that will do the same thing instead.
|
411
|
+
###
|
412
|
+
### # Print all the parts of a boot
|
413
|
+
### puts lexicon[:boot].traverse( :member_meronyms ).all
|
414
|
+
###
|
415
|
+
###
|
416
|
+
def traverse( type, &block )
|
417
|
+
enum = Enumerator.new do |yielder|
|
418
|
+
traversals = [ self.semanticlink_enum(type) ]
|
419
|
+
syn = nil
|
420
|
+
typekey = SEMANTIC_TYPEKEYS[ type ]
|
421
|
+
recurses = self.linktypes[ typekey ][:recurses]
|
422
|
+
|
423
|
+
self.log.debug "Traversing %s semlinks%s" % [ type, recurses ? " (recursive)" : '' ]
|
424
|
+
|
425
|
+
catch( :stop_traversal ) do
|
426
|
+
until traversals.empty?
|
427
|
+
begin
|
428
|
+
self.log.debug " %d traversal/s left"
|
429
|
+
syn = traversals.last.next
|
430
|
+
yielder.yield( syn, traversals.length )
|
431
|
+
traversals << syn.semanticlink_enum( type ) if recurses
|
432
|
+
rescue StopIteration
|
433
|
+
traversals.pop
|
866
434
|
end
|
435
|
+
end
|
436
|
+
end
|
437
|
+
end
|
867
438
|
|
439
|
+
return enum.each( &block ) if block
|
440
|
+
return enum
|
441
|
+
end
|
868
442
|
|
869
443
|
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
synsets = nil
|
878
|
-
|
879
|
-
# Iterate over this synset's pointers, looking for ones that match
|
880
|
-
# the type we're after. When we find one, we extract its offset and
|
881
|
-
# use that to look it up.
|
882
|
-
@mutex.synchronize( Sync::SH ) do
|
883
|
-
synsets = self.pointers.
|
884
|
-
find_all {|ptr|
|
885
|
-
ptr.type == type and
|
886
|
-
subtype.nil? || ptr.subtype == subtype
|
887
|
-
}.
|
888
|
-
collect {|ptr| ptr.synset }.
|
889
|
-
collect {|key| @lexicon.lookup_synsets_by_key( key )}
|
890
|
-
end
|
891
|
-
|
892
|
-
return synsets.flatten
|
893
|
-
end
|
894
|
-
|
895
|
-
|
896
|
-
### Sets the receiver's synset pointers for the specified +type+ to
|
897
|
-
### the specified +synsets+.
|
898
|
-
def set_synset_pointers( type, synsets, subtype=nil )
|
899
|
-
synsets = [ synsets ] unless synsets.is_a?( Array )
|
900
|
-
pmap = self.pointer_map
|
901
|
-
pmap[ type ] = synsets
|
902
|
-
self.pointers = pmap.values
|
903
|
-
end
|
904
|
-
|
444
|
+
### Search for the specified +synset+ in the semantic links of the given +type+ of
|
445
|
+
### the receiver, returning the depth it was found at if it's found, or nil if it
|
446
|
+
### wasn't found.
|
447
|
+
def search( type, synset )
|
448
|
+
found, depth = self.traverse( type ).find {|ss,depth| synset == ss }
|
449
|
+
return depth
|
450
|
+
end
|
905
451
|
|
906
|
-
|
907
|
-
end # module WordNet
|
452
|
+
end # class WordNet::Synset
|
908
453
|
|