wordnet 0.0.5 → 1.0.0.pre.126
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/History.rdoc +5 -0
- data/LICENSE +9 -9
- data/Manifest.txt +39 -0
- data/README.rdoc +60 -0
- data/Rakefile +47 -267
- data/TODO +9 -0
- data/WordNet30-license.txt +31 -0
- data/examples/add-laced-boots.rb +35 -0
- data/examples/clothes-with-collars.rb +42 -0
- data/examples/clothesWithTongues.rb +0 -0
- data/examples/domainTree.rb +0 -0
- data/examples/memberTree.rb +0 -0
- data/lib/wordnet/constants.rb +259 -296
- data/lib/wordnet/lexicallink.rb +34 -0
- data/lib/wordnet/lexicon.rb +158 -386
- data/lib/wordnet/mixins.rb +62 -0
- data/lib/wordnet/model.rb +78 -0
- data/lib/wordnet/morph.rb +25 -0
- data/lib/wordnet/semanticlink.rb +52 -0
- data/lib/wordnet/sense.rb +55 -0
- data/lib/wordnet/sumoterm.rb +21 -0
- data/lib/wordnet/synset.rb +404 -859
- data/lib/wordnet/utils.rb +126 -0
- data/lib/wordnet/word.rb +119 -0
- data/lib/wordnet.rb +113 -76
- data/spec/lib/helpers.rb +102 -133
- data/spec/linguawordnet.tests.rb +38 -0
- data/spec/wordnet/lexicon_spec.rb +96 -186
- data/spec/wordnet/model_spec.rb +59 -0
- data/spec/wordnet/semanticlink_spec.rb +42 -0
- data/spec/wordnet/synset_spec.rb +27 -256
- data/spec/wordnet/word_spec.rb +58 -0
- data/spec/wordnet_spec.rb +52 -0
- data.tar.gz.sig +0 -0
- metadata +227 -188
- metadata.gz.sig +0 -0
- data/ChangeLog +0 -720
- data/README +0 -93
- data/Rakefile.local +0 -46
- data/convertdb.rb +0 -417
- data/examples/addLacedBoots.rb +0 -27
- data/examples/clothesWithCollars.rb +0 -36
- data/rake/dependencies.rb +0 -76
- data/rake/helpers.rb +0 -384
- data/rake/manual.rb +0 -755
- data/rake/packaging.rb +0 -112
- data/rake/publishing.rb +0 -303
- data/rake/rdoc.rb +0 -35
- data/rake/style.rb +0 -62
- data/rake/svn.rb +0 -469
- data/rake/testing.rb +0 -192
- data/rake/verifytask.rb +0 -64
- data/utils.rb +0 -838
data/lib/wordnet/lexicon.rb
CHANGED
@@ -1,430 +1,202 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
# WordNet Lexicon object class
|
4
|
-
#
|
5
|
-
# == Synopsis
|
6
|
-
#
|
7
|
-
# lexicon = WordNet::Lexicon.new( dictpath )
|
8
|
-
#
|
9
|
-
# == Description
|
10
|
-
#
|
11
|
-
# Instances of this class abstract access to the various databases of the
|
12
|
-
# WordNet lexicon. It can be used to look up and search for WordNet::Synsets.
|
13
|
-
#
|
14
|
-
# == Author
|
15
|
-
#
|
16
|
-
# Michael Granger <ged@FaerieMUD.org>
|
17
|
-
#
|
18
|
-
# Copyright (c) 2002, 2003, 2005 The FaerieMUD Consortium. All rights reserved.
|
19
|
-
#
|
20
|
-
# This module is free software. You may use, modify, and/or redistribute this
|
21
|
-
# software under the terms of the Perl Artistic License. (See
|
22
|
-
# http://language.perl.com/misc/Artistic.html)
|
23
|
-
#
|
24
|
-
# Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
|
25
|
-
# by Dan Brian.
|
26
|
-
#
|
27
|
-
# == Version
|
28
|
-
#
|
29
|
-
# $Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
|
30
|
-
#
|
31
|
-
|
32
|
-
require 'rbconfig'
|
2
|
+
|
33
3
|
require 'pathname'
|
34
|
-
require '
|
35
|
-
require 'sync'
|
4
|
+
require 'rubygems'
|
36
5
|
|
6
|
+
require 'wordnet' unless defined?( WordNet )
|
37
7
|
require 'wordnet/constants'
|
8
|
+
require 'wordnet/mixins'
|
38
9
|
require 'wordnet/synset'
|
10
|
+
require 'wordnet/word'
|
39
11
|
|
40
|
-
### Lexicon exception - something has gone wrong in the internals of the
|
41
|
-
### lexicon.
|
42
|
-
class WordNet::LexiconError < StandardError ; end
|
43
|
-
|
44
|
-
### Lookup error - the object being looked up either doesn't exist or is
|
45
|
-
### malformed
|
46
|
-
class WordNet::LookupError < StandardError ; end
|
47
12
|
|
48
|
-
|
49
|
-
|
50
|
-
### WordNet::Synset objects.
|
13
|
+
# WordNet lexicon class - abstracts access to the WordNet lexical
|
14
|
+
# database, and provides factory methods for looking up words and synsets.
|
51
15
|
class WordNet::Lexicon
|
52
|
-
include WordNet::Constants
|
53
|
-
|
54
|
-
|
55
|
-
# Subversion Id
|
56
|
-
SvnId = %q$Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
|
16
|
+
include WordNet::Constants,
|
17
|
+
WordNet::Loggable
|
57
18
|
|
58
|
-
#
|
59
|
-
|
19
|
+
# class LogTracer
|
20
|
+
# def method_missing( sym, msg, &block )
|
21
|
+
# if msg =~ /does not exist/
|
22
|
+
# $stderr.puts ">>> DOES NOT EXIST TRACE"
|
23
|
+
# $stderr.puts( caller(1).grep(/wordnet/i) )
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
# end
|
60
27
|
|
61
28
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
# The path to the WordNet BerkeleyDB Env. It lives in the directory that
|
67
|
-
# this module is in.
|
68
|
-
DEFAULT_DB_ENV = File::join( Config::CONFIG['datadir'], "ruby-wordnet" )
|
29
|
+
# Add the logger device to the default options after it's been loaded
|
30
|
+
WordNet::DEFAULT_DB_OPTIONS.merge!( :logger => [WordNet.logger] )
|
31
|
+
# WordNet::DEFAULT_DB_OPTIONS.merge!( :logger => [LogTracer.new] )
|
69
32
|
|
70
|
-
# Options for the creation of the Env object
|
71
|
-
ENV_OPTIONS = {
|
72
|
-
:set_timeout => 50,
|
73
|
-
:set_lk_detect => 1,
|
74
|
-
:set_verbose => false,
|
75
|
-
:set_lk_max => 3000,
|
76
|
-
}
|
77
33
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
#############################################################
|
84
|
-
### I N S T A N C E M E T H O D S
|
85
|
-
#############################################################
|
34
|
+
### Get the Sequel URI of the default database, if it's installed.
|
35
|
+
def self::default_db_uri
|
36
|
+
WordNet.log.debug "Fetching the default db URI"
|
86
37
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
### octal mode (e.g., 0444) or one of (:readonly, :readwrite).
|
91
|
-
def initialize( dbenv=DEFAULT_DB_ENV, mode=:readonly )
|
92
|
-
@mode = normalize_mode( mode )
|
93
|
-
debug_msg "Mode is: %04o" % [ mode ]
|
94
|
-
|
95
|
-
envflags = 0
|
96
|
-
dbflags = 0
|
97
|
-
|
98
|
-
unless self.readonly?
|
99
|
-
debug_msg "Using read/write flags"
|
100
|
-
envflags = ENV_FLAGS_RW
|
101
|
-
dbflags = BDB::CREATE
|
38
|
+
datadir = nil
|
39
|
+
if Gem.datadir( 'wordnet-defaultdb' )
|
40
|
+
datadir = Pathname( Gem.datadir('wordnet-defaultdb') )
|
102
41
|
else
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
107
|
-
|
108
|
-
debug_msg "Env flags are: %0s, dbflags are %0s" %
|
109
|
-
[ envflags.to_s(2), dbflags.to_s(2) ]
|
110
|
-
|
111
|
-
begin
|
112
|
-
@env = BDB::Env.new( dbenv, envflags, ENV_OPTIONS )
|
113
|
-
@index_db = @env.open_db( BDB::BTREE, "index", nil, dbflags, @mode )
|
114
|
-
@data_db = @env.open_db( BDB::BTREE, "data", nil, dbflags, @mode )
|
115
|
-
@morph_db = @env.open_db( BDB::BTREE, "morph", nil, dbflags, @mode )
|
116
|
-
rescue StandardError => err
|
117
|
-
msg = "Error while opening Ruby-WordNet data files: #{dbenv}: %s" %
|
118
|
-
[ err.message ]
|
119
|
-
raise err, msg, err.backtrace
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
######
|
126
|
-
public
|
127
|
-
######
|
128
|
-
|
129
|
-
# The BDB::Env object which contains the wordnet lexicon's databases.
|
130
|
-
attr_reader :env
|
131
|
-
|
132
|
-
# The handle to the index table
|
133
|
-
attr_reader :index_db
|
134
|
-
|
135
|
-
# The handle to the synset data table
|
136
|
-
attr_reader :data_db
|
137
|
-
|
138
|
-
# The handle to the morph table
|
139
|
-
attr_reader :morph_db
|
140
|
-
|
141
|
-
|
142
|
-
### Returns +true+ if the lexicon was opened in read-only mode.
|
143
|
-
def readonly?
|
144
|
-
( @mode & 0200 ).nonzero? ? false : true
|
145
|
-
end
|
146
|
-
|
147
|
-
|
148
|
-
### Returns +true+ if the lexicon was opened in read-write mode.
|
149
|
-
def readwrite?
|
150
|
-
! self.readonly?
|
151
|
-
end
|
152
|
-
|
153
|
-
|
154
|
-
### Close the lexicon's database environment
|
155
|
-
def close
|
156
|
-
@env.close if @env
|
157
|
-
end
|
158
|
-
|
159
|
-
|
160
|
-
### Checkpoint the database. (BerkeleyDB-specific)
|
161
|
-
def checkpoint( bytes=0, minutes=0 )
|
162
|
-
@env.checkpoint
|
163
|
-
end
|
164
|
-
|
165
|
-
|
166
|
-
### Remove any archival logfiles for the lexicon's database
|
167
|
-
### environment. (BerkeleyDB-specific).
|
168
|
-
def clean_logs
|
169
|
-
return unless self.readwrite?
|
170
|
-
self.archlogs.each do |logfile|
|
171
|
-
File::chmod( 0777, logfile )
|
172
|
-
File::delete( logfile )
|
42
|
+
WordNet.log.warn " no defaultdb gem; looking for the development database"
|
43
|
+
datadir = Pathname( __FILE__ ).dirname.parent.parent +
|
44
|
+
'wordnet-defaultdb/data/wordnet-defaultdb'
|
173
45
|
end
|
174
|
-
end
|
175
46
|
|
47
|
+
dbfile = datadir + 'wordnet30.sqlite'
|
48
|
+
WordNet.log.debug " dbfile is: %s" % [ dbfile ]
|
176
49
|
|
177
|
-
|
178
|
-
|
179
|
-
### word by counting the synsets returned by #lookup_synsets.
|
180
|
-
def familiarity( word, part_of_speech, polyCount=nil )
|
181
|
-
wordkey = self.make_word_key( word, part_of_speech )
|
182
|
-
return nil unless @index_db.key?( wordkey )
|
183
|
-
@index_db[ wordkey ].split( WordNet::SUB_DELIM_RE ).length
|
184
|
-
end
|
185
|
-
|
186
|
-
|
187
|
-
### Look up sysets (Wordnet::Synset objects) matching +text+ as a
|
188
|
-
### +part_of_speech+, where +part_of_speech+ is one of +WordNet::Noun+,
|
189
|
-
### +WordNet::Verb+, +WordNet::Adjective+, or +WordNet::Adverb+. Without
|
190
|
-
### +sense+, #lookup_synsets will return all matches that are a
|
191
|
-
### +part_of_speech+. If +sense+ is specified, only the synset object that
|
192
|
-
### matches that particular +part_of_speech+ and +sense+ is returned.
|
193
|
-
def lookup_synsets( word, part_of_speech, sense=nil )
|
194
|
-
wordkey = self.make_word_key( word, part_of_speech )
|
195
|
-
pos = self.make_pos( part_of_speech )
|
196
|
-
synsets = []
|
197
|
-
|
198
|
-
# Look up the index entry, trying first the word as given, and if
|
199
|
-
# that fails, trying morphological conversion.
|
200
|
-
entry = @index_db[ wordkey ]
|
201
|
-
|
202
|
-
if entry.nil? && (word = self.morph( word, part_of_speech ))
|
203
|
-
wordkey = self.make_word_key( word, part_of_speech )
|
204
|
-
entry = @index_db[ wordkey ]
|
205
|
-
end
|
206
|
-
|
207
|
-
# If the lookup failed both ways, just abort
|
208
|
-
return nil unless entry
|
209
|
-
|
210
|
-
# Make synset keys from the entry, narrowing it to just the sense
|
211
|
-
# requested if one was specified.
|
212
|
-
synkeys = entry.split( SUB_DELIM_RE ).collect {|off| "#{off}%#{pos}" }
|
213
|
-
if sense
|
214
|
-
return lookup_synsets_by_key( synkeys[sense - 1] )
|
50
|
+
if dbfile.exist?
|
51
|
+
return "sqlite:#{dbfile}"
|
215
52
|
else
|
216
|
-
return
|
53
|
+
return nil
|
217
54
|
end
|
218
55
|
end
|
219
56
|
|
220
57
|
|
221
|
-
|
222
|
-
###
|
223
|
-
|
224
|
-
def lookup_synsets_by_key( *keys )
|
225
|
-
synsets = []
|
226
|
-
|
227
|
-
keys.each {|key|
|
228
|
-
raise WordNet::LookupError, "Failed lookup of synset '#{key}':"\
|
229
|
-
"No such synset" unless @data_db.key?( key )
|
230
|
-
|
231
|
-
data = @data_db[ key ]
|
232
|
-
offset, part_of_speech = key.split( /%/, 2 )
|
233
|
-
synsets << WordNet::Synset::new( self, offset, part_of_speech, nil, data )
|
234
|
-
}
|
235
|
-
|
236
|
-
return *synsets
|
237
|
-
end
|
238
|
-
alias_method :lookup_synsetsByOffset, :lookup_synsets_by_key
|
239
|
-
|
240
|
-
|
241
|
-
### Returns a form of +word+ as a part of speech +part_of_speech+, as
|
242
|
-
### found in the WordNet morph files. The #lookup_synsets method perfoms
|
243
|
-
### morphological conversion automatically, so a call to #morph is not
|
244
|
-
### required.
|
245
|
-
def morph( word, part_of_speech )
|
246
|
-
return @morph_db[ self.make_word_key(word, part_of_speech) ]
|
247
|
-
end
|
248
|
-
|
58
|
+
#############################################################
|
59
|
+
### I N S T A N C E M E T H O D S
|
60
|
+
#############################################################
|
249
61
|
|
250
|
-
###
|
251
|
-
###
|
252
|
-
def
|
253
|
-
|
254
|
-
|
62
|
+
### Create a new WordNet::Lexicon object that will use the database connection specified by
|
63
|
+
### the given +dbconfig+.
|
64
|
+
def initialize( *args )
|
65
|
+
uri = if args.empty?
|
66
|
+
WordNet::Lexicon.default_db_uri or
|
67
|
+
raise WordNet::LexiconError,
|
68
|
+
"No default WordNetSQL database! You can install it via the " +
|
69
|
+
"wordnet-defaultdb gem, or download a version yourself from " +
|
70
|
+
"http://sourceforge.net/projects/wnsql/"
|
71
|
+
|
72
|
+
elsif args.first.is_a?( String )
|
73
|
+
args.shift
|
74
|
+
else
|
75
|
+
nil
|
76
|
+
end
|
255
77
|
|
78
|
+
options = WordNet::DEFAULT_DB_OPTIONS.merge( args.shift || {} )
|
256
79
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
# the target text
|
265
|
-
cursor = @index_db.cursor
|
266
|
-
rec = cursor.set_range( text )
|
267
|
-
while /^#{text}/ =~ rec[0]
|
268
|
-
words.push rec[0]
|
269
|
-
rec = cursor.next
|
80
|
+
if uri
|
81
|
+
self.log.debug "Connecting using uri + options style: uri = %s, options = %p" %
|
82
|
+
[ uri, options ]
|
83
|
+
@db = Sequel.connect( uri, options )
|
84
|
+
else
|
85
|
+
self.log.debug "Connecting using hash style connect: options = %p" % [ options ]
|
86
|
+
@db = Sequel.connect( options )
|
270
87
|
end
|
271
|
-
cursor.close
|
272
|
-
|
273
|
-
return *words
|
274
|
-
end
|
275
88
|
|
89
|
+
@uri = @db.uri
|
90
|
+
self.log.debug " setting model db to: %s" % [ @uri ]
|
276
91
|
|
277
|
-
|
278
|
-
|
279
|
-
def create_synset( word, part_of_speech )
|
280
|
-
return WordNet::Synset::new( self, '', part_of_speech, word )
|
92
|
+
@db.sql_log_level = :debug
|
93
|
+
WordNet::Model.db = @db
|
281
94
|
end
|
282
|
-
alias_method :new_synset, :create_synset
|
283
|
-
|
284
|
-
|
285
|
-
### Store the specified +synset+ (a WordNet::Synset object) in the
|
286
|
-
### lexicon. Returns the key of the stored synset.
|
287
|
-
def store_synset( synset )
|
288
|
-
strippedOffset = nil
|
289
|
-
pos = nil
|
290
95
|
|
291
|
-
# Start a transaction
|
292
|
-
@env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
|
293
|
-
|
294
|
-
# If this is a new synset, generate an offset for it
|
295
|
-
if synset.offset == 1
|
296
|
-
synset.offset =
|
297
|
-
(datadb['offsetcount'] = datadb['offsetcount'].to_i + 1)
|
298
|
-
end
|
299
|
-
|
300
|
-
# Write the data entry
|
301
|
-
datadb[ synset.key ] = synset.serialize
|
302
|
-
|
303
|
-
# Write the index entries
|
304
|
-
txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
|
305
|
-
|
306
|
-
# Make word/part-of-speech pairs from the words in the synset
|
307
|
-
synset.words.collect {|word| word + "%" + pos }.each {|word|
|
308
|
-
|
309
|
-
# If the index already has this word, but not this
|
310
|
-
# synset, add it
|
311
|
-
if indexdb.key?( word )
|
312
|
-
indexdb[ word ] << SUB_DELIM << synset.offset unless
|
313
|
-
indexdb[ word ].include?( synset.offset )
|
314
|
-
else
|
315
|
-
indexdb[ word ] = synset.offset
|
316
|
-
end
|
317
|
-
}
|
318
|
-
end # transaction on @index_db
|
319
|
-
end # transaction on @dataDB
|
320
|
-
|
321
|
-
return synset.offset
|
322
|
-
end
|
323
96
|
|
97
|
+
######
|
98
|
+
public
|
99
|
+
######
|
324
100
|
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
else
|
352
|
-
index_db.delete( word )
|
353
|
-
end
|
354
|
-
end
|
355
|
-
}
|
101
|
+
# The database URI the lexicon will use to look up WordNet data
|
102
|
+
attr_reader :uri
|
103
|
+
|
104
|
+
# The Sequel::Database object that model tables read from
|
105
|
+
attr_reader :db
|
106
|
+
|
107
|
+
|
108
|
+
### Find a Word or Synset in the WordNet database and return it. In the case of multiple
|
109
|
+
### matching Synsets, only the first will be returned. If you want them all, you can use
|
110
|
+
### #lookup_synsets instead.
|
111
|
+
###
|
112
|
+
### The +word+ can be one of:
|
113
|
+
### [Integer]
|
114
|
+
### Looks up the corresponding Word or Synset by ID. This assumes that all Synset IDs are
|
115
|
+
### all 9 digits or greater, which is true as of WordNet 3.1. Any additional +args+ are
|
116
|
+
### ignored.
|
117
|
+
### [Symbol, String]
|
118
|
+
### Look up a Word by its gloss using #lookup_synsets, passing any additional +args+,
|
119
|
+
### and return the first one that is found.
|
120
|
+
def []( word, *args )
|
121
|
+
if word.is_a?( Integer )
|
122
|
+
# :TODO: Assumes Synset IDs are all >= 100_000_000
|
123
|
+
if word.to_s.length > 8
|
124
|
+
return WordNet::Synset[ word ]
|
125
|
+
else
|
126
|
+
return WordNet::Word[ word ]
|
356
127
|
end
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
# Delete the synset from the main db
|
361
|
-
datadb.delete( synset.offset )
|
128
|
+
else
|
129
|
+
return self.lookup_synsets( word, 1, *args ).first
|
362
130
|
end
|
363
|
-
|
364
|
-
return true
|
365
|
-
end
|
366
|
-
|
367
|
-
|
368
|
-
#########
|
369
|
-
protected
|
370
|
-
#########
|
371
|
-
|
372
|
-
### Normalize various ways of specifying a part of speech into the
|
373
|
-
### WordNet part of speech indicator from the +original+ representation,
|
374
|
-
### which may be the name (e.g., "noun"); +nil+, in which case it
|
375
|
-
### defaults to the indicator for a noun; or the indicator character
|
376
|
-
### itself, in which case it is returned unmodified.
|
377
|
-
def make_pos( original )
|
378
|
-
return WordNet::Noun if original.nil?
|
379
|
-
osym = original.to_s.intern
|
380
|
-
return WordNet::SYNTACTIC_CATEGORIES[ osym ] if
|
381
|
-
WordNet::SYNTACTIC_CATEGORIES.key?( osym )
|
382
|
-
return original if SYNTACTIC_SYMBOLS.key?( original )
|
383
|
-
return nil
|
384
|
-
end
|
385
|
-
|
386
|
-
|
387
|
-
### Make a lexicon key out of the given +word+ and part of speech
|
388
|
-
### (+pos+).
|
389
|
-
def make_word_key( word, pos )
|
390
|
-
pos = self.make_pos( pos )
|
391
|
-
word = word.gsub( /\s+/, '_' )
|
392
|
-
return "#{word}%#{pos}"
|
393
|
-
end
|
394
|
-
|
395
|
-
|
396
|
-
### Return a list of archival logfiles that can be removed
|
397
|
-
### safely. (BerkeleyDB-specific).
|
398
|
-
def archlogs
|
399
|
-
return @env.log_archive( BDB::ARCH_ABS )
|
400
131
|
end
|
401
132
|
|
402
133
|
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
###
|
408
|
-
###
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
134
|
+
### Look up synsets (Wordnet::Synset objects) associated with +word+, optionally filtered
|
135
|
+
### by additional +args+.
|
136
|
+
###
|
137
|
+
### The *args* can contain:
|
138
|
+
###
|
139
|
+
### [Integer, Range]
|
140
|
+
### The sense/s of the Word (1-indexed) to use when searching for Synsets. If not specified,
|
141
|
+
### all senses of the +word+ are used.
|
142
|
+
### [Regexp]
|
143
|
+
### The Word's Synsets are filtered by definition using an RLIKE filter. Note that not all
|
144
|
+
### databases (including the default one, sqlite3) support RLIKE.
|
145
|
+
### [Symbol, String]
|
146
|
+
### If it matches one of either a lexical domain (e.g., "verb.motion") or a part of
|
147
|
+
### speech (e.g., "adjective", :noun, :v), the resulting Synsets are filtered by that
|
148
|
+
### criteria.
|
149
|
+
### If the doesn't match a lexical domain or part of speech, it's used to filter by
|
150
|
+
### definition using a LIKE query.
|
151
|
+
###
|
152
|
+
def lookup_synsets( word, *args )
|
153
|
+
dataset = WordNet::Synset.filter( :words => WordNet::Word.filter(lemma: word.to_s) )
|
154
|
+
self.log.debug "Looking up synsets for %p" % [ word.to_s ]
|
155
|
+
|
156
|
+
# Add filters to the dataset for each argument
|
157
|
+
args.each do |arg|
|
158
|
+
self.log.debug " constraint arg: %p" % [ arg ]
|
159
|
+
case arg
|
160
|
+
|
161
|
+
when Integer
|
162
|
+
self.log.debug " limiting to sense %d" % [ arg ]
|
163
|
+
dataset = dataset.limit( 1, arg-1 )
|
164
|
+
|
165
|
+
when Range
|
166
|
+
self.log.debug " limiting to range of senses: %p" % [ arg ]
|
167
|
+
dataset = dataset.limit( arg.end - arg.begin, arg.begin - 1 )
|
168
|
+
|
169
|
+
when Regexp
|
170
|
+
self.log.debug " filter: definition =~ %p" % [ arg ]
|
171
|
+
dataset = dataset.filter( definition: arg )
|
172
|
+
|
173
|
+
when Symbol, String
|
174
|
+
# Lexical domain, e.g., "verb.motion"
|
175
|
+
if domain = WordNet::Synset.lexdomains[ arg.to_s ]
|
176
|
+
self.log.debug " filter: lex domain: %s (%d)" % [ arg, domain[:lexdomainid] ]
|
177
|
+
dataset = dataset.filter( lexdomainid: domain[:lexdomainid] )
|
178
|
+
|
179
|
+
# Part of speech symbol, e.g., "v"
|
180
|
+
elsif WordNet::Synset.postype_table.key?( arg.to_sym )
|
181
|
+
self.log.debug " filter: part of speech: %s" % [ arg ]
|
182
|
+
dataset = dataset.filter( pos: arg.to_s )
|
183
|
+
|
184
|
+
# Part of speech name, e.g., "verb"
|
185
|
+
elsif pos = WordNet::Synset.postypes[ arg.to_s ]
|
186
|
+
self.log.debug " filter: part of speech: %s" % [ pos.to_s ]
|
187
|
+
dataset = dataset.filter( pos: pos.to_s )
|
188
|
+
|
189
|
+
# Assume it's a definition match
|
190
|
+
else
|
191
|
+
pattern = "%%%s%%" % [ arg ]
|
192
|
+
self.log.debug " filter: definition LIKE %p" % [ pattern ]
|
193
|
+
dataset = dataset.filter { :definition.like(pattern) }
|
194
|
+
end
|
195
|
+
end
|
419
196
|
end
|
420
|
-
end
|
421
197
|
|
422
|
-
|
423
|
-
def debug_msg( *msg )
|
424
|
-
return unless $DEBUG
|
425
|
-
$deferr.puts msg
|
198
|
+
return dataset.all
|
426
199
|
end
|
427
|
-
|
428
200
|
|
429
201
|
end # class WordNet::Lexicon
|
430
202
|
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'wordnet' unless defined?( WordNet )
|
4
|
+
|
5
|
+
module WordNet
|
6
|
+
|
7
|
+
# Add logging to a WordNet class. Including classes get #log and #log_debug methods.
|
8
|
+
module Loggable
|
9
|
+
|
10
|
+
# Level names to levels
|
11
|
+
LEVEL = {
|
12
|
+
:debug => Logger::DEBUG,
|
13
|
+
:info => Logger::INFO,
|
14
|
+
:warn => Logger::WARN,
|
15
|
+
:error => Logger::ERROR,
|
16
|
+
:fatal => Logger::FATAL,
|
17
|
+
}
|
18
|
+
|
19
|
+
### A logging proxy class that wraps calls to the logger into calls that include
|
20
|
+
### the name of the calling class.
|
21
|
+
### @private
|
22
|
+
class ClassNameProxy
|
23
|
+
|
24
|
+
### Create a new proxy for the given +klass+.
|
25
|
+
def initialize( klass, force_debug=false )
|
26
|
+
@classname = klass.name
|
27
|
+
@force_debug = force_debug
|
28
|
+
end
|
29
|
+
|
30
|
+
### Delegate calls the global logger with the class name as the 'progname'
|
31
|
+
### argument.
|
32
|
+
def method_missing( sym, msg=nil, &block )
|
33
|
+
return super unless LEVEL.key?( sym )
|
34
|
+
sym = :debug if @force_debug
|
35
|
+
WordNet.logger.add( LEVEL[sym], msg, @classname, &block )
|
36
|
+
end
|
37
|
+
end # ClassNameProxy
|
38
|
+
|
39
|
+
#########
|
40
|
+
protected
|
41
|
+
#########
|
42
|
+
|
43
|
+
### Copy constructor -- clear the original's log proxy.
|
44
|
+
def initialize_copy( original )
|
45
|
+
@log_proxy = @log_debug_proxy = nil
|
46
|
+
super
|
47
|
+
end
|
48
|
+
|
49
|
+
### Return the proxied logger.
|
50
|
+
def log
|
51
|
+
@log_proxy ||= ClassNameProxy.new( self.class )
|
52
|
+
end
|
53
|
+
|
54
|
+
### Return a proxied "debug" logger that ignores other level specification.
|
55
|
+
def log_debug
|
56
|
+
@log_debug_proxy ||= ClassNameProxy.new( self.class, true )
|
57
|
+
end
|
58
|
+
end # module Loggable
|
59
|
+
|
60
|
+
|
61
|
+
end # module WordNet
|
62
|
+
|