wordnet 0.0.5 → 1.0.0.pre.126
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/History.rdoc +5 -0
- data/LICENSE +9 -9
- data/Manifest.txt +39 -0
- data/README.rdoc +60 -0
- data/Rakefile +47 -267
- data/TODO +9 -0
- data/WordNet30-license.txt +31 -0
- data/examples/add-laced-boots.rb +35 -0
- data/examples/clothes-with-collars.rb +42 -0
- data/examples/clothesWithTongues.rb +0 -0
- data/examples/domainTree.rb +0 -0
- data/examples/memberTree.rb +0 -0
- data/lib/wordnet/constants.rb +259 -296
- data/lib/wordnet/lexicallink.rb +34 -0
- data/lib/wordnet/lexicon.rb +158 -386
- data/lib/wordnet/mixins.rb +62 -0
- data/lib/wordnet/model.rb +78 -0
- data/lib/wordnet/morph.rb +25 -0
- data/lib/wordnet/semanticlink.rb +52 -0
- data/lib/wordnet/sense.rb +55 -0
- data/lib/wordnet/sumoterm.rb +21 -0
- data/lib/wordnet/synset.rb +404 -859
- data/lib/wordnet/utils.rb +126 -0
- data/lib/wordnet/word.rb +119 -0
- data/lib/wordnet.rb +113 -76
- data/spec/lib/helpers.rb +102 -133
- data/spec/linguawordnet.tests.rb +38 -0
- data/spec/wordnet/lexicon_spec.rb +96 -186
- data/spec/wordnet/model_spec.rb +59 -0
- data/spec/wordnet/semanticlink_spec.rb +42 -0
- data/spec/wordnet/synset_spec.rb +27 -256
- data/spec/wordnet/word_spec.rb +58 -0
- data/spec/wordnet_spec.rb +52 -0
- data.tar.gz.sig +0 -0
- metadata +227 -188
- metadata.gz.sig +0 -0
- data/ChangeLog +0 -720
- data/README +0 -93
- data/Rakefile.local +0 -46
- data/convertdb.rb +0 -417
- data/examples/addLacedBoots.rb +0 -27
- data/examples/clothesWithCollars.rb +0 -36
- data/rake/dependencies.rb +0 -76
- data/rake/helpers.rb +0 -384
- data/rake/manual.rb +0 -755
- data/rake/packaging.rb +0 -112
- data/rake/publishing.rb +0 -303
- data/rake/rdoc.rb +0 -35
- data/rake/style.rb +0 -62
- data/rake/svn.rb +0 -469
- data/rake/testing.rb +0 -192
- data/rake/verifytask.rb +0 -64
- data/utils.rb +0 -838
data/lib/wordnet/lexicon.rb
CHANGED
@@ -1,430 +1,202 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
# WordNet Lexicon object class
|
4
|
-
#
|
5
|
-
# == Synopsis
|
6
|
-
#
|
7
|
-
# lexicon = WordNet::Lexicon.new( dictpath )
|
8
|
-
#
|
9
|
-
# == Description
|
10
|
-
#
|
11
|
-
# Instances of this class abstract access to the various databases of the
|
12
|
-
# WordNet lexicon. It can be used to look up and search for WordNet::Synsets.
|
13
|
-
#
|
14
|
-
# == Author
|
15
|
-
#
|
16
|
-
# Michael Granger <ged@FaerieMUD.org>
|
17
|
-
#
|
18
|
-
# Copyright (c) 2002, 2003, 2005 The FaerieMUD Consortium. All rights reserved.
|
19
|
-
#
|
20
|
-
# This module is free software. You may use, modify, and/or redistribute this
|
21
|
-
# software under the terms of the Perl Artistic License. (See
|
22
|
-
# http://language.perl.com/misc/Artistic.html)
|
23
|
-
#
|
24
|
-
# Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
|
25
|
-
# by Dan Brian.
|
26
|
-
#
|
27
|
-
# == Version
|
28
|
-
#
|
29
|
-
# $Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
|
30
|
-
#
|
31
|
-
|
32
|
-
require 'rbconfig'
|
2
|
+
|
33
3
|
require 'pathname'
|
34
|
-
require '
|
35
|
-
require 'sync'
|
4
|
+
require 'rubygems'
|
36
5
|
|
6
|
+
require 'wordnet' unless defined?( WordNet )
|
37
7
|
require 'wordnet/constants'
|
8
|
+
require 'wordnet/mixins'
|
38
9
|
require 'wordnet/synset'
|
10
|
+
require 'wordnet/word'
|
39
11
|
|
40
|
-
### Lexicon exception - something has gone wrong in the internals of the
|
41
|
-
### lexicon.
|
42
|
-
class WordNet::LexiconError < StandardError ; end
|
43
|
-
|
44
|
-
### Lookup error - the object being looked up either doesn't exist or is
|
45
|
-
### malformed
|
46
|
-
class WordNet::LookupError < StandardError ; end
|
47
12
|
|
48
|
-
|
49
|
-
|
50
|
-
### WordNet::Synset objects.
|
13
|
+
# WordNet lexicon class - abstracts access to the WordNet lexical
|
14
|
+
# database, and provides factory methods for looking up words and synsets.
|
51
15
|
class WordNet::Lexicon
|
52
|
-
include WordNet::Constants
|
53
|
-
|
54
|
-
|
55
|
-
# Subversion Id
|
56
|
-
SvnId = %q$Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
|
16
|
+
include WordNet::Constants,
|
17
|
+
WordNet::Loggable
|
57
18
|
|
58
|
-
#
|
59
|
-
|
19
|
+
# class LogTracer
|
20
|
+
# def method_missing( sym, msg, &block )
|
21
|
+
# if msg =~ /does not exist/
|
22
|
+
# $stderr.puts ">>> DOES NOT EXIST TRACE"
|
23
|
+
# $stderr.puts( caller(1).grep(/wordnet/i) )
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
# end
|
60
27
|
|
61
28
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
# The path to the WordNet BerkeleyDB Env. It lives in the directory that
|
67
|
-
# this module is in.
|
68
|
-
DEFAULT_DB_ENV = File::join( Config::CONFIG['datadir'], "ruby-wordnet" )
|
29
|
+
# Add the logger device to the default options after it's been loaded
|
30
|
+
WordNet::DEFAULT_DB_OPTIONS.merge!( :logger => [WordNet.logger] )
|
31
|
+
# WordNet::DEFAULT_DB_OPTIONS.merge!( :logger => [LogTracer.new] )
|
69
32
|
|
70
|
-
# Options for the creation of the Env object
|
71
|
-
ENV_OPTIONS = {
|
72
|
-
:set_timeout => 50,
|
73
|
-
:set_lk_detect => 1,
|
74
|
-
:set_verbose => false,
|
75
|
-
:set_lk_max => 3000,
|
76
|
-
}
|
77
33
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
#############################################################
|
84
|
-
### I N S T A N C E M E T H O D S
|
85
|
-
#############################################################
|
34
|
+
### Get the Sequel URI of the default database, if it's installed.
|
35
|
+
def self::default_db_uri
|
36
|
+
WordNet.log.debug "Fetching the default db URI"
|
86
37
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
### octal mode (e.g., 0444) or one of (:readonly, :readwrite).
|
91
|
-
def initialize( dbenv=DEFAULT_DB_ENV, mode=:readonly )
|
92
|
-
@mode = normalize_mode( mode )
|
93
|
-
debug_msg "Mode is: %04o" % [ mode ]
|
94
|
-
|
95
|
-
envflags = 0
|
96
|
-
dbflags = 0
|
97
|
-
|
98
|
-
unless self.readonly?
|
99
|
-
debug_msg "Using read/write flags"
|
100
|
-
envflags = ENV_FLAGS_RW
|
101
|
-
dbflags = BDB::CREATE
|
38
|
+
datadir = nil
|
39
|
+
if Gem.datadir( 'wordnet-defaultdb' )
|
40
|
+
datadir = Pathname( Gem.datadir('wordnet-defaultdb') )
|
102
41
|
else
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
107
|
-
|
108
|
-
debug_msg "Env flags are: %0s, dbflags are %0s" %
|
109
|
-
[ envflags.to_s(2), dbflags.to_s(2) ]
|
110
|
-
|
111
|
-
begin
|
112
|
-
@env = BDB::Env.new( dbenv, envflags, ENV_OPTIONS )
|
113
|
-
@index_db = @env.open_db( BDB::BTREE, "index", nil, dbflags, @mode )
|
114
|
-
@data_db = @env.open_db( BDB::BTREE, "data", nil, dbflags, @mode )
|
115
|
-
@morph_db = @env.open_db( BDB::BTREE, "morph", nil, dbflags, @mode )
|
116
|
-
rescue StandardError => err
|
117
|
-
msg = "Error while opening Ruby-WordNet data files: #{dbenv}: %s" %
|
118
|
-
[ err.message ]
|
119
|
-
raise err, msg, err.backtrace
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
######
|
126
|
-
public
|
127
|
-
######
|
128
|
-
|
129
|
-
# The BDB::Env object which contains the wordnet lexicon's databases.
|
130
|
-
attr_reader :env
|
131
|
-
|
132
|
-
# The handle to the index table
|
133
|
-
attr_reader :index_db
|
134
|
-
|
135
|
-
# The handle to the synset data table
|
136
|
-
attr_reader :data_db
|
137
|
-
|
138
|
-
# The handle to the morph table
|
139
|
-
attr_reader :morph_db
|
140
|
-
|
141
|
-
|
142
|
-
### Returns +true+ if the lexicon was opened in read-only mode.
|
143
|
-
def readonly?
|
144
|
-
( @mode & 0200 ).nonzero? ? false : true
|
145
|
-
end
|
146
|
-
|
147
|
-
|
148
|
-
### Returns +true+ if the lexicon was opened in read-write mode.
|
149
|
-
def readwrite?
|
150
|
-
! self.readonly?
|
151
|
-
end
|
152
|
-
|
153
|
-
|
154
|
-
### Close the lexicon's database environment
|
155
|
-
def close
|
156
|
-
@env.close if @env
|
157
|
-
end
|
158
|
-
|
159
|
-
|
160
|
-
### Checkpoint the database. (BerkeleyDB-specific)
|
161
|
-
def checkpoint( bytes=0, minutes=0 )
|
162
|
-
@env.checkpoint
|
163
|
-
end
|
164
|
-
|
165
|
-
|
166
|
-
### Remove any archival logfiles for the lexicon's database
|
167
|
-
### environment. (BerkeleyDB-specific).
|
168
|
-
def clean_logs
|
169
|
-
return unless self.readwrite?
|
170
|
-
self.archlogs.each do |logfile|
|
171
|
-
File::chmod( 0777, logfile )
|
172
|
-
File::delete( logfile )
|
42
|
+
WordNet.log.warn " no defaultdb gem; looking for the development database"
|
43
|
+
datadir = Pathname( __FILE__ ).dirname.parent.parent +
|
44
|
+
'wordnet-defaultdb/data/wordnet-defaultdb'
|
173
45
|
end
|
174
|
-
end
|
175
46
|
|
47
|
+
dbfile = datadir + 'wordnet30.sqlite'
|
48
|
+
WordNet.log.debug " dbfile is: %s" % [ dbfile ]
|
176
49
|
|
177
|
-
|
178
|
-
|
179
|
-
### word by counting the synsets returned by #lookup_synsets.
|
180
|
-
def familiarity( word, part_of_speech, polyCount=nil )
|
181
|
-
wordkey = self.make_word_key( word, part_of_speech )
|
182
|
-
return nil unless @index_db.key?( wordkey )
|
183
|
-
@index_db[ wordkey ].split( WordNet::SUB_DELIM_RE ).length
|
184
|
-
end
|
185
|
-
|
186
|
-
|
187
|
-
### Look up sysets (Wordnet::Synset objects) matching +text+ as a
|
188
|
-
### +part_of_speech+, where +part_of_speech+ is one of +WordNet::Noun+,
|
189
|
-
### +WordNet::Verb+, +WordNet::Adjective+, or +WordNet::Adverb+. Without
|
190
|
-
### +sense+, #lookup_synsets will return all matches that are a
|
191
|
-
### +part_of_speech+. If +sense+ is specified, only the synset object that
|
192
|
-
### matches that particular +part_of_speech+ and +sense+ is returned.
|
193
|
-
def lookup_synsets( word, part_of_speech, sense=nil )
|
194
|
-
wordkey = self.make_word_key( word, part_of_speech )
|
195
|
-
pos = self.make_pos( part_of_speech )
|
196
|
-
synsets = []
|
197
|
-
|
198
|
-
# Look up the index entry, trying first the word as given, and if
|
199
|
-
# that fails, trying morphological conversion.
|
200
|
-
entry = @index_db[ wordkey ]
|
201
|
-
|
202
|
-
if entry.nil? && (word = self.morph( word, part_of_speech ))
|
203
|
-
wordkey = self.make_word_key( word, part_of_speech )
|
204
|
-
entry = @index_db[ wordkey ]
|
205
|
-
end
|
206
|
-
|
207
|
-
# If the lookup failed both ways, just abort
|
208
|
-
return nil unless entry
|
209
|
-
|
210
|
-
# Make synset keys from the entry, narrowing it to just the sense
|
211
|
-
# requested if one was specified.
|
212
|
-
synkeys = entry.split( SUB_DELIM_RE ).collect {|off| "#{off}%#{pos}" }
|
213
|
-
if sense
|
214
|
-
return lookup_synsets_by_key( synkeys[sense - 1] )
|
50
|
+
if dbfile.exist?
|
51
|
+
return "sqlite:#{dbfile}"
|
215
52
|
else
|
216
|
-
return
|
53
|
+
return nil
|
217
54
|
end
|
218
55
|
end
|
219
56
|
|
220
57
|
|
221
|
-
|
222
|
-
###
|
223
|
-
|
224
|
-
def lookup_synsets_by_key( *keys )
|
225
|
-
synsets = []
|
226
|
-
|
227
|
-
keys.each {|key|
|
228
|
-
raise WordNet::LookupError, "Failed lookup of synset '#{key}':"\
|
229
|
-
"No such synset" unless @data_db.key?( key )
|
230
|
-
|
231
|
-
data = @data_db[ key ]
|
232
|
-
offset, part_of_speech = key.split( /%/, 2 )
|
233
|
-
synsets << WordNet::Synset::new( self, offset, part_of_speech, nil, data )
|
234
|
-
}
|
235
|
-
|
236
|
-
return *synsets
|
237
|
-
end
|
238
|
-
alias_method :lookup_synsetsByOffset, :lookup_synsets_by_key
|
239
|
-
|
240
|
-
|
241
|
-
### Returns a form of +word+ as a part of speech +part_of_speech+, as
|
242
|
-
### found in the WordNet morph files. The #lookup_synsets method perfoms
|
243
|
-
### morphological conversion automatically, so a call to #morph is not
|
244
|
-
### required.
|
245
|
-
def morph( word, part_of_speech )
|
246
|
-
return @morph_db[ self.make_word_key(word, part_of_speech) ]
|
247
|
-
end
|
248
|
-
|
58
|
+
#############################################################
|
59
|
+
### I N S T A N C E M E T H O D S
|
60
|
+
#############################################################
|
249
61
|
|
250
|
-
###
|
251
|
-
###
|
252
|
-
def
|
253
|
-
|
254
|
-
|
62
|
+
### Create a new WordNet::Lexicon object that will use the database connection specified by
|
63
|
+
### the given +dbconfig+.
|
64
|
+
def initialize( *args )
|
65
|
+
uri = if args.empty?
|
66
|
+
WordNet::Lexicon.default_db_uri or
|
67
|
+
raise WordNet::LexiconError,
|
68
|
+
"No default WordNetSQL database! You can install it via the " +
|
69
|
+
"wordnet-defaultdb gem, or download a version yourself from " +
|
70
|
+
"http://sourceforge.net/projects/wnsql/"
|
71
|
+
|
72
|
+
elsif args.first.is_a?( String )
|
73
|
+
args.shift
|
74
|
+
else
|
75
|
+
nil
|
76
|
+
end
|
255
77
|
|
78
|
+
options = WordNet::DEFAULT_DB_OPTIONS.merge( args.shift || {} )
|
256
79
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
# the target text
|
265
|
-
cursor = @index_db.cursor
|
266
|
-
rec = cursor.set_range( text )
|
267
|
-
while /^#{text}/ =~ rec[0]
|
268
|
-
words.push rec[0]
|
269
|
-
rec = cursor.next
|
80
|
+
if uri
|
81
|
+
self.log.debug "Connecting using uri + options style: uri = %s, options = %p" %
|
82
|
+
[ uri, options ]
|
83
|
+
@db = Sequel.connect( uri, options )
|
84
|
+
else
|
85
|
+
self.log.debug "Connecting using hash style connect: options = %p" % [ options ]
|
86
|
+
@db = Sequel.connect( options )
|
270
87
|
end
|
271
|
-
cursor.close
|
272
|
-
|
273
|
-
return *words
|
274
|
-
end
|
275
88
|
|
89
|
+
@uri = @db.uri
|
90
|
+
self.log.debug " setting model db to: %s" % [ @uri ]
|
276
91
|
|
277
|
-
|
278
|
-
|
279
|
-
def create_synset( word, part_of_speech )
|
280
|
-
return WordNet::Synset::new( self, '', part_of_speech, word )
|
92
|
+
@db.sql_log_level = :debug
|
93
|
+
WordNet::Model.db = @db
|
281
94
|
end
|
282
|
-
alias_method :new_synset, :create_synset
|
283
|
-
|
284
|
-
|
285
|
-
### Store the specified +synset+ (a WordNet::Synset object) in the
|
286
|
-
### lexicon. Returns the key of the stored synset.
|
287
|
-
def store_synset( synset )
|
288
|
-
strippedOffset = nil
|
289
|
-
pos = nil
|
290
95
|
|
291
|
-
# Start a transaction
|
292
|
-
@env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
|
293
|
-
|
294
|
-
# If this is a new synset, generate an offset for it
|
295
|
-
if synset.offset == 1
|
296
|
-
synset.offset =
|
297
|
-
(datadb['offsetcount'] = datadb['offsetcount'].to_i + 1)
|
298
|
-
end
|
299
|
-
|
300
|
-
# Write the data entry
|
301
|
-
datadb[ synset.key ] = synset.serialize
|
302
|
-
|
303
|
-
# Write the index entries
|
304
|
-
txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
|
305
|
-
|
306
|
-
# Make word/part-of-speech pairs from the words in the synset
|
307
|
-
synset.words.collect {|word| word + "%" + pos }.each {|word|
|
308
|
-
|
309
|
-
# If the index already has this word, but not this
|
310
|
-
# synset, add it
|
311
|
-
if indexdb.key?( word )
|
312
|
-
indexdb[ word ] << SUB_DELIM << synset.offset unless
|
313
|
-
indexdb[ word ].include?( synset.offset )
|
314
|
-
else
|
315
|
-
indexdb[ word ] = synset.offset
|
316
|
-
end
|
317
|
-
}
|
318
|
-
end # transaction on @index_db
|
319
|
-
end # transaction on @dataDB
|
320
|
-
|
321
|
-
return synset.offset
|
322
|
-
end
|
323
96
|
|
97
|
+
######
|
98
|
+
public
|
99
|
+
######
|
324
100
|
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
else
|
352
|
-
index_db.delete( word )
|
353
|
-
end
|
354
|
-
end
|
355
|
-
}
|
101
|
+
# The database URI the lexicon will use to look up WordNet data
|
102
|
+
attr_reader :uri
|
103
|
+
|
104
|
+
# The Sequel::Database object that model tables read from
|
105
|
+
attr_reader :db
|
106
|
+
|
107
|
+
|
108
|
+
### Find a Word or Synset in the WordNet database and return it. In the case of multiple
|
109
|
+
### matching Synsets, only the first will be returned. If you want them all, you can use
|
110
|
+
### #lookup_synsets instead.
|
111
|
+
###
|
112
|
+
### The +word+ can be one of:
|
113
|
+
### [Integer]
|
114
|
+
### Looks up the corresponding Word or Synset by ID. This assumes that all Synset IDs are
|
115
|
+
### all 9 digits or greater, which is true as of WordNet 3.1. Any additional +args+ are
|
116
|
+
### ignored.
|
117
|
+
### [Symbol, String]
|
118
|
+
### Look up a Word by its gloss using #lookup_synsets, passing any additional +args+,
|
119
|
+
### and return the first one that is found.
|
120
|
+
def []( word, *args )
|
121
|
+
if word.is_a?( Integer )
|
122
|
+
# :TODO: Assumes Synset IDs are all >= 100_000_000
|
123
|
+
if word.to_s.length > 8
|
124
|
+
return WordNet::Synset[ word ]
|
125
|
+
else
|
126
|
+
return WordNet::Word[ word ]
|
356
127
|
end
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
# Delete the synset from the main db
|
361
|
-
datadb.delete( synset.offset )
|
128
|
+
else
|
129
|
+
return self.lookup_synsets( word, 1, *args ).first
|
362
130
|
end
|
363
|
-
|
364
|
-
return true
|
365
|
-
end
|
366
|
-
|
367
|
-
|
368
|
-
#########
|
369
|
-
protected
|
370
|
-
#########
|
371
|
-
|
372
|
-
### Normalize various ways of specifying a part of speech into the
|
373
|
-
### WordNet part of speech indicator from the +original+ representation,
|
374
|
-
### which may be the name (e.g., "noun"); +nil+, in which case it
|
375
|
-
### defaults to the indicator for a noun; or the indicator character
|
376
|
-
### itself, in which case it is returned unmodified.
|
377
|
-
def make_pos( original )
|
378
|
-
return WordNet::Noun if original.nil?
|
379
|
-
osym = original.to_s.intern
|
380
|
-
return WordNet::SYNTACTIC_CATEGORIES[ osym ] if
|
381
|
-
WordNet::SYNTACTIC_CATEGORIES.key?( osym )
|
382
|
-
return original if SYNTACTIC_SYMBOLS.key?( original )
|
383
|
-
return nil
|
384
|
-
end
|
385
|
-
|
386
|
-
|
387
|
-
### Make a lexicon key out of the given +word+ and part of speech
|
388
|
-
### (+pos+).
|
389
|
-
def make_word_key( word, pos )
|
390
|
-
pos = self.make_pos( pos )
|
391
|
-
word = word.gsub( /\s+/, '_' )
|
392
|
-
return "#{word}%#{pos}"
|
393
|
-
end
|
394
|
-
|
395
|
-
|
396
|
-
### Return a list of archival logfiles that can be removed
|
397
|
-
### safely. (BerkeleyDB-specific).
|
398
|
-
def archlogs
|
399
|
-
return @env.log_archive( BDB::ARCH_ABS )
|
400
131
|
end
|
401
132
|
|
402
133
|
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
###
|
408
|
-
###
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
134
|
+
### Look up synsets (Wordnet::Synset objects) associated with +word+, optionally filtered
|
135
|
+
### by additional +args+.
|
136
|
+
###
|
137
|
+
### The *args* can contain:
|
138
|
+
###
|
139
|
+
### [Integer, Range]
|
140
|
+
### The sense/s of the Word (1-indexed) to use when searching for Synsets. If not specified,
|
141
|
+
### all senses of the +word+ are used.
|
142
|
+
### [Regexp]
|
143
|
+
### The Word's Synsets are filtered by definition using an RLIKE filter. Note that not all
|
144
|
+
### databases (including the default one, sqlite3) support RLIKE.
|
145
|
+
### [Symbol, String]
|
146
|
+
### If it matches one of either a lexical domain (e.g., "verb.motion") or a part of
|
147
|
+
### speech (e.g., "adjective", :noun, :v), the resulting Synsets are filtered by that
|
148
|
+
### criteria.
|
149
|
+
### If the doesn't match a lexical domain or part of speech, it's used to filter by
|
150
|
+
### definition using a LIKE query.
|
151
|
+
###
|
152
|
+
def lookup_synsets( word, *args )
|
153
|
+
dataset = WordNet::Synset.filter( :words => WordNet::Word.filter(lemma: word.to_s) )
|
154
|
+
self.log.debug "Looking up synsets for %p" % [ word.to_s ]
|
155
|
+
|
156
|
+
# Add filters to the dataset for each argument
|
157
|
+
args.each do |arg|
|
158
|
+
self.log.debug " constraint arg: %p" % [ arg ]
|
159
|
+
case arg
|
160
|
+
|
161
|
+
when Integer
|
162
|
+
self.log.debug " limiting to sense %d" % [ arg ]
|
163
|
+
dataset = dataset.limit( 1, arg-1 )
|
164
|
+
|
165
|
+
when Range
|
166
|
+
self.log.debug " limiting to range of senses: %p" % [ arg ]
|
167
|
+
dataset = dataset.limit( arg.end - arg.begin, arg.begin - 1 )
|
168
|
+
|
169
|
+
when Regexp
|
170
|
+
self.log.debug " filter: definition =~ %p" % [ arg ]
|
171
|
+
dataset = dataset.filter( definition: arg )
|
172
|
+
|
173
|
+
when Symbol, String
|
174
|
+
# Lexical domain, e.g., "verb.motion"
|
175
|
+
if domain = WordNet::Synset.lexdomains[ arg.to_s ]
|
176
|
+
self.log.debug " filter: lex domain: %s (%d)" % [ arg, domain[:lexdomainid] ]
|
177
|
+
dataset = dataset.filter( lexdomainid: domain[:lexdomainid] )
|
178
|
+
|
179
|
+
# Part of speech symbol, e.g., "v"
|
180
|
+
elsif WordNet::Synset.postype_table.key?( arg.to_sym )
|
181
|
+
self.log.debug " filter: part of speech: %s" % [ arg ]
|
182
|
+
dataset = dataset.filter( pos: arg.to_s )
|
183
|
+
|
184
|
+
# Part of speech name, e.g., "verb"
|
185
|
+
elsif pos = WordNet::Synset.postypes[ arg.to_s ]
|
186
|
+
self.log.debug " filter: part of speech: %s" % [ pos.to_s ]
|
187
|
+
dataset = dataset.filter( pos: pos.to_s )
|
188
|
+
|
189
|
+
# Assume it's a definition match
|
190
|
+
else
|
191
|
+
pattern = "%%%s%%" % [ arg ]
|
192
|
+
self.log.debug " filter: definition LIKE %p" % [ pattern ]
|
193
|
+
dataset = dataset.filter { :definition.like(pattern) }
|
194
|
+
end
|
195
|
+
end
|
419
196
|
end
|
420
|
-
end
|
421
197
|
|
422
|
-
|
423
|
-
def debug_msg( *msg )
|
424
|
-
return unless $DEBUG
|
425
|
-
$deferr.puts msg
|
198
|
+
return dataset.all
|
426
199
|
end
|
427
|
-
|
428
200
|
|
429
201
|
end # class WordNet::Lexicon
|
430
202
|
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'wordnet' unless defined?( WordNet )
|
4
|
+
|
5
|
+
module WordNet
|
6
|
+
|
7
|
+
# Add logging to a WordNet class. Including classes get #log and #log_debug methods.
|
8
|
+
module Loggable
|
9
|
+
|
10
|
+
# Level names to levels
|
11
|
+
LEVEL = {
|
12
|
+
:debug => Logger::DEBUG,
|
13
|
+
:info => Logger::INFO,
|
14
|
+
:warn => Logger::WARN,
|
15
|
+
:error => Logger::ERROR,
|
16
|
+
:fatal => Logger::FATAL,
|
17
|
+
}
|
18
|
+
|
19
|
+
### A logging proxy class that wraps calls to the logger into calls that include
|
20
|
+
### the name of the calling class.
|
21
|
+
### @private
|
22
|
+
class ClassNameProxy
|
23
|
+
|
24
|
+
### Create a new proxy for the given +klass+.
|
25
|
+
def initialize( klass, force_debug=false )
|
26
|
+
@classname = klass.name
|
27
|
+
@force_debug = force_debug
|
28
|
+
end
|
29
|
+
|
30
|
+
### Delegate calls the global logger with the class name as the 'progname'
|
31
|
+
### argument.
|
32
|
+
def method_missing( sym, msg=nil, &block )
|
33
|
+
return super unless LEVEL.key?( sym )
|
34
|
+
sym = :debug if @force_debug
|
35
|
+
WordNet.logger.add( LEVEL[sym], msg, @classname, &block )
|
36
|
+
end
|
37
|
+
end # ClassNameProxy
|
38
|
+
|
39
|
+
#########
|
40
|
+
protected
|
41
|
+
#########
|
42
|
+
|
43
|
+
### Copy constructor -- clear the original's log proxy.
|
44
|
+
def initialize_copy( original )
|
45
|
+
@log_proxy = @log_debug_proxy = nil
|
46
|
+
super
|
47
|
+
end
|
48
|
+
|
49
|
+
### Return the proxied logger.
|
50
|
+
def log
|
51
|
+
@log_proxy ||= ClassNameProxy.new( self.class )
|
52
|
+
end
|
53
|
+
|
54
|
+
### Return a proxied "debug" logger that ignores other level specification.
|
55
|
+
def log_debug
|
56
|
+
@log_debug_proxy ||= ClassNameProxy.new( self.class, true )
|
57
|
+
end
|
58
|
+
end # module Loggable
|
59
|
+
|
60
|
+
|
61
|
+
end # module WordNet
|
62
|
+
|