wordnet 0.0.5 → 1.0.0.pre.126

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/.gemtest +0 -0
  2. data/History.rdoc +5 -0
  3. data/LICENSE +9 -9
  4. data/Manifest.txt +39 -0
  5. data/README.rdoc +60 -0
  6. data/Rakefile +47 -267
  7. data/TODO +9 -0
  8. data/WordNet30-license.txt +31 -0
  9. data/examples/add-laced-boots.rb +35 -0
  10. data/examples/clothes-with-collars.rb +42 -0
  11. data/examples/clothesWithTongues.rb +0 -0
  12. data/examples/domainTree.rb +0 -0
  13. data/examples/memberTree.rb +0 -0
  14. data/lib/wordnet/constants.rb +259 -296
  15. data/lib/wordnet/lexicallink.rb +34 -0
  16. data/lib/wordnet/lexicon.rb +158 -386
  17. data/lib/wordnet/mixins.rb +62 -0
  18. data/lib/wordnet/model.rb +78 -0
  19. data/lib/wordnet/morph.rb +25 -0
  20. data/lib/wordnet/semanticlink.rb +52 -0
  21. data/lib/wordnet/sense.rb +55 -0
  22. data/lib/wordnet/sumoterm.rb +21 -0
  23. data/lib/wordnet/synset.rb +404 -859
  24. data/lib/wordnet/utils.rb +126 -0
  25. data/lib/wordnet/word.rb +119 -0
  26. data/lib/wordnet.rb +113 -76
  27. data/spec/lib/helpers.rb +102 -133
  28. data/spec/linguawordnet.tests.rb +38 -0
  29. data/spec/wordnet/lexicon_spec.rb +96 -186
  30. data/spec/wordnet/model_spec.rb +59 -0
  31. data/spec/wordnet/semanticlink_spec.rb +42 -0
  32. data/spec/wordnet/synset_spec.rb +27 -256
  33. data/spec/wordnet/word_spec.rb +58 -0
  34. data/spec/wordnet_spec.rb +52 -0
  35. data.tar.gz.sig +0 -0
  36. metadata +227 -188
  37. metadata.gz.sig +0 -0
  38. data/ChangeLog +0 -720
  39. data/README +0 -93
  40. data/Rakefile.local +0 -46
  41. data/convertdb.rb +0 -417
  42. data/examples/addLacedBoots.rb +0 -27
  43. data/examples/clothesWithCollars.rb +0 -36
  44. data/rake/dependencies.rb +0 -76
  45. data/rake/helpers.rb +0 -384
  46. data/rake/manual.rb +0 -755
  47. data/rake/packaging.rb +0 -112
  48. data/rake/publishing.rb +0 -303
  49. data/rake/rdoc.rb +0 -35
  50. data/rake/style.rb +0 -62
  51. data/rake/svn.rb +0 -469
  52. data/rake/testing.rb +0 -192
  53. data/rake/verifytask.rb +0 -64
  54. data/utils.rb +0 -838
@@ -1,430 +1,202 @@
1
1
  #!/usr/bin/ruby
2
- #
3
- # WordNet Lexicon object class
4
- #
5
- # == Synopsis
6
- #
7
- # lexicon = WordNet::Lexicon.new( dictpath )
8
- #
9
- # == Description
10
- #
11
- # Instances of this class abstract access to the various databases of the
12
- # WordNet lexicon. It can be used to look up and search for WordNet::Synsets.
13
- #
14
- # == Author
15
- #
16
- # Michael Granger <ged@FaerieMUD.org>
17
- #
18
- # Copyright (c) 2002, 2003, 2005 The FaerieMUD Consortium. All rights reserved.
19
- #
20
- # This module is free software. You may use, modify, and/or redistribute this
21
- # software under the terms of the Perl Artistic License. (See
22
- # http://language.perl.com/misc/Artistic.html)
23
- #
24
- # Much of this code was inspired by/ported from the Lingua::Wordnet Perl module
25
- # by Dan Brian.
26
- #
27
- # == Version
28
- #
29
- # $Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
30
- #
31
-
32
- require 'rbconfig'
2
+
33
3
  require 'pathname'
34
- require 'bdb'
35
- require 'sync'
4
+ require 'rubygems'
36
5
 
6
+ require 'wordnet' unless defined?( WordNet )
37
7
  require 'wordnet/constants'
8
+ require 'wordnet/mixins'
38
9
  require 'wordnet/synset'
10
+ require 'wordnet/word'
39
11
 
40
- ### Lexicon exception - something has gone wrong in the internals of the
41
- ### lexicon.
42
- class WordNet::LexiconError < StandardError ; end
43
-
44
- ### Lookup error - the object being looked up either doesn't exist or is
45
- ### malformed
46
- class WordNet::LookupError < StandardError ; end
47
12
 
48
- ### WordNet lexicon class - abstracts access to the WordNet lexical
49
- ### databases, and provides factory methods for looking up and creating new
50
- ### WordNet::Synset objects.
13
+ # WordNet lexicon class - abstracts access to the WordNet lexical
14
+ # database, and provides factory methods for looking up words and synsets.
51
15
  class WordNet::Lexicon
52
- include WordNet::Constants
53
- include CrossCase if defined?( CrossCase )
54
-
55
- # Subversion Id
56
- SvnId = %q$Id: lexicon.rb 93 2008-07-12 00:56:49Z deveiant $
16
+ include WordNet::Constants,
17
+ WordNet::Loggable
57
18
 
58
- # Subversion revision
59
- SvnRev = %q$Rev: 93 $
19
+ # class LogTracer
20
+ # def method_missing( sym, msg, &block )
21
+ # if msg =~ /does not exist/
22
+ # $stderr.puts ">>> DOES NOT EXIST TRACE"
23
+ # $stderr.puts( caller(1).grep(/wordnet/i) )
24
+ # end
25
+ # end
26
+ # end
60
27
 
61
28
 
62
- #############################################################
63
- ### B E R K E L E Y D B C O N F I G U R A T I O N
64
- #############################################################
65
-
66
- # The path to the WordNet BerkeleyDB Env. It lives in the directory that
67
- # this module is in.
68
- DEFAULT_DB_ENV = File::join( Config::CONFIG['datadir'], "ruby-wordnet" )
29
+ # Add the logger device to the default options after it's been loaded
30
+ WordNet::DEFAULT_DB_OPTIONS.merge!( :logger => [WordNet.logger] )
31
+ # WordNet::DEFAULT_DB_OPTIONS.merge!( :logger => [LogTracer.new] )
69
32
 
70
- # Options for the creation of the Env object
71
- ENV_OPTIONS = {
72
- :set_timeout => 50,
73
- :set_lk_detect => 1,
74
- :set_verbose => false,
75
- :set_lk_max => 3000,
76
- }
77
33
 
78
- # Flags for the creation of the Env object (read-write and read-only)
79
- ENV_FLAGS_RW = BDB::CREATE|BDB::INIT_TRANSACTION|BDB::RECOVER|BDB::INIT_MPOOL
80
- ENV_FLAGS_RO = BDB::INIT_MPOOL
81
-
82
-
83
- #############################################################
84
- ### I N S T A N C E M E T H O D S
85
- #############################################################
34
+ ### Get the Sequel URI of the default database, if it's installed.
35
+ def self::default_db_uri
36
+ WordNet.log.debug "Fetching the default db URI"
86
37
 
87
- ### Create a new WordNet::Lexicon object that will read its data from
88
- ### the given +dbenv+ (a BerkeleyDB env directory). The database will be
89
- ### opened with the specified +mode+, which can either be a numeric
90
- ### octal mode (e.g., 0444) or one of (:readonly, :readwrite).
91
- def initialize( dbenv=DEFAULT_DB_ENV, mode=:readonly )
92
- @mode = normalize_mode( mode )
93
- debug_msg "Mode is: %04o" % [ mode ]
94
-
95
- envflags = 0
96
- dbflags = 0
97
-
98
- unless self.readonly?
99
- debug_msg "Using read/write flags"
100
- envflags = ENV_FLAGS_RW
101
- dbflags = BDB::CREATE
38
+ datadir = nil
39
+ if Gem.datadir( 'wordnet-defaultdb' )
40
+ datadir = Pathname( Gem.datadir('wordnet-defaultdb') )
102
41
  else
103
- debug_msg "Using readonly flags"
104
- envflags = ENV_FLAGS_RO
105
- dbflags = 0
106
- end
107
-
108
- debug_msg "Env flags are: %0s, dbflags are %0s" %
109
- [ envflags.to_s(2), dbflags.to_s(2) ]
110
-
111
- begin
112
- @env = BDB::Env.new( dbenv, envflags, ENV_OPTIONS )
113
- @index_db = @env.open_db( BDB::BTREE, "index", nil, dbflags, @mode )
114
- @data_db = @env.open_db( BDB::BTREE, "data", nil, dbflags, @mode )
115
- @morph_db = @env.open_db( BDB::BTREE, "morph", nil, dbflags, @mode )
116
- rescue StandardError => err
117
- msg = "Error while opening Ruby-WordNet data files: #{dbenv}: %s" %
118
- [ err.message ]
119
- raise err, msg, err.backtrace
120
- end
121
- end
122
-
123
-
124
-
125
- ######
126
- public
127
- ######
128
-
129
- # The BDB::Env object which contains the wordnet lexicon's databases.
130
- attr_reader :env
131
-
132
- # The handle to the index table
133
- attr_reader :index_db
134
-
135
- # The handle to the synset data table
136
- attr_reader :data_db
137
-
138
- # The handle to the morph table
139
- attr_reader :morph_db
140
-
141
-
142
- ### Returns +true+ if the lexicon was opened in read-only mode.
143
- def readonly?
144
- ( @mode & 0200 ).nonzero? ? false : true
145
- end
146
-
147
-
148
- ### Returns +true+ if the lexicon was opened in read-write mode.
149
- def readwrite?
150
- ! self.readonly?
151
- end
152
-
153
-
154
- ### Close the lexicon's database environment
155
- def close
156
- @env.close if @env
157
- end
158
-
159
-
160
- ### Checkpoint the database. (BerkeleyDB-specific)
161
- def checkpoint( bytes=0, minutes=0 )
162
- @env.checkpoint
163
- end
164
-
165
-
166
- ### Remove any archival logfiles for the lexicon's database
167
- ### environment. (BerkeleyDB-specific).
168
- def clean_logs
169
- return unless self.readwrite?
170
- self.archlogs.each do |logfile|
171
- File::chmod( 0777, logfile )
172
- File::delete( logfile )
42
+ WordNet.log.warn " no defaultdb gem; looking for the development database"
43
+ datadir = Pathname( __FILE__ ).dirname.parent.parent +
44
+ 'wordnet-defaultdb/data/wordnet-defaultdb'
173
45
  end
174
- end
175
46
 
47
+ dbfile = datadir + 'wordnet30.sqlite'
48
+ WordNet.log.debug " dbfile is: %s" % [ dbfile ]
176
49
 
177
- ### Returns an integer of the familiarity/polysemy count for +word+ as a
178
- ### +part_of_speech+. Note that polysemy can be identified for a given
179
- ### word by counting the synsets returned by #lookup_synsets.
180
- def familiarity( word, part_of_speech, polyCount=nil )
181
- wordkey = self.make_word_key( word, part_of_speech )
182
- return nil unless @index_db.key?( wordkey )
183
- @index_db[ wordkey ].split( WordNet::SUB_DELIM_RE ).length
184
- end
185
-
186
-
187
- ### Look up sysets (Wordnet::Synset objects) matching +text+ as a
188
- ### +part_of_speech+, where +part_of_speech+ is one of +WordNet::Noun+,
189
- ### +WordNet::Verb+, +WordNet::Adjective+, or +WordNet::Adverb+. Without
190
- ### +sense+, #lookup_synsets will return all matches that are a
191
- ### +part_of_speech+. If +sense+ is specified, only the synset object that
192
- ### matches that particular +part_of_speech+ and +sense+ is returned.
193
- def lookup_synsets( word, part_of_speech, sense=nil )
194
- wordkey = self.make_word_key( word, part_of_speech )
195
- pos = self.make_pos( part_of_speech )
196
- synsets = []
197
-
198
- # Look up the index entry, trying first the word as given, and if
199
- # that fails, trying morphological conversion.
200
- entry = @index_db[ wordkey ]
201
-
202
- if entry.nil? && (word = self.morph( word, part_of_speech ))
203
- wordkey = self.make_word_key( word, part_of_speech )
204
- entry = @index_db[ wordkey ]
205
- end
206
-
207
- # If the lookup failed both ways, just abort
208
- return nil unless entry
209
-
210
- # Make synset keys from the entry, narrowing it to just the sense
211
- # requested if one was specified.
212
- synkeys = entry.split( SUB_DELIM_RE ).collect {|off| "#{off}%#{pos}" }
213
- if sense
214
- return lookup_synsets_by_key( synkeys[sense - 1] )
50
+ if dbfile.exist?
51
+ return "sqlite:#{dbfile}"
215
52
  else
216
- return [ lookup_synsets_by_key(*synkeys) ].flatten
53
+ return nil
217
54
  end
218
55
  end
219
56
 
220
57
 
221
- ### Returns the WordNet::Synset objects corresponding to the +keys+
222
- ### specified. The +keys+ are made up of the target synset's "offset"
223
- ### and syntactic category catenated together with a '%' character.
224
- def lookup_synsets_by_key( *keys )
225
- synsets = []
226
-
227
- keys.each {|key|
228
- raise WordNet::LookupError, "Failed lookup of synset '#{key}':"\
229
- "No such synset" unless @data_db.key?( key )
230
-
231
- data = @data_db[ key ]
232
- offset, part_of_speech = key.split( /%/, 2 )
233
- synsets << WordNet::Synset::new( self, offset, part_of_speech, nil, data )
234
- }
235
-
236
- return *synsets
237
- end
238
- alias_method :lookup_synsetsByOffset, :lookup_synsets_by_key
239
-
240
-
241
- ### Returns a form of +word+ as a part of speech +part_of_speech+, as
242
- ### found in the WordNet morph files. The #lookup_synsets method perfoms
243
- ### morphological conversion automatically, so a call to #morph is not
244
- ### required.
245
- def morph( word, part_of_speech )
246
- return @morph_db[ self.make_word_key(word, part_of_speech) ]
247
- end
248
-
58
+ #############################################################
59
+ ### I N S T A N C E M E T H O D S
60
+ #############################################################
249
61
 
250
- ### Returns the result of looking up +word+ in the inverse of the WordNet
251
- ### morph files. _(This is undocumented in Lingua::Wordnet)_
252
- def reverse_morph( word )
253
- @morph_db.invert[ word ]
254
- end
62
+ ### Create a new WordNet::Lexicon object that will use the database connection specified by
63
+ ### the given +dbconfig+.
64
+ def initialize( *args )
65
+ uri = if args.empty?
66
+ WordNet::Lexicon.default_db_uri or
67
+ raise WordNet::LexiconError,
68
+ "No default WordNetSQL database! You can install it via the " +
69
+ "wordnet-defaultdb gem, or download a version yourself from " +
70
+ "http://sourceforge.net/projects/wnsql/"
71
+
72
+ elsif args.first.is_a?( String )
73
+ args.shift
74
+ else
75
+ nil
76
+ end
255
77
 
78
+ options = WordNet::DEFAULT_DB_OPTIONS.merge( args.shift || {} )
256
79
 
257
- ### Returns an array of compound words matching +text+.
258
- def grep( text )
259
- return [] if text.empty?
260
-
261
- words = []
262
-
263
- # Grab a cursor into the database and fetch while the key matches
264
- # the target text
265
- cursor = @index_db.cursor
266
- rec = cursor.set_range( text )
267
- while /^#{text}/ =~ rec[0]
268
- words.push rec[0]
269
- rec = cursor.next
80
+ if uri
81
+ self.log.debug "Connecting using uri + options style: uri = %s, options = %p" %
82
+ [ uri, options ]
83
+ @db = Sequel.connect( uri, options )
84
+ else
85
+ self.log.debug "Connecting using hash style connect: options = %p" % [ options ]
86
+ @db = Sequel.connect( options )
270
87
  end
271
- cursor.close
272
-
273
- return *words
274
- end
275
88
 
89
+ @uri = @db.uri
90
+ self.log.debug " setting model db to: %s" % [ @uri ]
276
91
 
277
- ### Factory method: Creates and returns a new WordNet::Synset object in
278
- ### this lexicon for the specified +word+ and +part_of_speech+.
279
- def create_synset( word, part_of_speech )
280
- return WordNet::Synset::new( self, '', part_of_speech, word )
92
+ @db.sql_log_level = :debug
93
+ WordNet::Model.db = @db
281
94
  end
282
- alias_method :new_synset, :create_synset
283
-
284
-
285
- ### Store the specified +synset+ (a WordNet::Synset object) in the
286
- ### lexicon. Returns the key of the stored synset.
287
- def store_synset( synset )
288
- strippedOffset = nil
289
- pos = nil
290
95
 
291
- # Start a transaction
292
- @env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
293
-
294
- # If this is a new synset, generate an offset for it
295
- if synset.offset == 1
296
- synset.offset =
297
- (datadb['offsetcount'] = datadb['offsetcount'].to_i + 1)
298
- end
299
-
300
- # Write the data entry
301
- datadb[ synset.key ] = synset.serialize
302
-
303
- # Write the index entries
304
- txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
305
-
306
- # Make word/part-of-speech pairs from the words in the synset
307
- synset.words.collect {|word| word + "%" + pos }.each {|word|
308
-
309
- # If the index already has this word, but not this
310
- # synset, add it
311
- if indexdb.key?( word )
312
- indexdb[ word ] << SUB_DELIM << synset.offset unless
313
- indexdb[ word ].include?( synset.offset )
314
- else
315
- indexdb[ word ] = synset.offset
316
- end
317
- }
318
- end # transaction on @index_db
319
- end # transaction on @dataDB
320
-
321
- return synset.offset
322
- end
323
96
 
97
+ ######
98
+ public
99
+ ######
324
100
 
325
- ### Remove the specified +synset+ (a WordNet::Synset object) in the
326
- ### lexicon. Returns the offset of the stored synset.
327
- def remove_synset( synset )
328
- # If it's not in the database (ie., doesn't have a real offset),
329
- # just return.
330
- return nil if synset.offset == 1
331
-
332
- # Start a transaction on the data table
333
- @env.begin( BDB::TXN_COMMIT, @data_db ) do |txn,datadb|
334
-
335
- # First remove the index entries for this synset by iterating
336
- # over each of its words
337
- txn.begin( BDB::TXN_COMMIT, @index_db ) do |txn,indexdb|
338
- synset.words.collect {|word| word + "%" + pos }.each {|word|
339
-
340
- # If the index contains an entry for this word, either
341
- # splice out the offset for the synset being deleted if
342
- # there are more than one, or just delete the whole
343
- # entry if it's the only one.
344
- if indexdb.key?( word )
345
- offsets = indexdb[ word ].
346
- split( SUB_DELIM_RE ).
347
- reject {|offset| offset == synset.offset}
348
-
349
- unless offsets.empty?
350
- index_db[ word ] = newoffsets.join( SUB_DELIM )
351
- else
352
- index_db.delete( word )
353
- end
354
- end
355
- }
101
+ # The database URI the lexicon will use to look up WordNet data
102
+ attr_reader :uri
103
+
104
+ # The Sequel::Database object that model tables read from
105
+ attr_reader :db
106
+
107
+
108
+ ### Find a Word or Synset in the WordNet database and return it. In the case of multiple
109
+ ### matching Synsets, only the first will be returned. If you want them all, you can use
110
+ ### #lookup_synsets instead.
111
+ ###
112
+ ### The +word+ can be one of:
113
+ ### [Integer]
114
+ ### Looks up the corresponding Word or Synset by ID. This assumes that all Synset IDs are
115
+ ### all 9 digits or greater, which is true as of WordNet 3.1. Any additional +args+ are
116
+ ### ignored.
117
+ ### [Symbol, String]
118
+ ### Look up a Word by its gloss using #lookup_synsets, passing any additional +args+,
119
+ ### and return the first one that is found.
120
+ def []( word, *args )
121
+ if word.is_a?( Integer )
122
+ # :TODO: Assumes Synset IDs are all >= 100_000_000
123
+ if word.to_s.length > 8
124
+ return WordNet::Synset[ word ]
125
+ else
126
+ return WordNet::Word[ word ]
356
127
  end
357
-
358
- # :TODO: Delete synset from pointers of related synsets
359
-
360
- # Delete the synset from the main db
361
- datadb.delete( synset.offset )
128
+ else
129
+ return self.lookup_synsets( word, 1, *args ).first
362
130
  end
363
-
364
- return true
365
- end
366
-
367
-
368
- #########
369
- protected
370
- #########
371
-
372
- ### Normalize various ways of specifying a part of speech into the
373
- ### WordNet part of speech indicator from the +original+ representation,
374
- ### which may be the name (e.g., "noun"); +nil+, in which case it
375
- ### defaults to the indicator for a noun; or the indicator character
376
- ### itself, in which case it is returned unmodified.
377
- def make_pos( original )
378
- return WordNet::Noun if original.nil?
379
- osym = original.to_s.intern
380
- return WordNet::SYNTACTIC_CATEGORIES[ osym ] if
381
- WordNet::SYNTACTIC_CATEGORIES.key?( osym )
382
- return original if SYNTACTIC_SYMBOLS.key?( original )
383
- return nil
384
- end
385
-
386
-
387
- ### Make a lexicon key out of the given +word+ and part of speech
388
- ### (+pos+).
389
- def make_word_key( word, pos )
390
- pos = self.make_pos( pos )
391
- word = word.gsub( /\s+/, '_' )
392
- return "#{word}%#{pos}"
393
- end
394
-
395
-
396
- ### Return a list of archival logfiles that can be removed
397
- ### safely. (BerkeleyDB-specific).
398
- def archlogs
399
- return @env.log_archive( BDB::ARCH_ABS )
400
131
  end
401
132
 
402
133
 
403
- #######
404
- private
405
- #######
406
-
407
- ### Turn the given +origmode+ into an octal file mode such as that
408
- ### given to File.open.
409
- def normalize_mode( origmode )
410
- case origmode
411
- when :readonly
412
- 0444 & ~File.umask
413
- when :readwrite, :writable
414
- 0666 & ~File.umask
415
- when Fixnum
416
- origmode
417
- else
418
- raise ArgumentError, "unrecognized mode %p" % [origmode]
134
+ ### Look up synsets (Wordnet::Synset objects) associated with +word+, optionally filtered
135
+ ### by additional +args+.
136
+ ###
137
+ ### The *args* can contain:
138
+ ###
139
+ ### [Integer, Range]
140
+ ### The sense/s of the Word (1-indexed) to use when searching for Synsets. If not specified,
141
+ ### all senses of the +word+ are used.
142
+ ### [Regexp]
143
+ ### The Word's Synsets are filtered by definition using an RLIKE filter. Note that not all
144
+ ### databases (including the default one, sqlite3) support RLIKE.
145
+ ### [Symbol, String]
146
+ ### If it matches one of either a lexical domain (e.g., "verb.motion") or a part of
147
+ ### speech (e.g., "adjective", :noun, :v), the resulting Synsets are filtered by that
148
+ ### criteria.
149
+ ### If the doesn't match a lexical domain or part of speech, it's used to filter by
150
+ ### definition using a LIKE query.
151
+ ###
152
+ def lookup_synsets( word, *args )
153
+ dataset = WordNet::Synset.filter( :words => WordNet::Word.filter(lemma: word.to_s) )
154
+ self.log.debug "Looking up synsets for %p" % [ word.to_s ]
155
+
156
+ # Add filters to the dataset for each argument
157
+ args.each do |arg|
158
+ self.log.debug " constraint arg: %p" % [ arg ]
159
+ case arg
160
+
161
+ when Integer
162
+ self.log.debug " limiting to sense %d" % [ arg ]
163
+ dataset = dataset.limit( 1, arg-1 )
164
+
165
+ when Range
166
+ self.log.debug " limiting to range of senses: %p" % [ arg ]
167
+ dataset = dataset.limit( arg.end - arg.begin, arg.begin - 1 )
168
+
169
+ when Regexp
170
+ self.log.debug " filter: definition =~ %p" % [ arg ]
171
+ dataset = dataset.filter( definition: arg )
172
+
173
+ when Symbol, String
174
+ # Lexical domain, e.g., "verb.motion"
175
+ if domain = WordNet::Synset.lexdomains[ arg.to_s ]
176
+ self.log.debug " filter: lex domain: %s (%d)" % [ arg, domain[:lexdomainid] ]
177
+ dataset = dataset.filter( lexdomainid: domain[:lexdomainid] )
178
+
179
+ # Part of speech symbol, e.g., "v"
180
+ elsif WordNet::Synset.postype_table.key?( arg.to_sym )
181
+ self.log.debug " filter: part of speech: %s" % [ arg ]
182
+ dataset = dataset.filter( pos: arg.to_s )
183
+
184
+ # Part of speech name, e.g., "verb"
185
+ elsif pos = WordNet::Synset.postypes[ arg.to_s ]
186
+ self.log.debug " filter: part of speech: %s" % [ pos.to_s ]
187
+ dataset = dataset.filter( pos: pos.to_s )
188
+
189
+ # Assume it's a definition match
190
+ else
191
+ pattern = "%%%s%%" % [ arg ]
192
+ self.log.debug " filter: definition LIKE %p" % [ pattern ]
193
+ dataset = dataset.filter { :definition.like(pattern) }
194
+ end
195
+ end
419
196
  end
420
- end
421
197
 
422
- ### Output the given +msg+ to STDERR if $DEBUG is turned on.
423
- def debug_msg( *msg )
424
- return unless $DEBUG
425
- $deferr.puts msg
198
+ return dataset.all
426
199
  end
427
-
428
200
 
429
201
  end # class WordNet::Lexicon
430
202
 
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'wordnet' unless defined?( WordNet )
4
+
5
+ module WordNet
6
+
7
+ # Add logging to a WordNet class. Including classes get #log and #log_debug methods.
8
+ module Loggable
9
+
10
+ # Level names to levels
11
+ LEVEL = {
12
+ :debug => Logger::DEBUG,
13
+ :info => Logger::INFO,
14
+ :warn => Logger::WARN,
15
+ :error => Logger::ERROR,
16
+ :fatal => Logger::FATAL,
17
+ }
18
+
19
+ ### A logging proxy class that wraps calls to the logger into calls that include
20
+ ### the name of the calling class.
21
+ ### @private
22
+ class ClassNameProxy
23
+
24
+ ### Create a new proxy for the given +klass+.
25
+ def initialize( klass, force_debug=false )
26
+ @classname = klass.name
27
+ @force_debug = force_debug
28
+ end
29
+
30
+ ### Delegate calls the global logger with the class name as the 'progname'
31
+ ### argument.
32
+ def method_missing( sym, msg=nil, &block )
33
+ return super unless LEVEL.key?( sym )
34
+ sym = :debug if @force_debug
35
+ WordNet.logger.add( LEVEL[sym], msg, @classname, &block )
36
+ end
37
+ end # ClassNameProxy
38
+
39
+ #########
40
+ protected
41
+ #########
42
+
43
+ ### Copy constructor -- clear the original's log proxy.
44
+ def initialize_copy( original )
45
+ @log_proxy = @log_debug_proxy = nil
46
+ super
47
+ end
48
+
49
+ ### Return the proxied logger.
50
+ def log
51
+ @log_proxy ||= ClassNameProxy.new( self.class )
52
+ end
53
+
54
+ ### Return a proxied "debug" logger that ignores other level specification.
55
+ def log_debug
56
+ @log_debug_proxy ||= ClassNameProxy.new( self.class, true )
57
+ end
58
+ end # module Loggable
59
+
60
+
61
+ end # module WordNet
62
+