wordnet 0.0.5 → 1.0.0.pre.126
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/History.rdoc +5 -0
- data/LICENSE +9 -9
- data/Manifest.txt +39 -0
- data/README.rdoc +60 -0
- data/Rakefile +47 -267
- data/TODO +9 -0
- data/WordNet30-license.txt +31 -0
- data/examples/add-laced-boots.rb +35 -0
- data/examples/clothes-with-collars.rb +42 -0
- data/examples/clothesWithTongues.rb +0 -0
- data/examples/domainTree.rb +0 -0
- data/examples/memberTree.rb +0 -0
- data/lib/wordnet/constants.rb +259 -296
- data/lib/wordnet/lexicallink.rb +34 -0
- data/lib/wordnet/lexicon.rb +158 -386
- data/lib/wordnet/mixins.rb +62 -0
- data/lib/wordnet/model.rb +78 -0
- data/lib/wordnet/morph.rb +25 -0
- data/lib/wordnet/semanticlink.rb +52 -0
- data/lib/wordnet/sense.rb +55 -0
- data/lib/wordnet/sumoterm.rb +21 -0
- data/lib/wordnet/synset.rb +404 -859
- data/lib/wordnet/utils.rb +126 -0
- data/lib/wordnet/word.rb +119 -0
- data/lib/wordnet.rb +113 -76
- data/spec/lib/helpers.rb +102 -133
- data/spec/linguawordnet.tests.rb +38 -0
- data/spec/wordnet/lexicon_spec.rb +96 -186
- data/spec/wordnet/model_spec.rb +59 -0
- data/spec/wordnet/semanticlink_spec.rb +42 -0
- data/spec/wordnet/synset_spec.rb +27 -256
- data/spec/wordnet/word_spec.rb +58 -0
- data/spec/wordnet_spec.rb +52 -0
- data.tar.gz.sig +0 -0
- metadata +227 -188
- metadata.gz.sig +0 -0
- data/ChangeLog +0 -720
- data/README +0 -93
- data/Rakefile.local +0 -46
- data/convertdb.rb +0 -417
- data/examples/addLacedBoots.rb +0 -27
- data/examples/clothesWithCollars.rb +0 -36
- data/rake/dependencies.rb +0 -76
- data/rake/helpers.rb +0 -384
- data/rake/manual.rb +0 -755
- data/rake/packaging.rb +0 -112
- data/rake/publishing.rb +0 -303
- data/rake/rdoc.rb +0 -35
- data/rake/style.rb +0 -62
- data/rake/svn.rb +0 -469
- data/rake/testing.rb +0 -192
- data/rake/verifytask.rb +0 -64
- data/utils.rb +0 -838
data/README
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
= Ruby-WordNet
|
2
|
-
== General Information
|
3
|
-
|
4
|
-
This library is a Ruby interface to WordNet�. WordNet� is an online lexical
|
5
|
-
reference system whose design is inspired by current psycholinguistic theories
|
6
|
-
of human lexical memory. English nouns, verbs, adjectives and adverbs are
|
7
|
-
organized into synonym sets, each representing one underlying lexical
|
8
|
-
concept. Different relations link the synonym sets.
|
9
|
-
|
10
|
-
You can find out more about WordNet� at <http://wordnet.princeton.edu/>.
|
11
|
-
|
12
|
-
This code was loosely based on the Lingua::Wordnet Perl module by Dan Brian, and
|
13
|
-
uses a similar strategy of converting the WordNet data files into a BerkeleyDB
|
14
|
-
database. The 'convertdb.rb' script in this directory can be used to build these
|
15
|
-
databases from the WordNet dictionaries.
|
16
|
-
|
17
|
-
This module is intended to offer basically the same functionality as
|
18
|
-
Lingua::Wordnet, and you should be able to use either (or both) to access and
|
19
|
-
modify the lexical database interchangeably. This module attempts to remain
|
20
|
-
fairly close in API to Lingua::Wordnet, so if you're familiar with it already,
|
21
|
-
you should be able to port things from one to the other with relatively little
|
22
|
-
trouble. This module, however, uses BerkeleyDB's transaction subsystem to allow
|
23
|
-
safe concurrent access to the databases.
|
24
|
-
|
25
|
-
Many thanks to Dan Brian, who did most of the hard work. His efforts made my job
|
26
|
-
mostly a matter of playing around.
|
27
|
-
|
28
|
-
|
29
|
-
== Caveats
|
30
|
-
|
31
|
-
The database-writing portions of this code have not been extensively tested, and
|
32
|
-
there are almost certainly bugs which will cause data to be lost or
|
33
|
-
miswritten. You should make backups of changes you make periodically.
|
34
|
-
|
35
|
-
I would greatly appreciate feedback on any aspect of this software. Suggestions,
|
36
|
-
feature requests, questions, design critiques, and bug reports are most
|
37
|
-
welcome. Relevant patches are particularly helpful. I may be reached at
|
38
|
-
<ged@FaerieMUD.org>.
|
39
|
-
|
40
|
-
|
41
|
-
== Installation
|
42
|
-
|
43
|
-
If you use RubyGems, you can install via:
|
44
|
-
|
45
|
-
$ sudo gem install wordnet
|
46
|
-
|
47
|
-
You can also install as a site library via the Rakefile:
|
48
|
-
|
49
|
-
$ wget http://deveiate.org/code/wordnet-x.y.z.tar.gz
|
50
|
-
$ tar xzvf wordnet-x.y.z.tar.gz
|
51
|
-
$ cd wordnet-x.y.z
|
52
|
-
$ sudo rake install
|
53
|
-
|
54
|
-
|
55
|
-
== More Information
|
56
|
-
|
57
|
-
There is a project page for Ruby-WordNet which can be found at:
|
58
|
-
<http://deveiate.org/projects/Ruby-WordNet/>.
|
59
|
-
|
60
|
-
This library was developed as part of the FaerieMUD Project. For more
|
61
|
-
information about the FaerieMUD project see <http://www.FaerieMUD.org/>.
|
62
|
-
|
63
|
-
You may also check out the latest development source for this module (which may or
|
64
|
-
may not be different than the release) using Subversion from the following URL:
|
65
|
-
|
66
|
-
svn://deveiate.org/Ruby-WordNet/trunk
|
67
|
-
|
68
|
-
|
69
|
-
== Authors
|
70
|
-
|
71
|
-
* Michael Granger <ged@FaerieMUD.org>
|
72
|
-
|
73
|
-
|
74
|
-
== Legal
|
75
|
-
|
76
|
-
Ruby-WordNet is Open Source Software which is Copyright � 2001-2008 by The
|
77
|
-
FaerieMUD Consortium.
|
78
|
-
|
79
|
-
It is licensed under the modified BSD license. See the LICENSE file for details.
|
80
|
-
|
81
|
-
WordNet� is a registered trademark of Princeton University.
|
82
|
-
|
83
|
-
Lingua::Wordnet is code licensed under the following terms:
|
84
|
-
|
85
|
-
Lingua::Wordnet
|
86
|
-
Copyright 1999,2000,2001 by Dan Brian.
|
87
|
-
|
88
|
-
This program is free software; you can redistribute it and/or modify
|
89
|
-
it under the same terms as Perl itself.
|
90
|
-
|
91
|
-
|
92
|
-
$Id: README 95 2008-09-05 18:49:25Z deveiant $
|
93
|
-
|
data/Rakefile.local
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
#!rake
|
2
|
-
#
|
3
|
-
# Project-local tasks for Ruby-WordNet
|
4
|
-
# $Id: Rakefile.local 95 2008-09-05 18:49:25Z deveiant $
|
5
|
-
#
|
6
|
-
# Authors:
|
7
|
-
# * Michael Granger <ged@FaerieMUD.org>
|
8
|
-
#
|
9
|
-
|
10
|
-
unless defined?( BASEDIR )
|
11
|
-
fail "This is meant to be loaded from the main Rakefile, not run directly."
|
12
|
-
end
|
13
|
-
|
14
|
-
|
15
|
-
require 'wordnet'
|
16
|
-
require 'rake'
|
17
|
-
|
18
|
-
CONVERT_UTIL = BASEDIR + 'convertdb.rb'
|
19
|
-
DATA_BUILD_DIR = BASEDIR + File.basename( WordNet::Lexicon::DEFAULT_DB_ENV )
|
20
|
-
DATA_DATABASE_FILE = DATA_BUILD_DIR + 'data'
|
21
|
-
|
22
|
-
|
23
|
-
### Tasks
|
24
|
-
|
25
|
-
# Add 'convert' to the default task, and the testing tasks
|
26
|
-
Rake::Task[:default].prerequisites << :convert
|
27
|
-
Rake::Task[:spec].prerequisites << :convert
|
28
|
-
|
29
|
-
### Task: convert
|
30
|
-
desc "Convert WordNet dict files to a database"
|
31
|
-
task :convert => DATA_DATABASE_FILE
|
32
|
-
|
33
|
-
# Conversion utility
|
34
|
-
file CONVERT_UTIL.to_s
|
35
|
-
|
36
|
-
# Build directory for the database files
|
37
|
-
directory DATA_BUILD_DIR.to_s
|
38
|
-
CLOBBER.include( DATA_BUILD_DIR.to_s )
|
39
|
-
|
40
|
-
# BerkeleyDB main database file
|
41
|
-
file DATA_DATABASE_FILE.to_s
|
42
|
-
task DATA_DATABASE_FILE.to_s => CONVERT_UTIL do
|
43
|
-
load CONVERT_UTIL
|
44
|
-
WordNetConverter.new( DATA_BUILD_DIR ).convertdb
|
45
|
-
end
|
46
|
-
|
data/convertdb.rb
DELETED
@@ -1,417 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
#
|
3
|
-
# Conversion script for Ruby-WordNet
|
4
|
-
#
|
5
|
-
# == Synopsis
|
6
|
-
#
|
7
|
-
# ./convertdb.rb [DATADIR]
|
8
|
-
#
|
9
|
-
# == Authors
|
10
|
-
#
|
11
|
-
# This is a port of Dan Brian's convertdb.pl in the Lingua::Wordnet
|
12
|
-
# distribution. It requires the 'strscan' library, which is in the standard
|
13
|
-
# library of Ruby 1.8.
|
14
|
-
#
|
15
|
-
# * Michael Granger <ged@FaerieMUD.org>
|
16
|
-
#
|
17
|
-
# == Copyright
|
18
|
-
#
|
19
|
-
# Copyright (c) 2003-2008 The FaerieMUD Consortium. All rights reserved.
|
20
|
-
#
|
21
|
-
# This module is free software. You may use, modify, and/or redistribute this
|
22
|
-
# software under the terms of the Perl Artistic License. (See
|
23
|
-
# http://language.perl.com/misc/Artistic.html)
|
24
|
-
#
|
25
|
-
# == Version
|
26
|
-
#
|
27
|
-
# $Id: convertdb.rb 94 2008-07-25 02:47:42Z deveiant $
|
28
|
-
#
|
29
|
-
|
30
|
-
begin
|
31
|
-
base = File::dirname( File::expand_path(__FILE__) )
|
32
|
-
$LOAD_PATH.unshift "#{base}/lib" unless $LOAD_PATH.include?( "#{base}/lib" )
|
33
|
-
$LOAD_PATH.unshift base
|
34
|
-
|
35
|
-
unless defined?( UtilityFunctions )
|
36
|
-
require "#{base}/utils.rb"
|
37
|
-
include UtilityFunctions
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
require 'pathname'
|
42
|
-
require 'strscan'
|
43
|
-
require 'wordnet'
|
44
|
-
require 'optparse'
|
45
|
-
require 'fileutils'
|
46
|
-
|
47
|
-
|
48
|
-
# Globals: Index of words => senses, StringScanner for parsing.
|
49
|
-
$senseIndex = {}
|
50
|
-
$scanner = StringScanner::new( "" )
|
51
|
-
|
52
|
-
class WordNetConverter
|
53
|
-
|
54
|
-
# Source WordNet files
|
55
|
-
IndexFiles = %w[ index.noun index.verb index.adj index.adv ]
|
56
|
-
MorphFiles = {
|
57
|
-
'adj.exc' => WordNet::Adjective,
|
58
|
-
'adv.exc' => WordNet::Adverb,
|
59
|
-
'noun.exc' => WordNet::Noun,
|
60
|
-
'verb.exc' => WordNet::Verb,
|
61
|
-
'cousin.exc' => '',
|
62
|
-
}
|
63
|
-
DataFiles = {
|
64
|
-
'data.adj' => WordNet::Adjective,
|
65
|
-
'data.adv' => WordNet::Adverb,
|
66
|
-
'data.noun' => WordNet::Noun,
|
67
|
-
'data.verb' => WordNet::Verb,
|
68
|
-
}
|
69
|
-
|
70
|
-
# Struct which represents a list of files, a database, and a processor function
|
71
|
-
# for moving records from each of the files into the database.
|
72
|
-
Fileset = Struct::new( "WordNetFileset", :files, :name, :db, :processor )
|
73
|
-
|
74
|
-
# How many records to insert between commits
|
75
|
-
CommitThreshold = 2000
|
76
|
-
|
77
|
-
# Temporary location for the lexicon data files
|
78
|
-
BuildDir = Pathname.new( __FILE__ ).expand_path.dirname +
|
79
|
-
Pathname.new( WordNet::Lexicon::DEFAULT_DB_ENV ).basename
|
80
|
-
|
81
|
-
|
82
|
-
### Create a new converter that will dump WordNet dictionary files into a BerkeleyDB
|
83
|
-
### in the given +builddir+
|
84
|
-
def initialize( builddir=BuildDir )
|
85
|
-
@builddir = Pathname.new( builddir )
|
86
|
-
end
|
87
|
-
|
88
|
-
|
89
|
-
### Convert the various dict files from the WordNet project into a BerkeleyDB database
|
90
|
-
def convertdb( errorLimit=0 )
|
91
|
-
$stderr.sync = $stdout.sync = true
|
92
|
-
header "WordNet Lexicon Converter"
|
93
|
-
|
94
|
-
# Make sure the user knows what they're in for
|
95
|
-
message "This program will convert WordNet data files into databases\n"\
|
96
|
-
"used by Ruby-WordNet. This will not affect existing WordNet files,\n"\
|
97
|
-
"but will require up to 40Mb of disk space.\n"
|
98
|
-
exit unless /^y/i =~ prompt_with_default("Continue?", "y")
|
99
|
-
|
100
|
-
# Open the database and check to be sure it's empty. Confirm overwrite if
|
101
|
-
# not. Checkpoint and set up logging proc if debugging.
|
102
|
-
if @builddir.exist? && ( @builddir + 'data' ).exist?
|
103
|
-
message ">>> Warning: Existing data in the Ruby-WordNet databases\n"\
|
104
|
-
"will be overwritten.\n"
|
105
|
-
abort( "user cancelled." ) unless
|
106
|
-
/^y/i =~ prompt_with_default( "Continue?", "n" )
|
107
|
-
@builddir.rmtree
|
108
|
-
end
|
109
|
-
|
110
|
-
# Find the source data files
|
111
|
-
default = nil
|
112
|
-
wndirs = Pathname.glob( Pathname.getwd + 'WordNet-*' )
|
113
|
-
localdict = Pathname.getwd + 'dict'
|
114
|
-
if !wndirs.empty?
|
115
|
-
default = wndirs.first + 'dict'
|
116
|
-
elsif localdict.exist?
|
117
|
-
default = localdict
|
118
|
-
else
|
119
|
-
default = '/usr/local/WordNet-3.0/dict'
|
120
|
-
end
|
121
|
-
|
122
|
-
message "Where can I find the WordNet data files?\n"
|
123
|
-
datadir = prompt_with_default( "Data directory", default )
|
124
|
-
datadir = Pathname.new( datadir )
|
125
|
-
|
126
|
-
abort( "Directory '#{datadir}' does not exist" ) unless datadir.exist?
|
127
|
-
abort( "'#{datadir}' is not a directory" ) unless datadir.directory?
|
128
|
-
testfile = datadir + "data.noun"
|
129
|
-
abort( "'#{datadir}' doesn't seem to contain the necessary files.") unless testfile.exist?
|
130
|
-
|
131
|
-
# Open the lexicon readwrite into the temporary datadir
|
132
|
-
@builddir.mkpath
|
133
|
-
lexicon = WordNet::Lexicon::new( @builddir.to_s, 0666 )
|
134
|
-
|
135
|
-
# Process each fileset
|
136
|
-
[ # Fileset, name, database handle, processor
|
137
|
-
Fileset::new( IndexFiles, "index", lexicon.index_db, method(:parse_index_line) ),
|
138
|
-
Fileset::new( MorphFiles, "morph", lexicon.morph_db, method(:parse_morph_line) ),
|
139
|
-
Fileset::new( DataFiles, "data", lexicon.data_db, method(:parse_synset_line) ),
|
140
|
-
].each do |set|
|
141
|
-
message "Converting %s files...\n" % set.name
|
142
|
-
set.db.truncate
|
143
|
-
|
144
|
-
# Process each file in the set with the appropriate processor method and
|
145
|
-
# insert results into the corresponding table.
|
146
|
-
set.files.each do |file,pos|
|
147
|
-
message " #{file}..."
|
148
|
-
|
149
|
-
filepath = File::join( datadir, file )
|
150
|
-
if !File::exists?( filepath )
|
151
|
-
message "missing: skipped\n"
|
152
|
-
next
|
153
|
-
end
|
154
|
-
|
155
|
-
txn, dbh = lexicon.env.txn_begin( 0, set.db )
|
156
|
-
entries = lineNumber = errors = 0
|
157
|
-
File::readlines( filepath ).each do |line|
|
158
|
-
lineNumber += 1
|
159
|
-
next if /^\s/ =~ line
|
160
|
-
|
161
|
-
key, value = set.processor.call( line.chomp, lineNumber, pos )
|
162
|
-
unless key
|
163
|
-
errors += 1
|
164
|
-
if errorLimit.nonzero? && errors >= errorLimit
|
165
|
-
abort( "Too many errors" )
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
|
-
dbh[ key ] = value
|
170
|
-
entries += 1
|
171
|
-
print "%d%s" % [ entries, "\x08" * entries.to_s.length ]
|
172
|
-
|
173
|
-
# Commit and start a new transaction every 1000 records
|
174
|
-
if (entries % CommitThreshold).zero?
|
175
|
-
print "."
|
176
|
-
txn.commit( BDB::TXN_NOSYNC )
|
177
|
-
txn, dbh = lexicon.env.txn_begin( 0, set.db )
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
message "committing..."
|
182
|
-
txn.commit( BDB::TXN_SYNC )
|
183
|
-
message "done (%d entries, %d errors).\n" %
|
184
|
-
[ entries, errors ]
|
185
|
-
end
|
186
|
-
|
187
|
-
lock_stats = lexicon.env.lock_stat
|
188
|
-
message "Lock statistics:\n"
|
189
|
-
puts " Lock objects: #{lock_stats['st_nobjects']}/#{lock_stats['st_maxnobjects']}",
|
190
|
-
" Locks: #{lock_stats['st_nlocks']}/#{lock_stats['st_maxnlocks']}",
|
191
|
-
" Lockers: #{lock_stats['st_nlockers']}/#{lock_stats['st_maxnlockers']}"
|
192
|
-
|
193
|
-
|
194
|
-
message "Checkpointing DB and cleaning logs..."
|
195
|
-
lexicon.checkpoint
|
196
|
-
lexicon.clean_logs
|
197
|
-
puts "done."
|
198
|
-
end
|
199
|
-
|
200
|
-
message "done.\n\n"
|
201
|
-
end
|
202
|
-
|
203
|
-
|
204
|
-
#######
|
205
|
-
private
|
206
|
-
#######
|
207
|
-
|
208
|
-
# Index entry patterns
|
209
|
-
IndexEntry = /^(\S+)\s(\w)\s(\d+)\s(\d+)\s/
|
210
|
-
PointerSymbol = /(\S{1,2})\s/
|
211
|
-
SenseCounts = /(\d+)\s(\d+)\s/
|
212
|
-
SynsetId = /(\d{8})\s*/
|
213
|
-
|
214
|
-
### Parse an entry from one of the index files and return the key and
|
215
|
-
### data. Returns +nil+ if any part of the netry isn't able to be parsed. The
|
216
|
-
### +pos+ argument is not used -- it's just to make the interface between all
|
217
|
-
### three processor methods the same.
|
218
|
-
def parse_index_line( string, lineNumber, pos=nil )
|
219
|
-
$scanner.string = string
|
220
|
-
synsets = []
|
221
|
-
lemma, pos, polycnt = nil, nil, nil
|
222
|
-
|
223
|
-
raise "whole error" unless $scanner.scan( IndexEntry )
|
224
|
-
lemma, pos, polycnt, pcnt = $scanner[1], $scanner[2], $scanner[3], $scanner[4]
|
225
|
-
|
226
|
-
# Discard pointer symbols
|
227
|
-
pcnt.to_i.times do |i|
|
228
|
-
$scanner.skip( PointerSymbol ) or raise "couldn't skip pointer #{i}"
|
229
|
-
end
|
230
|
-
|
231
|
-
# Parse sense and tagsense counts
|
232
|
-
$scanner.scan( SenseCounts ) or raise "couldn't parse sense counts"
|
233
|
-
senseCount, tagSenseCount = $scanner[1], $scanner[2]
|
234
|
-
|
235
|
-
# Find synsets
|
236
|
-
senseCount.to_i.times do |i|
|
237
|
-
$scanner.scan( SynsetId ) or raise "couldn't parse synset #{i}"
|
238
|
-
synset = $scanner[1]
|
239
|
-
synsets.push( synset )
|
240
|
-
$senseIndex[ synset + "%" + pos + "%" + lemma ] = i.to_s
|
241
|
-
end
|
242
|
-
|
243
|
-
# Make the index entry and return it
|
244
|
-
key = lemma + "%" + pos
|
245
|
-
data = synsets.join(WordNet::SUB_DELIM)
|
246
|
-
|
247
|
-
return key, data
|
248
|
-
rescue => err
|
249
|
-
message "Index entry did not parse: %s at '%s...' (line %d)\n\t%s\n" % [
|
250
|
-
err.message,
|
251
|
-
$scanner.rest[0,20],
|
252
|
-
lineNumber,
|
253
|
-
err.backtrace[0]
|
254
|
-
]
|
255
|
-
return nil
|
256
|
-
end
|
257
|
-
|
258
|
-
|
259
|
-
### "Parse" a morph line and return it as a key and value.
|
260
|
-
def parse_morph_line( string, lineNumber, pos )
|
261
|
-
key, value = string.split
|
262
|
-
return "#{key}%#{pos}", value
|
263
|
-
rescue => err
|
264
|
-
message "Morph entry did not parse: %s for %s (pos = %s, line %d)\n\t%s\n" % [
|
265
|
-
err.message,
|
266
|
-
string.inspect,
|
267
|
-
pos.inspect,
|
268
|
-
lineNumber,
|
269
|
-
err.backtrace[0]
|
270
|
-
]
|
271
|
-
return nil
|
272
|
-
end
|
273
|
-
|
274
|
-
|
275
|
-
# Synset data patterns
|
276
|
-
Synset = /(\d+)\s(\d{2})\s(\w)\s(\w{2})\s/
|
277
|
-
SynWord = /(\S+)\s(\w)*\s*/
|
278
|
-
SynPtrCnt = /(\d{3})\s/
|
279
|
-
SynPtr = /(\S{1,2})\s(\d+)\s(\w)\s(\w{4})\s/
|
280
|
-
SynFrameCnt = /\s*(\d{2})\s/
|
281
|
-
SynFrame = /\+\s(\d{2})\s(\w{2})\s/
|
282
|
-
SynGloss = /\s*\|\s*(.+)?/
|
283
|
-
|
284
|
-
### Parse an entry from a data file and return the key and data. Returns +nil+
|
285
|
-
### if any part of the entry isn't able to be parsed.
|
286
|
-
def parse_synset_line( string, lineNumber, pos )
|
287
|
-
$scanner.string = string
|
288
|
-
|
289
|
-
filenum, synsetType, gloss = nil, nil, nil
|
290
|
-
words = []
|
291
|
-
ptrs = []
|
292
|
-
frames = []
|
293
|
-
|
294
|
-
# Parse the first part of the synset
|
295
|
-
$scanner.scan( Synset ) or raise "unable to parse synset"
|
296
|
-
offset, filenum, synsetType, wordCount =
|
297
|
-
$scanner[1], $scanner[2], $scanner[3], $scanner[4]
|
298
|
-
|
299
|
-
# Parse the words
|
300
|
-
wordCount.to_i(16).times do |i|
|
301
|
-
$scanner.scan( SynWord ) or raise "unable to parse word #{i}"
|
302
|
-
word, lexid = $scanner[1], $scanner[2]
|
303
|
-
senseKey = (offset + "%" + pos + "%" + word).downcase
|
304
|
-
if !$senseIndex.key?( senseKey )
|
305
|
-
newKey = senseKey.sub( /\(\w+\)$/, '' )
|
306
|
-
if !$senseIndex.key?( newKey )
|
307
|
-
raise "Sense index does not contain sense '#{senseKey}' "\
|
308
|
-
"(tried #{newKey}, too)."
|
309
|
-
end
|
310
|
-
senseKey = newKey
|
311
|
-
end
|
312
|
-
|
313
|
-
words.push( word + "%" + $senseIndex[senseKey].to_s )
|
314
|
-
end
|
315
|
-
|
316
|
-
# Parse pointers
|
317
|
-
if $scanner.scan( SynPtrCnt )
|
318
|
-
$scanner[1].to_i.times do |i|
|
319
|
-
$scanner.scan( SynPtr ) or raise "unable to parse synptr #{i}"
|
320
|
-
ptrs.push "%s %s%%%s %s" % [
|
321
|
-
$scanner[1],
|
322
|
-
$scanner[2],
|
323
|
-
$scanner[3],
|
324
|
-
$scanner[4],
|
325
|
-
]
|
326
|
-
end
|
327
|
-
else
|
328
|
-
raise "Couldn't parse pointer count"
|
329
|
-
end
|
330
|
-
|
331
|
-
# Parse frames if this synset is a verb
|
332
|
-
if synsetType == WordNet::Verb
|
333
|
-
if $scanner.scan( SynFrameCnt )
|
334
|
-
$scanner[1].to_i.times do |i|
|
335
|
-
$scanner.scan( SynFrame ) or raise "unable to parse frame #{i}"
|
336
|
-
frames.push "#{$scanner[1]} #{$scanner[2]}"
|
337
|
-
end
|
338
|
-
else
|
339
|
-
raise "Couldn't parse frame count"
|
340
|
-
end
|
341
|
-
end
|
342
|
-
|
343
|
-
# Find the gloss
|
344
|
-
if $scanner.scan( SynGloss )
|
345
|
-
gloss = $scanner[1].strip
|
346
|
-
end
|
347
|
-
|
348
|
-
# This should never happen, as the gloss matches pretty much anything to
|
349
|
-
# the end of line.
|
350
|
-
if !$scanner.empty?
|
351
|
-
raise "Trailing miscellaneous found at end of entry"
|
352
|
-
end
|
353
|
-
|
354
|
-
# Build the synset entry and return it
|
355
|
-
synsetType = WordNet::Adjective if synsetType == WordNet::Other
|
356
|
-
key = [ offset, synsetType ].join("%")
|
357
|
-
data = [
|
358
|
-
filenum,
|
359
|
-
words.join( WordNet::SUB_DELIM ),
|
360
|
-
ptrs.join( WordNet::SUB_DELIM ),
|
361
|
-
frames.join( WordNet::SUB_DELIM ),
|
362
|
-
gloss,
|
363
|
-
].join( WordNet::DELIM )
|
364
|
-
|
365
|
-
return key, data
|
366
|
-
rescue => err
|
367
|
-
message "Synset did not parse: %s at '%s...' (pos = %s, line %d)\n\t%s\n" % [
|
368
|
-
err.message,
|
369
|
-
$scanner.rest[0,20],
|
370
|
-
pos.inspect,
|
371
|
-
lineNumber,
|
372
|
-
err.backtrace[0]
|
373
|
-
]
|
374
|
-
return nil
|
375
|
-
end
|
376
|
-
|
377
|
-
end # class WordNetConverter
|
378
|
-
|
379
|
-
|
380
|
-
# Start the program if it's run directly
|
381
|
-
if $0 == __FILE__
|
382
|
-
errorLimit = 0
|
383
|
-
|
384
|
-
ARGV.options {|oparser|
|
385
|
-
oparser.banner = "Usage: #{File::basename($0)} -dv\n"
|
386
|
-
|
387
|
-
# Debugging on/off
|
388
|
-
oparser.on( "--debug", "-d", TrueClass, "Turn debugging on" ) {
|
389
|
-
$DEBUG = true
|
390
|
-
debugMsg "Turned debugging on."
|
391
|
-
}
|
392
|
-
|
393
|
-
# Verbose
|
394
|
-
oparser.on( "--verbose", "-v", TrueClass, "Verbose progress messages" ) {
|
395
|
-
$VERBOSE = true
|
396
|
-
debugMsg "Turned verbose on."
|
397
|
-
}
|
398
|
-
|
399
|
-
# Error-limit
|
400
|
-
oparser.on( "--error-limit=COUNT", "-eCOUNT", Integer,
|
401
|
-
"Error limit -- quit after COUNT errors" ) {|arg|
|
402
|
-
errorLimit = arg.to_i
|
403
|
-
debugMsg "Set error limit to #{errorLimit}"
|
404
|
-
}
|
405
|
-
|
406
|
-
# Handle the 'help' option
|
407
|
-
oparser.on( "--help", "-h", "Display this text." ) {
|
408
|
-
$stderr.puts oparser
|
409
|
-
exit!(0)
|
410
|
-
}
|
411
|
-
|
412
|
-
oparser.parse!
|
413
|
-
}
|
414
|
-
|
415
|
-
WordNetConverter.new.convertdb( errorLimit )
|
416
|
-
end
|
417
|
-
|
data/examples/addLacedBoots.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
#
|
3
|
-
# Add a synset for laced boots
|
4
|
-
#
|
5
|
-
|
6
|
-
$: << "lib"
|
7
|
-
require "WordNet"
|
8
|
-
|
9
|
-
lex = WordNet::Lexicon.new( "ruby-wordnet" )
|
10
|
-
|
11
|
-
boot = lex.lookup_synsets( "boot", "n", 1 )
|
12
|
-
laced_boot = lex.create_synset( "laced boot", "n" )
|
13
|
-
tongue = lex.lookup_synsets( "tongue", "n", 6 )
|
14
|
-
|
15
|
-
laced_boot.add_hypernyms( boot )
|
16
|
-
laced_boot.add_component_meronyms( tongue )
|
17
|
-
|
18
|
-
lex.unlock {
|
19
|
-
laced_boot.write
|
20
|
-
boot.write
|
21
|
-
tongue.write
|
22
|
-
}
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
@@ -1,36 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
#
|
3
|
-
# Find all articles of clothing that have collars (Adapted from the synopsis
|
4
|
-
# of Lingua::Wordnet::Analysis)
|
5
|
-
#
|
6
|
-
|
7
|
-
$LOAD_PATH.unshift "lib"
|
8
|
-
require "wordnet"
|
9
|
-
|
10
|
-
# Create the lexicon
|
11
|
-
lex = WordNet::Lexicon.new
|
12
|
-
|
13
|
-
# Look up the clothing synset as the origin
|
14
|
-
clothing = lex.lookup_synsets( "clothing", WordNet::Noun, 1 )
|
15
|
-
|
16
|
-
part_word = ARGV.shift || "collar"
|
17
|
-
part = lex.lookup_synsets( part_word, WordNet::Noun, 1 ) or
|
18
|
-
abort( "Couldn't find synset for #{part_word}" )
|
19
|
-
|
20
|
-
|
21
|
-
puts "Looking for instances of:",
|
22
|
-
" #{part}",
|
23
|
-
"in the hyponyms of",
|
24
|
-
" #{clothing}",
|
25
|
-
""
|
26
|
-
|
27
|
-
# Now traverse all hyponyms of the clothing synset, and check for "part" among
|
28
|
-
# each one's meronyms, printing any we find
|
29
|
-
clothing.traverse( :hyponyms ) do |syn,depth|
|
30
|
-
if syn.search( :meronyms, part )
|
31
|
-
puts "Has a #{part_word}: #{syn}"
|
32
|
-
else
|
33
|
-
puts "Doesn't have a #{part_word}: #{syn}" if $DEBUG
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
data/rake/dependencies.rb
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Dependency-checking and Installation Rake Tasks
|
3
|
-
# $Id: dependencies.rb 43 2008-09-05 18:19:16Z deveiant $
|
4
|
-
#
|
5
|
-
|
6
|
-
require 'rubygems/dependency_installer'
|
7
|
-
require 'rubygems/source_index'
|
8
|
-
require 'rubygems/requirement'
|
9
|
-
require 'rubygems/doc_manager'
|
10
|
-
|
11
|
-
### Install the specified +gems+ if they aren't already installed.
|
12
|
-
def install_gems( gems )
|
13
|
-
|
14
|
-
defaults = Gem::DependencyInstaller::DEFAULT_OPTIONS.merge({
|
15
|
-
:generate_rdoc => true,
|
16
|
-
:generate_ri => true,
|
17
|
-
:install_dir => Gem.dir,
|
18
|
-
:format_executable => false,
|
19
|
-
:test => false,
|
20
|
-
:version => Gem::Requirement.default,
|
21
|
-
})
|
22
|
-
|
23
|
-
# Check for root
|
24
|
-
if Process.euid != 0
|
25
|
-
$stderr.puts "This probably won't work, as you aren't root, but I'll try anyway"
|
26
|
-
end
|
27
|
-
|
28
|
-
gemindex = Gem::SourceIndex.from_installed_gems
|
29
|
-
|
30
|
-
gems.each do |gemname, reqstring|
|
31
|
-
requirement = Gem::Requirement.new( reqstring )
|
32
|
-
trace "requirement is: %p" % [ requirement ]
|
33
|
-
|
34
|
-
trace "Searching for an installed #{gemname}..."
|
35
|
-
specs = gemindex.find_name( gemname )
|
36
|
-
trace "...found %d specs: %s" %
|
37
|
-
[ specs.length, specs.collect {|s| "%s %s" % [s.name, s.version] }.join(', ') ]
|
38
|
-
|
39
|
-
if spec = specs.find {|spec| requirement.satisfied_by?(spec.version) }
|
40
|
-
log "Version %s of %s is already installed (needs %s); skipping..." %
|
41
|
-
[ spec.version, spec.name, requirement ]
|
42
|
-
next
|
43
|
-
end
|
44
|
-
|
45
|
-
rgv = Gem::Version.new( Gem::RubyGemsVersion )
|
46
|
-
installer = nil
|
47
|
-
|
48
|
-
log "Trying to install #{gemname.inspect} #{requirement}..."
|
49
|
-
if rgv >= Gem::Version.new( '1.1.1' )
|
50
|
-
installer = Gem::DependencyInstaller.new
|
51
|
-
installer.install( gemname, requirement )
|
52
|
-
else
|
53
|
-
installer = Gem::DependencyInstaller.new( gemname )
|
54
|
-
installer.install
|
55
|
-
end
|
56
|
-
|
57
|
-
installer.installed_gems.each do |spec|
|
58
|
-
log "Installed: %s" % [ spec.full_name ]
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
### Task: install runtime dependencies
|
66
|
-
desc "Install runtime dependencies as gems"
|
67
|
-
task :install_dependencies do
|
68
|
-
install_gems( DEPENDENCIES )
|
69
|
-
end
|
70
|
-
|
71
|
-
### Task: install gems for development tasks
|
72
|
-
desc "Install development dependencies as gems"
|
73
|
-
task :install_dev_dependencies do
|
74
|
-
install_gems( DEVELOPMENT_DEPENDENCIES )
|
75
|
-
end
|
76
|
-
|