Linguistics 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Artistic +127 -0
- data/ChangeLog +444 -0
- data/MANIFEST +19 -0
- data/README +178 -0
- data/README.english +245 -0
- data/TODO +17 -0
- data/experiments/randobjlist.rb +34 -0
- data/install.rb +154 -0
- data/lib/linguistics/en/infinitive.rb +1149 -0
- data/lib/linguistics/en/linkparser.rb +142 -0
- data/lib/linguistics/en/wordnet.rb +253 -0
- data/lib/linguistics/en.rb +1694 -0
- data/lib/linguistics/iso639.rb +456 -0
- data/lib/linguistics.rb +368 -0
- data/redist/crosscase.rb +298 -0
- data/test.rb +110 -0
- data/tests/en/conjunction.tests.rb +114 -0
- data/tests/en/inflect.tests.rb +1378 -0
- data/tests/lingtestcase.rb +239 -0
- data/tests/use.tests.rb +99 -0
- data/utils.rb +689 -0
- metadata +58 -0
@@ -0,0 +1,142 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# This file contains the extensions to the Linguistics::EN module which provide
|
4
|
+
# support for the Ruby LinkParser module. LinkParser enables grammatic queries
|
5
|
+
# of English language sentences.
|
6
|
+
#
|
7
|
+
# == Synopsis
|
8
|
+
#
|
9
|
+
# # Test to see whether or not the link parser is loaded.
|
10
|
+
# Linguistics::EN.has_link_parser?
|
11
|
+
# # => true
|
12
|
+
#
|
13
|
+
# # Diagram the first linkage for a test sentence
|
14
|
+
# puts "he is a big dog".sentence.linkages.first.to_s
|
15
|
+
# +---O*---+
|
16
|
+
# | +--Ds--+
|
17
|
+
# +Ss+ | +-A-+
|
18
|
+
# | | | | |
|
19
|
+
# he is a big dog
|
20
|
+
#
|
21
|
+
# # Find the verb in the sentence
|
22
|
+
# "he is a big dog".en.sentence.verb.to_s
|
23
|
+
# # => "is"
|
24
|
+
#
|
25
|
+
# # Combined infinitive + LinkParser: Find the infinitive form of the verb of the
|
26
|
+
# given sentence.
|
27
|
+
# "he is a big dog".en.sentence.verb.infinitive
|
28
|
+
# # => "be"
|
29
|
+
#
|
30
|
+
# # Find the direct object of the sentence
|
31
|
+
# "he is a big dog".en.sentence.object.to_s
|
32
|
+
# # => "dog"
|
33
|
+
#
|
34
|
+
# # Look at the raw LinkParser::Word for the direct object of the sentence.
|
35
|
+
# "he is a big dog".en.sentence.object
|
36
|
+
# # => #<LinkParser::Word:0x403da0a0 @definition=[[{@A-}, Ds-, {@M+}, J-], [{@A-},
|
37
|
+
# Ds-, {@M+}, Os-], [{@A-}, Ds-, {@M+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-, {@M+},
|
38
|
+
# Ss+, R-], [{@A-}, Ds-, {@M+}, SIs-], [{@A-}, Ds-, {R+}, {Bs+}, J-], [{@A-}, Ds-,
|
39
|
+
# {R+}, {Bs+}, Os-], [{@A-}, Ds-, {R+}, {Bs+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-,
|
40
|
+
# {R+}, {Bs+}, Ss+, R-], [{@A-}, Ds-, {R+}, {Bs+}, SIs-]], @right=[], @suffix="",
|
41
|
+
# @left=[#<LinkParser::Connection:0x403da028 @rword=#<LinkParser::Word:0x403da0a0
|
42
|
+
# ...>, @lword=#<LinkParser::Word:0x403da0b4 @definition=[[Ss-, O+, {@MV+}], [Ss-,
|
43
|
+
# B-, {@MV+}], [Ss-, P+], [Ss-, AF-], [RS-, Bs-, O+, {@MV+}], [RS-, Bs-, B-,
|
44
|
+
# {@MV+}], [RS-, Bs-, P+], [RS-, Bs-, AF-], [{Q-}, SIs+, O+, {@MV+}], [{Q-}, SIs+,
|
45
|
+
# B-, {@MV+}], [{Q-}, SIs+, P+], [{Q-}, SIs+, AF-]],
|
46
|
+
# @right=[#<LinkParser::Connection:0x403da028 ...>], @suffix="", @left=[],
|
47
|
+
# @name="is", @position=1>, @subName="*", @name="O", @length=3>], @name="dog",
|
48
|
+
# @position=4>
|
49
|
+
#
|
50
|
+
# # Combine WordNet + LinkParser to find the definition of the direct object of
|
51
|
+
# # the sentence
|
52
|
+
# "he is a big dog".en.sentence.object.gloss
|
53
|
+
# # => "a member of the genus Canis (probably descended from the common wolf) that
|
54
|
+
# has been domesticated by man since prehistoric times; occurs in many breeds;
|
55
|
+
# \"the dog barked all night\""
|
56
|
+
#
|
57
|
+
# == Authors
|
58
|
+
#
|
59
|
+
# * Martin Chase <stillflame@FaerieMUD.org>
|
60
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
61
|
+
#
|
62
|
+
# == Copyright
|
63
|
+
#
|
64
|
+
# Copyright (c) 2003 The FaerieMUD Consortium. All rights reserved.
|
65
|
+
#
|
66
|
+
# This module is free software. You may use, modify, and/or redistribute this
|
67
|
+
# software under the terms of the Perl Artistic License. (See
|
68
|
+
# http://language.perl.com/misc/Artistic.html)
|
69
|
+
#
|
70
|
+
# # == Version
|
71
|
+
#
|
72
|
+
# $Id: linkparser.rb,v 1.4 2003/09/14 11:15:33 deveiant Exp $
|
73
|
+
#
|
74
|
+
|
75
|
+
require 'linguistics/en'
|
76
|
+
|
77
|
+
module Linguistics::EN
|
78
|
+
|
79
|
+
@hasLinkParser = false
|
80
|
+
@lpParser = nil
|
81
|
+
@lpError = nil
|
82
|
+
|
83
|
+
begin
|
84
|
+
require "linkparser"
|
85
|
+
@hasLinkParser = true
|
86
|
+
rescue LoadError => err
|
87
|
+
@lpError = err
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
#################################################################
|
92
|
+
### M O D U L E M E T H O D S
|
93
|
+
#################################################################
|
94
|
+
class << self
|
95
|
+
|
96
|
+
### Returns +true+ if LinkParser was loaded okay
|
97
|
+
def hasLinkParser? ; @hasLinkParser ; end
|
98
|
+
|
99
|
+
### If #hasLinkParser? returns +false+, this can be called to fetch the
|
100
|
+
### exception which was raised when trying to load LinkParser.
|
101
|
+
def lpError ; @lpError ; end
|
102
|
+
|
103
|
+
### The instance of LinkParser used for all Linguistics LinkParser
|
104
|
+
### functions.
|
105
|
+
def linkParser
|
106
|
+
if @lpError
|
107
|
+
raise NotImplementedError,
|
108
|
+
"LinkParser functions are not loaded: %s" %
|
109
|
+
@lpError.message
|
110
|
+
end
|
111
|
+
|
112
|
+
return @lpParser if ! @lpParser.nil?
|
113
|
+
|
114
|
+
LinkParser::Word::extend( Linguistics )
|
115
|
+
Linguistics::installDelegatorProxy( LinkParser::Word, :en )
|
116
|
+
|
117
|
+
dictOpts = Hash.new('')
|
118
|
+
dictOpts['datadir'] = '/usr/lib/ruby/site_ruby/1.8/linkparser/data'
|
119
|
+
dictOpts['dict'] = 'tiny.dict'
|
120
|
+
parseOpts = Hash.new
|
121
|
+
|
122
|
+
@lpParser = LinkParser.new( dictOpts, parseOpts )
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
#################################################################
|
128
|
+
### L I N K P A R S E R I N T E R F A C E
|
129
|
+
#################################################################
|
130
|
+
|
131
|
+
###############
|
132
|
+
module_function
|
133
|
+
###############
|
134
|
+
|
135
|
+
### Return a LinkParser::Sentence, with or without a sentence in it.
|
136
|
+
def linkParse( sent )
|
137
|
+
return Linguistics::EN::linkParser.parse( sent.to_s )
|
138
|
+
end
|
139
|
+
alias_method :sentence, :linkParse
|
140
|
+
module_function :sentence
|
141
|
+
|
142
|
+
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# This file contains functions for finding relations for English words. It
|
4
|
+
# requires the Ruby-WordNet module to be installed; if it is not installed,
|
5
|
+
# calling the functions defined by this file will raise NotImplemented
|
6
|
+
# exceptions if called. Requiring this file adds functions and constants to the
|
7
|
+
# Linguistics::EN module.
|
8
|
+
#
|
9
|
+
# == Synopsis
|
10
|
+
#
|
11
|
+
# # Test to be sure the WordNet module loaded okay.
|
12
|
+
# Linguistics::EN.has_wordnet?
|
13
|
+
# # => true
|
14
|
+
#
|
15
|
+
# # Fetch the default synset for the word "balance"
|
16
|
+
# "balance".synset
|
17
|
+
# # => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium"
|
18
|
+
# (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
|
19
|
+
#
|
20
|
+
# # Fetch the synset for the first verb sense of "balance"
|
21
|
+
# "balance".en.synset( :verb )
|
22
|
+
# # => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise
|
23
|
+
# (verb): "bring into balance or equilibrium; "She has to balance work and her
|
24
|
+
# domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1,
|
25
|
+
# verbGroups: 2, hypernyms: 1, hyponyms: 5)>
|
26
|
+
#
|
27
|
+
# # Fetch the second noun sense
|
28
|
+
# "balance".en.synset( 2, :noun )
|
29
|
+
# # => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends
|
30
|
+
# on pull of gravity" (hypernyms: 1, hyponyms: 5)>
|
31
|
+
#
|
32
|
+
# # Fetch the second noun sense's hypernyms (more-general words, like a superclass)
|
33
|
+
# "balance".en.synset( 2, :noun ).hypernyms
|
34
|
+
# # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
35
|
+
# instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
36
|
+
# hyponyms: 2)>]
|
37
|
+
#
|
38
|
+
# # A simpler way of doing the same thing:
|
39
|
+
# "balance".en.hypernyms( 2, :noun )
|
40
|
+
# # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
41
|
+
# instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
42
|
+
# hyponyms: 2)>]
|
43
|
+
#
|
44
|
+
# # Fetch the first hypernym's hypernyms
|
45
|
+
# "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
|
46
|
+
# # => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system,
|
47
|
+
# measuring device (noun): "instrument that shows the extent or amount or quantity
|
48
|
+
# or degree of something" (hypernyms: 1, hyponyms: 83)>]
|
49
|
+
#
|
50
|
+
# # Find the synset to which both the second noun sense of "balance" and the
|
51
|
+
# # default sense of "shovel" belong.
|
52
|
+
# ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
|
53
|
+
# # => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an
|
54
|
+
# artifact (or system of artifacts) that is instrumental in accomplishing some
|
55
|
+
# end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
|
56
|
+
#
|
57
|
+
# # Fetch just the words for the other kinds of "instruments"
|
58
|
+
# "instrument".en.hyponyms.collect {|synset| synset.words}.flatten
|
59
|
+
# # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument",
|
60
|
+
# "extractor", "instrument of execution", "instrument of punishment", "measuring
|
61
|
+
# instrument", "measuring system", "measuring device", "medical instrument",
|
62
|
+
# "navigational instrument", "optical instrument", "plotter", "scientific
|
63
|
+
# instrument", "sonograph", "surveying instrument", "surveyor's instrument",
|
64
|
+
# "tracer", "weapon", "arm", "weapon system", "whip"]
|
65
|
+
#
|
66
|
+
#
|
67
|
+
# == Authors
|
68
|
+
#
|
69
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
70
|
+
#
|
71
|
+
# == Copyright
|
72
|
+
#
|
73
|
+
# Copyright (c) 2003 The FaerieMUD Consortium. All rights reserved.
|
74
|
+
#
|
75
|
+
# This module is free software. You may use, modify, and/or redistribute this
|
76
|
+
# software under the terms of the Perl Artistic License. (See
|
77
|
+
# http://language.perl.com/misc/Artistic.html)
|
78
|
+
#
|
79
|
+
# == Version
|
80
|
+
#
|
81
|
+
# $Id: wordnet.rb,v 1.3 2003/09/14 11:28:02 deveiant Exp $
|
82
|
+
#
|
83
|
+
|
84
|
+
module Linguistics
|
85
|
+
module EN
|
86
|
+
|
87
|
+
@hasWordnet = false
|
88
|
+
@wnError = nil
|
89
|
+
@wnLexicon = nil
|
90
|
+
|
91
|
+
# Load WordNet and open the lexicon if possible, saving the error that
|
92
|
+
# occurs if anything goes wrong.
|
93
|
+
begin
|
94
|
+
require 'wordnet'
|
95
|
+
@hasWordnet = true
|
96
|
+
rescue LoadError => err
|
97
|
+
@wnError = err
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
#################################################################
|
102
|
+
### M O D U L E M E T H O D S
|
103
|
+
#################################################################
|
104
|
+
class << self
|
105
|
+
|
106
|
+
### Returns +true+ if WordNet was loaded okay
|
107
|
+
def hasWordnet? ; @hasWordnet; end
|
108
|
+
|
109
|
+
### If #haveWordnet? returns +false+, this can be called to fetch the
|
110
|
+
### exception which was raised when WordNet was loaded.
|
111
|
+
def wnError ; @wnError; end
|
112
|
+
|
113
|
+
### The instance of the WordNet::Lexicon used for all Linguistics WordNet
|
114
|
+
### functions.
|
115
|
+
def wnLexicon
|
116
|
+
if @wnError
|
117
|
+
raise NotImplementedError,
|
118
|
+
"WordNet functions are not loaded: %s" %
|
119
|
+
@wnError.message
|
120
|
+
end
|
121
|
+
|
122
|
+
@wnLexicon ||= WordNet::Lexicon::new
|
123
|
+
end
|
124
|
+
|
125
|
+
### Make a function that calls the method +meth+ on the synset of an input
|
126
|
+
### word.
|
127
|
+
def def_synset_function( meth )
|
128
|
+
(class << self; self; end).instance_eval do
|
129
|
+
define_method( meth ) {|*args|
|
130
|
+
word, pos, sense = *args
|
131
|
+
raise ArgumentError,
|
132
|
+
"wrong number of arguments (0 for 1)" unless word
|
133
|
+
sense ||= 1
|
134
|
+
|
135
|
+
syn = synset( word.to_s, pos, sense )
|
136
|
+
return syn.nil? ? nil : syn.send( meth )
|
137
|
+
}
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
#################################################################
|
145
|
+
### W O R D N E T I N T E R F A C E
|
146
|
+
#################################################################
|
147
|
+
|
148
|
+
###############
|
149
|
+
module_function
|
150
|
+
###############
|
151
|
+
|
152
|
+
### Look up the synset associated with the given word or collocation in the
|
153
|
+
### WordNet lexicon and return a WordNet::Synset object.
|
154
|
+
def synset( word, pos=nil, sense=1 )
|
155
|
+
lex = Linguistics::EN::wnLexicon
|
156
|
+
if pos.is_a?( Fixnum)
|
157
|
+
sense = pos
|
158
|
+
pos = nil
|
159
|
+
end
|
160
|
+
postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
|
161
|
+
syn = nil
|
162
|
+
|
163
|
+
postries.each {|pos|
|
164
|
+
break if syn = lex.lookupSynsets( word.to_s, pos, sense )
|
165
|
+
}
|
166
|
+
|
167
|
+
return syn
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
### Look up all the synsets associated with the given word or collocation in
|
172
|
+
### the WordNet lexicon and return an Array of WordNet::Synset objects. If
|
173
|
+
### +pos+ is +nil+, return synsets for all parts of speech.
|
174
|
+
def synsets( word, pos=nil )
|
175
|
+
lex = Linguistics::EN::wnLexicon
|
176
|
+
postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
|
177
|
+
syns = []
|
178
|
+
|
179
|
+
postries.each {|pos|
|
180
|
+
syns << lex.lookupSynsets( word.to_s, pos )
|
181
|
+
}
|
182
|
+
|
183
|
+
return syns.flatten.compact
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
# Returns definitions and/or example sentences as a String.
|
188
|
+
def_synset_function :gloss
|
189
|
+
|
190
|
+
# Returns definitions and/or example sentences as an Array.
|
191
|
+
def_synset_function :glosses
|
192
|
+
|
193
|
+
# Return nouns or verbs that have the same hypernym as the receiver.
|
194
|
+
def_synset_function :coordinates
|
195
|
+
|
196
|
+
# Returns the Array of synonyms contained in the synset for the receiver.
|
197
|
+
def_synset_function :words
|
198
|
+
def_synset_function :synonyms
|
199
|
+
|
200
|
+
# Returns the name of the lexicographer file that contains the raw data for
|
201
|
+
# the receiver.
|
202
|
+
def_synset_function :lexInfo
|
203
|
+
|
204
|
+
# :TODO: Finish these comments, and figure out how the hell to get the
|
205
|
+
# methods to show up in RDoc.
|
206
|
+
def_synset_function :frames
|
207
|
+
|
208
|
+
def_synset_function :antonyms
|
209
|
+
def_synset_function :hypernyms
|
210
|
+
def_synset_function :entailment
|
211
|
+
def_synset_function :hyponyms
|
212
|
+
def_synset_function :causes
|
213
|
+
def_synset_function :verbgroups
|
214
|
+
def_synset_function :similarTo
|
215
|
+
def_synset_function :participles
|
216
|
+
def_synset_function :pertainyms
|
217
|
+
def_synset_function :attributes
|
218
|
+
def_synset_function :derivedFrom
|
219
|
+
def_synset_function :seeAlso
|
220
|
+
def_synset_function :functions
|
221
|
+
|
222
|
+
def_synset_function :meronyms
|
223
|
+
def_synset_function :memberMeronyms
|
224
|
+
def_synset_function :stuffMeronyms
|
225
|
+
def_synset_function :portionMeronyms
|
226
|
+
def_synset_function :componentMeronyms
|
227
|
+
def_synset_function :featureMeronyms
|
228
|
+
def_synset_function :phaseMeronyms
|
229
|
+
def_synset_function :placeMeronyms
|
230
|
+
|
231
|
+
def_synset_function :holonyms
|
232
|
+
def_synset_function :memberHolonyms
|
233
|
+
def_synset_function :stuffHolonyms
|
234
|
+
def_synset_function :portionHolonyms
|
235
|
+
def_synset_function :componentHolonyms
|
236
|
+
def_synset_function :featureHolonyms
|
237
|
+
def_synset_function :phaseHolonyms
|
238
|
+
def_synset_function :placeHolonyms
|
239
|
+
|
240
|
+
def_synset_function :domains
|
241
|
+
def_synset_function :categoryDomains
|
242
|
+
def_synset_function :regionDomains
|
243
|
+
def_synset_function :usageDomains
|
244
|
+
|
245
|
+
def_synset_function :members
|
246
|
+
def_synset_function :categoryMembers
|
247
|
+
def_synset_function :regionMembers
|
248
|
+
def_synset_function :usageMembers
|
249
|
+
|
250
|
+
|
251
|
+
end # module EN
|
252
|
+
end # module Linguistics
|
253
|
+
|