Linguistics 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Artistic +127 -0
- data/ChangeLog +444 -0
- data/MANIFEST +19 -0
- data/README +178 -0
- data/README.english +245 -0
- data/TODO +17 -0
- data/experiments/randobjlist.rb +34 -0
- data/install.rb +154 -0
- data/lib/linguistics/en/infinitive.rb +1149 -0
- data/lib/linguistics/en/linkparser.rb +142 -0
- data/lib/linguistics/en/wordnet.rb +253 -0
- data/lib/linguistics/en.rb +1694 -0
- data/lib/linguistics/iso639.rb +456 -0
- data/lib/linguistics.rb +368 -0
- data/redist/crosscase.rb +298 -0
- data/test.rb +110 -0
- data/tests/en/conjunction.tests.rb +114 -0
- data/tests/en/inflect.tests.rb +1378 -0
- data/tests/lingtestcase.rb +239 -0
- data/tests/use.tests.rb +99 -0
- data/utils.rb +689 -0
- metadata +58 -0
@@ -0,0 +1,142 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# This file contains the extensions to the Linguistics::EN module which provide
|
4
|
+
# support for the Ruby LinkParser module. LinkParser enables grammatic queries
|
5
|
+
# of English language sentences.
|
6
|
+
#
|
7
|
+
# == Synopsis
|
8
|
+
#
|
9
|
+
# # Test to see whether or not the link parser is loaded.
|
10
|
+
# Linguistics::EN.has_link_parser?
|
11
|
+
# # => true
|
12
|
+
#
|
13
|
+
# # Diagram the first linkage for a test sentence
|
14
|
+
# puts "he is a big dog".sentence.linkages.first.to_s
|
15
|
+
# +---O*---+
|
16
|
+
# | +--Ds--+
|
17
|
+
# +Ss+ | +-A-+
|
18
|
+
# | | | | |
|
19
|
+
# he is a big dog
|
20
|
+
#
|
21
|
+
# # Find the verb in the sentence
|
22
|
+
# "he is a big dog".en.sentence.verb.to_s
|
23
|
+
# # => "is"
|
24
|
+
#
|
25
|
+
# # Combined infinitive + LinkParser: Find the infinitive form of the verb of the
|
26
|
+
# given sentence.
|
27
|
+
# "he is a big dog".en.sentence.verb.infinitive
|
28
|
+
# # => "be"
|
29
|
+
#
|
30
|
+
# # Find the direct object of the sentence
|
31
|
+
# "he is a big dog".en.sentence.object.to_s
|
32
|
+
# # => "dog"
|
33
|
+
#
|
34
|
+
# # Look at the raw LinkParser::Word for the direct object of the sentence.
|
35
|
+
# "he is a big dog".en.sentence.object
|
36
|
+
# # => #<LinkParser::Word:0x403da0a0 @definition=[[{@A-}, Ds-, {@M+}, J-], [{@A-},
|
37
|
+
# Ds-, {@M+}, Os-], [{@A-}, Ds-, {@M+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-, {@M+},
|
38
|
+
# Ss+, R-], [{@A-}, Ds-, {@M+}, SIs-], [{@A-}, Ds-, {R+}, {Bs+}, J-], [{@A-}, Ds-,
|
39
|
+
# {R+}, {Bs+}, Os-], [{@A-}, Ds-, {R+}, {Bs+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-,
|
40
|
+
# {R+}, {Bs+}, Ss+, R-], [{@A-}, Ds-, {R+}, {Bs+}, SIs-]], @right=[], @suffix="",
|
41
|
+
# @left=[#<LinkParser::Connection:0x403da028 @rword=#<LinkParser::Word:0x403da0a0
|
42
|
+
# ...>, @lword=#<LinkParser::Word:0x403da0b4 @definition=[[Ss-, O+, {@MV+}], [Ss-,
|
43
|
+
# B-, {@MV+}], [Ss-, P+], [Ss-, AF-], [RS-, Bs-, O+, {@MV+}], [RS-, Bs-, B-,
|
44
|
+
# {@MV+}], [RS-, Bs-, P+], [RS-, Bs-, AF-], [{Q-}, SIs+, O+, {@MV+}], [{Q-}, SIs+,
|
45
|
+
# B-, {@MV+}], [{Q-}, SIs+, P+], [{Q-}, SIs+, AF-]],
|
46
|
+
# @right=[#<LinkParser::Connection:0x403da028 ...>], @suffix="", @left=[],
|
47
|
+
# @name="is", @position=1>, @subName="*", @name="O", @length=3>], @name="dog",
|
48
|
+
# @position=4>
|
49
|
+
#
|
50
|
+
# # Combine WordNet + LinkParser to find the definition of the direct object of
|
51
|
+
# # the sentence
|
52
|
+
# "he is a big dog".en.sentence.object.gloss
|
53
|
+
# # => "a member of the genus Canis (probably descended from the common wolf) that
|
54
|
+
# has been domesticated by man since prehistoric times; occurs in many breeds;
|
55
|
+
# \"the dog barked all night\""
|
56
|
+
#
|
57
|
+
# == Authors
|
58
|
+
#
|
59
|
+
# * Martin Chase <stillflame@FaerieMUD.org>
|
60
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
61
|
+
#
|
62
|
+
# == Copyright
|
63
|
+
#
|
64
|
+
# Copyright (c) 2003 The FaerieMUD Consortium. All rights reserved.
|
65
|
+
#
|
66
|
+
# This module is free software. You may use, modify, and/or redistribute this
|
67
|
+
# software under the terms of the Perl Artistic License. (See
|
68
|
+
# http://language.perl.com/misc/Artistic.html)
|
69
|
+
#
|
70
|
+
# # == Version
|
71
|
+
#
|
72
|
+
# $Id: linkparser.rb,v 1.4 2003/09/14 11:15:33 deveiant Exp $
|
73
|
+
#
|
74
|
+
|
75
|
+
require 'linguistics/en'
|
76
|
+
|
77
|
+
module Linguistics::EN
|
78
|
+
|
79
|
+
@hasLinkParser = false
|
80
|
+
@lpParser = nil
|
81
|
+
@lpError = nil
|
82
|
+
|
83
|
+
begin
|
84
|
+
require "linkparser"
|
85
|
+
@hasLinkParser = true
|
86
|
+
rescue LoadError => err
|
87
|
+
@lpError = err
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
#################################################################
|
92
|
+
### M O D U L E M E T H O D S
|
93
|
+
#################################################################
|
94
|
+
class << self
|
95
|
+
|
96
|
+
### Returns +true+ if LinkParser was loaded okay
|
97
|
+
def hasLinkParser? ; @hasLinkParser ; end
|
98
|
+
|
99
|
+
### If #hasLinkParser? returns +false+, this can be called to fetch the
|
100
|
+
### exception which was raised when trying to load LinkParser.
|
101
|
+
def lpError ; @lpError ; end
|
102
|
+
|
103
|
+
### The instance of LinkParser used for all Linguistics LinkParser
|
104
|
+
### functions.
|
105
|
+
def linkParser
|
106
|
+
if @lpError
|
107
|
+
raise NotImplementedError,
|
108
|
+
"LinkParser functions are not loaded: %s" %
|
109
|
+
@lpError.message
|
110
|
+
end
|
111
|
+
|
112
|
+
return @lpParser if ! @lpParser.nil?
|
113
|
+
|
114
|
+
LinkParser::Word::extend( Linguistics )
|
115
|
+
Linguistics::installDelegatorProxy( LinkParser::Word, :en )
|
116
|
+
|
117
|
+
dictOpts = Hash.new('')
|
118
|
+
dictOpts['datadir'] = '/usr/lib/ruby/site_ruby/1.8/linkparser/data'
|
119
|
+
dictOpts['dict'] = 'tiny.dict'
|
120
|
+
parseOpts = Hash.new
|
121
|
+
|
122
|
+
@lpParser = LinkParser.new( dictOpts, parseOpts )
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
#################################################################
|
128
|
+
### L I N K P A R S E R I N T E R F A C E
|
129
|
+
#################################################################
|
130
|
+
|
131
|
+
###############
|
132
|
+
module_function
|
133
|
+
###############
|
134
|
+
|
135
|
+
### Return a LinkParser::Sentence, with or without a sentence in it.
|
136
|
+
def linkParse( sent )
|
137
|
+
return Linguistics::EN::linkParser.parse( sent.to_s )
|
138
|
+
end
|
139
|
+
alias_method :sentence, :linkParse
|
140
|
+
module_function :sentence
|
141
|
+
|
142
|
+
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# This file contains functions for finding relations for English words. It
|
4
|
+
# requires the Ruby-WordNet module to be installed; if it is not installed,
|
5
|
+
# calling the functions defined by this file will raise NotImplemented
|
6
|
+
# exceptions if called. Requiring this file adds functions and constants to the
|
7
|
+
# Linguistics::EN module.
|
8
|
+
#
|
9
|
+
# == Synopsis
|
10
|
+
#
|
11
|
+
# # Test to be sure the WordNet module loaded okay.
|
12
|
+
# Linguistics::EN.has_wordnet?
|
13
|
+
# # => true
|
14
|
+
#
|
15
|
+
# # Fetch the default synset for the word "balance"
|
16
|
+
# "balance".synset
|
17
|
+
# # => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium"
|
18
|
+
# (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
|
19
|
+
#
|
20
|
+
# # Fetch the synset for the first verb sense of "balance"
|
21
|
+
# "balance".en.synset( :verb )
|
22
|
+
# # => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise
|
23
|
+
# (verb): "bring into balance or equilibrium; "She has to balance work and her
|
24
|
+
# domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1,
|
25
|
+
# verbGroups: 2, hypernyms: 1, hyponyms: 5)>
|
26
|
+
#
|
27
|
+
# # Fetch the second noun sense
|
28
|
+
# "balance".en.synset( 2, :noun )
|
29
|
+
# # => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends
|
30
|
+
# on pull of gravity" (hypernyms: 1, hyponyms: 5)>
|
31
|
+
#
|
32
|
+
# # Fetch the second noun sense's hypernyms (more-general words, like a superclass)
|
33
|
+
# "balance".en.synset( 2, :noun ).hypernyms
|
34
|
+
# # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
35
|
+
# instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
36
|
+
# hyponyms: 2)>]
|
37
|
+
#
|
38
|
+
# # A simpler way of doing the same thing:
|
39
|
+
# "balance".en.hypernyms( 2, :noun )
|
40
|
+
# # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
41
|
+
# instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
42
|
+
# hyponyms: 2)>]
|
43
|
+
#
|
44
|
+
# # Fetch the first hypernym's hypernyms
|
45
|
+
# "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
|
46
|
+
# # => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system,
|
47
|
+
# measuring device (noun): "instrument that shows the extent or amount or quantity
|
48
|
+
# or degree of something" (hypernyms: 1, hyponyms: 83)>]
|
49
|
+
#
|
50
|
+
# # Find the synset to which both the second noun sense of "balance" and the
|
51
|
+
# # default sense of "shovel" belong.
|
52
|
+
# ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
|
53
|
+
# # => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an
|
54
|
+
# artifact (or system of artifacts) that is instrumental in accomplishing some
|
55
|
+
# end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
|
56
|
+
#
|
57
|
+
# # Fetch just the words for the other kinds of "instruments"
|
58
|
+
# "instrument".en.hyponyms.collect {|synset| synset.words}.flatten
|
59
|
+
# # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument",
|
60
|
+
# "extractor", "instrument of execution", "instrument of punishment", "measuring
|
61
|
+
# instrument", "measuring system", "measuring device", "medical instrument",
|
62
|
+
# "navigational instrument", "optical instrument", "plotter", "scientific
|
63
|
+
# instrument", "sonograph", "surveying instrument", "surveyor's instrument",
|
64
|
+
# "tracer", "weapon", "arm", "weapon system", "whip"]
|
65
|
+
#
|
66
|
+
#
|
67
|
+
# == Authors
|
68
|
+
#
|
69
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
70
|
+
#
|
71
|
+
# == Copyright
|
72
|
+
#
|
73
|
+
# Copyright (c) 2003 The FaerieMUD Consortium. All rights reserved.
|
74
|
+
#
|
75
|
+
# This module is free software. You may use, modify, and/or redistribute this
|
76
|
+
# software under the terms of the Perl Artistic License. (See
|
77
|
+
# http://language.perl.com/misc/Artistic.html)
|
78
|
+
#
|
79
|
+
# == Version
|
80
|
+
#
|
81
|
+
# $Id: wordnet.rb,v 1.3 2003/09/14 11:28:02 deveiant Exp $
|
82
|
+
#
|
83
|
+
|
84
|
+
module Linguistics
|
85
|
+
module EN
|
86
|
+
|
87
|
+
@hasWordnet = false
|
88
|
+
@wnError = nil
|
89
|
+
@wnLexicon = nil
|
90
|
+
|
91
|
+
# Load WordNet and open the lexicon if possible, saving the error that
|
92
|
+
# occurs if anything goes wrong.
|
93
|
+
begin
|
94
|
+
require 'wordnet'
|
95
|
+
@hasWordnet = true
|
96
|
+
rescue LoadError => err
|
97
|
+
@wnError = err
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
#################################################################
|
102
|
+
### M O D U L E M E T H O D S
|
103
|
+
#################################################################
|
104
|
+
class << self
|
105
|
+
|
106
|
+
### Returns +true+ if WordNet was loaded okay
|
107
|
+
def hasWordnet? ; @hasWordnet; end
|
108
|
+
|
109
|
+
### If #haveWordnet? returns +false+, this can be called to fetch the
|
110
|
+
### exception which was raised when WordNet was loaded.
|
111
|
+
def wnError ; @wnError; end
|
112
|
+
|
113
|
+
### The instance of the WordNet::Lexicon used for all Linguistics WordNet
|
114
|
+
### functions.
|
115
|
+
def wnLexicon
|
116
|
+
if @wnError
|
117
|
+
raise NotImplementedError,
|
118
|
+
"WordNet functions are not loaded: %s" %
|
119
|
+
@wnError.message
|
120
|
+
end
|
121
|
+
|
122
|
+
@wnLexicon ||= WordNet::Lexicon::new
|
123
|
+
end
|
124
|
+
|
125
|
+
### Make a function that calls the method +meth+ on the synset of an input
|
126
|
+
### word.
|
127
|
+
def def_synset_function( meth )
|
128
|
+
(class << self; self; end).instance_eval do
|
129
|
+
define_method( meth ) {|*args|
|
130
|
+
word, pos, sense = *args
|
131
|
+
raise ArgumentError,
|
132
|
+
"wrong number of arguments (0 for 1)" unless word
|
133
|
+
sense ||= 1
|
134
|
+
|
135
|
+
syn = synset( word.to_s, pos, sense )
|
136
|
+
return syn.nil? ? nil : syn.send( meth )
|
137
|
+
}
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
#################################################################
|
145
|
+
### W O R D N E T I N T E R F A C E
|
146
|
+
#################################################################
|
147
|
+
|
148
|
+
###############
|
149
|
+
module_function
|
150
|
+
###############
|
151
|
+
|
152
|
+
### Look up the synset associated with the given word or collocation in the
|
153
|
+
### WordNet lexicon and return a WordNet::Synset object.
|
154
|
+
def synset( word, pos=nil, sense=1 )
|
155
|
+
lex = Linguistics::EN::wnLexicon
|
156
|
+
if pos.is_a?( Fixnum)
|
157
|
+
sense = pos
|
158
|
+
pos = nil
|
159
|
+
end
|
160
|
+
postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
|
161
|
+
syn = nil
|
162
|
+
|
163
|
+
postries.each {|pos|
|
164
|
+
break if syn = lex.lookupSynsets( word.to_s, pos, sense )
|
165
|
+
}
|
166
|
+
|
167
|
+
return syn
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
### Look up all the synsets associated with the given word or collocation in
|
172
|
+
### the WordNet lexicon and return an Array of WordNet::Synset objects. If
|
173
|
+
### +pos+ is +nil+, return synsets for all parts of speech.
|
174
|
+
def synsets( word, pos=nil )
|
175
|
+
lex = Linguistics::EN::wnLexicon
|
176
|
+
postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
|
177
|
+
syns = []
|
178
|
+
|
179
|
+
postries.each {|pos|
|
180
|
+
syns << lex.lookupSynsets( word.to_s, pos )
|
181
|
+
}
|
182
|
+
|
183
|
+
return syns.flatten.compact
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
# Returns definitions and/or example sentences as a String.
|
188
|
+
def_synset_function :gloss
|
189
|
+
|
190
|
+
# Returns definitions and/or example sentences as an Array.
|
191
|
+
def_synset_function :glosses
|
192
|
+
|
193
|
+
# Return nouns or verbs that have the same hypernym as the receiver.
|
194
|
+
def_synset_function :coordinates
|
195
|
+
|
196
|
+
# Returns the Array of synonyms contained in the synset for the receiver.
|
197
|
+
def_synset_function :words
|
198
|
+
def_synset_function :synonyms
|
199
|
+
|
200
|
+
# Returns the name of the lexicographer file that contains the raw data for
|
201
|
+
# the receiver.
|
202
|
+
def_synset_function :lexInfo
|
203
|
+
|
204
|
+
# :TODO: Finish these comments, and figure out how the hell to get the
|
205
|
+
# methods to show up in RDoc.
|
206
|
+
def_synset_function :frames
|
207
|
+
|
208
|
+
def_synset_function :antonyms
|
209
|
+
def_synset_function :hypernyms
|
210
|
+
def_synset_function :entailment
|
211
|
+
def_synset_function :hyponyms
|
212
|
+
def_synset_function :causes
|
213
|
+
def_synset_function :verbgroups
|
214
|
+
def_synset_function :similarTo
|
215
|
+
def_synset_function :participles
|
216
|
+
def_synset_function :pertainyms
|
217
|
+
def_synset_function :attributes
|
218
|
+
def_synset_function :derivedFrom
|
219
|
+
def_synset_function :seeAlso
|
220
|
+
def_synset_function :functions
|
221
|
+
|
222
|
+
def_synset_function :meronyms
|
223
|
+
def_synset_function :memberMeronyms
|
224
|
+
def_synset_function :stuffMeronyms
|
225
|
+
def_synset_function :portionMeronyms
|
226
|
+
def_synset_function :componentMeronyms
|
227
|
+
def_synset_function :featureMeronyms
|
228
|
+
def_synset_function :phaseMeronyms
|
229
|
+
def_synset_function :placeMeronyms
|
230
|
+
|
231
|
+
def_synset_function :holonyms
|
232
|
+
def_synset_function :memberHolonyms
|
233
|
+
def_synset_function :stuffHolonyms
|
234
|
+
def_synset_function :portionHolonyms
|
235
|
+
def_synset_function :componentHolonyms
|
236
|
+
def_synset_function :featureHolonyms
|
237
|
+
def_synset_function :phaseHolonyms
|
238
|
+
def_synset_function :placeHolonyms
|
239
|
+
|
240
|
+
def_synset_function :domains
|
241
|
+
def_synset_function :categoryDomains
|
242
|
+
def_synset_function :regionDomains
|
243
|
+
def_synset_function :usageDomains
|
244
|
+
|
245
|
+
def_synset_function :members
|
246
|
+
def_synset_function :categoryMembers
|
247
|
+
def_synset_function :regionMembers
|
248
|
+
def_synset_function :usageMembers
|
249
|
+
|
250
|
+
|
251
|
+
end # module EN
|
252
|
+
end # module Linguistics
|
253
|
+
|