linguistics 1.0.9 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/.gemtest +0 -0
- data/ChangeLog +849 -342
- data/History.rdoc +11 -0
- data/LICENSE +9 -9
- data/Manifest.txt +44 -0
- data/README.rdoc +226 -0
- data/Rakefile +32 -349
- data/examples/endocs.rb +272 -0
- data/examples/generalize_sentence.rb +2 -1
- data/examples/klingon.rb +22 -0
- data/lib/linguistics.rb +130 -292
- data/lib/linguistics/en.rb +337 -1628
- data/lib/linguistics/en/articles.rb +138 -0
- data/lib/linguistics/en/conjugation.rb +2245 -0
- data/lib/linguistics/en/conjunctions.rb +202 -0
- data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
- data/lib/linguistics/en/linkparser.rb +41 -49
- data/lib/linguistics/en/numbers.rb +483 -0
- data/lib/linguistics/en/participles.rb +33 -0
- data/lib/linguistics/en/pluralization.rb +810 -0
- data/lib/linguistics/en/stemmer.rb +75 -0
- data/lib/linguistics/en/titlecase.rb +121 -0
- data/lib/linguistics/en/wordnet.rb +63 -97
- data/lib/linguistics/inflector.rb +89 -0
- data/lib/linguistics/iso639.rb +534 -448
- data/lib/linguistics/languagebehavior.rb +36 -0
- data/lib/linguistics/monkeypatches.rb +42 -0
- data/spec/lib/constants.rb +15 -0
- data/spec/lib/helpers.rb +38 -0
- data/spec/linguistics/en/articles_spec.rb +797 -0
- data/spec/linguistics/en/conjugation_spec.rb +2083 -0
- data/spec/linguistics/en/conjunctions_spec.rb +154 -0
- data/spec/linguistics/en/infinitives_spec.rb +518 -0
- data/spec/linguistics/en/linkparser_spec.rb +66 -0
- data/spec/linguistics/en/numbers_spec.rb +1295 -0
- data/spec/linguistics/en/participles_spec.rb +55 -0
- data/spec/linguistics/en/pluralization_spec.rb +4636 -0
- data/spec/linguistics/en/stemmer_spec.rb +72 -0
- data/spec/linguistics/en/titlecase_spec.rb +841 -0
- data/spec/linguistics/en/wordnet_spec.rb +85 -0
- data/spec/linguistics/en_spec.rb +45 -167
- data/spec/linguistics/inflector_spec.rb +40 -0
- data/spec/linguistics/iso639_spec.rb +49 -53
- data/spec/linguistics/monkeypatches_spec.rb +40 -0
- data/spec/linguistics_spec.rb +46 -76
- metadata +241 -113
- metadata.gz.sig +0 -0
- data/README +0 -166
- data/README.english +0 -245
- data/rake/191_compat.rb +0 -26
- data/rake/dependencies.rb +0 -76
- data/rake/documentation.rb +0 -123
- data/rake/helpers.rb +0 -502
- data/rake/hg.rb +0 -318
- data/rake/manual.rb +0 -787
- data/rake/packaging.rb +0 -129
- data/rake/publishing.rb +0 -341
- data/rake/style.rb +0 -62
- data/rake/svn.rb +0 -668
- data/rake/testing.rb +0 -152
- data/rake/verifytask.rb +0 -64
- data/tests/en/infinitive.tests.rb +0 -207
- data/tests/en/inflect.tests.rb +0 -1389
- data/tests/en/lafcadio.tests.rb +0 -77
- data/tests/en/linkparser.tests.rb +0 -42
- data/tests/en/lprintf.tests.rb +0 -77
- data/tests/en/titlecase.tests.rb +0 -73
- data/tests/en/wordnet.tests.rb +0 -95
data/examples/endocs.rb
ADDED
@@ -0,0 +1,272 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift( 'lib' )
|
4
|
+
$stdout.sync = $stderr.sync = true
|
5
|
+
|
6
|
+
require 'loggability'
|
7
|
+
require 'linguistics'
|
8
|
+
require 'pry'
|
9
|
+
|
10
|
+
lines = File.readlines( __FILE__ ).slice_before( /^__END__/ ).to_a
|
11
|
+
header = lines.shift
|
12
|
+
source = lines.shift
|
13
|
+
source.shift
|
14
|
+
|
15
|
+
header_lines = header.length + 1
|
16
|
+
code = ''
|
17
|
+
result = nil
|
18
|
+
|
19
|
+
Loggability.level = $VERBOSE ? :debug : :warn
|
20
|
+
Loggability.format_with( :color )
|
21
|
+
|
22
|
+
source.each_with_index do |line, i|
|
23
|
+
case line
|
24
|
+
|
25
|
+
# Eval any accumulated code on a blank line
|
26
|
+
when /^\s*$/
|
27
|
+
puts
|
28
|
+
next if code.empty?
|
29
|
+
puts( code )
|
30
|
+
eval( code, binding(), __FILE__, header_lines + i )
|
31
|
+
code = ''
|
32
|
+
|
33
|
+
# Eval the code on a result marker, but also render the result after the marker
|
34
|
+
when /^#\s+=>/
|
35
|
+
puts( code )
|
36
|
+
$stdout.flush
|
37
|
+
result = eval( code, binding(), __FILE__, header_lines + i )
|
38
|
+
print '# => '
|
39
|
+
pp( result )
|
40
|
+
code = ''
|
41
|
+
|
42
|
+
# Output comment lines as-is
|
43
|
+
when /^#/
|
44
|
+
puts( line )
|
45
|
+
|
46
|
+
# Anything else gets appended to the code accumulator
|
47
|
+
else
|
48
|
+
code << line
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
__END__
|
53
|
+
|
54
|
+
# This module is a container for various English-language linguistic
|
55
|
+
# functions for the Linguistics library. It can be either loaded
|
56
|
+
# directly, or by passing some variant of +:en+ or +:eng+ to the
|
57
|
+
# Linguistics.use method.
|
58
|
+
|
59
|
+
require 'linguistics'
|
60
|
+
Linguistics.use( :en ) # extends Array, String, and Numeric
|
61
|
+
|
62
|
+
# == Pluralization
|
63
|
+
|
64
|
+
"box".en.plural
|
65
|
+
# => "boxes"
|
66
|
+
|
67
|
+
"mouse".en.plural
|
68
|
+
# => "mice"
|
69
|
+
|
70
|
+
"ruby".en.plural
|
71
|
+
# => "rubies"
|
72
|
+
|
73
|
+
|
74
|
+
# == Indefinite Articles
|
75
|
+
|
76
|
+
"book".en.a
|
77
|
+
# => "a book"
|
78
|
+
|
79
|
+
"article".en.a
|
80
|
+
# => "an article"
|
81
|
+
|
82
|
+
|
83
|
+
# == Present Participles
|
84
|
+
|
85
|
+
"runs".en.present_participle
|
86
|
+
# => "running"
|
87
|
+
|
88
|
+
"eats".en.present_participle
|
89
|
+
# => "eating"
|
90
|
+
|
91
|
+
"spies".en.present_participle
|
92
|
+
# => "spying"
|
93
|
+
|
94
|
+
|
95
|
+
# == Ordinal Numbers
|
96
|
+
|
97
|
+
5.en.ordinal
|
98
|
+
# => "5th"
|
99
|
+
|
100
|
+
2004.en.ordinal
|
101
|
+
# => "2004th"
|
102
|
+
|
103
|
+
|
104
|
+
# == Numbers to Words
|
105
|
+
|
106
|
+
5.en.numwords
|
107
|
+
# => "five"
|
108
|
+
|
109
|
+
2004.en.numwords
|
110
|
+
# => "two thousand and four"
|
111
|
+
|
112
|
+
2385762345876.en.numwords
|
113
|
+
# => "two trillion, three hundred and eighty-five billion, seven hundred and sixty-two million, three hundred and forty-five thousand, eight hundred and seventy-six"
|
114
|
+
|
115
|
+
|
116
|
+
# == Quantification
|
117
|
+
|
118
|
+
"cow".en.quantify( 5 )
|
119
|
+
# => "several cows"
|
120
|
+
|
121
|
+
"cow".en.quantify( 1005 )
|
122
|
+
# => "thousands of cows"
|
123
|
+
|
124
|
+
"cow".en.quantify( 20_432_123_000_000 )
|
125
|
+
# => "tens of trillions of cows"
|
126
|
+
|
127
|
+
|
128
|
+
# == Conjunctions
|
129
|
+
|
130
|
+
animals = %w{dog cow ox chicken goose goat cow dog rooster llama pig goat dog cat cat dog cow goat goose goose ox alpaca}
|
131
|
+
"The farm has: " + animals.en.conjunction
|
132
|
+
# => The farm has: four dogs, three cows, three geese, three goats, two oxen, two cats, a chicken, a rooster, a llama, a pig, and an alpaca
|
133
|
+
|
134
|
+
# Note that 'goose' and 'ox' are both correctly pluralized, and the correct
|
135
|
+
# indefinite article 'an' has been used for 'alpaca'.
|
136
|
+
#
|
137
|
+
# You can also use the generalization function of the #quantify method to give
|
138
|
+
# general descriptions of object lists instead of literal counts:
|
139
|
+
|
140
|
+
allobjs = []
|
141
|
+
ObjectSpace::each_object {|obj| allobjs << obj.class.name }
|
142
|
+
puts "The current Ruby objectspace contains: " + allobjs.en.conjunction( :generalize => true )
|
143
|
+
# =>
|
144
|
+
|
145
|
+
|
146
|
+
# == Infinitives
|
147
|
+
|
148
|
+
"leaving".en.infinitive
|
149
|
+
# => "leave"
|
150
|
+
|
151
|
+
"left".en.infinitive
|
152
|
+
# => "leave"
|
153
|
+
|
154
|
+
"leaving".en.infinitive.suffix
|
155
|
+
# => "ing"
|
156
|
+
|
157
|
+
|
158
|
+
# == Conjugation
|
159
|
+
|
160
|
+
#Conjugate a verb given an infinitive:
|
161
|
+
|
162
|
+
"run".en.past_tense
|
163
|
+
# => "ran"
|
164
|
+
|
165
|
+
"run".en.past_participle
|
166
|
+
# => "run"
|
167
|
+
|
168
|
+
"run".en.present_tense
|
169
|
+
# => "run"
|
170
|
+
|
171
|
+
"run".en.present_participle
|
172
|
+
# => "running"
|
173
|
+
|
174
|
+
# Conjugate an infinitive with an explicit tense and grammatical person:
|
175
|
+
|
176
|
+
"be".en.conjugate( :present, :third_person_singular )
|
177
|
+
# => "is"
|
178
|
+
|
179
|
+
"be".en.conjugate( :present, :first_person_singular )
|
180
|
+
# => "am"
|
181
|
+
|
182
|
+
"be".en.conjugate( :past, :first_person_singular )
|
183
|
+
# => "was"
|
184
|
+
|
185
|
+
# The functionality is a port of the verb conjugation portion of Morph
|
186
|
+
# Adorner (http://morphadorner.northwestern.edu/).
|
187
|
+
#
|
188
|
+
# It includes a good number of irregular verbs, but it's not going to be
|
189
|
+
# 100% correct everytime.
|
190
|
+
|
191
|
+
|
192
|
+
# == WordNet® Integration
|
193
|
+
|
194
|
+
# If you have the 'wordnet' gem installed, you can look up WordNet synsets using
|
195
|
+
# the Linguistics interface:
|
196
|
+
|
197
|
+
# Test to be sure the WordNet module loaded okay.
|
198
|
+
Linguistics::EN.has_wordnet?
|
199
|
+
# => true
|
200
|
+
|
201
|
+
# Fetch the default synset for the word "balance"
|
202
|
+
"balance".en.synset
|
203
|
+
# => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium" (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
|
204
|
+
|
205
|
+
# Fetch the synset for the first verb sense of "balance"
|
206
|
+
"balance".en.synset( :verb )
|
207
|
+
# => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise (verb): "bring into balance or equilibrium; "She has to balance work and her domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1, verbGroups: 2, hypernyms: 1, hyponyms: 5)>
|
208
|
+
|
209
|
+
# Fetch the second noun sense
|
210
|
+
"balance".en.synset( 2, :noun )
|
211
|
+
# => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends on pull of gravity" (hypernyms: 1, hyponyms: 5)>
|
212
|
+
|
213
|
+
# Fetch the second noun sense's hypernyms (more-general words, like a superclass)
|
214
|
+
"balance".en.synset( 2, :noun ).hypernyms
|
215
|
+
# => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1, hyponyms: 2)>]
|
216
|
+
|
217
|
+
# A simpler way of doing the same thing:
|
218
|
+
"balance".en.hypernyms( 2, :noun )
|
219
|
+
# => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1, hyponyms: 2)>]
|
220
|
+
|
221
|
+
# Fetch the first hypernym's hypernyms
|
222
|
+
"balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
|
223
|
+
# => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system, measuring device (noun): "instrument that shows the extent or amount or quantity or degree of something" (hypernyms: 1, hyponyms: 83)>]
|
224
|
+
|
225
|
+
# Find the synset to which both the second noun sense of "balance" and the
|
226
|
+
# default sense of "shovel" belong.
|
227
|
+
("balance".en.synset( 2, :noun ) | "shovel".en.synset)
|
228
|
+
# => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an artifact (or system of artifacts) that is instrumental in accomplishing some end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
|
229
|
+
|
230
|
+
# Fetch words for the specific kinds of (device-ish) "instruments"
|
231
|
+
"instrument".en.hyponyms( "device" ).collect( &:words ).flatten.join(', ')
|
232
|
+
# => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument", "extractor", "instrument of execution", "instrument of punishment", "measuring instrument", "measuring system", "measuring device", "medical instrument", "navigational instrument", "optical instrument", "plotter", "scientific instrument", "sonograph", "surveying instrument", "surveyor's instrument", "tracer", "weapon", "arm", "weapon system", "whip"]
|
233
|
+
|
234
|
+
# ...or musical instruments
|
235
|
+
"instrument".en.hyponyms( "musical" ).collect( &:words ).flatten.join(', ')
|
236
|
+
# => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument", "extractor", "instrument of execution", "instrument of punishment", "measuring instrument", "measuring system", "measuring device", "medical instrument", "navigational instrument", "optical instrument", "plotter", "scientific instrument", "sonograph", "surveying instrument", "surveyor's instrument", "tracer", "weapon", "arm", "weapon system", "whip"]
|
237
|
+
|
238
|
+
# There are many more WordNet methods supported--too many to list here. See the
|
239
|
+
# documentation for the complete list.
|
240
|
+
|
241
|
+
|
242
|
+
# == LinkParser Integration
|
243
|
+
|
244
|
+
# If you have the 'linkparser' gem installed, you can create linkages
|
245
|
+
# from English sentences that let you query for parts of speech:
|
246
|
+
|
247
|
+
# Test to see whether or not the link parser is loaded.
|
248
|
+
Linguistics::EN.has_linkparser?
|
249
|
+
# => true
|
250
|
+
|
251
|
+
# Diagram the first linkage for a test sentence
|
252
|
+
puts "he is a big dog".en.sentence.linkages.first.diagram
|
253
|
+
|
254
|
+
# Find the verb in the sentence
|
255
|
+
"he is a big dog".en.sentence.verb
|
256
|
+
# => "is"
|
257
|
+
|
258
|
+
# Combined infinitive + LinkParser: Find the infinitive form of the verb of the
|
259
|
+
# given sentence.
|
260
|
+
"he is a big dog".en.sentence.verb.en.infinitive
|
261
|
+
# => "be"
|
262
|
+
|
263
|
+
# Find the direct object of the sentence
|
264
|
+
"he is a big dog".en.sentence.object
|
265
|
+
# => "dog"
|
266
|
+
|
267
|
+
# Combine WordNet + LinkParser to find the definition of the direct object of
|
268
|
+
# the sentence
|
269
|
+
"he is a big dog".en.sentence.object.en.definition
|
270
|
+
# =>
|
271
|
+
|
272
|
+
|
@@ -5,7 +5,8 @@ BEGIN {
|
|
5
5
|
|
6
6
|
basedir = Pathname.new( __FILE__ ).dirname.parent.expand_path
|
7
7
|
libdir = basedir + "lib"
|
8
|
-
$LOAD_PATH.unshift(
|
8
|
+
$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
|
9
|
+
$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
|
9
10
|
}
|
10
11
|
|
11
12
|
require 'linguistics'
|
data/examples/klingon.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
|
3
|
+
require 'linguistics'
|
4
|
+
|
5
|
+
# An example of how you'd start writing a language module that provides
|
6
|
+
# Klingon-language inflecton. It's obviously not really a useful
|
7
|
+
# implementation.
|
8
|
+
|
9
|
+
module Linguistics::TLH
|
10
|
+
|
11
|
+
# Register the module with the framework
|
12
|
+
Linguistics.register_language( :tlh, self )
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
if __FILE__ == $0
|
18
|
+
require 'pp'
|
19
|
+
Linguistics.use( :tlh, :classes => [Object] )
|
20
|
+
pp Object.new.tlh
|
21
|
+
end
|
22
|
+
|
data/lib/linguistics.rb
CHANGED
@@ -1,244 +1,131 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
+
# coding: utf-8
|
2
3
|
|
3
|
-
require '
|
4
|
-
|
5
|
-
#
|
6
|
-
# classes.
|
7
|
-
#
|
8
|
-
# == Synopsis
|
9
|
-
#
|
10
|
-
# require 'linguistics'
|
11
|
-
# Linguistics::use( :en )
|
12
|
-
# MyClass::extend( Linguistics )
|
13
|
-
#
|
14
|
-
# == Authors
|
15
|
-
#
|
16
|
-
# * Michael Granger <ged@FaerieMUD.org>
|
17
|
-
#
|
18
|
-
# :include: LICENSE
|
19
|
-
#
|
20
|
-
#--
|
21
|
-
#
|
22
|
-
# Please see the file LICENSE in the base directory for licensing details.
|
23
|
-
#
|
4
|
+
require 'loggability'
|
5
|
+
|
6
|
+
# An interface for extending core Ruby classes with natural-language methods.
|
24
7
|
module Linguistics
|
8
|
+
extend Loggability
|
9
|
+
|
10
|
+
# Loggability API -- set up a logger for Linguistics objects
|
11
|
+
log_as :linguistics
|
25
12
|
|
26
|
-
### Class constants
|
27
13
|
|
28
14
|
# Release version
|
29
|
-
VERSION = '
|
15
|
+
VERSION = '2.0.0'
|
30
16
|
|
31
|
-
#
|
32
|
-
|
33
|
-
# so that direct requiring of a language module sets the default.
|
34
|
-
DefaultLanguages = []
|
17
|
+
# VCS version
|
18
|
+
REVISION = %q$Revision: a7cda4b8747c $
|
35
19
|
|
36
20
|
# The list of Classes to add linguistic behaviours to.
|
37
|
-
|
21
|
+
DEFAULT_EXT_CLASSES = [ String, Numeric, Array ]
|
38
22
|
|
39
23
|
|
40
|
-
|
41
|
-
|
42
|
-
|
24
|
+
vvec = lambda {|version| version.split('.').collect {|v| v.to_i }.pack('N*') }
|
25
|
+
abort "This version of Linguistics requires Ruby 1.9.2 or greater." unless
|
26
|
+
vvec[RUBY_VERSION] >= vvec['1.9.2']
|
43
27
|
|
44
|
-
### A class which is inherited from by proxies for classes being extended
|
45
|
-
### with one or more linguistic interfaces. It provides on-the-fly creation
|
46
|
-
### of linguistic methods when the <tt>:installProxy</tt> option is passed
|
47
|
-
### to the call to Linguistics#use.
|
48
|
-
class LanguageProxyClass
|
49
|
-
|
50
|
-
### Class instance variable + accessor. Contains the module which knows
|
51
|
-
### the specifics of the language the languageProxy class is providing
|
52
|
-
### methods for.
|
53
|
-
@langmod = nil
|
54
|
-
class << self
|
55
|
-
attr_accessor :langmod
|
56
|
-
end
|
57
28
|
|
29
|
+
require 'linguistics/monkeypatches'
|
30
|
+
require 'linguistics/iso639'
|
31
|
+
require 'linguistics/inflector'
|
58
32
|
|
59
|
-
|
60
|
-
def initialize( receiver )
|
61
|
-
@receiver = receiver
|
62
|
-
end
|
33
|
+
include Linguistics::ISO639
|
63
34
|
|
64
35
|
|
65
|
-
|
66
|
-
|
67
|
-
|
36
|
+
### Language modules and the inflector classes that act as their interfaces
|
37
|
+
@languages = {}
|
38
|
+
@inflector_mixins = {}
|
68
39
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
40
|
+
class << self
|
41
|
+
# The Hash of loaded languages keyed by 3-letter bibliographic ISO639-2 code
|
42
|
+
attr_reader :languages
|
73
43
|
|
44
|
+
# The Hash of anonymous inflector modules that act as the mixin interface to
|
45
|
+
# a language module's inflector, keyed by the language module they belong to
|
46
|
+
attr_reader :inflector_mixins
|
47
|
+
end
|
74
48
|
|
75
|
-
### Autoload linguistic methods defined in the module this object's
|
76
|
-
### class uses for inflection.
|
77
|
-
def method_missing( sym, *args, &block )
|
78
|
-
return super unless self.class.langmod.respond_to?( sym )
|
79
49
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
50
|
+
### Return the library's version string
|
51
|
+
def self::version_string( include_buildnum=false )
|
52
|
+
vstring = "%s %s" % [ self.name, VERSION ]
|
53
|
+
vstring << " (build %s)" % [ REVISION[/: ([[:xdigit:]]+)/, 1] || '0' ] if include_buildnum
|
54
|
+
return vstring
|
55
|
+
end
|
85
56
|
|
86
|
-
self.method( sym ).call( *args, &block )
|
87
|
-
end
|
88
57
|
|
58
|
+
### Register a module as providing linguistic functions for the specified +language+ (a two-
|
59
|
+
### or three-letter ISO639-2 language codes as a Symbol)
|
60
|
+
def self::register_language( language, mod )
|
61
|
+
language_entry = LANGUAGE_CODES[ language.to_sym ] or
|
62
|
+
raise "Unknown ISO639-2 language code '#{language}'"
|
63
|
+
self.log.info "Registering %s for language %p" % [ mod, language_entry ]
|
89
64
|
|
90
|
-
|
91
|
-
|
92
|
-
def inspect
|
93
|
-
"<%s languageProxy for %s object %s>" % [
|
94
|
-
self.class.langmod.language,
|
95
|
-
@receiver.class.name,
|
96
|
-
@receiver.inspect,
|
97
|
-
]
|
65
|
+
language_entry[:codes].each do |lang|
|
66
|
+
self.languages[ lang.to_sym ] = mod
|
98
67
|
end
|
99
68
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
# $stderr.puts "Extending a object's metaclass: %p" % obj if $DEBUG
|
114
|
-
self::install_language_proxy( sclass )
|
69
|
+
# Load in plugins for the language
|
70
|
+
Gem.find_files( "linguistics/#{language}/*.rb" ).each do |extension|
|
71
|
+
next if extension.include?( '/spec/' ) # Skip specs
|
72
|
+
extension.sub!( %r{.*/linguistics/}, 'linguistics/' )
|
73
|
+
self.log.debug " trying to load #{language_entry[:eng_name]} extension %p" % [ extension ]
|
74
|
+
begin
|
75
|
+
require extension
|
76
|
+
rescue LoadError => err
|
77
|
+
self.log.debug " failed (%s): %s %s" %
|
78
|
+
[ err.class.name, err.message, err.backtrace.first ]
|
79
|
+
else
|
80
|
+
self.log.debug " success."
|
81
|
+
end
|
115
82
|
end
|
116
83
|
|
117
|
-
super
|
118
84
|
end
|
119
85
|
|
120
86
|
|
121
|
-
###
|
122
|
-
|
123
|
-
|
124
|
-
mod
|
125
|
-
end
|
87
|
+
### Try to load the module that implements the given language, returning
|
88
|
+
### the Module object if successful.
|
89
|
+
def self::load_language( lang )
|
90
|
+
unless mod = self.languages[ lang.to_sym ]
|
126
91
|
|
92
|
+
self.log.debug "Trying to load language %p" % [ lang ]
|
93
|
+
language = LANGUAGE_CODES[ lang.to_sym ] or
|
94
|
+
raise "Unknown ISO639-2 language code '#{lang}'"
|
95
|
+
self.log.debug " got language code %p" % [ language ]
|
127
96
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
Class::new( LanguageProxyClass ) {
|
133
|
-
@langmod = mod
|
134
|
-
}
|
135
|
-
end
|
97
|
+
# Sort all the codes for the specified language, trying the 2-letter
|
98
|
+
# versions first in alphabetical order, then the 3-letter ones
|
99
|
+
msgs = []
|
100
|
+
mod = nil
|
136
101
|
|
102
|
+
language[:codes].sort.each do |code|
|
103
|
+
next if code == ''
|
137
104
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
# loaded), make a languageProxy class that delegates to it, and
|
149
|
-
# figure out what the languageProxy method will be called.
|
150
|
-
mod = load_language( lang.to_s.downcase )
|
151
|
-
ifaceMeth = mod.name.downcase.sub( /.*:/, '' )
|
152
|
-
languageProxyClass = make_language_proxy( mod )
|
153
|
-
|
154
|
-
# Install a hash for languageProxy classes and an accessor for the
|
155
|
-
# hash if it's not already present.
|
156
|
-
if !klass.class_variables.include?( "@@__languageProxy_class" )
|
157
|
-
klass.module_eval %{
|
158
|
-
@@__languageProxy_class = {}
|
159
|
-
def self::__languageProxy_class; @@__languageProxy_class; end
|
160
|
-
}, __FILE__, __LINE__
|
105
|
+
begin
|
106
|
+
require "linguistics/#{code}"
|
107
|
+
self.log.debug " loaded linguistics/#{code}!"
|
108
|
+
mod = self.languages[ lang.to_sym ]
|
109
|
+
self.log.debug " set mod to %p" % [ mod ]
|
110
|
+
break
|
111
|
+
rescue LoadError => err
|
112
|
+
self.log.error " require of linguistics/#{code} failed: #{err.message}"
|
113
|
+
msgs << "Tried 'linguistics/#{code}': #{err.message}\n"
|
114
|
+
end
|
161
115
|
end
|
162
116
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
# already
|
168
|
-
unless klass.instance_methods(true).include?( ifaceMeth )
|
169
|
-
klass.module_eval %{
|
170
|
-
def #{ifaceMeth}
|
171
|
-
@__#{ifaceMeth}_languageProxy ||=
|
172
|
-
self.class.__languageProxy_class["#{ifaceMeth}"].
|
173
|
-
new( self )
|
174
|
-
end
|
175
|
-
}, __FILE__, __LINE__
|
117
|
+
if mod.is_a?( Array )
|
118
|
+
raise LoadError,
|
119
|
+
"Failed to load language extension %s:\n%s" %
|
120
|
+
[ lang, msgs.join ]
|
176
121
|
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
|
181
122
|
|
182
|
-
### Install a regular proxy method in the given klass that will delegate
|
183
|
-
### calls to missing method to the languageProxy for the given +language+.
|
184
|
-
def self::install_delegator_proxy( klass, langcode )
|
185
|
-
raise ArgumentError, "Missing langcode" if langcode.nil?
|
186
|
-
|
187
|
-
# Alias any currently-extant
|
188
|
-
if klass.instance_methods( false ).include?( "method_missing" )
|
189
|
-
klass.module_eval %{
|
190
|
-
alias_method :__orig_method_missing, :method_missing
|
191
|
-
}
|
192
123
|
end
|
193
124
|
|
194
|
-
|
195
|
-
# for methods supported by the linguistic proxy objects.
|
196
|
-
klass.module_eval %{
|
197
|
-
def method_missing( sym, *args, &block )
|
198
|
-
|
199
|
-
# If the linguistic delegator answers the message, install a
|
200
|
-
# delegator method and call it.
|
201
|
-
if self.send( :#{langcode} ).respond_to?( sym )
|
202
|
-
|
203
|
-
# $stderr.puts "Installing linguistic delegator method \#{sym} " \
|
204
|
-
# "for the '#{langcode}' proxy"
|
205
|
-
self.class.module_eval %{
|
206
|
-
def \#{sym}( *args, &block )
|
207
|
-
self.#{langcode}.\#{sym}( *args, &block )
|
208
|
-
end
|
209
|
-
}
|
210
|
-
self.method( sym ).call( *args, &block )
|
211
|
-
|
212
|
-
# Otherwise either call the overridden proxy method if there is
|
213
|
-
# one, or just let our parent deal with it.
|
214
|
-
else
|
215
|
-
if self.respond_to?( :__orig_method_missing )
|
216
|
-
return self.__orig_method_missing( sym, *args, &block )
|
217
|
-
else
|
218
|
-
super( sym, *args, &block )
|
219
|
-
end
|
220
|
-
end
|
221
|
-
end
|
222
|
-
}
|
223
|
-
end
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
#################################################################
|
228
|
-
### L A N G U A G E - I N D E P E N D E N T F U N C T I O N S
|
229
|
-
#################################################################
|
230
|
-
|
231
|
-
|
232
|
-
### Handle auto-magic usage
|
233
|
-
def self::const_missing( sym )
|
234
|
-
load_language( sym.to_s.downcase )
|
125
|
+
return mod
|
235
126
|
end
|
236
127
|
|
237
128
|
|
238
|
-
###############
|
239
|
-
module_function
|
240
|
-
###############
|
241
|
-
|
242
129
|
### Add linguistics functions for the specified languages to Ruby's core
|
243
130
|
### classes. The interface to all linguistic functions for a given language
|
244
131
|
### is through a method which is the same the language's international 2- or
|
@@ -247,120 +134,71 @@ module Linguistics
|
|
247
134
|
###
|
248
135
|
### [<b>:classes</b>]
|
249
136
|
### Specify the classes which are to be extended. If this is not specified,
|
250
|
-
### the Class objects in Linguistics::
|
137
|
+
### the Class objects in Linguistics::DEFAULT_EXT_CLASSES (an Array) are
|
251
138
|
### extended.
|
252
|
-
### [<b>:
|
253
|
-
###
|
254
|
-
###
|
255
|
-
|
256
|
-
### to be called directly on extended objects directly (e.g.,
|
257
|
-
### 12.en.ordinal becomes 12.ordinal). Obviously, methods which would
|
258
|
-
### collide with the object's builtin methods will need to be invoked
|
259
|
-
### through the languageProxy. Any existing proxy methods in the extended
|
260
|
-
### classes will be preserved.
|
261
|
-
def use( *languages )
|
262
|
-
config = {}
|
139
|
+
### [<b>:monkeypatch</b>]
|
140
|
+
### Monkeypatch directly (albeit responsibly, via a mixin) the specified
|
141
|
+
### +classes+ instead of adding a single language-code method.
|
142
|
+
def self::use( *languages )
|
263
143
|
config = languages.pop if languages.last.is_a?( Hash )
|
144
|
+
config ||= {}
|
264
145
|
|
265
|
-
classes = config
|
266
|
-
classes
|
146
|
+
classes = Array(config[:classes]) if config[:classes]
|
147
|
+
classes ||= DEFAULT_EXT_CLASSES
|
267
148
|
|
268
|
-
|
269
|
-
|
149
|
+
self.log.debug "Extending %d classes with %d language modules." %
|
150
|
+
[ classes.length, languages.length ]
|
270
151
|
|
271
|
-
|
272
|
-
|
152
|
+
# Mix the language module for each requested language into each
|
153
|
+
# specified class
|
154
|
+
classes.each do |klass|
|
155
|
+
self.log.debug " extending %p" % [ klass ]
|
156
|
+
languages.each do |lang|
|
157
|
+
mod = load_language( lang ) or
|
158
|
+
raise LoadError, "failed to load a language extension for %p" % [ lang ]
|
159
|
+
self.log.debug " using %s language module: %p" % [ lang, mod ]
|
273
160
|
|
274
|
-
|
275
|
-
|
276
|
-
case config[:installProxy]
|
277
|
-
when Symbol
|
278
|
-
langcode = config[:installProxy]
|
279
|
-
when String
|
280
|
-
langcode = config[:installProxy].intern
|
281
|
-
when TrueClass
|
282
|
-
langcode = languages[0] || DefaultLanguages[0] || :en
|
161
|
+
if config[:monkeypatch]
|
162
|
+
klass.send( :include, mod )
|
283
163
|
else
|
284
|
-
|
285
|
-
|
286
|
-
|
164
|
+
inflector = make_inflector_mixin( lang, mod )
|
165
|
+
self.log.debug " made an inflector mixin: %p" % [ inflector ]
|
166
|
+
klass.send( :include, inflector )
|
287
167
|
end
|
288
|
-
|
289
|
-
install_delegator_proxy( klass, langcode )
|
290
168
|
end
|
291
|
-
|
292
|
-
end
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
### Support Lingua::EN::Inflect-style globals in a threadsafe way by using
|
297
|
-
### Thread-local variables.
|
298
|
-
|
299
|
-
### Set the default count for all unspecified plurals to +val+. Setting is
|
300
|
-
### local to calling thread.
|
301
|
-
def num=( val )
|
302
|
-
Thread.current[:persistent_count] = val
|
303
|
-
end
|
304
|
-
alias_method :NUM=, :num=
|
169
|
+
end
|
305
170
|
|
306
|
-
|
307
|
-
### calling thread.
|
308
|
-
def num
|
309
|
-
Thread.current[:persistent_count]
|
171
|
+
return classes
|
310
172
|
end
|
311
|
-
alias_method :NUM, :num
|
312
173
|
|
313
|
-
|
314
|
-
### Set the 'classical pluralizations' flag to +val+. Setting is local to
|
315
|
-
### calling thread.
|
316
|
-
def classical=( val )
|
317
|
-
Thread.current[:classical_plurals] = val
|
318
|
-
end
|
319
174
|
|
320
|
-
###
|
321
|
-
###
|
322
|
-
def
|
323
|
-
|
324
|
-
|
175
|
+
### Create a mixin module/class pair that act as the per-object interface to
|
176
|
+
### the given language +mod+'s inflector.
|
177
|
+
def self::make_inflector_mixin( lang, mod )
|
178
|
+
language = LANGUAGE_CODES[ lang.to_sym ] or
|
179
|
+
raise "Unknown ISO639-2 language code '#{lang}'"
|
325
180
|
|
181
|
+
unless mixin = self.inflector_mixins[ mod ]
|
182
|
+
self.log.debug "Making an inflector mixin for %p" % [ mod ]
|
326
183
|
|
327
|
-
|
328
|
-
|
329
|
-
|
184
|
+
bibcode, alpha2code, termcode = *language[:codes]
|
185
|
+
inflector = Class.new( Linguistics::Inflector ) { include(mod) }
|
186
|
+
self.log.debug " created inflector class %p for [%p, %p, %p]" %
|
187
|
+
[ inflector, bibcode, termcode, alpha2code ]
|
330
188
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
raise "Unknown language code '#{lang}'" unless
|
335
|
-
LanguageCodes.key?( lang )
|
336
|
-
|
337
|
-
# Sort all the codes for the specified language, trying the 2-letter
|
338
|
-
# versions first in alphabetical order, then the 3-letter ones
|
339
|
-
msgs = []
|
340
|
-
mod = LanguageCodes[ lang ][:codes].sort {|a,b|
|
341
|
-
(a.length <=> b.length).nonzero? ||
|
342
|
-
(a <=> b)
|
343
|
-
}.each do |code|
|
344
|
-
unless Linguistics::const_defined?( code.upcase )
|
345
|
-
begin
|
346
|
-
require "linguistics/#{code}"
|
347
|
-
rescue LoadError => err
|
348
|
-
msgs << "Tried 'linguistics/#{code}': #{err.message}\n"
|
349
|
-
next
|
189
|
+
mixin = Module.new do
|
190
|
+
define_method( bibcode ) do
|
191
|
+
@__inflector ||= inflector.new( bibcode, self )
|
350
192
|
end
|
193
|
+
alias_method termcode, bibcode unless termcode.nil? || termcode.empty?
|
194
|
+
alias_method alpha2code, bibcode unless alpha2code.nil? || alpha2code.empty?
|
351
195
|
end
|
352
|
-
|
353
|
-
break Linguistics::const_get( code.upcase ) if
|
354
|
-
Linguistics::const_defined?( code.upcase )
|
196
|
+
self.inflector_mixins[ mod ] = mixin
|
355
197
|
end
|
356
198
|
|
357
|
-
|
358
|
-
raise LoadError,
|
359
|
-
"Failed to load language extension %s:\n%s" %
|
360
|
-
[ lang, msgs.join ]
|
361
|
-
end
|
362
|
-
return mod
|
199
|
+
return mixin
|
363
200
|
end
|
364
201
|
|
365
|
-
|
202
|
+
|
203
|
+
end # module Linguistics
|
366
204
|
|