linguistics 1.0.9 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.gemtest +0 -0
  3. data/ChangeLog +849 -342
  4. data/History.rdoc +11 -0
  5. data/LICENSE +9 -9
  6. data/Manifest.txt +44 -0
  7. data/README.rdoc +226 -0
  8. data/Rakefile +32 -349
  9. data/examples/endocs.rb +272 -0
  10. data/examples/generalize_sentence.rb +2 -1
  11. data/examples/klingon.rb +22 -0
  12. data/lib/linguistics.rb +130 -292
  13. data/lib/linguistics/en.rb +337 -1628
  14. data/lib/linguistics/en/articles.rb +138 -0
  15. data/lib/linguistics/en/conjugation.rb +2245 -0
  16. data/lib/linguistics/en/conjunctions.rb +202 -0
  17. data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
  18. data/lib/linguistics/en/linkparser.rb +41 -49
  19. data/lib/linguistics/en/numbers.rb +483 -0
  20. data/lib/linguistics/en/participles.rb +33 -0
  21. data/lib/linguistics/en/pluralization.rb +810 -0
  22. data/lib/linguistics/en/stemmer.rb +75 -0
  23. data/lib/linguistics/en/titlecase.rb +121 -0
  24. data/lib/linguistics/en/wordnet.rb +63 -97
  25. data/lib/linguistics/inflector.rb +89 -0
  26. data/lib/linguistics/iso639.rb +534 -448
  27. data/lib/linguistics/languagebehavior.rb +36 -0
  28. data/lib/linguistics/monkeypatches.rb +42 -0
  29. data/spec/lib/constants.rb +15 -0
  30. data/spec/lib/helpers.rb +38 -0
  31. data/spec/linguistics/en/articles_spec.rb +797 -0
  32. data/spec/linguistics/en/conjugation_spec.rb +2083 -0
  33. data/spec/linguistics/en/conjunctions_spec.rb +154 -0
  34. data/spec/linguistics/en/infinitives_spec.rb +518 -0
  35. data/spec/linguistics/en/linkparser_spec.rb +66 -0
  36. data/spec/linguistics/en/numbers_spec.rb +1295 -0
  37. data/spec/linguistics/en/participles_spec.rb +55 -0
  38. data/spec/linguistics/en/pluralization_spec.rb +4636 -0
  39. data/spec/linguistics/en/stemmer_spec.rb +72 -0
  40. data/spec/linguistics/en/titlecase_spec.rb +841 -0
  41. data/spec/linguistics/en/wordnet_spec.rb +85 -0
  42. data/spec/linguistics/en_spec.rb +45 -167
  43. data/spec/linguistics/inflector_spec.rb +40 -0
  44. data/spec/linguistics/iso639_spec.rb +49 -53
  45. data/spec/linguistics/monkeypatches_spec.rb +40 -0
  46. data/spec/linguistics_spec.rb +46 -76
  47. metadata +241 -113
  48. metadata.gz.sig +0 -0
  49. data/README +0 -166
  50. data/README.english +0 -245
  51. data/rake/191_compat.rb +0 -26
  52. data/rake/dependencies.rb +0 -76
  53. data/rake/documentation.rb +0 -123
  54. data/rake/helpers.rb +0 -502
  55. data/rake/hg.rb +0 -318
  56. data/rake/manual.rb +0 -787
  57. data/rake/packaging.rb +0 -129
  58. data/rake/publishing.rb +0 -341
  59. data/rake/style.rb +0 -62
  60. data/rake/svn.rb +0 -668
  61. data/rake/testing.rb +0 -152
  62. data/rake/verifytask.rb +0 -64
  63. data/tests/en/infinitive.tests.rb +0 -207
  64. data/tests/en/inflect.tests.rb +0 -1389
  65. data/tests/en/lafcadio.tests.rb +0 -77
  66. data/tests/en/linkparser.tests.rb +0 -42
  67. data/tests/en/lprintf.tests.rb +0 -77
  68. data/tests/en/titlecase.tests.rb +0 -73
  69. data/tests/en/wordnet.tests.rb +0 -95
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env spec -cfs
2
+
3
+ BEGIN {
4
+ require 'pathname'
5
+ basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
6
+
7
+ libdir = basedir + "lib"
8
+
9
+ $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10
+ $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11
+ }
12
+
13
+ require 'rspec'
14
+ require 'spec/lib/helpers'
15
+
16
+ require 'linguistics'
17
+ require 'linguistics/en'
18
+ require 'linguistics/en/wordnet'
19
+
20
+
21
+ describe Linguistics::EN::WordNet do
22
+
23
+ before( :all ) do
24
+ setup_logging()
25
+ Linguistics.use( :en )
26
+ end
27
+
28
+ after( :all ) do
29
+ reset_logging()
30
+ end
31
+
32
+
33
+ it "adds EN::WordNet to the list of English language modules" do
34
+ Linguistics::EN::MODULES.include?( Linguistics::EN::WordNet )
35
+ end
36
+
37
+
38
+ describe "on a system that has the 'wordnet' library installed" do
39
+
40
+ before( :each ) do
41
+ pending "installation of the wordnet library" unless
42
+ Linguistics::EN.has_wordnet?
43
+ end
44
+
45
+ it "can create a WordNet::Synset from a word" do
46
+ "jackal".en.synset.should be_a( WordNet::Synset )
47
+ end
48
+
49
+ it "can load all synsets for a word" do
50
+ result = "appear".en.synsets
51
+ result.should have( 7 ).members
52
+ result.should include( WordNet::Synset[200422090] )
53
+ end
54
+
55
+ end
56
+
57
+
58
+ describe "on a system that doesn't have the 'wordnet' library" do
59
+ before( :all ) do
60
+ # If the system *does* have wordnet support, pretend it doesn't.
61
+ if Linguistics::EN.has_wordnet?
62
+ @had_wordnet = true
63
+ error = LoadError.new( "no such file to load -- wordnet" )
64
+ Linguistics::EN::WordNet.instance_variable_set( :@has_wordnet, false )
65
+ Linguistics::EN::WordNet.instance_variable_set( :@wn_error, error )
66
+ end
67
+ end
68
+
69
+ after( :all ) do
70
+ if @had_wordnet
71
+ Linguistics::EN::WordNet.instance_variable_set( :@has_wordnet, true )
72
+ Linguistics::EN::WordNet.instance_variable_set( :@wn_error, nil )
73
+ end
74
+ end
75
+
76
+ it "raises the appropriate LoadError when you try to use wordnet functionality" do
77
+ expect {
78
+ "persimmon".en.synset
79
+ }.to raise_error( LoadError, %r{wordnet}i )
80
+ end
81
+
82
+ end
83
+
84
+ end
85
+
@@ -6,210 +6,88 @@ BEGIN {
6
6
 
7
7
  libdir = basedir + "lib"
8
8
 
9
- $LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
9
+ $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10
+ $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
10
11
  }
11
12
 
12
- begin
13
- require 'spec/runner'
14
- require 'linguistics'
15
- require 'linguistics/en'
16
- rescue LoadError
17
- unless Object.const_defined?( :Gem )
18
- require 'rubygems'
19
- retry
20
- end
21
- raise
22
- end
13
+ require 'rspec'
14
+ require 'spec/lib/helpers'
15
+
16
+ require 'linguistics'
17
+ require 'linguistics/en'
18
+ require 'linguistics/languagebehavior'
23
19
 
24
20
 
25
21
  describe Linguistics::EN do
26
22
 
27
23
  before( :all ) do
28
- Linguistics::use( :en )
24
+ setup_logging( :fatal )
25
+ Linguistics.use( :en, :proxy => true )
29
26
  include Linguistics::EN
30
27
  end
31
28
 
32
-
33
- describe "conjunctions with an Array of a single element" do
34
-
35
- before( :each ) do
36
- @array = ['cat']
37
- end
38
-
39
- it "results in a phrase with indefinite article" do
40
- @array.en.conjunction.should == "a cat"
41
- end
42
-
29
+ after( :all ) do
30
+ reset_logging()
43
31
  end
44
32
 
45
33
 
46
- describe "conjunction with an Array of two different words" do
34
+ it_behaves_like "a Linguistics language module"
47
35
 
48
- before( :each ) do
49
- @array = ['cat', 'dog']
50
- end
51
36
 
52
- it "results in a phrase joined with 'and' with default options" do
53
- @array.en.conjunction.should == "a cat and a dog"
54
- end
55
-
56
- it "results in a phrase joined with 'plus' if 'plus' is set as the conjunctive" do
57
- @array.en.conjunction(:conjunctive => 'plus').should == "a cat plus a dog"
58
- end
59
-
60
- it "results in a phrase joined with a space if an empty string is set as the conjunctive" do
61
- @array.en.conjunction(:conjunctive => '').should == "a cat a dog"
62
- end
63
-
37
+ it "provides a predicate for testing for the presence of modules by name" do
38
+ Linguistics::EN.should_not have_extension( 'nonexistant' )
39
+ Linguistics::EN.should have_extension( 'articles' )
64
40
  end
65
41
 
66
-
67
- describe "conjunction with an Array of two words that differ only in case" do
68
-
69
- before( :each ) do
70
- @array = ['cat', 'Cat']
71
- end
72
-
73
- it "combines them into their downcased equivalents with default options" do
74
- @array.en.conjunction.should == "two cats"
75
- end
76
-
77
- it "lists them separately if :combine is set to false" do
78
- @array.en.conjunction(:combine => false).should == "a cat and a Cat"
79
- end
80
-
81
- it "doesn't combine them if :casefold is turned off" do
82
- @array.en.conjunction(:casefold => false).should == "a cat and a Cat"
83
- end
84
-
85
- it "combines and lists them with a non-specific count if :generalize is set" do
86
- @array.en.conjunction(:generalize => true).should == "several cats"
87
- end
88
-
42
+ it "knows that it's not in 'classical' mode by default" do
43
+ Linguistics::EN.should_not be_classical()
89
44
  end
90
45
 
91
-
92
- describe "conjunction with an Array of many (more than two) words of varying cases" do
93
-
94
- before( :each ) do
95
- @array = %w{cat dog fox dog chicken chicken Fox chicken goose Dog goose}
96
- end
97
-
98
- it "combines them into their downcased equivalents and lists them in order of amount " +
99
- "with default options" do
100
- @array.en.conjunction.should ==
101
- 'three dogs, three chickens, two foxes, two geese, and a cat'
102
- end
103
-
104
- it "lists them separately if :combine is set to false" do
105
- @array.en.conjunction(:combine => false).should ==
106
- 'a cat, a dog, a fox, a dog, a chicken, a chicken, a Fox, a '\
107
- 'chicken, a goose, a Dog, and a goose'
108
- end
109
-
110
- it "doesn't combine the differently-cased ones if :casefold is turned off" do
111
- @array.en.conjunction(:casefold => false).should ==
112
- 'three chickens, two dogs, two geese, a cat, a fox, a Fox, '\
113
- 'and a Dog'
46
+ it "can run a single block in classical mode" do
47
+ Linguistics::EN.in_classical_mode do
48
+ Linguistics::EN.should be_classical()
114
49
  end
115
-
116
- it "combines and lists them with a non-specific count if :generalize is set" do
117
- @array.en.conjunction(:generalize => true).should ==
118
- 'several dogs, several chickens, several foxes, several '\
119
- 'geese, and a cat'
120
- end
121
-
122
50
  end
123
-
124
-
125
- describe "conjunction with an object-transform block" do
126
-
127
- it "doesn't still have #6: #conjunction doesn't invoke supplied block under some conditions"
128
- before( :each ) do
129
- # Create a new class, as we need to guarantee that this will be the
130
- # first #conjunction call to it.
131
- @collection = Class::new {
132
- include Enumerable, Linguistics
133
- def initialize( *ary )
134
- @ary = ary.flatten
135
- end
136
-
137
- # Delegate #each to the contained Array
138
- def each( &block )
139
- @ary.each( &block )
140
- end
141
- }
142
-
143
- @obj = @collection.new( 'foo', 'bar', 'baz', 'tree', 'node', 'sonogram' )
144
- end
145
51
 
146
- it "uses supplied block for object transform on first invocation" do
147
- @obj.en.conjunction {|word| "%s-letter word" % word.length.en.numwords }.should ==
148
- "three three-letter words, two four-letter words, and an eight-letter word"
52
+ it "handles nested classical blocks correctly" do
53
+ Linguistics::EN.in_classical_mode do
54
+ Linguistics::EN.in_classical_mode do
55
+ Linguistics::EN.should be_classical()
56
+ end
57
+ Linguistics::EN.should be_classical()
149
58
  end
59
+ Linguistics::EN.should_not be_classical()
150
60
  end
151
61
 
152
62
 
153
-
154
- def test_conjunction_should_use_supplied_block_for_object_transform
155
- rval = nil
156
-
157
- assert_nothing_raised do
158
- rval = Items.en.conjunction {|word| "%s-word" % word[0,1]}
159
- end
160
-
161
- assert_equal "three c-words and a b-word", rval
63
+ it "provides a sprintf-like function for interpolating variables into a String" do
64
+ "I have %CONJUNCT.".en.lprintf( ["cat", "cat", "dog"] ).
65
+ should == "I have two cats and a dog."
162
66
  end
163
67
 
164
68
 
165
- def test_conjunction_should_use_supplied_block_for_object_transform_through_autoproxy
166
- rval = nil
69
+ context "lprintf formatters" do
167
70
 
168
- assert_nothing_raised do
169
- rval = Items.conjunction {|word| "%s-word" % word[0,1]}
71
+ before( :all ) do
72
+ @real_formatters = Linguistics::EN.lprintf_formatters
170
73
  end
171
74
 
172
- assert_equal "three c-words and a b-word", rval
173
- end
174
-
175
- def test_conjunction_with_penultimate_separator_turned_off_should_not_use_one
176
- rval = nil
177
-
178
- assert_nothing_raised do
179
- rval = Items.en.conjunction( :penultimate => false )
75
+ before( :each ) do
76
+ Linguistics::EN.lprintf_formatters.clear
180
77
  end
181
-
182
- assert_equal "a cow, a chicken, a blancmange and a cyclist", rval
183
- end
184
78
 
185
- def test_three_item_conjunction_should_honor_penultimate_setting
186
- rval = nil
187
-
188
- assert_nothing_raised do
189
- rval = %w{duck cow dog}.en.conjunction( :penultimate => false )
79
+ after( :all ) do
80
+ Linguistics::EN.lprintf_formatters.replace( @real_formatters )
190
81
  end
191
-
192
- assert_equal "a duck, a cow and a dog", rval
193
- end
194
82
 
195
- def test_conjunction_uses_alt_separator_if_phrases_include_the_primary_one
196
- rval = nil
197
- scene_items = [
198
- "desk with stamps, paper, and envelopes on it",
199
- "basket containing milk, eggs, and broccoli",
200
- "chair",
201
- "wooden chest",
202
- "hat rack",
203
- ]
204
-
205
- assert_nothing_raised do
206
- rval = scene_items.conjunction
83
+
84
+ it "provides a way to register new lprintf formatters with a Symbol" do
85
+ Linguistics::EN.register_lprintf_formatter :TEST, :plural
86
+ Linguistics::EN.lprintf_formatters.should have( 1 ).member
87
+ Linguistics::EN.lprintf_formatters.should include( :TEST )
88
+ Linguistics::EN.lprintf_formatters[ :TEST ].should be_a( Proc )
207
89
  end
208
-
209
- assert_equal "a desk with stamps, paper, and envelopes on it; " +
210
- "a basket containing milk, eggs, and broccoli; " +
211
- "a chair; a wooden chest; and a hat rack", rval
212
- end
213
90
 
91
+ end
214
92
  end
215
93
 
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env spec -cfs
2
+
3
+ BEGIN {
4
+ require 'pathname'
5
+ basedir = Pathname.new( __FILE__ ).dirname.parent.parent
6
+
7
+ libdir = basedir + "lib"
8
+
9
+ $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10
+ $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11
+ }
12
+
13
+ require 'rspec'
14
+ require 'spec/lib/helpers'
15
+
16
+ require 'linguistics'
17
+ require 'linguistics/inflector'
18
+
19
+
20
+ describe Linguistics::Inflector do
21
+
22
+ before( :all ) do
23
+ setup_logging( :fatal )
24
+ end
25
+
26
+ after( :all ) do
27
+ reset_logging()
28
+ end
29
+
30
+
31
+ it "provides a human-readable representation of the object suitable for debugging" do
32
+ obj = Object.new
33
+ result = Linguistics::Inflector.new( :en, obj ).inspect
34
+
35
+ result.should include( (obj.object_id / 2).to_s(16) )
36
+ result.should =~ /english-language/i
37
+ end
38
+
39
+ end
40
+
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env spec -cfs
2
+ #encoding: utf-8
2
3
 
3
4
  BEGIN {
4
5
  require 'pathname'
@@ -6,67 +7,62 @@ BEGIN {
6
7
 
7
8
  libdir = basedir + "lib"
8
9
 
9
- $LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
10
+ $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
11
+ $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
10
12
  }
11
13
 
12
- begin
13
- require 'spec/runner'
14
- require 'linguistics/iso639'
15
- rescue LoadError
16
- unless Object.const_defined?( :Gem )
17
- require 'rubygems'
18
- retry
19
- end
20
- raise
21
- end
14
+ require 'rspec'
15
+ require 'spec/lib/helpers'
22
16
 
17
+ require 'linguistics'
18
+ require 'linguistics/iso639'
23
19
 
24
- describe Linguistics, " language codes" do
25
20
 
21
+ describe Linguistics::ISO639 do
22
+
23
+ # eng||en|English|anglais
26
24
  it "loads simple language codes from its __DATA__ section" do
27
- Linguistics::LanguageCodes.should have_key( "en" )
28
- Linguistics::LanguageCodes[ "en" ].should have(2).members
29
-
30
- Linguistics::LanguageCodes[ "en" ].should have_key( :codes )
31
- Linguistics::LanguageCodes[ "en" ][:codes].should have(2).members
32
- Linguistics::LanguageCodes[ "en" ][:codes].should include("en")
33
- Linguistics::LanguageCodes[ "en" ][:codes].should include("eng")
34
-
35
- Linguistics::LanguageCodes[ "en" ].should have_key( :desc )
36
- Linguistics::LanguageCodes[ "en" ][:desc].should == 'English'
25
+ Linguistics::LANGUAGE_CODES.should have_key( :en )
26
+ Linguistics::LANGUAGE_CODES[ :en ].should have(3).members
27
+
28
+ Linguistics::LANGUAGE_CODES[ :en ].should have_key( :codes )
29
+ Linguistics::LANGUAGE_CODES[ :en ][:codes].should have(2).members
30
+ Linguistics::LANGUAGE_CODES[ :en ][:codes].should include("en", "eng")
31
+
32
+ Linguistics::LANGUAGE_CODES[ :en ].should have_key( :eng_name )
33
+ Linguistics::LANGUAGE_CODES[ :en ][:eng_name].should == 'English'
34
+ Linguistics::LANGUAGE_CODES[ :en ].should have_key( :fre_name )
35
+ Linguistics::LANGUAGE_CODES[ :en ][:fre_name].should == 'anglais'
37
36
  end
38
-
37
+
39
38
  it "loads language codes with variants from its __DATA__ section" do
40
39
 
41
- # ces/cze cs Czech
42
- Linguistics::LanguageCodes.should have_key( "cs" )
43
- Linguistics::LanguageCodes[ "cs" ].should have(2).members
44
-
45
- Linguistics::LanguageCodes[ "cs" ].should have_key( :codes )
46
- Linguistics::LanguageCodes[ "cs" ][:codes].should have(3).members
47
- Linguistics::LanguageCodes[ "cs" ][:codes].should include("cs")
48
- Linguistics::LanguageCodes[ "cs" ][:codes].should include("ces")
49
- Linguistics::LanguageCodes[ "cs" ][:codes].should include("cze")
50
-
51
- Linguistics::LanguageCodes[ "cs" ].should have_key( :desc )
52
- Linguistics::LanguageCodes[ "cs" ][:desc].should == 'Czech'
53
-
54
- # jav/jaw jv/jw Javanese
55
- Linguistics::LanguageCodes.should have_key( "jv" )
56
- Linguistics::LanguageCodes.should have_key( "jw" )
57
- Linguistics::LanguageCodes[ "jv" ].should == Linguistics::LanguageCodes[ "jw" ]
58
- Linguistics::LanguageCodes[ "jv" ].should have(2).members
59
-
60
- Linguistics::LanguageCodes[ "jv" ].should have_key( :codes )
61
- Linguistics::LanguageCodes[ "jv" ][:codes].should have(4).members
62
- Linguistics::LanguageCodes[ "jv" ][:codes].should include("jv")
63
- Linguistics::LanguageCodes[ "jv" ][:codes].should include("jw")
64
- Linguistics::LanguageCodes[ "jv" ][:codes].should include("jav")
65
- Linguistics::LanguageCodes[ "jv" ][:codes].should include("jaw")
66
-
67
- Linguistics::LanguageCodes[ "jv" ].should have_key( :desc )
68
- Linguistics::LanguageCodes[ "jv" ][:desc].should == 'Javanese'
69
-
40
+ # cze|ces|cs|Czech|tchèque
41
+ Linguistics::LANGUAGE_CODES.should have_key( :cs )
42
+ Linguistics::LANGUAGE_CODES[ :cs ].should have(3).members
43
+
44
+ Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :codes )
45
+ Linguistics::LANGUAGE_CODES[ :cs ][:codes].should have(3).members
46
+ Linguistics::LANGUAGE_CODES[ :cs ][:codes].should include("cs", "ces", "cze")
47
+
48
+ Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :eng_name )
49
+ Linguistics::LANGUAGE_CODES[ :cs ][:eng_name].should == 'Czech'
50
+ Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :fre_name )
51
+ Linguistics::LANGUAGE_CODES[ :cs ][:fre_name].should == 'tchèque'
52
+
53
+ # mac|mkd|mk|Macedonian|macédonien
54
+ Linguistics::LANGUAGE_CODES.should have_key( :mk )
55
+ Linguistics::LANGUAGE_CODES[ :mk ].should have( 3 ).members
56
+
57
+ Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :codes )
58
+ Linguistics::LANGUAGE_CODES[ :mk ][:codes].should have(3).members
59
+ Linguistics::LANGUAGE_CODES[ :mk ][:codes].should include("mk", "mac", "mkd")
60
+
61
+ Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :eng_name )
62
+ Linguistics::LANGUAGE_CODES[ :mk ][:eng_name].should == 'Macedonian'
63
+ Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :fre_name )
64
+ Linguistics::LANGUAGE_CODES[ :mk ][:fre_name].should == 'macédonien'
65
+
70
66
  end
71
-
67
+
72
68
  end