linguistics 1.0.9 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.gemtest +0 -0
  3. data/ChangeLog +849 -342
  4. data/History.rdoc +11 -0
  5. data/LICENSE +9 -9
  6. data/Manifest.txt +44 -0
  7. data/README.rdoc +226 -0
  8. data/Rakefile +32 -349
  9. data/examples/endocs.rb +272 -0
  10. data/examples/generalize_sentence.rb +2 -1
  11. data/examples/klingon.rb +22 -0
  12. data/lib/linguistics.rb +130 -292
  13. data/lib/linguistics/en.rb +337 -1628
  14. data/lib/linguistics/en/articles.rb +138 -0
  15. data/lib/linguistics/en/conjugation.rb +2245 -0
  16. data/lib/linguistics/en/conjunctions.rb +202 -0
  17. data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
  18. data/lib/linguistics/en/linkparser.rb +41 -49
  19. data/lib/linguistics/en/numbers.rb +483 -0
  20. data/lib/linguistics/en/participles.rb +33 -0
  21. data/lib/linguistics/en/pluralization.rb +810 -0
  22. data/lib/linguistics/en/stemmer.rb +75 -0
  23. data/lib/linguistics/en/titlecase.rb +121 -0
  24. data/lib/linguistics/en/wordnet.rb +63 -97
  25. data/lib/linguistics/inflector.rb +89 -0
  26. data/lib/linguistics/iso639.rb +534 -448
  27. data/lib/linguistics/languagebehavior.rb +36 -0
  28. data/lib/linguistics/monkeypatches.rb +42 -0
  29. data/spec/lib/constants.rb +15 -0
  30. data/spec/lib/helpers.rb +38 -0
  31. data/spec/linguistics/en/articles_spec.rb +797 -0
  32. data/spec/linguistics/en/conjugation_spec.rb +2083 -0
  33. data/spec/linguistics/en/conjunctions_spec.rb +154 -0
  34. data/spec/linguistics/en/infinitives_spec.rb +518 -0
  35. data/spec/linguistics/en/linkparser_spec.rb +66 -0
  36. data/spec/linguistics/en/numbers_spec.rb +1295 -0
  37. data/spec/linguistics/en/participles_spec.rb +55 -0
  38. data/spec/linguistics/en/pluralization_spec.rb +4636 -0
  39. data/spec/linguistics/en/stemmer_spec.rb +72 -0
  40. data/spec/linguistics/en/titlecase_spec.rb +841 -0
  41. data/spec/linguistics/en/wordnet_spec.rb +85 -0
  42. data/spec/linguistics/en_spec.rb +45 -167
  43. data/spec/linguistics/inflector_spec.rb +40 -0
  44. data/spec/linguistics/iso639_spec.rb +49 -53
  45. data/spec/linguistics/monkeypatches_spec.rb +40 -0
  46. data/spec/linguistics_spec.rb +46 -76
  47. metadata +241 -113
  48. metadata.gz.sig +0 -0
  49. data/README +0 -166
  50. data/README.english +0 -245
  51. data/rake/191_compat.rb +0 -26
  52. data/rake/dependencies.rb +0 -76
  53. data/rake/documentation.rb +0 -123
  54. data/rake/helpers.rb +0 -502
  55. data/rake/hg.rb +0 -318
  56. data/rake/manual.rb +0 -787
  57. data/rake/packaging.rb +0 -129
  58. data/rake/publishing.rb +0 -341
  59. data/rake/style.rb +0 -62
  60. data/rake/svn.rb +0 -668
  61. data/rake/testing.rb +0 -152
  62. data/rake/verifytask.rb +0 -64
  63. data/tests/en/infinitive.tests.rb +0 -207
  64. data/tests/en/inflect.tests.rb +0 -1389
  65. data/tests/en/lafcadio.tests.rb +0 -77
  66. data/tests/en/linkparser.tests.rb +0 -42
  67. data/tests/en/lprintf.tests.rb +0 -77
  68. data/tests/en/titlecase.tests.rb +0 -73
  69. data/tests/en/wordnet.tests.rb +0 -95
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env spec -cfs
2
+
3
+ BEGIN {
4
+ require 'pathname'
5
+ basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
6
+
7
+ libdir = basedir + "lib"
8
+
9
+ $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10
+ $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11
+ }
12
+
13
+ require 'rspec'
14
+ require 'spec/lib/helpers'
15
+
16
+ require 'linguistics'
17
+ require 'linguistics/en'
18
+ require 'linguistics/en/wordnet'
19
+
20
+
21
+ describe Linguistics::EN::WordNet do
22
+
23
+ before( :all ) do
24
+ setup_logging()
25
+ Linguistics.use( :en )
26
+ end
27
+
28
+ after( :all ) do
29
+ reset_logging()
30
+ end
31
+
32
+
33
+ it "adds EN::WordNet to the list of English language modules" do
34
+ Linguistics::EN::MODULES.include?( Linguistics::EN::WordNet )
35
+ end
36
+
37
+
38
+ describe "on a system that has the 'wordnet' library installed" do
39
+
40
+ before( :each ) do
41
+ pending "installation of the wordnet library" unless
42
+ Linguistics::EN.has_wordnet?
43
+ end
44
+
45
+ it "can create a WordNet::Synset from a word" do
46
+ "jackal".en.synset.should be_a( WordNet::Synset )
47
+ end
48
+
49
+ it "can load all synsets for a word" do
50
+ result = "appear".en.synsets
51
+ result.should have( 7 ).members
52
+ result.should include( WordNet::Synset[200422090] )
53
+ end
54
+
55
+ end
56
+
57
+
58
+ describe "on a system that doesn't have the 'wordnet' library" do
59
+ before( :all ) do
60
+ # If the system *does* have wordnet support, pretend it doesn't.
61
+ if Linguistics::EN.has_wordnet?
62
+ @had_wordnet = true
63
+ error = LoadError.new( "no such file to load -- wordnet" )
64
+ Linguistics::EN::WordNet.instance_variable_set( :@has_wordnet, false )
65
+ Linguistics::EN::WordNet.instance_variable_set( :@wn_error, error )
66
+ end
67
+ end
68
+
69
+ after( :all ) do
70
+ if @had_wordnet
71
+ Linguistics::EN::WordNet.instance_variable_set( :@has_wordnet, true )
72
+ Linguistics::EN::WordNet.instance_variable_set( :@wn_error, nil )
73
+ end
74
+ end
75
+
76
+ it "raises the appropriate LoadError when you try to use wordnet functionality" do
77
+ expect {
78
+ "persimmon".en.synset
79
+ }.to raise_error( LoadError, %r{wordnet}i )
80
+ end
81
+
82
+ end
83
+
84
+ end
85
+
@@ -6,210 +6,88 @@ BEGIN {
6
6
 
7
7
  libdir = basedir + "lib"
8
8
 
9
- $LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
9
+ $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10
+ $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
10
11
  }
11
12
 
12
- begin
13
- require 'spec/runner'
14
- require 'linguistics'
15
- require 'linguistics/en'
16
- rescue LoadError
17
- unless Object.const_defined?( :Gem )
18
- require 'rubygems'
19
- retry
20
- end
21
- raise
22
- end
13
+ require 'rspec'
14
+ require 'spec/lib/helpers'
15
+
16
+ require 'linguistics'
17
+ require 'linguistics/en'
18
+ require 'linguistics/languagebehavior'
23
19
 
24
20
 
25
21
  describe Linguistics::EN do
26
22
 
27
23
  before( :all ) do
28
- Linguistics::use( :en )
24
+ setup_logging( :fatal )
25
+ Linguistics.use( :en, :proxy => true )
29
26
  include Linguistics::EN
30
27
  end
31
28
 
32
-
33
- describe "conjunctions with an Array of a single element" do
34
-
35
- before( :each ) do
36
- @array = ['cat']
37
- end
38
-
39
- it "results in a phrase with indefinite article" do
40
- @array.en.conjunction.should == "a cat"
41
- end
42
-
29
+ after( :all ) do
30
+ reset_logging()
43
31
  end
44
32
 
45
33
 
46
- describe "conjunction with an Array of two different words" do
34
+ it_behaves_like "a Linguistics language module"
47
35
 
48
- before( :each ) do
49
- @array = ['cat', 'dog']
50
- end
51
36
 
52
- it "results in a phrase joined with 'and' with default options" do
53
- @array.en.conjunction.should == "a cat and a dog"
54
- end
55
-
56
- it "results in a phrase joined with 'plus' if 'plus' is set as the conjunctive" do
57
- @array.en.conjunction(:conjunctive => 'plus').should == "a cat plus a dog"
58
- end
59
-
60
- it "results in a phrase joined with a space if an empty string is set as the conjunctive" do
61
- @array.en.conjunction(:conjunctive => '').should == "a cat a dog"
62
- end
63
-
37
+ it "provides a predicate for testing for the presence of modules by name" do
38
+ Linguistics::EN.should_not have_extension( 'nonexistant' )
39
+ Linguistics::EN.should have_extension( 'articles' )
64
40
  end
65
41
 
66
-
67
- describe "conjunction with an Array of two words that differ only in case" do
68
-
69
- before( :each ) do
70
- @array = ['cat', 'Cat']
71
- end
72
-
73
- it "combines them into their downcased equivalents with default options" do
74
- @array.en.conjunction.should == "two cats"
75
- end
76
-
77
- it "lists them separately if :combine is set to false" do
78
- @array.en.conjunction(:combine => false).should == "a cat and a Cat"
79
- end
80
-
81
- it "doesn't combine them if :casefold is turned off" do
82
- @array.en.conjunction(:casefold => false).should == "a cat and a Cat"
83
- end
84
-
85
- it "combines and lists them with a non-specific count if :generalize is set" do
86
- @array.en.conjunction(:generalize => true).should == "several cats"
87
- end
88
-
42
+ it "knows that it's not in 'classical' mode by default" do
43
+ Linguistics::EN.should_not be_classical()
89
44
  end
90
45
 
91
-
92
- describe "conjunction with an Array of many (more than two) words of varying cases" do
93
-
94
- before( :each ) do
95
- @array = %w{cat dog fox dog chicken chicken Fox chicken goose Dog goose}
96
- end
97
-
98
- it "combines them into their downcased equivalents and lists them in order of amount " +
99
- "with default options" do
100
- @array.en.conjunction.should ==
101
- 'three dogs, three chickens, two foxes, two geese, and a cat'
102
- end
103
-
104
- it "lists them separately if :combine is set to false" do
105
- @array.en.conjunction(:combine => false).should ==
106
- 'a cat, a dog, a fox, a dog, a chicken, a chicken, a Fox, a '\
107
- 'chicken, a goose, a Dog, and a goose'
108
- end
109
-
110
- it "doesn't combine the differently-cased ones if :casefold is turned off" do
111
- @array.en.conjunction(:casefold => false).should ==
112
- 'three chickens, two dogs, two geese, a cat, a fox, a Fox, '\
113
- 'and a Dog'
46
+ it "can run a single block in classical mode" do
47
+ Linguistics::EN.in_classical_mode do
48
+ Linguistics::EN.should be_classical()
114
49
  end
115
-
116
- it "combines and lists them with a non-specific count if :generalize is set" do
117
- @array.en.conjunction(:generalize => true).should ==
118
- 'several dogs, several chickens, several foxes, several '\
119
- 'geese, and a cat'
120
- end
121
-
122
50
  end
123
-
124
-
125
- describe "conjunction with an object-transform block" do
126
-
127
- it "doesn't still have #6: #conjunction doesn't invoke supplied block under some conditions"
128
- before( :each ) do
129
- # Create a new class, as we need to guarantee that this will be the
130
- # first #conjunction call to it.
131
- @collection = Class::new {
132
- include Enumerable, Linguistics
133
- def initialize( *ary )
134
- @ary = ary.flatten
135
- end
136
-
137
- # Delegate #each to the contained Array
138
- def each( &block )
139
- @ary.each( &block )
140
- end
141
- }
142
-
143
- @obj = @collection.new( 'foo', 'bar', 'baz', 'tree', 'node', 'sonogram' )
144
- end
145
51
 
146
- it "uses supplied block for object transform on first invocation" do
147
- @obj.en.conjunction {|word| "%s-letter word" % word.length.en.numwords }.should ==
148
- "three three-letter words, two four-letter words, and an eight-letter word"
52
+ it "handles nested classical blocks correctly" do
53
+ Linguistics::EN.in_classical_mode do
54
+ Linguistics::EN.in_classical_mode do
55
+ Linguistics::EN.should be_classical()
56
+ end
57
+ Linguistics::EN.should be_classical()
149
58
  end
59
+ Linguistics::EN.should_not be_classical()
150
60
  end
151
61
 
152
62
 
153
-
154
- def test_conjunction_should_use_supplied_block_for_object_transform
155
- rval = nil
156
-
157
- assert_nothing_raised do
158
- rval = Items.en.conjunction {|word| "%s-word" % word[0,1]}
159
- end
160
-
161
- assert_equal "three c-words and a b-word", rval
63
+ it "provides a sprintf-like function for interpolating variables into a String" do
64
+ "I have %CONJUNCT.".en.lprintf( ["cat", "cat", "dog"] ).
65
+ should == "I have two cats and a dog."
162
66
  end
163
67
 
164
68
 
165
- def test_conjunction_should_use_supplied_block_for_object_transform_through_autoproxy
166
- rval = nil
69
+ context "lprintf formatters" do
167
70
 
168
- assert_nothing_raised do
169
- rval = Items.conjunction {|word| "%s-word" % word[0,1]}
71
+ before( :all ) do
72
+ @real_formatters = Linguistics::EN.lprintf_formatters
170
73
  end
171
74
 
172
- assert_equal "three c-words and a b-word", rval
173
- end
174
-
175
- def test_conjunction_with_penultimate_separator_turned_off_should_not_use_one
176
- rval = nil
177
-
178
- assert_nothing_raised do
179
- rval = Items.en.conjunction( :penultimate => false )
75
+ before( :each ) do
76
+ Linguistics::EN.lprintf_formatters.clear
180
77
  end
181
-
182
- assert_equal "a cow, a chicken, a blancmange and a cyclist", rval
183
- end
184
78
 
185
- def test_three_item_conjunction_should_honor_penultimate_setting
186
- rval = nil
187
-
188
- assert_nothing_raised do
189
- rval = %w{duck cow dog}.en.conjunction( :penultimate => false )
79
+ after( :all ) do
80
+ Linguistics::EN.lprintf_formatters.replace( @real_formatters )
190
81
  end
191
-
192
- assert_equal "a duck, a cow and a dog", rval
193
- end
194
82
 
195
- def test_conjunction_uses_alt_separator_if_phrases_include_the_primary_one
196
- rval = nil
197
- scene_items = [
198
- "desk with stamps, paper, and envelopes on it",
199
- "basket containing milk, eggs, and broccoli",
200
- "chair",
201
- "wooden chest",
202
- "hat rack",
203
- ]
204
-
205
- assert_nothing_raised do
206
- rval = scene_items.conjunction
83
+
84
+ it "provides a way to register new lprintf formatters with a Symbol" do
85
+ Linguistics::EN.register_lprintf_formatter :TEST, :plural
86
+ Linguistics::EN.lprintf_formatters.should have( 1 ).member
87
+ Linguistics::EN.lprintf_formatters.should include( :TEST )
88
+ Linguistics::EN.lprintf_formatters[ :TEST ].should be_a( Proc )
207
89
  end
208
-
209
- assert_equal "a desk with stamps, paper, and envelopes on it; " +
210
- "a basket containing milk, eggs, and broccoli; " +
211
- "a chair; a wooden chest; and a hat rack", rval
212
- end
213
90
 
91
+ end
214
92
  end
215
93
 
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env spec -cfs
2
+
3
+ BEGIN {
4
+ require 'pathname'
5
+ basedir = Pathname.new( __FILE__ ).dirname.parent.parent
6
+
7
+ libdir = basedir + "lib"
8
+
9
+ $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
10
+ $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
11
+ }
12
+
13
+ require 'rspec'
14
+ require 'spec/lib/helpers'
15
+
16
+ require 'linguistics'
17
+ require 'linguistics/inflector'
18
+
19
+
20
+ describe Linguistics::Inflector do
21
+
22
+ before( :all ) do
23
+ setup_logging( :fatal )
24
+ end
25
+
26
+ after( :all ) do
27
+ reset_logging()
28
+ end
29
+
30
+
31
+ it "provides a human-readable representation of the object suitable for debugging" do
32
+ obj = Object.new
33
+ result = Linguistics::Inflector.new( :en, obj ).inspect
34
+
35
+ result.should include( (obj.object_id / 2).to_s(16) )
36
+ result.should =~ /english-language/i
37
+ end
38
+
39
+ end
40
+
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env spec -cfs
2
+ #encoding: utf-8
2
3
 
3
4
  BEGIN {
4
5
  require 'pathname'
@@ -6,67 +7,62 @@ BEGIN {
6
7
 
7
8
  libdir = basedir + "lib"
8
9
 
9
- $LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
10
+ $LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
11
+ $LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
10
12
  }
11
13
 
12
- begin
13
- require 'spec/runner'
14
- require 'linguistics/iso639'
15
- rescue LoadError
16
- unless Object.const_defined?( :Gem )
17
- require 'rubygems'
18
- retry
19
- end
20
- raise
21
- end
14
+ require 'rspec'
15
+ require 'spec/lib/helpers'
22
16
 
17
+ require 'linguistics'
18
+ require 'linguistics/iso639'
23
19
 
24
- describe Linguistics, " language codes" do
25
20
 
21
+ describe Linguistics::ISO639 do
22
+
23
+ # eng||en|English|anglais
26
24
  it "loads simple language codes from its __DATA__ section" do
27
- Linguistics::LanguageCodes.should have_key( "en" )
28
- Linguistics::LanguageCodes[ "en" ].should have(2).members
29
-
30
- Linguistics::LanguageCodes[ "en" ].should have_key( :codes )
31
- Linguistics::LanguageCodes[ "en" ][:codes].should have(2).members
32
- Linguistics::LanguageCodes[ "en" ][:codes].should include("en")
33
- Linguistics::LanguageCodes[ "en" ][:codes].should include("eng")
34
-
35
- Linguistics::LanguageCodes[ "en" ].should have_key( :desc )
36
- Linguistics::LanguageCodes[ "en" ][:desc].should == 'English'
25
+ Linguistics::LANGUAGE_CODES.should have_key( :en )
26
+ Linguistics::LANGUAGE_CODES[ :en ].should have(3).members
27
+
28
+ Linguistics::LANGUAGE_CODES[ :en ].should have_key( :codes )
29
+ Linguistics::LANGUAGE_CODES[ :en ][:codes].should have(2).members
30
+ Linguistics::LANGUAGE_CODES[ :en ][:codes].should include("en", "eng")
31
+
32
+ Linguistics::LANGUAGE_CODES[ :en ].should have_key( :eng_name )
33
+ Linguistics::LANGUAGE_CODES[ :en ][:eng_name].should == 'English'
34
+ Linguistics::LANGUAGE_CODES[ :en ].should have_key( :fre_name )
35
+ Linguistics::LANGUAGE_CODES[ :en ][:fre_name].should == 'anglais'
37
36
  end
38
-
37
+
39
38
  it "loads language codes with variants from its __DATA__ section" do
40
39
 
41
- # ces/cze cs Czech
42
- Linguistics::LanguageCodes.should have_key( "cs" )
43
- Linguistics::LanguageCodes[ "cs" ].should have(2).members
44
-
45
- Linguistics::LanguageCodes[ "cs" ].should have_key( :codes )
46
- Linguistics::LanguageCodes[ "cs" ][:codes].should have(3).members
47
- Linguistics::LanguageCodes[ "cs" ][:codes].should include("cs")
48
- Linguistics::LanguageCodes[ "cs" ][:codes].should include("ces")
49
- Linguistics::LanguageCodes[ "cs" ][:codes].should include("cze")
50
-
51
- Linguistics::LanguageCodes[ "cs" ].should have_key( :desc )
52
- Linguistics::LanguageCodes[ "cs" ][:desc].should == 'Czech'
53
-
54
- # jav/jaw jv/jw Javanese
55
- Linguistics::LanguageCodes.should have_key( "jv" )
56
- Linguistics::LanguageCodes.should have_key( "jw" )
57
- Linguistics::LanguageCodes[ "jv" ].should == Linguistics::LanguageCodes[ "jw" ]
58
- Linguistics::LanguageCodes[ "jv" ].should have(2).members
59
-
60
- Linguistics::LanguageCodes[ "jv" ].should have_key( :codes )
61
- Linguistics::LanguageCodes[ "jv" ][:codes].should have(4).members
62
- Linguistics::LanguageCodes[ "jv" ][:codes].should include("jv")
63
- Linguistics::LanguageCodes[ "jv" ][:codes].should include("jw")
64
- Linguistics::LanguageCodes[ "jv" ][:codes].should include("jav")
65
- Linguistics::LanguageCodes[ "jv" ][:codes].should include("jaw")
66
-
67
- Linguistics::LanguageCodes[ "jv" ].should have_key( :desc )
68
- Linguistics::LanguageCodes[ "jv" ][:desc].should == 'Javanese'
69
-
40
+ # cze|ces|cs|Czech|tchèque
41
+ Linguistics::LANGUAGE_CODES.should have_key( :cs )
42
+ Linguistics::LANGUAGE_CODES[ :cs ].should have(3).members
43
+
44
+ Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :codes )
45
+ Linguistics::LANGUAGE_CODES[ :cs ][:codes].should have(3).members
46
+ Linguistics::LANGUAGE_CODES[ :cs ][:codes].should include("cs", "ces", "cze")
47
+
48
+ Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :eng_name )
49
+ Linguistics::LANGUAGE_CODES[ :cs ][:eng_name].should == 'Czech'
50
+ Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :fre_name )
51
+ Linguistics::LANGUAGE_CODES[ :cs ][:fre_name].should == 'tchèque'
52
+
53
+ # mac|mkd|mk|Macedonian|macédonien
54
+ Linguistics::LANGUAGE_CODES.should have_key( :mk )
55
+ Linguistics::LANGUAGE_CODES[ :mk ].should have( 3 ).members
56
+
57
+ Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :codes )
58
+ Linguistics::LANGUAGE_CODES[ :mk ][:codes].should have(3).members
59
+ Linguistics::LANGUAGE_CODES[ :mk ][:codes].should include("mk", "mac", "mkd")
60
+
61
+ Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :eng_name )
62
+ Linguistics::LANGUAGE_CODES[ :mk ][:eng_name].should == 'Macedonian'
63
+ Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :fre_name )
64
+ Linguistics::LANGUAGE_CODES[ :mk ][:fre_name].should == 'macédonien'
65
+
70
66
  end
71
-
67
+
72
68
  end