RubyGems - linguistics - Versions diffs - 1.0.9 → 2.0.0 - Mend

linguistics 1.0.9 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

data.tar.gz.sig +0 -0
data/.gemtest +0 -0
data/ChangeLog +849 -342
data/History.rdoc +11 -0
data/LICENSE +9 -9
data/Manifest.txt +44 -0
data/README.rdoc +226 -0
data/Rakefile +32 -349
data/examples/endocs.rb +272 -0
data/examples/generalize_sentence.rb +2 -1
data/examples/klingon.rb +22 -0
data/lib/linguistics.rb +130 -292
data/lib/linguistics/en.rb +337 -1628
data/lib/linguistics/en/articles.rb +138 -0
data/lib/linguistics/en/conjugation.rb +2245 -0
data/lib/linguistics/en/conjunctions.rb +202 -0
data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
data/lib/linguistics/en/linkparser.rb +41 -49
data/lib/linguistics/en/numbers.rb +483 -0
data/lib/linguistics/en/participles.rb +33 -0
data/lib/linguistics/en/pluralization.rb +810 -0
data/lib/linguistics/en/stemmer.rb +75 -0
data/lib/linguistics/en/titlecase.rb +121 -0
data/lib/linguistics/en/wordnet.rb +63 -97
data/lib/linguistics/inflector.rb +89 -0
data/lib/linguistics/iso639.rb +534 -448
data/lib/linguistics/languagebehavior.rb +36 -0
data/lib/linguistics/monkeypatches.rb +42 -0
data/spec/lib/constants.rb +15 -0
data/spec/lib/helpers.rb +38 -0
data/spec/linguistics/en/articles_spec.rb +797 -0
data/spec/linguistics/en/conjugation_spec.rb +2083 -0
data/spec/linguistics/en/conjunctions_spec.rb +154 -0
data/spec/linguistics/en/infinitives_spec.rb +518 -0
data/spec/linguistics/en/linkparser_spec.rb +66 -0
data/spec/linguistics/en/numbers_spec.rb +1295 -0
data/spec/linguistics/en/participles_spec.rb +55 -0
data/spec/linguistics/en/pluralization_spec.rb +4636 -0
data/spec/linguistics/en/stemmer_spec.rb +72 -0
data/spec/linguistics/en/titlecase_spec.rb +841 -0
data/spec/linguistics/en/wordnet_spec.rb +85 -0
data/spec/linguistics/en_spec.rb +45 -167
data/spec/linguistics/inflector_spec.rb +40 -0
data/spec/linguistics/iso639_spec.rb +49 -53
data/spec/linguistics/monkeypatches_spec.rb +40 -0
data/spec/linguistics_spec.rb +46 -76
metadata +241 -113
metadata.gz.sig +0 -0
data/README +0 -166
data/README.english +0 -245
data/rake/191_compat.rb +0 -26
data/rake/dependencies.rb +0 -76
data/rake/documentation.rb +0 -123
data/rake/helpers.rb +0 -502
data/rake/hg.rb +0 -318
data/rake/manual.rb +0 -787
data/rake/packaging.rb +0 -129
data/rake/publishing.rb +0 -341
data/rake/style.rb +0 -62
data/rake/svn.rb +0 -668
data/rake/testing.rb +0 -152
data/rake/verifytask.rb +0 -64
data/tests/en/infinitive.tests.rb +0 -207
data/tests/en/inflect.tests.rb +0 -1389
data/tests/en/lafcadio.tests.rb +0 -77
data/tests/en/linkparser.tests.rb +0 -42
data/tests/en/lprintf.tests.rb +0 -77
data/tests/en/titlecase.tests.rb +0 -73
data/tests/en/wordnet.tests.rb +0 -95

data/spec/linguistics/en/wordnet_spec.rb ADDED Viewed

@@ -0,0 +1,85 @@
+#!/usr/bin/env spec -cfs
+BEGIN {
+	require 'pathname'
+	basedir = Pathname.new( __FILE__ ).dirname.parent.parent.parent
+	libdir = basedir + "lib"
+	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
+	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
+}
+require 'rspec'
+require 'spec/lib/helpers'
+require 'linguistics'
+require 'linguistics/en'
+require 'linguistics/en/wordnet'
+describe Linguistics::EN::WordNet do
+	before( :all ) do
+		setup_logging()
+		Linguistics.use( :en )
+	end
+	after( :all ) do
+		reset_logging()
+	end
+	it "adds EN::WordNet to the list of English language modules" do
+		Linguistics::EN::MODULES.include?( Linguistics::EN::WordNet )
+	end
+	describe "on a system that has the 'wordnet' library installed" do
+		before( :each ) do
+			pending "installation of the wordnet library" unless
+				Linguistics::EN.has_wordnet?
+		end
+		it "can create a WordNet::Synset from a word" do
+			"jackal".en.synset.should be_a( WordNet::Synset )
+		end
+		it "can load all synsets for a word" do
+			result = "appear".en.synsets
+			result.should have( 7 ).members
+			result.should include( WordNet::Synset[200422090] )
+		end
+	end
+	describe "on a system that doesn't have the 'wordnet' library" do
+		before( :all ) do
+			# If the system *does* have wordnet support, pretend it doesn't.
+			if Linguistics::EN.has_wordnet?
+				@had_wordnet = true
+				error = LoadError.new( "no such file to load -- wordnet" )
+				Linguistics::EN::WordNet.instance_variable_set( :@has_wordnet, false )
+				Linguistics::EN::WordNet.instance_variable_set( :@wn_error, error )
+			end
+		end
+		after( :all ) do
+			if @had_wordnet
+				Linguistics::EN::WordNet.instance_variable_set( :@has_wordnet, true )
+				Linguistics::EN::WordNet.instance_variable_set( :@wn_error, nil )
+			end
+		end
+		it "raises the appropriate LoadError when you try to use wordnet functionality" do
+			expect {
+				"persimmon".en.synset
+			}.to raise_error( LoadError, %r{wordnet}i )
+		end
+	end
+end

data/spec/linguistics/en_spec.rb CHANGED Viewed

@@ -6,210 +6,88 @@ BEGIN {
 	libdir = basedir + "lib"
-	$LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
+	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
+	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
 }
-begin
-	require 'spec/runner'
-	require 'linguistics'
-	require 'linguistics/en'
-rescue LoadError
-	unless Object.const_defined?( :Gem )
-		require 'rubygems'
-		retry
-	end
-	raise
-end
+require 'rspec'
+require 'spec/lib/helpers'
+require 'linguistics'
+require 'linguistics/en'
+require 'linguistics/languagebehavior'
 describe Linguistics::EN do
 	before( :all ) do
-		Linguistics::use( :en )
+		setup_logging( :fatal )
+		Linguistics.use( :en, :proxy => true )
 		include Linguistics::EN
 	end
-	describe "conjunctions with an Array of a single element" do
-		before( :each ) do
-			@array = ['cat']
-		end
-		it "results in a phrase with indefinite article" do
-			@array.en.conjunction.should == "a cat"
-		end
+	after( :all ) do
+		reset_logging()
 	end
-	describe "conjunction with an Array of two different words" do
+	it_behaves_like "a Linguistics language module"
-		before( :each ) do
-			@array = ['cat', 'dog']
-		end
-		it "results in a phrase joined with 'and' with default options" do
-			@array.en.conjunction.should == "a cat and a dog"
-		end
-		it "results in a phrase joined with 'plus' if 'plus' is set as the conjunctive" do
-			@array.en.conjunction(:conjunctive => 'plus').should == "a cat plus a dog"
-		end
-		it "results in a phrase joined with a space if an empty string is set as the conjunctive" do
-			@array.en.conjunction(:conjunctive => '').should == "a cat a dog"
-		end
+	it "provides a predicate for testing for the presence of modules by name" do
+		Linguistics::EN.should_not have_extension( 'nonexistant' )
+		Linguistics::EN.should have_extension( 'articles' )
 	end
-	describe "conjunction with an Array of two words that differ only in case" do
-		before( :each ) do
-			@array = ['cat', 'Cat']
-		end
-		it "combines them into their downcased equivalents with default options" do
-			@array.en.conjunction.should == "two cats"
-		end
-		it "lists them separately if :combine is set to false" do
-			@array.en.conjunction(:combine => false).should == "a cat and a Cat"
-		end
-		it "doesn't combine them if :casefold is turned off" do
-			@array.en.conjunction(:casefold => false).should == "a cat and a Cat"
-		end
-		it "combines and lists them with a non-specific count if :generalize is set" do
-			@array.en.conjunction(:generalize => true).should == "several cats"
-		end
+	it "knows that it's not in 'classical' mode by default" do
+		Linguistics::EN.should_not be_classical()
 	end
-	describe "conjunction with an Array of many (more than two) words of varying cases" do
-		before( :each ) do
-			@array = %w{cat dog fox dog chicken chicken Fox chicken goose Dog goose}
-		end
-		it "combines them into their downcased equivalents and lists them in order of amount " +
-		   "with default options" do
-			@array.en.conjunction.should ==
-				'three dogs, three chickens, two foxes, two geese, and a cat'
-		end
-		it "lists them separately if :combine is set to false" do
-			@array.en.conjunction(:combine => false).should ==
-				'a cat, a dog, a fox, a dog, a chicken, a chicken, a Fox, a '\
-				'chicken, a goose, a Dog, and a goose'
-		end
-		it "doesn't combine the differently-cased ones if :casefold is turned off" do
-			@array.en.conjunction(:casefold => false).should ==
-				'three chickens, two dogs, two geese, a cat, a fox, a Fox, '\
-				'and a Dog'
+	it "can run a single block in classical mode" do
+		Linguistics::EN.in_classical_mode do
+			Linguistics::EN.should be_classical()
 		end
-		it "combines and lists them with a non-specific count if :generalize is set" do
-			@array.en.conjunction(:generalize => true).should ==
-				'several dogs, several chickens, several foxes, several '\
-				'geese, and a cat'
-		end
 	end
-	describe "conjunction with an object-transform block" do
-		it "doesn't still have #6: #conjunction doesn't invoke supplied block under some conditions"
-		before( :each ) do
-			# Create a new class, as we need to guarantee that this will be the
-			# first #conjunction call to it.
-			@collection = Class::new {
-				include Enumerable, Linguistics
-				def initialize( *ary )
-					@ary = ary.flatten
-				end
-				# Delegate #each to the contained Array
-				def each( &block )
-					@ary.each( &block )
-				end
-			}
-			@obj = @collection.new( 'foo', 'bar', 'baz', 'tree', 'node', 'sonogram' )
-		end
-		it "uses supplied block for object transform on first invocation" do
-			@obj.en.conjunction {|word| "%s-letter word" % word.length.en.numwords }.should ==
-				"three three-letter words, two four-letter words, and an eight-letter word"
+	it "handles nested classical blocks correctly" do
+		Linguistics::EN.in_classical_mode do
+			Linguistics::EN.in_classical_mode do
+				Linguistics::EN.should be_classical()
+			end
+			Linguistics::EN.should be_classical()
 		end
+		Linguistics::EN.should_not be_classical()
 	end
-	def test_conjunction_should_use_supplied_block_for_object_transform
-		rval = nil
-		assert_nothing_raised do
-			rval = Items.en.conjunction {|word| "%s-word" % word[0,1]}
-		end
-		assert_equal "three c-words and a b-word", rval
+	it "provides a sprintf-like function for interpolating variables into a String" do
+		"I have %CONJUNCT.".en.lprintf( ["cat", "cat", "dog"] ).
+			should == "I have two cats and a dog."
 	end
-	def test_conjunction_should_use_supplied_block_for_object_transform_through_autoproxy
-		rval = nil
+	context "lprintf formatters" do
-		assert_nothing_raised do
-			rval = Items.conjunction {|word| "%s-word" % word[0,1]}
+		before( :all ) do
+			@real_formatters = Linguistics::EN.lprintf_formatters
 		end
-		assert_equal "three c-words and a b-word", rval
-	end
-	def test_conjunction_with_penultimate_separator_turned_off_should_not_use_one
-		rval = nil
-		assert_nothing_raised do
-			rval = Items.en.conjunction( :penultimate => false )
+		before( :each ) do
+			Linguistics::EN.lprintf_formatters.clear
 		end
-		assert_equal "a cow, a chicken, a blancmange and a cyclist", rval
-	end
-	def test_three_item_conjunction_should_honor_penultimate_setting
-		rval = nil
-		assert_nothing_raised do
-			rval = %w{duck cow dog}.en.conjunction( :penultimate => false )
+		after( :all ) do
+			Linguistics::EN.lprintf_formatters.replace( @real_formatters )
 		end
-		assert_equal "a duck, a cow and a dog", rval
-	end
-	def test_conjunction_uses_alt_separator_if_phrases_include_the_primary_one
-		rval = nil
-		scene_items = [
-			"desk with stamps, paper, and envelopes on it",
-			"basket containing milk, eggs, and broccoli",
-			"chair",
-			"wooden chest",
-			"hat rack",
-		]
-		assert_nothing_raised do
-			rval = scene_items.conjunction
+		it "provides a way to register new lprintf formatters with a Symbol" do
+			Linguistics::EN.register_lprintf_formatter :TEST, :plural
+			Linguistics::EN.lprintf_formatters.should have( 1 ).member
+			Linguistics::EN.lprintf_formatters.should include( :TEST )
+			Linguistics::EN.lprintf_formatters[ :TEST ].should be_a( Proc )
 		end
-		assert_equal "a desk with stamps, paper, and envelopes on it; " +
-			"a basket containing milk, eggs, and broccoli; " +
-			"a chair; a wooden chest; and a hat rack", rval
-	end
+	end
 end

data/spec/linguistics/inflector_spec.rb ADDED Viewed

@@ -0,0 +1,40 @@
+#!/usr/bin/env spec -cfs
+BEGIN {
+	require 'pathname'
+	basedir = Pathname.new( __FILE__ ).dirname.parent.parent
+	libdir = basedir + "lib"
+	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
+	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
+}
+require 'rspec'
+require 'spec/lib/helpers'
+require 'linguistics'
+require 'linguistics/inflector'
+describe Linguistics::Inflector do
+	before( :all ) do
+		setup_logging( :fatal )
+	end
+	after( :all ) do
+		reset_logging()
+	end
+	it "provides a human-readable representation of the object suitable for debugging" do
+		obj = Object.new
+		result = Linguistics::Inflector.new( :en, obj ).inspect
+		result.should include( (obj.object_id / 2).to_s(16) )
+		result.should =~ /english-language/i
+	end
+end

data/spec/linguistics/iso639_spec.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 #!/usr/bin/env spec -cfs
+#encoding: utf-8
 BEGIN {
 	require 'pathname'
@@ -6,67 +7,62 @@ BEGIN {
 	libdir = basedir + "lib"
-	$LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
+	$LOAD_PATH.unshift( basedir.to_s ) unless $LOAD_PATH.include?( basedir.to_s )
+	$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
 }
-begin
-	require 'spec/runner'
-	require 'linguistics/iso639'
-rescue LoadError
-	unless Object.const_defined?( :Gem )
-		require 'rubygems'
-		retry
-	end
-	raise
-end
+require 'rspec'
+require 'spec/lib/helpers'
+require 'linguistics'
+require 'linguistics/iso639'
-describe Linguistics, " language codes" do
+describe Linguistics::ISO639 do
+	# eng||en|English|anglais
 	it "loads simple language codes from its __DATA__ section" do
-		Linguistics::LanguageCodes.should have_key( "en" )
-		Linguistics::LanguageCodes[ "en" ].should have(2).members
-		Linguistics::LanguageCodes[ "en" ].should have_key( :codes )
-		Linguistics::LanguageCodes[ "en" ][:codes].should have(2).members
-		Linguistics::LanguageCodes[ "en" ][:codes].should include("en")
-		Linguistics::LanguageCodes[ "en" ][:codes].should include("eng")
-		Linguistics::LanguageCodes[ "en" ].should have_key( :desc )
-		Linguistics::LanguageCodes[ "en" ][:desc].should == 'English'
+		Linguistics::LANGUAGE_CODES.should have_key( :en )
+		Linguistics::LANGUAGE_CODES[ :en ].should have(3).members
+		Linguistics::LANGUAGE_CODES[ :en ].should have_key( :codes )
+		Linguistics::LANGUAGE_CODES[ :en ][:codes].should have(2).members
+		Linguistics::LANGUAGE_CODES[ :en ][:codes].should include("en", "eng")
+		Linguistics::LANGUAGE_CODES[ :en ].should have_key( :eng_name )
+		Linguistics::LANGUAGE_CODES[ :en ][:eng_name].should == 'English'
+		Linguistics::LANGUAGE_CODES[ :en ].should have_key( :fre_name )
+		Linguistics::LANGUAGE_CODES[ :en ][:fre_name].should == 'anglais'
 	end
 	it "loads language codes with variants from its __DATA__ section" do
-		# ces/cze  cs    Czech
-		Linguistics::LanguageCodes.should have_key( "cs" )
-		Linguistics::LanguageCodes[ "cs" ].should have(2).members
-		Linguistics::LanguageCodes[ "cs" ].should have_key( :codes )
-		Linguistics::LanguageCodes[ "cs" ][:codes].should have(3).members
-		Linguistics::LanguageCodes[ "cs" ][:codes].should include("cs")
-		Linguistics::LanguageCodes[ "cs" ][:codes].should include("ces")
-		Linguistics::LanguageCodes[ "cs" ][:codes].should include("cze")
-		Linguistics::LanguageCodes[ "cs" ].should have_key( :desc )
-		Linguistics::LanguageCodes[ "cs" ][:desc].should == 'Czech'
-		# jav/jaw  jv/jw Javanese
-		Linguistics::LanguageCodes.should have_key( "jv" )
-		Linguistics::LanguageCodes.should have_key( "jw" )
-		Linguistics::LanguageCodes[ "jv" ].should == Linguistics::LanguageCodes[ "jw" ]
-		Linguistics::LanguageCodes[ "jv" ].should have(2).members
-		Linguistics::LanguageCodes[ "jv" ].should have_key( :codes )
-		Linguistics::LanguageCodes[ "jv" ][:codes].should have(4).members
-		Linguistics::LanguageCodes[ "jv" ][:codes].should include("jv")
-		Linguistics::LanguageCodes[ "jv" ][:codes].should include("jw")
-		Linguistics::LanguageCodes[ "jv" ][:codes].should include("jav")
-		Linguistics::LanguageCodes[ "jv" ][:codes].should include("jaw")
-		Linguistics::LanguageCodes[ "jv" ].should have_key( :desc )
-		Linguistics::LanguageCodes[ "jv" ][:desc].should == 'Javanese'
+		# cze|ces|cs|Czech|tchèque
+		Linguistics::LANGUAGE_CODES.should have_key( :cs )
+		Linguistics::LANGUAGE_CODES[ :cs ].should have(3).members
+		Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :codes )
+		Linguistics::LANGUAGE_CODES[ :cs ][:codes].should have(3).members
+		Linguistics::LANGUAGE_CODES[ :cs ][:codes].should include("cs", "ces", "cze")
+		Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :eng_name )
+		Linguistics::LANGUAGE_CODES[ :cs ][:eng_name].should == 'Czech'
+		Linguistics::LANGUAGE_CODES[ :cs ].should have_key( :fre_name )
+		Linguistics::LANGUAGE_CODES[ :cs ][:fre_name].should == 'tchèque'
+		# mac|mkd|mk|Macedonian|macédonien
+		Linguistics::LANGUAGE_CODES.should have_key( :mk )
+		Linguistics::LANGUAGE_CODES[ :mk ].should have( 3 ).members
+		Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :codes )
+		Linguistics::LANGUAGE_CODES[ :mk ][:codes].should have(3).members
+		Linguistics::LANGUAGE_CODES[ :mk ][:codes].should include("mk", "mac", "mkd")
+		Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :eng_name )
+		Linguistics::LANGUAGE_CODES[ :mk ][:eng_name].should == 'Macedonian'
+		Linguistics::LANGUAGE_CODES[ :mk ].should have_key( :fre_name )
+		Linguistics::LANGUAGE_CODES[ :mk ][:fre_name].should == 'macédonien'
 	end
 end