RubyGems - linguistics - Versions diffs - 1.0.9 → 2.0.0 - Mend

linguistics 1.0.9 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

data.tar.gz.sig +0 -0
data/.gemtest +0 -0
data/ChangeLog +849 -342
data/History.rdoc +11 -0
data/LICENSE +9 -9
data/Manifest.txt +44 -0
data/README.rdoc +226 -0
data/Rakefile +32 -349
data/examples/endocs.rb +272 -0
data/examples/generalize_sentence.rb +2 -1
data/examples/klingon.rb +22 -0
data/lib/linguistics.rb +130 -292
data/lib/linguistics/en.rb +337 -1628
data/lib/linguistics/en/articles.rb +138 -0
data/lib/linguistics/en/conjugation.rb +2245 -0
data/lib/linguistics/en/conjunctions.rb +202 -0
data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
data/lib/linguistics/en/linkparser.rb +41 -49
data/lib/linguistics/en/numbers.rb +483 -0
data/lib/linguistics/en/participles.rb +33 -0
data/lib/linguistics/en/pluralization.rb +810 -0
data/lib/linguistics/en/stemmer.rb +75 -0
data/lib/linguistics/en/titlecase.rb +121 -0
data/lib/linguistics/en/wordnet.rb +63 -97
data/lib/linguistics/inflector.rb +89 -0
data/lib/linguistics/iso639.rb +534 -448
data/lib/linguistics/languagebehavior.rb +36 -0
data/lib/linguistics/monkeypatches.rb +42 -0
data/spec/lib/constants.rb +15 -0
data/spec/lib/helpers.rb +38 -0
data/spec/linguistics/en/articles_spec.rb +797 -0
data/spec/linguistics/en/conjugation_spec.rb +2083 -0
data/spec/linguistics/en/conjunctions_spec.rb +154 -0
data/spec/linguistics/en/infinitives_spec.rb +518 -0
data/spec/linguistics/en/linkparser_spec.rb +66 -0
data/spec/linguistics/en/numbers_spec.rb +1295 -0
data/spec/linguistics/en/participles_spec.rb +55 -0
data/spec/linguistics/en/pluralization_spec.rb +4636 -0
data/spec/linguistics/en/stemmer_spec.rb +72 -0
data/spec/linguistics/en/titlecase_spec.rb +841 -0
data/spec/linguistics/en/wordnet_spec.rb +85 -0
data/spec/linguistics/en_spec.rb +45 -167
data/spec/linguistics/inflector_spec.rb +40 -0
data/spec/linguistics/iso639_spec.rb +49 -53
data/spec/linguistics/monkeypatches_spec.rb +40 -0
data/spec/linguistics_spec.rb +46 -76
metadata +241 -113
metadata.gz.sig +0 -0
data/README +0 -166
data/README.english +0 -245
data/rake/191_compat.rb +0 -26
data/rake/dependencies.rb +0 -76
data/rake/documentation.rb +0 -123
data/rake/helpers.rb +0 -502
data/rake/hg.rb +0 -318
data/rake/manual.rb +0 -787
data/rake/packaging.rb +0 -129
data/rake/publishing.rb +0 -341
data/rake/style.rb +0 -62
data/rake/svn.rb +0 -668
data/rake/testing.rb +0 -152
data/rake/verifytask.rb +0 -64
data/tests/en/infinitive.tests.rb +0 -207
data/tests/en/inflect.tests.rb +0 -1389
data/tests/en/lafcadio.tests.rb +0 -77
data/tests/en/linkparser.tests.rb +0 -42
data/tests/en/lprintf.tests.rb +0 -77
data/tests/en/titlecase.tests.rb +0 -73
data/tests/en/wordnet.tests.rb +0 -95

data/lib/linguistics/en/conjunctions.rb ADDED Viewed

@@ -0,0 +1,202 @@
+#!/usr/bin/ruby
+require 'linguistics/en' unless defined?( Linguistics::EN )
+# Conjunction methods for the English-language Linguistics module.
+module Linguistics::EN::Conjunctions
+	# Register this module to the list of modules to include
+	Linguistics::EN.register_extension( self )
+	# :stopdoc:
+	# Default configuration arguments for the #conjunction (junction, what's
+	# your) function.
+	CONJUNCTION_DEFAULTS = {
+		:separator		=> ', ',
+		:altsep			=> '; ',
+		:penultimate	=> true,
+		:conjunctive	=> 'and',
+		:combine		=> true,
+		:casefold		=> true,
+		:generalize		=> false,
+		:quantsort		=> true,
+	}
+	# :TODO: Needs refactoring
+	### Return the specified +obj+ (which must support the <tt>#collect</tt>
+	### method) as a conjunction. Each item is converted to a String if it is
+	### not already (using #to_s) unless a block is given, in which case it is
+	### called once for each object in the array, and the stringified return
+	### value from the block is used instead. Returning +nil+ causes that
+	### particular element to be omitted from the resulting conjunction. The
+	### following options can be used to control the makeup of the returned
+	### conjunction String:
+	###
+	### [<b>:separator</b>]
+	###   Specify one or more characters to separate items in the resulting
+	###   list. Defaults to <tt>', '</tt>.
+	### [<b>:altsep</b>]
+	###   An alternate separator to use if any of the resulting conjunction's
+	###   clauses contain the <tt>:separator</tt> character/s. Defaults to <tt>'; '</tt>.
+	### [<b>:penultimate</b>]
+	###   Flag that indicates whether or not to join the last clause onto the
+	###   rest of the conjunction using a penultimate <tt>:separator</tt>. E.g.,
+	###     %w{duck, cow, dog}.en.conjunction
+	###     # => "a duck, a cow, and a dog"
+	###     %w{duck cow dog}.en.conjunction( :penultimate => false )
+	###     "a duck, a cow and a dog"
+	###   Default to <tt>true</tt>.
+	### [<b>:conjunctive</b>]
+	###   Sets the word used as the conjunctive (separating word) of the
+	###   resulting string. Default to <tt>'and'</tt>.
+	### [<b>:combine</b>]
+	###   If set to <tt>true</tt> (the default), items which are indentical (after
+	###   surrounding spaces are stripped) will be combined in the resulting
+	###   conjunction. E.g.,
+	###     %w{goose cow goose dog}.en.conjunction
+	###     # => "two geese, a cow, and a dog"
+	###     %w{goose cow goose dog}.en.conjunction( :combine => false )
+	###     # => "a goose, a cow, a goose, and a dog"
+	### [<b>:casefold</b>]
+	###   If set to <tt>true</tt> (the default), then items are compared
+	###   case-insensitively when combining them. This has no effect if
+	###   <tt>:combine</tt> is <tt>false</tt>.
+	### [<b>:generalize</b>]
+	###   If set to <tt>true</tt>, then quantities of combined items are turned into
+	###   general descriptions instead of exact amounts.
+	###     ary = %w{goose pig dog horse goose reindeer goose dog horse}
+	###     ary.en.conjunction
+	###     # => "three geese, two dogs, two horses, a pig, and a reindeer"
+	###     ary.en.conjunction( :generalize => true )
+	###     # => "several geese, several dogs, several horses, a pig, and a reindeer"
+	###   See the #quantify method for specifics on how quantities are
+	###   generalized. Generalization defaults to <tt>false</tt>, and has no effect if
+	###   :combine is <tt>false</tt>.
+	### [<b>:quantsort</b>]
+	###   If set to <tt>true</tt> (the default), items which are combined in the
+	###   resulting conjunction will be listed in order of amount, with greater
+	###   quantities sorted first. If <tt>:quantsort</tt> is <tt>false</tt>, combined items
+	###   will appear where the first instance of them occurred in the
+	###   list. This sort is also the fallback for indentical quantities (ie.,
+	###   items of the same quantity will be listed in the order they appeared
+	###   in the source list).
+	###
+	def conjunction( args={} )
+		config = CONJUNCTION_DEFAULTS.merge( args )
+		# Transform items in the obj to phrases
+		phrases = if block_given?
+				self.log.debug "  collecting with a block"
+				self.collect {|item| yield(item) }.compact
+			else
+				self.log.debug "  collecting without a block"
+				rval = self.collect( &:to_s )
+				self.log.debug "  collected: %p" % [ rval ]
+				rval
+			end
+		self.log.debug "  phrases is: %p" % [ phrases ]
+		# No need for a conjunction if there's only one thing
+		return phrases[0].en.a if phrases.length < 2
+		# Set up a Proc to derive a collector key from a phrase depending on the
+		# configuration
+		keyfunc =
+			if config[:casefold]
+				proc {|key| key.downcase.strip}
+			else
+				proc {|key| key.strip}
+			end
+		# Count and delete phrases that hash the same when the keyfunc munges
+		# them into the same thing if we're combining (:combine => true).
+		collector = {}
+		if config[:combine]
+			phrases.each_index do |i|
+				# Stop when reaching the end of a truncated list
+				break if phrases[i].nil?
+				# Make the key using the configured key function
+				phrase = keyfunc[ phrases[i] ]
+				# If the collector already has this key, increment its count,
+				# eliminate the duplicate from the phrase list, and redo the loop.
+				if collector.key?( phrase )
+					collector[ phrase ] += 1
+					phrases.delete_at( i )
+					redo
+				end
+				collector[ phrase ] = 1
+			end
+		else
+			# If we're not combining, just make everything have a count of 1.
+			phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
+		end
+		# If sort-by-quantity is turned on, sort the phrases first by how many
+		# there are (most-first), and then by the order they were specified in.
+		if config[:quantsort] && config[:combine]
+			origorder = {}
+			phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
+			phrases.sort! {|a,b|
+				(collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
+				(origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
+			}
+		end
+		# Set up a filtering function that adds either an indefinite article, an
+		# indefinite quantifier, or a definite quantifier to each phrase
+		# depending on the configuration and the count of phrases in the
+		# collector.
+		filter =
+			if config[:generalize]
+				proc {|phrase, count| phrase.en.quantify(count) }
+			else
+				proc do |phrase, count|
+					if count > 1
+						"%s %s" % [
+							# :TODO: Make this threshold settable
+							count < 10 ? count.en.numwords : count.to_s,
+							phrase.en.plural( count )
+						]
+					else
+						phrase.en.a
+					end
+				end
+			end
+		# Now use the configured filter to turn each phrase into its final
+		# form. Hmmm... square-bracket Lisp?
+		phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }
+		# Prepend the conjunctive to the last element unless it's empty or
+		# there's only one element
+		phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
+			config[:conjunctive].strip.empty? or
+			phrases.length < 2
+		# Concatenate the last two elements if there's no penultimate separator,
+		# and pick a separator based on how many phrases there are and whether
+		# or not there's already an instance of it in the phrases.
+		phrase_count = phrases.length
+		phrases[-2] << " " << phrases.pop unless config[:penultimate]
+		sep = config[:separator]
+		if phrase_count <= 2
+			sep = ' '
+		elsif phrases.find {|str| str.include?(config[:separator]) }
+			sep = config[:altsep]
+		end
+		return phrases.join( sep )
+	end
+	Linguistics::EN.register_lprintf_formatter :CONJUNCT, :conjunction
+end # module Linguistics::EN::Conjunctions

data/lib/linguistics/en/{infinitive.rb → infinitives.rb} RENAMED Viewed

@@ -1,38 +1,18 @@
 #!/usr/bin/ruby
-#
-# This file contains functions for deriving the infinitive forms of conjugated
-# English words. Requiring this file adds functions and constants to the
-# Linguistics::EN module.
-#
-# == Authors
-#
-# * Michael Granger <ged@FaerieMUD.org>
-#
-# == Acknowledgments
-#
-# This code was ported from the excellent 'Lingua::EN::Infinitive' Perl module
-# by Ron Savage, which is distributed under the following license:
-#
-#    Australian copyright (c) 1999-2002 Ron Savage.
-#
-#    	All Programs of mine are 'OSI Certified Open Source Software';
-#    	you can redistribute them and/or modify them under the terms of
-#    	The Artistic License, a copy of which is available at:
-#    	http://www.opensource.org/licenses/index.html
-#
-#
-# :include: LICENSE
-#
-#--
-#
-# Please see the file LICENSE in the base directory for licensing details.
-#
-module Linguistics::EN
+require 'linguistics/en' unless defined?( Linguistics::EN )
+# Methods for deriving the infinitive forms of conjugated words for
+# the English-language Linguistics module.
+module Linguistics::EN::Infinitives
+	# Register this module to the list of modules to include
+	Linguistics::EN.register_extension( self )
 	# :stopdoc:
 	# Irregular words => infinitive forms
-	IrregularInfinitives = {
+	IRREGULAR_INFINITIVES = {
 		'abided'			=> 'abide',
 		'abode'				=> 'abide',
 		'am'				=> 'be',
@@ -543,7 +523,7 @@ module Linguistics::EN
 	}
 	# Mapping of word suffixes to infinitive rules.
-	InfSuffixRules = {
+	INF_SUFFIX_RULES = {
 		# '<suffix>' => {
 		#	:order => <sort order>,
 		#	:rule  => <rule number>,
@@ -1008,7 +988,7 @@ module Linguistics::EN
 			:suffix2	=> '',
 		},
 	}
-	InfSuffixRuleOrder = InfSuffixRules.keys.sort_by {|rule| InfSuffixRules[rule][:order]}
+	INF_SUFFIX_RULE_ORDER = INF_SUFFIX_RULES.keys.sort_by {|rule| INF_SUFFIX_RULES[rule][:order]}
 	# :startdoc:
@@ -1037,20 +1017,26 @@ module Linguistics::EN
 		# The rule used
 		attr_reader :rule
+		### Equality operator: returns +true+ if +other+ is == to either of the receiver's words.
+		def ==( other )
+			return super(other) || @word2 == other
+		end
 	end
-	###############
-	module_function
-	###############
+	######
+	public
+	######
 	### Return the infinitive form of the given word
-	def infinitive( word )
-		word = word.to_s
+	def infinitive
+		word = self.to_s
 		word1 = word2 = suffix = rule = newword = ''
-		if IrregularInfinitives.key?( word )
-			word1	= IrregularInfinitives[ word ]
+		if IRREGULAR_INFINITIVES.key?( word )
+			word1	= IRREGULAR_INFINITIVES[ word ]
 			rule	= 'irregular'
 		else
 			# Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
@@ -1068,49 +1054,49 @@ module Linguistics::EN
 				}
 			}
-			$stderr.puts "prefixes: %p" % prefixes if $DEBUG
+			self.log.debug "prefixes: %p" % [ prefixes ]
 			# Now check for rules covering the prefixes for this word, picking
 			# the first one if one was found.
-			if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
-				rule = InfSuffixRules[ suffix ][:rule]
-				shortestPrefix = InfSuffixRules[ suffix ][:word1]
-				$stderr.puts "Using rule %p (%p) for suffix %p" %
+			if (( suffix = ((INF_SUFFIX_RULE_ORDER & prefixes.keys).first) ))
+				rule = INF_SUFFIX_RULES[ suffix ][:rule]
+				shortestPrefix = INF_SUFFIX_RULES[ suffix ][:word1]
+				self.log.debug "Using rule %p (%p) for suffix %p" %
 					[ rule, shortestPrefix, suffix ] if $DEBUG
 				case shortestPrefix
 				when 0
 					word1 = prefixes[ suffix ][ 0 ]
 					word2 = prefixes[ suffix ][ 1 ]
-					$stderr.puts "For sp = 0: word1: %p, word2: %p" %
+					self.log.debug "For sp = 0: word1: %p, word2: %p" %
 						[ word1, word2 ] if $DEBUG
 				when -1
 					word1 = prefixes[ suffix ].last +
-						InfSuffixRules[ suffix ][:suffix1]
+						INF_SUFFIX_RULES[ suffix ][:suffix1]
 					word2 = ''
-					$stderr.puts "For sp = -1: word1: %p, word2: %p" %
+					self.log.debug "For sp = -1: word1: %p, word2: %p" %
 						[ word1, word2 ] if $DEBUG
 				when -2
 					word1 = prefixes[ suffix ].last +
-						InfSuffixRules[ suffix ][:suffix1]
+						INF_SUFFIX_RULES[ suffix ][:suffix1]
 					word2 = prefixes[ suffix ].last
-					$stderr.puts "For sp = -2: word1: %p, word2: %p" %
+					self.log.debug "For sp = -2: word1: %p, word2: %p" %
 						[ word1, word2 ] if $DEBUG
 				when -3
 					word1 = prefixes[ suffix ].last +
-						InfSuffixRules[ suffix ][:suffix1]
+						INF_SUFFIX_RULES[ suffix ][:suffix1]
 					word2 = prefixes[ suffix ].last +
-						InfSuffixRules[ suffix ][:suffix2]
-					$stderr.puts "For sp = -3: word1: %p, word2: %p" %
+						INF_SUFFIX_RULES[ suffix ][:suffix2]
+					self.log.debug "For sp = -3: word1: %p, word2: %p" %
 						[ word1, word2 ] if $DEBUG
 				when -4
 					word1 = word
 					word2 = ''
-					$stderr.puts "For sp = -4: word1: %p, word2: %p" %
+					self.log.debug "For sp = -4: word1: %p, word2: %p" %
 						[ word1, word2 ] if $DEBUG
 				else
@@ -1128,7 +1114,7 @@ module Linguistics::EN
 					# Eg: tipped => tipp?
 					# Then return tip and tipp.
 					# Eg: swimming => swimm?
-					# Then return tipswim and swimm.
+					# Then return swim and swimm.
 					if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
 						word1 = $1 + $2
@@ -1138,7 +1124,7 @@ module Linguistics::EN
 			end
 		end
-		return Infinitive::new( word1, word2, suffix, rule )
+		return Infinitive.new( word1, word2, suffix, rule )
 	end
 end # module EN::Linguistics

data/lib/linguistics/en/linkparser.rb CHANGED Viewed

@@ -1,20 +1,16 @@
 #!/usr/bin/ruby
-require 'linguistics/en'
+require 'linguistics/en' unless defined?( Linguistics::EN )
-#
-# This file contains the extensions to the Linguistics::EN module which provide
-# support for the Ruby LinkParser module. LinkParser enables grammatic queries
-# of English language sentences.
-#
-# == Synopsis
+# LinkParser support for the English-language Linguistics module.
+# LinkParser enables grammatic queries of English language sentences.
 #
 #   # Test to see whether or not the link parser is loaded.
 #   Linguistics::EN.has_link_parser?
 #   # => true
 #
 #   # Diagram the first linkage for a test sentence
-#   puts "he is a big dog".sentence.linkages.first.to_s
+#   puts "he is a big dog".en.sentence.linkages.first.to_s
 # 	  +---O*---+
 # 	  | +--Ds--+
 #    +Ss+ |  +-A-+
@@ -41,54 +37,51 @@ require 'linguistics/en'
 #   has been domesticated by man since prehistoric times; occurs in many breeds;
 #   \"the dog barked all night\""
 #
-# == Authors
-#
-# * Martin Chase <stillflame@FaerieMUD.org>
-# * Michael Granger <ged@FaerieMUD.org>
-#
-# :include: LICENSE
-#
-#--
-#
-# Please see the file LICENSE in the base directory for licensing details.
-#
-module Linguistics::EN
+module Linguistics::EN::LinkParser
-	@has_link_parser	= false
-	@lp_dict			= nil
-	@lp_error			= nil
+	@has_linkparser = false
+	@lp_dict        = nil
+	@lp_error       = nil
 	begin
 		require "linkparser"
-		@has_link_parser = true
+		@has_linkparser = true
 	rescue LoadError => err
 		@lp_error = err
 	end
-	#################################################################
-	###	M O D U L E   M E T H O D S
-	#################################################################
-	class << self
+	# Container for methods intended to extend the EN module as singleton methods.
+	module SingletonMethods
+		### Returns +true+ if WordNet was loaded okay
+		def has_linkparser? ; @has_linkparser; end
+		### If #has_linkparser? returns +false+, this can be called to fetch the
+		### exception which was raised when WordNet was loaded.
+		def linkparser_error ; @lp_error; end
+	end # module SingletonMethods
+	extend SingletonMethods
-		### Returns +true+ if LinkParser was loaded okay
-		def has_link_parser? ; @has_link_parser ; end
-		### If #has_link_parser? returns +false+, this can be called to fetch the
-		### exception which was raised when trying to load LinkParser.
-		def lp_error ; @lp_error ; end
+	# Register this module to the list of modules to include
+	Linguistics::EN.register_extension( self )
-		### The instance of LinkParser used for all Linguistics LinkParser
-		### functions.
-		def lp_dict
-			if @lp_error
-				raise NotImplementedError,
-					"LinkParser functions are not loaded: %s" %
-					@lp_error.message
-			end
+	#################################################################
+	###	M O D U L E   M E T H O D S
+	#################################################################
-			return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 )
+	### The instance of LinkParser used for all Linguistics LinkParser
+	### functions.
+	def self::lp_dict
+		if !self.has_linkparser?
+			raise NotImplementedError,
+				"LinkParser functions are not loaded: %s" %
+				self.lp_error.message
 		end
+		return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 )
 	end
@@ -96,14 +89,13 @@ module Linguistics::EN
 	###	L I N K P A R S E R   I N T E R F A C E
 	#################################################################
-	###############
-	module_function
-	###############
+	######
+	public
+	######
 	### Return a LinkParser::Sentence for the stringified +obj+.
-	def sentence( obj )
-		return Linguistics::EN::lp_dict.parse( obj.to_s )
+	def sentence
+		return Linguistics::EN::LinkParser.lp_dict.parse( self.to_s )
 	end
-	module_function :sentence
-end
+end # class Linguistics::EN::LinkParser