RubyGems - Linguistics - Versions diffs - 1.0.3 → 1.0.5 - Mend

Linguistics 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/ChangeLog +2 -2
data/README +30 -39
data/install.rb +59 -28
data/lib/linguistics.rb +41 -31
data/lib/linguistics/en.rb +106 -50
data/lib/linguistics/en/linkparser.rb +15 -42
data/lib/linguistics/en/wordnet.rb +30 -21
data/test.rb +4 -4
data/tests/en/conjunction.tests.rb +88 -2
data/tests/en/inflect.tests.rb +16 -5
data/tests/lingtestcase.rb +6 -6
data/tests/use.tests.rb +2 -2
data/utils.rb +7 -7
metadata +42 -34

data/lib/linguistics/en.rb CHANGED

@@ -118,6 +118,20 @@ module Linguistics::EN
 		re = parts.flatten.join("|")
 		"(?:#{re})"
 	end
+	@lprintf_formatters = {}
+	class << self
+		attr_accessor :lprintf_formatters
+	end
+	### Add the specified method (which can be either a Method object or a
+	### Symbol for looking up a method)
+	def self::def_lprintf_formatter( name, meth )
+		meth = self.method( meth ) unless meth.is_a?( Method )
+		self.lprintf_formatters[ name ] = meth
+	end
 	#################################################################
@@ -668,13 +682,13 @@ module Linguistics::EN
 	###############
 	### Debugging output
-	def debugMsg( *msgs ) # :nodoc:
+	def debug_msg( *msgs ) # :nodoc:
 		$stderr.puts msgs.join(" ") if $DEBUG
 	end
 	### Normalize a count to either 1 or 2 (singular or plural)
-	def normalizeCount( count, default=2 )
+	def normalize_count( count, default=2 )
 		return default if count.nil? # Default to plural
 		if /^(#{PL_count_one})$/i =~ count.to_s ||
 				Linguistics::classical? &&
@@ -713,7 +727,7 @@ module Linguistics::EN
 	def pluralize_noun( word, count=nil )
 		value = nil
 		count ||= Linguistics::num
-		count = normalizeCount( count )
+		count = normalize_count( count )
 		return word if count == 1
@@ -842,7 +856,7 @@ module Linguistics::EN
 	### Pluralize special verbs
 	def pluralize_special_verb( word, count )
 		count ||= Linguistics::num
-		count = normalizeCount( count )
+		count = normalize_count( count )
 		return nil if /^(#{PL_count_one})$/i =~ count.to_s
@@ -885,7 +899,7 @@ module Linguistics::EN
 	### Pluralize regular verbs
 	def pluralize_general_verb( word, count )
 		count ||= Linguistics::num
-		count = normalizeCount( count )
+		count = normalize_count( count )
 		return word if /^(#{PL_count_one})$/i =~ count.to_s
@@ -909,7 +923,7 @@ module Linguistics::EN
 	### Handle special adjectives
 	def pluralize_special_adjective( word, count )
 		count ||= Linguistics::num
-		count = normalizeCount( count )
+		count = normalize_count( count )
 		return word if /^(#{PL_count_one})$/i =~ count.to_s
@@ -1064,10 +1078,10 @@ module Linguistics::EN
 			# Scan the string, and call the word-chunk function that deals with
 			# chunks of the found number of digits.
 			num.to_s.scan( re ) {|digits|
-				debugMsg "   digits = #{digits.inspect}"
+				debug_msg "   digits = #{digits.inspect}"
 				fn = NumberToWordsFunctions[ digits.nitems ]
 				numerals = digits.flatten.compact.collect {|i| i.to_i}
-				debugMsg "   numerals = #{numerals.inspect}"
+				debug_msg "   numerals = #{numerals.inspect}"
 				chunks.push fn.call( config[:zero], *numerals ).strip
 			}
 		else
@@ -1104,7 +1118,7 @@ module Linguistics::EN
 	#################################################################
 	### Return the name of the language this module is for.
-	def language
+	def language( unused=nil )
 		"English"
 	end
@@ -1112,6 +1126,8 @@ module Linguistics::EN
 	### Return the plural of the given +phrase+ if +count+ indicates it should
 	### be plural.
 	def plural( phrase, count=nil )
+		phrase = numwords( phrase ) if phrase.is_a?( Numeric )
 		md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
 		pre, word, post = md.to_a[1,3]
 		return phrase if word.nil? or word.empty?
@@ -1123,7 +1139,7 @@ module Linguistics::EN
 		return pre + plural + post
 	end
-	alias_method :PL, :plural
+	def_lprintf_formatter :PL, :plural
 	### Return the plural of the given noun +phrase+ if +count+ indicates it
@@ -1136,7 +1152,7 @@ module Linguistics::EN
 		plural = postprocess( word, pluralize_noun(word, count) )
 		return pre + plural + post
 	end
-	alias_method :PL_N, :plural_noun
+	def_lprintf_formatter :PL_N, :plural_noun
 	### Return the plural of the given verb +phrase+ if +count+ indicates it
@@ -1151,7 +1167,7 @@ module Linguistics::EN
 			pluralize_general_verb(word, count) )
 		return pre + plural + post
 	end
-	alias_method :PL_V, :plural_verb
+	def_lprintf_formatter :PL_V, :plural_verb
 	### Return the plural of the given adjectival +phrase+ if +count+ indicates
@@ -1166,7 +1182,7 @@ module Linguistics::EN
 		return pre + plural + post
 	end
 	alias_method :plural_adj, :plural_adjective
-	alias_method :PL_ADJ, :plural_adjective
+	def_lprintf_formatter :PL_ADJ, :plural_adjective
 	### Return the given phrase with the appropriate indefinite article ("a" or
@@ -1180,8 +1196,8 @@ module Linguistics::EN
 		return pre + result + post
 	end
 	alias_method :an, :a
-	alias_method :A, :a
-	alias_method :AN, :a
+	def_lprintf_formatter :A, :a
+	def_lprintf_formatter :AN, :a
 	### Translate zero-quantified +phrase+ to "no +phrase.plural+"
@@ -1196,7 +1212,7 @@ module Linguistics::EN
 			return "#{pre}no " + plural( word, 0 ) + post
 		end
 	end
-	alias_method :NO, :no
+	def_lprintf_formatter :NO, :no
 	### Participles
@@ -1214,7 +1230,7 @@ module Linguistics::EN
         return "#{plural}ing"
 	end
 	alias_method :part_pres, :present_participle
-	alias_method :PART_PRES, :present_participle
+	def_lprintf_formatter :PART_PRES, :present_participle
@@ -1223,29 +1239,31 @@ module Linguistics::EN
 	###
 	### [<b>:group</b>]
 	###   Controls how many numbers at a time are grouped together. Valid values
-	###   are +0+ (normal grouping), +1+ (single-digit grouping, e.g., "one,
-	###   two, three, four"), +2+ (double-digit grouping, e.g., "twelve,
-	###   thirty-four", or +3+ (triple-digit grouping, e.g., "one twenty-three,
-	###   four").
+	###   are <code>0</code> (normal grouping), <code>1</code> (single-digit
+	###   grouping, e.g., "one, two, three, four"), <code>2</code>
+	###   (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
+	###   (triple-digit grouping, e.g., "one twenty-three, four").
 	### [<b>:comma</b>]
-	###   Set the character/s used to separate word groups. Defaults to +", "+.
+	###   Set the character/s used to separate word groups. Defaults to
+	###   <code>", "</code>.
 	### [<b>:and</b>]
-	###   Set the word and/or characters used where ' and ' (the default) is
-	###   normally used. Setting <tt>:and</tt> to +' '+, for example, will cause
-	###   +2556+ to be returned as "two-thousand, five hundred fifty-six"
-	###   instead of ""two-thousand, five hundred and fifty-six".
+	###   Set the word and/or characters used where <code>' and ' </code>(the
+	###   default) is normally used. Setting <code>:and</code> to
+	###   <code>' '</code>, for example, will cause <code>2556</code> to be
+	###   returned as "two-thousand, five hundred fifty-six" instead of
+	###   "two-thousand, five hundred and fifty-six".
 	### [<b>:zero</b>]
-	###   Set the word used to represent the numeral +0+ in the result. +'zero'+
-	###   is the default.
+	###   Set the word used to represent the numeral <code>0</code> in the
+	###   result. <code>'zero'</code> is the default.
 	### [<b>:decimal</b>]
 	###   Set the translation of any decimal points in the number; the default
-	###   is +'point'+.
+	###   is <code>'point'</code>.
 	### [<b>:asArray</b>]
 	###   If set to a true value, the number will be returned as an array of
 	###   word groups instead of a String.
 	def numwords( number, hashargs={} )
 		num = number.to_s
-		config = NumwordDefaults.dup.update( hashargs )
+		config = NumwordDefaults.merge( hashargs )
 		raise "Bad chunking option: #{config[:group]}" unless
 			config[:group].between?( 0, 3 )
@@ -1289,11 +1307,11 @@ module Linguistics::EN
 			unless config[:group].zero? && section.nonzero?
 				parts.push number_to_words( chunk, config )
 			else
-				parts.push number_to_words( chunk, config.dup.update(:group => 1) )
+				parts.push number_to_words( chunk, config.merge(:group => 1) )
 			end
 		}
-		debugMsg "Parts => #{parts.inspect}"
+		debug_msg "Parts => #{parts.inspect}"
 		# Turn the last word of the whole-number part back into an ordinal if
 		# the original number came in that way.
@@ -1323,7 +1341,7 @@ module Linguistics::EN
 				# wholenum part with an 'and'. This is to get things like 'three
 				# thousand and three' instead of 'three thousand, three'.
 				if /^\s*(\S+)\s*$/ =~ parts[0].last
-					wholenum += " and #{parts[0].last}"
+					wholenum += config[:and] + parts[0].last
 				else
 					wholenum += config[:comma] + parts[0].last
 				end
@@ -1332,7 +1350,7 @@ module Linguistics::EN
 			end
 			decimals = parts[1..-1].collect {|part| part.join(" ")}
-			debugMsg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
+			debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
 			# Join with the configured decimal; if it's empty, just join with
 			# spaces.
@@ -1351,7 +1369,7 @@ module Linguistics::EN
 				strip
 		end
 	end
-	alias_method :NUMWORDS, :numwords
+	def_lprintf_formatter :NUMWORDS, :numwords
 	### Transform the given +number+ into an ordinal word. The +number+ object
@@ -1365,12 +1383,18 @@ module Linguistics::EN
 			return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
 		end
 	end
-	alias_method :ORD, :ordinal
+	def_lprintf_formatter :ORD, :ordinal
+	### Transform the given +number+ into an ordinate word.
+	def ordinate( number )
+		numwords( number ).ordinal
+	end
 	### Return a phrase describing the specified +number+ of objects in the
-	### given +phrase+. The following options can be used to control the makeup
-	### of the returned quantity String:
+	### given +phrase+ in general terms. The following options can be used to
+	### control the makeup of the returned quantity String:
 	###
     ### [<b>:joinword</b>]
     ###   Sets the word (and any surrounding spaces) used as the word separating the
@@ -1378,7 +1402,7 @@ module Linguistics::EN
     ###   '</tt>.
 	def quantify( phrase, number=0, args={} )
 		num = number.to_i
-		config = QuantifyDefaults.dup.update( args )
+		config = QuantifyDefaults.merge( args )
 		case num
 		when 0
@@ -1422,7 +1446,10 @@ module Linguistics::EN
 			].compact.join( config[:joinword] )
 		end
 	end
+	def_lprintf_formatter :QUANT, :quantify
+	# :TODO: Needs refactoring
     ### Return the specified +obj+ (which must support the <tt>#collect</tt>
     ### method) as a conjunction. Each item is converted to a String if it is
@@ -1483,7 +1510,7 @@ module Linguistics::EN
     ###   in the source list).
     ###
 	def conjunction( obj, args={} )
-		config = ConjunctionDefaults.dup.update( args )
+		config = ConjunctionDefaults.merge( args )
 		phrases = []
 		# Transform items in the obj to phrases
@@ -1574,26 +1601,29 @@ module Linguistics::EN
 			config[:conjunctive].strip.empty? or
 			phrases.length < 2
-		# Catenate the last two elements if there's no penultimate separator,
+		# Concatenate the last two elements if there's no penultimate separator,
 		# and pick a separator based on how many phrases there are and whether
 		# or not there's already an instance of it in the phrases.
+		phrase_count = phrases.length
 		phrases[-2] << " " << phrases.pop unless config[:penultimate]
-		sep = if phrases.length <= 2
-				  ' '
-			  elsif phrases.grep( /#{config[:separator]}/ ).empty?
-				  config[:separator]
-			  else
-				  config[:altsep]
-			  end
+		sep = config[:separator]
+		if phrase_count <= 2
+			sep = ' '
+		elsif phrases.find {|str| str.include?(config[:separator]) }
+			sep = config[:altsep]
+		end
 		return phrases.join( sep )
 	end
+	def_lprintf_formatter :CONJUNCT, :conjunction
 	### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
 	### ("camel case to english"). Each word is decapitalized.
 	def camel_case_to_english( string )
-		string.to_s.gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
+		string.to_s.
+			gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
+			gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
 	end
@@ -1659,11 +1689,37 @@ module Linguistics::EN
 		}.join
 	end
+	### Format the given +fmt+ string by replacing %-escaped sequences with the
+	### result of performing a specified operation on the corresponding
+	### argument, ala Kernel.sprintf.
+	### %PL::
+	###   Plural.
+	### %A, %AN::
+	###   Prepend indefinite article.
+	### %NO::
+	###   Zero-quantified phrase.
+	### %NUMWORDS::
+	###   Convert a number into the corresponding words.
+	### %CONJUNCT::
+	###   Conjunction.
+	def lprintf( fmt, *args )
+		fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
+			op = $1.to_s.upcase.to_sym
+			if self.lprintf_formatters.key?( op )
+				arg = args.shift
+				self.lprintf_formatters[ op ].call( arg )
+			else
+				raise "no such formatter %p" % op
+			end
+		end
+	end
 end # module Linguistics::EN
 ### Add the #separate and #separate! methods to Array.
-class Array # :nodoc:
+class Array
 	### Returns a new Array that has had a new member inserted between all of
 	### the current ones. The value used is the given +value+ argument unless a

data/lib/linguistics/en/linkparser.rb CHANGED

@@ -31,22 +31,6 @@
 #   "he is a big dog".en.sentence.object.to_s
 #   # => "dog"
 #
-#   # Look at the raw LinkParser::Word for the direct object of the sentence.
-#   "he is a big dog".en.sentence.object
-#   # => #<LinkParser::Word:0x403da0a0 @definition=[[{@A-}, Ds-, {@M+}, J-], [{@A-},
-#   Ds-, {@M+}, Os-], [{@A-}, Ds-, {@M+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-, {@M+},
-#   Ss+, R-], [{@A-}, Ds-, {@M+}, SIs-], [{@A-}, Ds-, {R+}, {Bs+}, J-], [{@A-}, Ds-,
-#   {R+}, {Bs+}, Os-], [{@A-}, Ds-, {R+}, {Bs+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-,
-#   {R+}, {Bs+}, Ss+, R-], [{@A-}, Ds-, {R+}, {Bs+}, SIs-]], @right=[], @suffix="",
-#   @left=[#<LinkParser::Connection:0x403da028 @rword=#<LinkParser::Word:0x403da0a0
-#   ...>, @lword=#<LinkParser::Word:0x403da0b4 @definition=[[Ss-, O+, {@MV+}], [Ss-,
-#   B-, {@MV+}], [Ss-, P+], [Ss-, AF-], [RS-, Bs-, O+, {@MV+}], [RS-, Bs-, B-,
-#   {@MV+}], [RS-, Bs-, P+], [RS-, Bs-, AF-], [{Q-}, SIs+, O+, {@MV+}], [{Q-}, SIs+,
-#   B-, {@MV+}], [{Q-}, SIs+, P+], [{Q-}, SIs+, AF-]],
-#   @right=[#<LinkParser::Connection:0x403da028 ...>], @suffix="", @left=[],
-#   @name="is", @position=1>, @subName="*", @name="O", @length=3>], @name="dog",
-#   @position=4>
-#
 #   # Combine WordNet + LinkParser to find the definition of the direct object of
 #   # the sentence
 #   "he is a big dog".en.sentence.object.gloss
@@ -76,15 +60,15 @@ require 'linguistics/en'
 module Linguistics::EN
-	@hasLinkParser	= false
-	@lpParser		= nil
-	@lpError		= nil
+	@has_link_parser	= false
+	@lp_dict			= nil
+	@lp_error			= nil
 	begin
 		require "linkparser"
-		@hasLinkParser = true
+		@has_link_parser = true
 	rescue LoadError => err
-		@lpError = err
+		@lp_error = err
 	end
@@ -94,32 +78,22 @@ module Linguistics::EN
 	class << self
 		### Returns +true+ if LinkParser was loaded okay
-		def hasLinkParser? ; @hasLinkParser ; end
+		def has_link_parser? ; @has_link_parser ; end
-		### If #hasLinkParser? returns +false+, this can be called to fetch the
+		### If #has_link_parser? returns +false+, this can be called to fetch the
 		### exception which was raised when trying to load LinkParser.
-		def lpError ; @lpError ; end
+		def lp_error ; @lp_error ; end
 		### The instance of LinkParser used for all Linguistics LinkParser
 		### functions.
-		def linkParser
-			if @lpError
+		def lp_dict
+			if @lp_error
 				raise NotImplementedError,
 					"LinkParser functions are not loaded: %s" %
-					@lpError.message
+					@lp_error.message
 			end
-			return @lpParser if ! @lpParser.nil?
-			LinkParser::Word::extend( Linguistics )
-			Linguistics::installDelegatorProxy( LinkParser::Word, :en )
-			dictOpts = Hash.new('')
-			dictOpts['datadir'] = '/usr/lib/ruby/site_ruby/1.8/linkparser/data'
-			dictOpts['dict'] = 'tiny.dict'
-			parseOpts = Hash.new
-			@lpParser = LinkParser.new( dictOpts, parseOpts )
+			return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 )
 		end
 	end
@@ -132,11 +106,10 @@ module Linguistics::EN
 	module_function
 	###############
-	### Return a LinkParser::Sentence, with or without a sentence in it.
-	def linkParse( sent )
-		return Linguistics::EN::linkParser.parse( sent.to_s )
+	### Return a LinkParser::Sentence for the stringified +obj+.
+	def sentence( obj )
+		return Linguistics::EN::lp_dict.parse( obj.to_s )
 	end
-	alias_method :sentence, :linkParse
 	module_function :sentence
 end