Linguistics 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -118,6 +118,20 @@ module Linguistics::EN
118
118
  re = parts.flatten.join("|")
119
119
  "(?:#{re})"
120
120
  end
121
+
122
+
123
+ @lprintf_formatters = {}
124
+ class << self
125
+ attr_accessor :lprintf_formatters
126
+ end
127
+
128
+ ### Add the specified method (which can be either a Method object or a
129
+ ### Symbol for looking up a method)
130
+ def self::def_lprintf_formatter( name, meth )
131
+ meth = self.method( meth ) unless meth.is_a?( Method )
132
+ self.lprintf_formatters[ name ] = meth
133
+ end
134
+
121
135
 
122
136
 
123
137
  #################################################################
@@ -668,13 +682,13 @@ module Linguistics::EN
668
682
  ###############
669
683
 
670
684
  ### Debugging output
671
- def debugMsg( *msgs ) # :nodoc:
685
+ def debug_msg( *msgs ) # :nodoc:
672
686
  $stderr.puts msgs.join(" ") if $DEBUG
673
687
  end
674
688
 
675
689
 
676
690
  ### Normalize a count to either 1 or 2 (singular or plural)
677
- def normalizeCount( count, default=2 )
691
+ def normalize_count( count, default=2 )
678
692
  return default if count.nil? # Default to plural
679
693
  if /^(#{PL_count_one})$/i =~ count.to_s ||
680
694
  Linguistics::classical? &&
@@ -713,7 +727,7 @@ module Linguistics::EN
713
727
  def pluralize_noun( word, count=nil )
714
728
  value = nil
715
729
  count ||= Linguistics::num
716
- count = normalizeCount( count )
730
+ count = normalize_count( count )
717
731
 
718
732
  return word if count == 1
719
733
 
@@ -842,7 +856,7 @@ module Linguistics::EN
842
856
  ### Pluralize special verbs
843
857
  def pluralize_special_verb( word, count )
844
858
  count ||= Linguistics::num
845
- count = normalizeCount( count )
859
+ count = normalize_count( count )
846
860
 
847
861
  return nil if /^(#{PL_count_one})$/i =~ count.to_s
848
862
 
@@ -885,7 +899,7 @@ module Linguistics::EN
885
899
  ### Pluralize regular verbs
886
900
  def pluralize_general_verb( word, count )
887
901
  count ||= Linguistics::num
888
- count = normalizeCount( count )
902
+ count = normalize_count( count )
889
903
 
890
904
  return word if /^(#{PL_count_one})$/i =~ count.to_s
891
905
 
@@ -909,7 +923,7 @@ module Linguistics::EN
909
923
  ### Handle special adjectives
910
924
  def pluralize_special_adjective( word, count )
911
925
  count ||= Linguistics::num
912
- count = normalizeCount( count )
926
+ count = normalize_count( count )
913
927
 
914
928
  return word if /^(#{PL_count_one})$/i =~ count.to_s
915
929
 
@@ -1064,10 +1078,10 @@ module Linguistics::EN
1064
1078
  # Scan the string, and call the word-chunk function that deals with
1065
1079
  # chunks of the found number of digits.
1066
1080
  num.to_s.scan( re ) {|digits|
1067
- debugMsg " digits = #{digits.inspect}"
1081
+ debug_msg " digits = #{digits.inspect}"
1068
1082
  fn = NumberToWordsFunctions[ digits.nitems ]
1069
1083
  numerals = digits.flatten.compact.collect {|i| i.to_i}
1070
- debugMsg " numerals = #{numerals.inspect}"
1084
+ debug_msg " numerals = #{numerals.inspect}"
1071
1085
  chunks.push fn.call( config[:zero], *numerals ).strip
1072
1086
  }
1073
1087
  else
@@ -1104,7 +1118,7 @@ module Linguistics::EN
1104
1118
  #################################################################
1105
1119
 
1106
1120
  ### Return the name of the language this module is for.
1107
- def language
1121
+ def language( unused=nil )
1108
1122
  "English"
1109
1123
  end
1110
1124
 
@@ -1112,6 +1126,8 @@ module Linguistics::EN
1112
1126
  ### Return the plural of the given +phrase+ if +count+ indicates it should
1113
1127
  ### be plural.
1114
1128
  def plural( phrase, count=nil )
1129
+ phrase = numwords( phrase ) if phrase.is_a?( Numeric )
1130
+
1115
1131
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1116
1132
  pre, word, post = md.to_a[1,3]
1117
1133
  return phrase if word.nil? or word.empty?
@@ -1123,7 +1139,7 @@ module Linguistics::EN
1123
1139
 
1124
1140
  return pre + plural + post
1125
1141
  end
1126
- alias_method :PL, :plural
1142
+ def_lprintf_formatter :PL, :plural
1127
1143
 
1128
1144
 
1129
1145
  ### Return the plural of the given noun +phrase+ if +count+ indicates it
@@ -1136,7 +1152,7 @@ module Linguistics::EN
1136
1152
  plural = postprocess( word, pluralize_noun(word, count) )
1137
1153
  return pre + plural + post
1138
1154
  end
1139
- alias_method :PL_N, :plural_noun
1155
+ def_lprintf_formatter :PL_N, :plural_noun
1140
1156
 
1141
1157
 
1142
1158
  ### Return the plural of the given verb +phrase+ if +count+ indicates it
@@ -1151,7 +1167,7 @@ module Linguistics::EN
1151
1167
  pluralize_general_verb(word, count) )
1152
1168
  return pre + plural + post
1153
1169
  end
1154
- alias_method :PL_V, :plural_verb
1170
+ def_lprintf_formatter :PL_V, :plural_verb
1155
1171
 
1156
1172
 
1157
1173
  ### Return the plural of the given adjectival +phrase+ if +count+ indicates
@@ -1166,7 +1182,7 @@ module Linguistics::EN
1166
1182
  return pre + plural + post
1167
1183
  end
1168
1184
  alias_method :plural_adj, :plural_adjective
1169
- alias_method :PL_ADJ, :plural_adjective
1185
+ def_lprintf_formatter :PL_ADJ, :plural_adjective
1170
1186
 
1171
1187
 
1172
1188
  ### Return the given phrase with the appropriate indefinite article ("a" or
@@ -1180,8 +1196,8 @@ module Linguistics::EN
1180
1196
  return pre + result + post
1181
1197
  end
1182
1198
  alias_method :an, :a
1183
- alias_method :A, :a
1184
- alias_method :AN, :a
1199
+ def_lprintf_formatter :A, :a
1200
+ def_lprintf_formatter :AN, :a
1185
1201
 
1186
1202
 
1187
1203
  ### Translate zero-quantified +phrase+ to "no +phrase.plural+"
@@ -1196,7 +1212,7 @@ module Linguistics::EN
1196
1212
  return "#{pre}no " + plural( word, 0 ) + post
1197
1213
  end
1198
1214
  end
1199
- alias_method :NO, :no
1215
+ def_lprintf_formatter :NO, :no
1200
1216
 
1201
1217
 
1202
1218
  ### Participles
@@ -1214,7 +1230,7 @@ module Linguistics::EN
1214
1230
  return "#{plural}ing"
1215
1231
  end
1216
1232
  alias_method :part_pres, :present_participle
1217
- alias_method :PART_PRES, :present_participle
1233
+ def_lprintf_formatter :PART_PRES, :present_participle
1218
1234
 
1219
1235
 
1220
1236
 
@@ -1223,29 +1239,31 @@ module Linguistics::EN
1223
1239
  ###
1224
1240
  ### [<b>:group</b>]
1225
1241
  ### Controls how many numbers at a time are grouped together. Valid values
1226
- ### are +0+ (normal grouping), +1+ (single-digit grouping, e.g., "one,
1227
- ### two, three, four"), +2+ (double-digit grouping, e.g., "twelve,
1228
- ### thirty-four", or +3+ (triple-digit grouping, e.g., "one twenty-three,
1229
- ### four").
1242
+ ### are <code>0</code> (normal grouping), <code>1</code> (single-digit
1243
+ ### grouping, e.g., "one, two, three, four"), <code>2</code>
1244
+ ### (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
1245
+ ### (triple-digit grouping, e.g., "one twenty-three, four").
1230
1246
  ### [<b>:comma</b>]
1231
- ### Set the character/s used to separate word groups. Defaults to +", "+.
1247
+ ### Set the character/s used to separate word groups. Defaults to
1248
+ ### <code>", "</code>.
1232
1249
  ### [<b>:and</b>]
1233
- ### Set the word and/or characters used where ' and ' (the default) is
1234
- ### normally used. Setting <tt>:and</tt> to +' '+, for example, will cause
1235
- ### +2556+ to be returned as "two-thousand, five hundred fifty-six"
1236
- ### instead of ""two-thousand, five hundred and fifty-six".
1250
+ ### Set the word and/or characters used where <code>' and ' </code>(the
1251
+ ### default) is normally used. Setting <code>:and</code> to
1252
+ ### <code>' '</code>, for example, will cause <code>2556</code> to be
1253
+ ### returned as "two-thousand, five hundred fifty-six" instead of
1254
+ ### "two-thousand, five hundred and fifty-six".
1237
1255
  ### [<b>:zero</b>]
1238
- ### Set the word used to represent the numeral +0+ in the result. +'zero'+
1239
- ### is the default.
1256
+ ### Set the word used to represent the numeral <code>0</code> in the
1257
+ ### result. <code>'zero'</code> is the default.
1240
1258
  ### [<b>:decimal</b>]
1241
1259
  ### Set the translation of any decimal points in the number; the default
1242
- ### is +'point'+.
1260
+ ### is <code>'point'</code>.
1243
1261
  ### [<b>:asArray</b>]
1244
1262
  ### If set to a true value, the number will be returned as an array of
1245
1263
  ### word groups instead of a String.
1246
1264
  def numwords( number, hashargs={} )
1247
1265
  num = number.to_s
1248
- config = NumwordDefaults.dup.update( hashargs )
1266
+ config = NumwordDefaults.merge( hashargs )
1249
1267
  raise "Bad chunking option: #{config[:group]}" unless
1250
1268
  config[:group].between?( 0, 3 )
1251
1269
 
@@ -1289,11 +1307,11 @@ module Linguistics::EN
1289
1307
  unless config[:group].zero? && section.nonzero?
1290
1308
  parts.push number_to_words( chunk, config )
1291
1309
  else
1292
- parts.push number_to_words( chunk, config.dup.update(:group => 1) )
1310
+ parts.push number_to_words( chunk, config.merge(:group => 1) )
1293
1311
  end
1294
1312
  }
1295
1313
 
1296
- debugMsg "Parts => #{parts.inspect}"
1314
+ debug_msg "Parts => #{parts.inspect}"
1297
1315
 
1298
1316
  # Turn the last word of the whole-number part back into an ordinal if
1299
1317
  # the original number came in that way.
@@ -1323,7 +1341,7 @@ module Linguistics::EN
1323
1341
  # wholenum part with an 'and'. This is to get things like 'three
1324
1342
  # thousand and three' instead of 'three thousand, three'.
1325
1343
  if /^\s*(\S+)\s*$/ =~ parts[0].last
1326
- wholenum += " and #{parts[0].last}"
1344
+ wholenum += config[:and] + parts[0].last
1327
1345
  else
1328
1346
  wholenum += config[:comma] + parts[0].last
1329
1347
  end
@@ -1332,7 +1350,7 @@ module Linguistics::EN
1332
1350
  end
1333
1351
  decimals = parts[1..-1].collect {|part| part.join(" ")}
1334
1352
 
1335
- debugMsg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
1353
+ debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
1336
1354
 
1337
1355
  # Join with the configured decimal; if it's empty, just join with
1338
1356
  # spaces.
@@ -1351,7 +1369,7 @@ module Linguistics::EN
1351
1369
  strip
1352
1370
  end
1353
1371
  end
1354
- alias_method :NUMWORDS, :numwords
1372
+ def_lprintf_formatter :NUMWORDS, :numwords
1355
1373
 
1356
1374
 
1357
1375
  ### Transform the given +number+ into an ordinal word. The +number+ object
@@ -1365,12 +1383,18 @@ module Linguistics::EN
1365
1383
  return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
1366
1384
  end
1367
1385
  end
1368
- alias_method :ORD, :ordinal
1386
+ def_lprintf_formatter :ORD, :ordinal
1369
1387
 
1370
1388
 
1389
+ ### Transform the given +number+ into an ordinate word.
1390
+ def ordinate( number )
1391
+ numwords( number ).ordinal
1392
+ end
1393
+
1394
+
1371
1395
  ### Return a phrase describing the specified +number+ of objects in the
1372
- ### given +phrase+. The following options can be used to control the makeup
1373
- ### of the returned quantity String:
1396
+ ### given +phrase+ in general terms. The following options can be used to
1397
+ ### control the makeup of the returned quantity String:
1374
1398
  ###
1375
1399
  ### [<b>:joinword</b>]
1376
1400
  ### Sets the word (and any surrounding spaces) used as the word separating the
@@ -1378,7 +1402,7 @@ module Linguistics::EN
1378
1402
  ### '</tt>.
1379
1403
  def quantify( phrase, number=0, args={} )
1380
1404
  num = number.to_i
1381
- config = QuantifyDefaults.dup.update( args )
1405
+ config = QuantifyDefaults.merge( args )
1382
1406
 
1383
1407
  case num
1384
1408
  when 0
@@ -1422,7 +1446,10 @@ module Linguistics::EN
1422
1446
  ].compact.join( config[:joinword] )
1423
1447
  end
1424
1448
  end
1449
+ def_lprintf_formatter :QUANT, :quantify
1450
+
1425
1451
 
1452
+ # :TODO: Needs refactoring
1426
1453
 
1427
1454
  ### Return the specified +obj+ (which must support the <tt>#collect</tt>
1428
1455
  ### method) as a conjunction. Each item is converted to a String if it is
@@ -1483,7 +1510,7 @@ module Linguistics::EN
1483
1510
  ### in the source list).
1484
1511
  ###
1485
1512
  def conjunction( obj, args={} )
1486
- config = ConjunctionDefaults.dup.update( args )
1513
+ config = ConjunctionDefaults.merge( args )
1487
1514
  phrases = []
1488
1515
 
1489
1516
  # Transform items in the obj to phrases
@@ -1574,26 +1601,29 @@ module Linguistics::EN
1574
1601
  config[:conjunctive].strip.empty? or
1575
1602
  phrases.length < 2
1576
1603
 
1577
- # Catenate the last two elements if there's no penultimate separator,
1604
+ # Concatenate the last two elements if there's no penultimate separator,
1578
1605
  # and pick a separator based on how many phrases there are and whether
1579
1606
  # or not there's already an instance of it in the phrases.
1607
+ phrase_count = phrases.length
1580
1608
  phrases[-2] << " " << phrases.pop unless config[:penultimate]
1581
- sep = if phrases.length <= 2
1582
- ' '
1583
- elsif phrases.grep( /#{config[:separator]}/ ).empty?
1584
- config[:separator]
1585
- else
1586
- config[:altsep]
1587
- end
1609
+ sep = config[:separator]
1610
+ if phrase_count <= 2
1611
+ sep = ' '
1612
+ elsif phrases.find {|str| str.include?(config[:separator]) }
1613
+ sep = config[:altsep]
1614
+ end
1588
1615
 
1589
1616
  return phrases.join( sep )
1590
1617
  end
1618
+ def_lprintf_formatter :CONJUNCT, :conjunction
1591
1619
 
1592
1620
 
1593
1621
  ### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
1594
1622
  ### ("camel case to english"). Each word is decapitalized.
1595
1623
  def camel_case_to_english( string )
1596
- string.to_s.gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
1624
+ string.to_s.
1625
+ gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
1626
+ gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
1597
1627
  end
1598
1628
 
1599
1629
 
@@ -1659,11 +1689,37 @@ module Linguistics::EN
1659
1689
  }.join
1660
1690
  end
1661
1691
 
1692
+
1693
+ ### Format the given +fmt+ string by replacing %-escaped sequences with the
1694
+ ### result of performing a specified operation on the corresponding
1695
+ ### argument, ala Kernel.sprintf.
1696
+ ### %PL::
1697
+ ### Plural.
1698
+ ### %A, %AN::
1699
+ ### Prepend indefinite article.
1700
+ ### %NO::
1701
+ ### Zero-quantified phrase.
1702
+ ### %NUMWORDS::
1703
+ ### Convert a number into the corresponding words.
1704
+ ### %CONJUNCT::
1705
+ ### Conjunction.
1706
+ def lprintf( fmt, *args )
1707
+ fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
1708
+ op = $1.to_s.upcase.to_sym
1709
+ if self.lprintf_formatters.key?( op )
1710
+ arg = args.shift
1711
+ self.lprintf_formatters[ op ].call( arg )
1712
+ else
1713
+ raise "no such formatter %p" % op
1714
+ end
1715
+ end
1716
+ end
1717
+
1662
1718
  end # module Linguistics::EN
1663
1719
 
1664
1720
 
1665
1721
  ### Add the #separate and #separate! methods to Array.
1666
- class Array # :nodoc:
1722
+ class Array
1667
1723
 
1668
1724
  ### Returns a new Array that has had a new member inserted between all of
1669
1725
  ### the current ones. The value used is the given +value+ argument unless a
@@ -31,22 +31,6 @@
31
31
  # "he is a big dog".en.sentence.object.to_s
32
32
  # # => "dog"
33
33
  #
34
- # # Look at the raw LinkParser::Word for the direct object of the sentence.
35
- # "he is a big dog".en.sentence.object
36
- # # => #<LinkParser::Word:0x403da0a0 @definition=[[{@A-}, Ds-, {@M+}, J-], [{@A-},
37
- # Ds-, {@M+}, Os-], [{@A-}, Ds-, {@M+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-, {@M+},
38
- # Ss+, R-], [{@A-}, Ds-, {@M+}, SIs-], [{@A-}, Ds-, {R+}, {Bs+}, J-], [{@A-}, Ds-,
39
- # {R+}, {Bs+}, Os-], [{@A-}, Ds-, {R+}, {Bs+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-,
40
- # {R+}, {Bs+}, Ss+, R-], [{@A-}, Ds-, {R+}, {Bs+}, SIs-]], @right=[], @suffix="",
41
- # @left=[#<LinkParser::Connection:0x403da028 @rword=#<LinkParser::Word:0x403da0a0
42
- # ...>, @lword=#<LinkParser::Word:0x403da0b4 @definition=[[Ss-, O+, {@MV+}], [Ss-,
43
- # B-, {@MV+}], [Ss-, P+], [Ss-, AF-], [RS-, Bs-, O+, {@MV+}], [RS-, Bs-, B-,
44
- # {@MV+}], [RS-, Bs-, P+], [RS-, Bs-, AF-], [{Q-}, SIs+, O+, {@MV+}], [{Q-}, SIs+,
45
- # B-, {@MV+}], [{Q-}, SIs+, P+], [{Q-}, SIs+, AF-]],
46
- # @right=[#<LinkParser::Connection:0x403da028 ...>], @suffix="", @left=[],
47
- # @name="is", @position=1>, @subName="*", @name="O", @length=3>], @name="dog",
48
- # @position=4>
49
- #
50
34
  # # Combine WordNet + LinkParser to find the definition of the direct object of
51
35
  # # the sentence
52
36
  # "he is a big dog".en.sentence.object.gloss
@@ -76,15 +60,15 @@ require 'linguistics/en'
76
60
 
77
61
  module Linguistics::EN
78
62
 
79
- @hasLinkParser = false
80
- @lpParser = nil
81
- @lpError = nil
63
+ @has_link_parser = false
64
+ @lp_dict = nil
65
+ @lp_error = nil
82
66
 
83
67
  begin
84
68
  require "linkparser"
85
- @hasLinkParser = true
69
+ @has_link_parser = true
86
70
  rescue LoadError => err
87
- @lpError = err
71
+ @lp_error = err
88
72
  end
89
73
 
90
74
 
@@ -94,32 +78,22 @@ module Linguistics::EN
94
78
  class << self
95
79
 
96
80
  ### Returns +true+ if LinkParser was loaded okay
97
- def hasLinkParser? ; @hasLinkParser ; end
81
+ def has_link_parser? ; @has_link_parser ; end
98
82
 
99
- ### If #hasLinkParser? returns +false+, this can be called to fetch the
83
+ ### If #has_link_parser? returns +false+, this can be called to fetch the
100
84
  ### exception which was raised when trying to load LinkParser.
101
- def lpError ; @lpError ; end
85
+ def lp_error ; @lp_error ; end
102
86
 
103
87
  ### The instance of LinkParser used for all Linguistics LinkParser
104
88
  ### functions.
105
- def linkParser
106
- if @lpError
89
+ def lp_dict
90
+ if @lp_error
107
91
  raise NotImplementedError,
108
92
  "LinkParser functions are not loaded: %s" %
109
- @lpError.message
93
+ @lp_error.message
110
94
  end
111
95
 
112
- return @lpParser if ! @lpParser.nil?
113
-
114
- LinkParser::Word::extend( Linguistics )
115
- Linguistics::installDelegatorProxy( LinkParser::Word, :en )
116
-
117
- dictOpts = Hash.new('')
118
- dictOpts['datadir'] = '/usr/lib/ruby/site_ruby/1.8/linkparser/data'
119
- dictOpts['dict'] = 'tiny.dict'
120
- parseOpts = Hash.new
121
-
122
- @lpParser = LinkParser.new( dictOpts, parseOpts )
96
+ return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 )
123
97
  end
124
98
  end
125
99
 
@@ -132,11 +106,10 @@ module Linguistics::EN
132
106
  module_function
133
107
  ###############
134
108
 
135
- ### Return a LinkParser::Sentence, with or without a sentence in it.
136
- def linkParse( sent )
137
- return Linguistics::EN::linkParser.parse( sent.to_s )
109
+ ### Return a LinkParser::Sentence for the stringified +obj+.
110
+ def sentence( obj )
111
+ return Linguistics::EN::lp_dict.parse( obj.to_s )
138
112
  end
139
- alias_method :sentence, :linkParse
140
113
  module_function :sentence
141
114
 
142
115
  end