Linguistics 1.0.3 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -118,6 +118,20 @@ module Linguistics::EN
118
118
  re = parts.flatten.join("|")
119
119
  "(?:#{re})"
120
120
  end
121
+
122
+
123
+ @lprintf_formatters = {}
124
+ class << self
125
+ attr_accessor :lprintf_formatters
126
+ end
127
+
128
+ ### Add the specified method (which can be either a Method object or a
129
+ ### Symbol for looking up a method)
130
+ def self::def_lprintf_formatter( name, meth )
131
+ meth = self.method( meth ) unless meth.is_a?( Method )
132
+ self.lprintf_formatters[ name ] = meth
133
+ end
134
+
121
135
 
122
136
 
123
137
  #################################################################
@@ -668,13 +682,13 @@ module Linguistics::EN
668
682
  ###############
669
683
 
670
684
  ### Debugging output
671
- def debugMsg( *msgs ) # :nodoc:
685
+ def debug_msg( *msgs ) # :nodoc:
672
686
  $stderr.puts msgs.join(" ") if $DEBUG
673
687
  end
674
688
 
675
689
 
676
690
  ### Normalize a count to either 1 or 2 (singular or plural)
677
- def normalizeCount( count, default=2 )
691
+ def normalize_count( count, default=2 )
678
692
  return default if count.nil? # Default to plural
679
693
  if /^(#{PL_count_one})$/i =~ count.to_s ||
680
694
  Linguistics::classical? &&
@@ -713,7 +727,7 @@ module Linguistics::EN
713
727
  def pluralize_noun( word, count=nil )
714
728
  value = nil
715
729
  count ||= Linguistics::num
716
- count = normalizeCount( count )
730
+ count = normalize_count( count )
717
731
 
718
732
  return word if count == 1
719
733
 
@@ -842,7 +856,7 @@ module Linguistics::EN
842
856
  ### Pluralize special verbs
843
857
  def pluralize_special_verb( word, count )
844
858
  count ||= Linguistics::num
845
- count = normalizeCount( count )
859
+ count = normalize_count( count )
846
860
 
847
861
  return nil if /^(#{PL_count_one})$/i =~ count.to_s
848
862
 
@@ -885,7 +899,7 @@ module Linguistics::EN
885
899
  ### Pluralize regular verbs
886
900
  def pluralize_general_verb( word, count )
887
901
  count ||= Linguistics::num
888
- count = normalizeCount( count )
902
+ count = normalize_count( count )
889
903
 
890
904
  return word if /^(#{PL_count_one})$/i =~ count.to_s
891
905
 
@@ -909,7 +923,7 @@ module Linguistics::EN
909
923
  ### Handle special adjectives
910
924
  def pluralize_special_adjective( word, count )
911
925
  count ||= Linguistics::num
912
- count = normalizeCount( count )
926
+ count = normalize_count( count )
913
927
 
914
928
  return word if /^(#{PL_count_one})$/i =~ count.to_s
915
929
 
@@ -1064,10 +1078,10 @@ module Linguistics::EN
1064
1078
  # Scan the string, and call the word-chunk function that deals with
1065
1079
  # chunks of the found number of digits.
1066
1080
  num.to_s.scan( re ) {|digits|
1067
- debugMsg " digits = #{digits.inspect}"
1081
+ debug_msg " digits = #{digits.inspect}"
1068
1082
  fn = NumberToWordsFunctions[ digits.nitems ]
1069
1083
  numerals = digits.flatten.compact.collect {|i| i.to_i}
1070
- debugMsg " numerals = #{numerals.inspect}"
1084
+ debug_msg " numerals = #{numerals.inspect}"
1071
1085
  chunks.push fn.call( config[:zero], *numerals ).strip
1072
1086
  }
1073
1087
  else
@@ -1104,7 +1118,7 @@ module Linguistics::EN
1104
1118
  #################################################################
1105
1119
 
1106
1120
  ### Return the name of the language this module is for.
1107
- def language
1121
+ def language( unused=nil )
1108
1122
  "English"
1109
1123
  end
1110
1124
 
@@ -1112,6 +1126,8 @@ module Linguistics::EN
1112
1126
  ### Return the plural of the given +phrase+ if +count+ indicates it should
1113
1127
  ### be plural.
1114
1128
  def plural( phrase, count=nil )
1129
+ phrase = numwords( phrase ) if phrase.is_a?( Numeric )
1130
+
1115
1131
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1116
1132
  pre, word, post = md.to_a[1,3]
1117
1133
  return phrase if word.nil? or word.empty?
@@ -1123,7 +1139,7 @@ module Linguistics::EN
1123
1139
 
1124
1140
  return pre + plural + post
1125
1141
  end
1126
- alias_method :PL, :plural
1142
+ def_lprintf_formatter :PL, :plural
1127
1143
 
1128
1144
 
1129
1145
  ### Return the plural of the given noun +phrase+ if +count+ indicates it
@@ -1136,7 +1152,7 @@ module Linguistics::EN
1136
1152
  plural = postprocess( word, pluralize_noun(word, count) )
1137
1153
  return pre + plural + post
1138
1154
  end
1139
- alias_method :PL_N, :plural_noun
1155
+ def_lprintf_formatter :PL_N, :plural_noun
1140
1156
 
1141
1157
 
1142
1158
  ### Return the plural of the given verb +phrase+ if +count+ indicates it
@@ -1151,7 +1167,7 @@ module Linguistics::EN
1151
1167
  pluralize_general_verb(word, count) )
1152
1168
  return pre + plural + post
1153
1169
  end
1154
- alias_method :PL_V, :plural_verb
1170
+ def_lprintf_formatter :PL_V, :plural_verb
1155
1171
 
1156
1172
 
1157
1173
  ### Return the plural of the given adjectival +phrase+ if +count+ indicates
@@ -1166,7 +1182,7 @@ module Linguistics::EN
1166
1182
  return pre + plural + post
1167
1183
  end
1168
1184
  alias_method :plural_adj, :plural_adjective
1169
- alias_method :PL_ADJ, :plural_adjective
1185
+ def_lprintf_formatter :PL_ADJ, :plural_adjective
1170
1186
 
1171
1187
 
1172
1188
  ### Return the given phrase with the appropriate indefinite article ("a" or
@@ -1180,8 +1196,8 @@ module Linguistics::EN
1180
1196
  return pre + result + post
1181
1197
  end
1182
1198
  alias_method :an, :a
1183
- alias_method :A, :a
1184
- alias_method :AN, :a
1199
+ def_lprintf_formatter :A, :a
1200
+ def_lprintf_formatter :AN, :a
1185
1201
 
1186
1202
 
1187
1203
  ### Translate zero-quantified +phrase+ to "no +phrase.plural+"
@@ -1196,7 +1212,7 @@ module Linguistics::EN
1196
1212
  return "#{pre}no " + plural( word, 0 ) + post
1197
1213
  end
1198
1214
  end
1199
- alias_method :NO, :no
1215
+ def_lprintf_formatter :NO, :no
1200
1216
 
1201
1217
 
1202
1218
  ### Participles
@@ -1214,7 +1230,7 @@ module Linguistics::EN
1214
1230
  return "#{plural}ing"
1215
1231
  end
1216
1232
  alias_method :part_pres, :present_participle
1217
- alias_method :PART_PRES, :present_participle
1233
+ def_lprintf_formatter :PART_PRES, :present_participle
1218
1234
 
1219
1235
 
1220
1236
 
@@ -1223,29 +1239,31 @@ module Linguistics::EN
1223
1239
  ###
1224
1240
  ### [<b>:group</b>]
1225
1241
  ### Controls how many numbers at a time are grouped together. Valid values
1226
- ### are +0+ (normal grouping), +1+ (single-digit grouping, e.g., "one,
1227
- ### two, three, four"), +2+ (double-digit grouping, e.g., "twelve,
1228
- ### thirty-four", or +3+ (triple-digit grouping, e.g., "one twenty-three,
1229
- ### four").
1242
+ ### are <code>0</code> (normal grouping), <code>1</code> (single-digit
1243
+ ### grouping, e.g., "one, two, three, four"), <code>2</code>
1244
+ ### (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
1245
+ ### (triple-digit grouping, e.g., "one twenty-three, four").
1230
1246
  ### [<b>:comma</b>]
1231
- ### Set the character/s used to separate word groups. Defaults to +", "+.
1247
+ ### Set the character/s used to separate word groups. Defaults to
1248
+ ### <code>", "</code>.
1232
1249
  ### [<b>:and</b>]
1233
- ### Set the word and/or characters used where ' and ' (the default) is
1234
- ### normally used. Setting <tt>:and</tt> to +' '+, for example, will cause
1235
- ### +2556+ to be returned as "two-thousand, five hundred fifty-six"
1236
- ### instead of ""two-thousand, five hundred and fifty-six".
1250
+ ### Set the word and/or characters used where <code>' and ' </code>(the
1251
+ ### default) is normally used. Setting <code>:and</code> to
1252
+ ### <code>' '</code>, for example, will cause <code>2556</code> to be
1253
+ ### returned as "two-thousand, five hundred fifty-six" instead of
1254
+ ### "two-thousand, five hundred and fifty-six".
1237
1255
  ### [<b>:zero</b>]
1238
- ### Set the word used to represent the numeral +0+ in the result. +'zero'+
1239
- ### is the default.
1256
+ ### Set the word used to represent the numeral <code>0</code> in the
1257
+ ### result. <code>'zero'</code> is the default.
1240
1258
  ### [<b>:decimal</b>]
1241
1259
  ### Set the translation of any decimal points in the number; the default
1242
- ### is +'point'+.
1260
+ ### is <code>'point'</code>.
1243
1261
  ### [<b>:asArray</b>]
1244
1262
  ### If set to a true value, the number will be returned as an array of
1245
1263
  ### word groups instead of a String.
1246
1264
  def numwords( number, hashargs={} )
1247
1265
  num = number.to_s
1248
- config = NumwordDefaults.dup.update( hashargs )
1266
+ config = NumwordDefaults.merge( hashargs )
1249
1267
  raise "Bad chunking option: #{config[:group]}" unless
1250
1268
  config[:group].between?( 0, 3 )
1251
1269
 
@@ -1289,11 +1307,11 @@ module Linguistics::EN
1289
1307
  unless config[:group].zero? && section.nonzero?
1290
1308
  parts.push number_to_words( chunk, config )
1291
1309
  else
1292
- parts.push number_to_words( chunk, config.dup.update(:group => 1) )
1310
+ parts.push number_to_words( chunk, config.merge(:group => 1) )
1293
1311
  end
1294
1312
  }
1295
1313
 
1296
- debugMsg "Parts => #{parts.inspect}"
1314
+ debug_msg "Parts => #{parts.inspect}"
1297
1315
 
1298
1316
  # Turn the last word of the whole-number part back into an ordinal if
1299
1317
  # the original number came in that way.
@@ -1323,7 +1341,7 @@ module Linguistics::EN
1323
1341
  # wholenum part with an 'and'. This is to get things like 'three
1324
1342
  # thousand and three' instead of 'three thousand, three'.
1325
1343
  if /^\s*(\S+)\s*$/ =~ parts[0].last
1326
- wholenum += " and #{parts[0].last}"
1344
+ wholenum += config[:and] + parts[0].last
1327
1345
  else
1328
1346
  wholenum += config[:comma] + parts[0].last
1329
1347
  end
@@ -1332,7 +1350,7 @@ module Linguistics::EN
1332
1350
  end
1333
1351
  decimals = parts[1..-1].collect {|part| part.join(" ")}
1334
1352
 
1335
- debugMsg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
1353
+ debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
1336
1354
 
1337
1355
  # Join with the configured decimal; if it's empty, just join with
1338
1356
  # spaces.
@@ -1351,7 +1369,7 @@ module Linguistics::EN
1351
1369
  strip
1352
1370
  end
1353
1371
  end
1354
- alias_method :NUMWORDS, :numwords
1372
+ def_lprintf_formatter :NUMWORDS, :numwords
1355
1373
 
1356
1374
 
1357
1375
  ### Transform the given +number+ into an ordinal word. The +number+ object
@@ -1365,12 +1383,18 @@ module Linguistics::EN
1365
1383
  return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
1366
1384
  end
1367
1385
  end
1368
- alias_method :ORD, :ordinal
1386
+ def_lprintf_formatter :ORD, :ordinal
1369
1387
 
1370
1388
 
1389
+ ### Transform the given +number+ into an ordinate word.
1390
+ def ordinate( number )
1391
+ numwords( number ).ordinal
1392
+ end
1393
+
1394
+
1371
1395
  ### Return a phrase describing the specified +number+ of objects in the
1372
- ### given +phrase+. The following options can be used to control the makeup
1373
- ### of the returned quantity String:
1396
+ ### given +phrase+ in general terms. The following options can be used to
1397
+ ### control the makeup of the returned quantity String:
1374
1398
  ###
1375
1399
  ### [<b>:joinword</b>]
1376
1400
  ### Sets the word (and any surrounding spaces) used as the word separating the
@@ -1378,7 +1402,7 @@ module Linguistics::EN
1378
1402
  ### '</tt>.
1379
1403
  def quantify( phrase, number=0, args={} )
1380
1404
  num = number.to_i
1381
- config = QuantifyDefaults.dup.update( args )
1405
+ config = QuantifyDefaults.merge( args )
1382
1406
 
1383
1407
  case num
1384
1408
  when 0
@@ -1422,7 +1446,10 @@ module Linguistics::EN
1422
1446
  ].compact.join( config[:joinword] )
1423
1447
  end
1424
1448
  end
1449
+ def_lprintf_formatter :QUANT, :quantify
1450
+
1425
1451
 
1452
+ # :TODO: Needs refactoring
1426
1453
 
1427
1454
  ### Return the specified +obj+ (which must support the <tt>#collect</tt>
1428
1455
  ### method) as a conjunction. Each item is converted to a String if it is
@@ -1483,7 +1510,7 @@ module Linguistics::EN
1483
1510
  ### in the source list).
1484
1511
  ###
1485
1512
  def conjunction( obj, args={} )
1486
- config = ConjunctionDefaults.dup.update( args )
1513
+ config = ConjunctionDefaults.merge( args )
1487
1514
  phrases = []
1488
1515
 
1489
1516
  # Transform items in the obj to phrases
@@ -1574,26 +1601,29 @@ module Linguistics::EN
1574
1601
  config[:conjunctive].strip.empty? or
1575
1602
  phrases.length < 2
1576
1603
 
1577
- # Catenate the last two elements if there's no penultimate separator,
1604
+ # Concatenate the last two elements if there's no penultimate separator,
1578
1605
  # and pick a separator based on how many phrases there are and whether
1579
1606
  # or not there's already an instance of it in the phrases.
1607
+ phrase_count = phrases.length
1580
1608
  phrases[-2] << " " << phrases.pop unless config[:penultimate]
1581
- sep = if phrases.length <= 2
1582
- ' '
1583
- elsif phrases.grep( /#{config[:separator]}/ ).empty?
1584
- config[:separator]
1585
- else
1586
- config[:altsep]
1587
- end
1609
+ sep = config[:separator]
1610
+ if phrase_count <= 2
1611
+ sep = ' '
1612
+ elsif phrases.find {|str| str.include?(config[:separator]) }
1613
+ sep = config[:altsep]
1614
+ end
1588
1615
 
1589
1616
  return phrases.join( sep )
1590
1617
  end
1618
+ def_lprintf_formatter :CONJUNCT, :conjunction
1591
1619
 
1592
1620
 
1593
1621
  ### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
1594
1622
  ### ("camel case to english"). Each word is decapitalized.
1595
1623
  def camel_case_to_english( string )
1596
- string.to_s.gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
1624
+ string.to_s.
1625
+ gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
1626
+ gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
1597
1627
  end
1598
1628
 
1599
1629
 
@@ -1659,11 +1689,37 @@ module Linguistics::EN
1659
1689
  }.join
1660
1690
  end
1661
1691
 
1692
+
1693
+ ### Format the given +fmt+ string by replacing %-escaped sequences with the
1694
+ ### result of performing a specified operation on the corresponding
1695
+ ### argument, ala Kernel.sprintf.
1696
+ ### %PL::
1697
+ ### Plural.
1698
+ ### %A, %AN::
1699
+ ### Prepend indefinite article.
1700
+ ### %NO::
1701
+ ### Zero-quantified phrase.
1702
+ ### %NUMWORDS::
1703
+ ### Convert a number into the corresponding words.
1704
+ ### %CONJUNCT::
1705
+ ### Conjunction.
1706
+ def lprintf( fmt, *args )
1707
+ fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
1708
+ op = $1.to_s.upcase.to_sym
1709
+ if self.lprintf_formatters.key?( op )
1710
+ arg = args.shift
1711
+ self.lprintf_formatters[ op ].call( arg )
1712
+ else
1713
+ raise "no such formatter %p" % op
1714
+ end
1715
+ end
1716
+ end
1717
+
1662
1718
  end # module Linguistics::EN
1663
1719
 
1664
1720
 
1665
1721
  ### Add the #separate and #separate! methods to Array.
1666
- class Array # :nodoc:
1722
+ class Array
1667
1723
 
1668
1724
  ### Returns a new Array that has had a new member inserted between all of
1669
1725
  ### the current ones. The value used is the given +value+ argument unless a
@@ -31,22 +31,6 @@
31
31
  # "he is a big dog".en.sentence.object.to_s
32
32
  # # => "dog"
33
33
  #
34
- # # Look at the raw LinkParser::Word for the direct object of the sentence.
35
- # "he is a big dog".en.sentence.object
36
- # # => #<LinkParser::Word:0x403da0a0 @definition=[[{@A-}, Ds-, {@M+}, J-], [{@A-},
37
- # Ds-, {@M+}, Os-], [{@A-}, Ds-, {@M+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-, {@M+},
38
- # Ss+, R-], [{@A-}, Ds-, {@M+}, SIs-], [{@A-}, Ds-, {R+}, {Bs+}, J-], [{@A-}, Ds-,
39
- # {R+}, {Bs+}, Os-], [{@A-}, Ds-, {R+}, {Bs+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-,
40
- # {R+}, {Bs+}, Ss+, R-], [{@A-}, Ds-, {R+}, {Bs+}, SIs-]], @right=[], @suffix="",
41
- # @left=[#<LinkParser::Connection:0x403da028 @rword=#<LinkParser::Word:0x403da0a0
42
- # ...>, @lword=#<LinkParser::Word:0x403da0b4 @definition=[[Ss-, O+, {@MV+}], [Ss-,
43
- # B-, {@MV+}], [Ss-, P+], [Ss-, AF-], [RS-, Bs-, O+, {@MV+}], [RS-, Bs-, B-,
44
- # {@MV+}], [RS-, Bs-, P+], [RS-, Bs-, AF-], [{Q-}, SIs+, O+, {@MV+}], [{Q-}, SIs+,
45
- # B-, {@MV+}], [{Q-}, SIs+, P+], [{Q-}, SIs+, AF-]],
46
- # @right=[#<LinkParser::Connection:0x403da028 ...>], @suffix="", @left=[],
47
- # @name="is", @position=1>, @subName="*", @name="O", @length=3>], @name="dog",
48
- # @position=4>
49
- #
50
34
  # # Combine WordNet + LinkParser to find the definition of the direct object of
51
35
  # # the sentence
52
36
  # "he is a big dog".en.sentence.object.gloss
@@ -76,15 +60,15 @@ require 'linguistics/en'
76
60
 
77
61
  module Linguistics::EN
78
62
 
79
- @hasLinkParser = false
80
- @lpParser = nil
81
- @lpError = nil
63
+ @has_link_parser = false
64
+ @lp_dict = nil
65
+ @lp_error = nil
82
66
 
83
67
  begin
84
68
  require "linkparser"
85
- @hasLinkParser = true
69
+ @has_link_parser = true
86
70
  rescue LoadError => err
87
- @lpError = err
71
+ @lp_error = err
88
72
  end
89
73
 
90
74
 
@@ -94,32 +78,22 @@ module Linguistics::EN
94
78
  class << self
95
79
 
96
80
  ### Returns +true+ if LinkParser was loaded okay
97
- def hasLinkParser? ; @hasLinkParser ; end
81
+ def has_link_parser? ; @has_link_parser ; end
98
82
 
99
- ### If #hasLinkParser? returns +false+, this can be called to fetch the
83
+ ### If #has_link_parser? returns +false+, this can be called to fetch the
100
84
  ### exception which was raised when trying to load LinkParser.
101
- def lpError ; @lpError ; end
85
+ def lp_error ; @lp_error ; end
102
86
 
103
87
  ### The instance of LinkParser used for all Linguistics LinkParser
104
88
  ### functions.
105
- def linkParser
106
- if @lpError
89
+ def lp_dict
90
+ if @lp_error
107
91
  raise NotImplementedError,
108
92
  "LinkParser functions are not loaded: %s" %
109
- @lpError.message
93
+ @lp_error.message
110
94
  end
111
95
 
112
- return @lpParser if ! @lpParser.nil?
113
-
114
- LinkParser::Word::extend( Linguistics )
115
- Linguistics::installDelegatorProxy( LinkParser::Word, :en )
116
-
117
- dictOpts = Hash.new('')
118
- dictOpts['datadir'] = '/usr/lib/ruby/site_ruby/1.8/linkparser/data'
119
- dictOpts['dict'] = 'tiny.dict'
120
- parseOpts = Hash.new
121
-
122
- @lpParser = LinkParser.new( dictOpts, parseOpts )
96
+ return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 )
123
97
  end
124
98
  end
125
99
 
@@ -132,11 +106,10 @@ module Linguistics::EN
132
106
  module_function
133
107
  ###############
134
108
 
135
- ### Return a LinkParser::Sentence, with or without a sentence in it.
136
- def linkParse( sent )
137
- return Linguistics::EN::linkParser.parse( sent.to_s )
109
+ ### Return a LinkParser::Sentence for the stringified +obj+.
110
+ def sentence( obj )
111
+ return Linguistics::EN::lp_dict.parse( obj.to_s )
138
112
  end
139
- alias_method :sentence, :linkParse
140
113
  module_function :sentence
141
114
 
142
115
  end