treat 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (160) hide show
  1. data/LICENSE +4 -4
  2. data/TODO +21 -54
  3. data/lib/economist/half_cocked_basel.txt +16 -0
  4. data/lib/economist/hose_and_dry.doc +0 -0
  5. data/lib/economist/hungarys_troubles.abw +70 -0
  6. data/lib/economist/republican_nomination.pdf +0 -0
  7. data/lib/economist/saving_the_euro.odt +0 -0
  8. data/lib/economist/to_infinity_and_beyond.txt +15 -0
  9. data/lib/economist/zero_sum.html +91 -0
  10. data/lib/treat.rb +58 -72
  11. data/lib/treat/buildable.rb +59 -15
  12. data/lib/treat/categories.rb +26 -14
  13. data/lib/treat/category.rb +2 -2
  14. data/lib/treat/delegatable.rb +65 -48
  15. data/lib/treat/doable.rb +44 -0
  16. data/lib/treat/entities.rb +34 -14
  17. data/lib/treat/entities/collection.rb +2 -0
  18. data/lib/treat/entities/document.rb +3 -2
  19. data/lib/treat/entities/entity.rb +105 -90
  20. data/lib/treat/entities/phrases.rb +17 -0
  21. data/lib/treat/entities/tokens.rb +28 -13
  22. data/lib/treat/entities/zones.rb +20 -0
  23. data/lib/treat/extractors.rb +49 -11
  24. data/lib/treat/extractors/coreferences/stanford.rb +68 -0
  25. data/lib/treat/extractors/date/chronic.rb +32 -0
  26. data/lib/treat/extractors/date/ruby.rb +25 -0
  27. data/lib/treat/extractors/keywords/tf_idf.rb +26 -0
  28. data/lib/treat/extractors/keywords/{topics_frequency.rb → topics_tf_idf.rb} +15 -7
  29. data/lib/treat/{detectors/language/language_detector.rb → extractors/language/language_extractor.rb} +5 -2
  30. data/lib/treat/extractors/language/what_language.rb +49 -0
  31. data/lib/treat/extractors/named_entity_tag/stanford.rb +53 -0
  32. data/lib/treat/extractors/roles/naive.rb +73 -0
  33. data/lib/treat/extractors/statistics/frequency_in.rb +6 -13
  34. data/lib/treat/extractors/statistics/{position_in_parent.rb → position_in.rb} +1 -1
  35. data/lib/treat/extractors/statistics/tf_idf.rb +89 -21
  36. data/lib/treat/extractors/statistics/transition_matrix.rb +11 -11
  37. data/lib/treat/extractors/statistics/transition_probability.rb +4 -4
  38. data/lib/treat/extractors/time/nickel.rb +30 -12
  39. data/lib/treat/extractors/topic_words/lda.rb +9 -9
  40. data/lib/treat/extractors/topics/reuters.rb +14 -15
  41. data/lib/treat/extractors/topics/reuters/region.xml +1 -0
  42. data/lib/treat/features.rb +7 -0
  43. data/lib/treat/formatters/readers/abw.rb +6 -1
  44. data/lib/treat/formatters/readers/autoselect.rb +5 -6
  45. data/lib/treat/formatters/readers/doc.rb +3 -1
  46. data/lib/treat/formatters/readers/html.rb +1 -1
  47. data/lib/treat/formatters/readers/image.rb +43 -0
  48. data/lib/treat/formatters/readers/odt.rb +1 -2
  49. data/lib/treat/formatters/readers/pdf.rb +9 -1
  50. data/lib/treat/formatters/readers/xml.rb +40 -0
  51. data/lib/treat/formatters/serializers/xml.rb +50 -14
  52. data/lib/treat/formatters/serializers/yaml.rb +7 -2
  53. data/lib/treat/formatters/unserializers/xml.rb +33 -7
  54. data/lib/treat/formatters/visualizers/dot.rb +90 -20
  55. data/lib/treat/formatters/visualizers/short_value.rb +2 -2
  56. data/lib/treat/formatters/visualizers/standoff.rb +2 -2
  57. data/lib/treat/formatters/visualizers/tree.rb +1 -1
  58. data/lib/treat/formatters/visualizers/txt.rb +13 -4
  59. data/lib/treat/group.rb +16 -10
  60. data/lib/treat/helpers/linguistics_loader.rb +18 -0
  61. data/lib/treat/inflectors.rb +10 -0
  62. data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
  63. data/lib/treat/inflectors/conjugations/linguistics.rb +5 -12
  64. data/lib/treat/inflectors/declensions/english.rb +319 -0
  65. data/lib/treat/inflectors/declensions/linguistics.rb +12 -11
  66. data/lib/treat/inflectors/ordinal_words/linguistics.rb +3 -3
  67. data/lib/treat/install.rb +59 -0
  68. data/lib/treat/kernel.rb +18 -8
  69. data/lib/treat/languages.rb +18 -11
  70. data/lib/treat/languages/arabic.rb +4 -2
  71. data/lib/treat/languages/chinese.rb +6 -2
  72. data/lib/treat/languages/dutch.rb +16 -0
  73. data/lib/treat/languages/english.rb +47 -19
  74. data/lib/treat/languages/french.rb +8 -5
  75. data/lib/treat/languages/german.rb +9 -6
  76. data/lib/treat/languages/greek.rb +16 -0
  77. data/lib/treat/languages/italian.rb +6 -3
  78. data/lib/treat/languages/polish.rb +16 -0
  79. data/lib/treat/languages/portuguese.rb +16 -0
  80. data/lib/treat/languages/russian.rb +16 -0
  81. data/lib/treat/languages/spanish.rb +16 -0
  82. data/lib/treat/languages/swedish.rb +16 -0
  83. data/lib/treat/languages/tags.rb +377 -0
  84. data/lib/treat/lexicalizers.rb +34 -23
  85. data/lib/treat/lexicalizers/category/from_tag.rb +17 -10
  86. data/lib/treat/lexicalizers/linkages/naive.rb +51 -51
  87. data/lib/treat/lexicalizers/synsets/wordnet.rb +5 -1
  88. data/lib/treat/lexicalizers/tag/brill.rb +35 -40
  89. data/lib/treat/lexicalizers/tag/lingua.rb +19 -14
  90. data/lib/treat/lexicalizers/tag/stanford.rb +59 -68
  91. data/lib/treat/lexicalizers/tag/tagger.rb +29 -0
  92. data/lib/treat/processors.rb +8 -8
  93. data/lib/treat/processors/chunkers/txt.rb +4 -4
  94. data/lib/treat/processors/parsers/enju.rb +114 -99
  95. data/lib/treat/processors/parsers/stanford.rb +109 -41
  96. data/lib/treat/processors/segmenters/punkt.rb +17 -18
  97. data/lib/treat/processors/segmenters/punkt/dutch.yaml +9716 -0
  98. data/lib/treat/processors/segmenters/punkt/english.yaml +10340 -0
  99. data/lib/treat/processors/segmenters/punkt/french.yaml +43159 -0
  100. data/lib/treat/processors/segmenters/punkt/german.yaml +9572 -0
  101. data/lib/treat/processors/segmenters/punkt/greek.yaml +6050 -0
  102. data/lib/treat/processors/segmenters/punkt/italian.yaml +14748 -0
  103. data/lib/treat/processors/segmenters/punkt/polish.yaml +9751 -0
  104. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +13662 -0
  105. data/lib/treat/processors/segmenters/punkt/russian.yaml +4237 -0
  106. data/lib/treat/processors/segmenters/punkt/spanish.yaml +24034 -0
  107. data/lib/treat/processors/segmenters/punkt/swedish.yaml +10001 -0
  108. data/lib/treat/processors/segmenters/stanford.rb +38 -37
  109. data/lib/treat/processors/segmenters/tactful.rb +5 -4
  110. data/lib/treat/processors/tokenizers/macintyre.rb +7 -6
  111. data/lib/treat/processors/tokenizers/multilingual.rb +2 -3
  112. data/lib/treat/processors/tokenizers/perl.rb +2 -2
  113. data/lib/treat/processors/tokenizers/punkt.rb +6 -2
  114. data/lib/treat/processors/tokenizers/stanford.rb +25 -24
  115. data/lib/treat/processors/tokenizers/tactful.rb +1 -2
  116. data/lib/treat/proxies.rb +2 -35
  117. data/lib/treat/registrable.rb +17 -22
  118. data/lib/treat/sugar.rb +11 -11
  119. data/lib/treat/tree.rb +27 -17
  120. data/lib/treat/viewable.rb +29 -0
  121. data/lib/treat/visitable.rb +1 -1
  122. data/test/tc_entity.rb +56 -49
  123. data/test/tc_extractors.rb +41 -18
  124. data/test/tc_formatters.rb +7 -8
  125. data/test/tc_inflectors.rb +19 -24
  126. data/test/tc_lexicalizers.rb +12 -19
  127. data/test/tc_processors.rb +26 -12
  128. data/test/tc_resources.rb +2 -7
  129. data/test/tc_treat.rb +20 -22
  130. data/test/tc_tree.rb +4 -4
  131. data/test/tests.rb +3 -5
  132. data/test/texts.rb +13 -14
  133. data/tmp/INFO +1 -0
  134. metadata +78 -158
  135. data/bin/INFO +0 -1
  136. data/examples/benchmark.rb +0 -81
  137. data/examples/keywords.rb +0 -148
  138. data/lib/treat/detectors.rb +0 -31
  139. data/lib/treat/detectors/encoding/r_chardet19.rb +0 -27
  140. data/lib/treat/detectors/format/file.rb +0 -36
  141. data/lib/treat/detectors/language/what_language.rb +0 -29
  142. data/lib/treat/entities/constituents.rb +0 -15
  143. data/lib/treat/entities/sentence.rb +0 -8
  144. data/lib/treat/extractors/named_entity/abner.rb +0 -20
  145. data/lib/treat/extractors/named_entity/stanford.rb +0 -174
  146. data/lib/treat/extractors/statistics/frequency_of.rb +0 -15
  147. data/lib/treat/extractors/time/chronic.rb +0 -20
  148. data/lib/treat/extractors/time/native.rb +0 -18
  149. data/lib/treat/formatters/readers/gocr.rb +0 -26
  150. data/lib/treat/formatters/readers/ocropus.rb +0 -31
  151. data/lib/treat/formatters/visualizers/html.rb +0 -13
  152. data/lib/treat/formatters/visualizers/inspect.rb +0 -20
  153. data/lib/treat/inflectors/declensions/en.rb +0 -18
  154. data/lib/treat/languages/categories.rb +0 -5
  155. data/lib/treat/languages/english/categories.rb +0 -23
  156. data/lib/treat/languages/english/tags.rb +0 -352
  157. data/lib/treat/languages/xinhua.rb +0 -12
  158. data/lib/treat/lexicalizers/synsets/rita_wn.rb +0 -23
  159. data/lib/treat/string.rb +0 -5
  160. data/test/tc_detectors.rb +0 -26
@@ -3,7 +3,7 @@ module Treat
3
3
  module Visualizers
4
4
  class ShortValue
5
5
  # Default options for the visualizer.
6
- DefaultOptions = { max_words: 6, max_length: 30 }
6
+ DefaultOptions = { :max_words => 6, :max_length => 30 }
7
7
  # Returns the text value of an entity, shortend
8
8
  # with [..] if the value is longer than :max_words
9
9
  # or longer than :max_length.
@@ -18,7 +18,7 @@ module Treat
18
18
  words = entity.to_s.split(' ')
19
19
  if words.size < options[:max_words] ||
20
20
  entity.to_s.length < options[:max_length]
21
- entity.to_s
21
+ entity.to_s
22
22
  else
23
23
  words[0..2].join(' ') + ' [...] ' + words[-3..-1].join(' ')
24
24
  end
@@ -6,7 +6,7 @@ module Treat
6
6
  # (S (NP John) (VP has (VP come))).
7
7
  class Standoff
8
8
  # Default options for the visualizer.
9
- DefaultOptions = { indent: 0 }
9
+ DefaultOptions = { :indent => 0 }
10
10
  # A lambda to recursively visualize the children
11
11
  # of an entity.
12
12
  Recurse = lambda do |entity, options|
@@ -25,7 +25,7 @@ module Treat
25
25
  options[:indent] += 1
26
26
  if entity.is_a?(Treat::Entities::Token)
27
27
  value += "#{spaces}(#{entity.tag} #{entity.value})"
28
- elsif entity.is_a?(Treat::Entities::Constituent)
28
+ elsif entity.is_a?(Treat::Entities::Phrase)
29
29
  tag = entity.has?(:tag) ? entity.tag : ''
30
30
  value += ("#{spaces}(#{tag}\n" +
31
31
  "#{Recurse.call(entity, options)})\n")
@@ -5,7 +5,7 @@ module Treat
5
5
  # of a tree of entities.
6
6
  class Tree
7
7
  # Default options for the visualizer.
8
- DefaultOptions = { indent: 0 }
8
+ DefaultOptions = { :indent => 0 }
9
9
  # Obtain a plain text tree representation
10
10
  # of the entity.
11
11
  def self.visualize(entity, options = {})
@@ -4,30 +4,39 @@ module Treat
4
4
  # Creates a plain text visualization of an entity.
5
5
  class Txt
6
6
  # The default options for the visualizer.
7
- DefaultOptions = { sep: ' ' }
7
+ DefaultOptions = { :sep => ' ' }
8
8
  # Obtain a plain text visualization of the entity,
9
9
  # with no additional information.
10
10
  #
11
11
  # Options:
12
12
  # (String) :sep => the separator to use between words.
13
13
  def self.visualize(entity, options = {})
14
+ options[:first] = true unless options[:first] == false
15
+ first = options[:first]
14
16
  options = DefaultOptions.merge(options)
15
- return entity.value if !entity.has_children?
17
+ return entity.value.dup if !entity.has_children?
16
18
  value = ''
19
+ options[:first] = false
17
20
  entity.each do |child|
21
+ value += "\n\n" if child.is_a?(Treat::Entities::Section)
18
22
  if child.is_a?(Treat::Entities::Token) || child.value != ''
19
23
  # Remove the trailing space for tokens that
20
24
  # 'stick' to the previous one, such
21
25
  # as punctuation symbols and clitics.
22
26
  if child.is_a?(Treat::Entities::Punctuation) ||
23
- child.is_a?(Treat::Entities::Clitic)
24
- value.strip!
27
+ child.is_a?(Treat::Entities::Clitic)
28
+ value.strip!
25
29
  end
26
30
  value += child.value + options[:sep]
27
31
  else
28
32
  value += visualize(child, options)
29
33
  end
34
+ if child.is_a?(Treat::Entities::Title) ||
35
+ child.is_a?(Treat::Entities::Paragraph)
36
+ value += "\n\n"
37
+ end
30
38
  end
39
+ value = value.strip if first
31
40
  value
32
41
  end
33
42
  end
@@ -5,7 +5,11 @@ module Treat
5
5
  group.module_eval do
6
6
  class << self
7
7
  attr_accessor :type, :default, :targets
8
+ attr_accessor :presets, :preprocessors, :postprocessors
8
9
  end
10
+ self.presets = {}
11
+ self.preprocessors = {}
12
+ self.postprocessors = {}
9
13
  # Return the method corresponding to the group.
10
14
  # This method resolves the name of the method
11
15
  # that a group should provide based on the name
@@ -31,21 +35,20 @@ module Treat
31
35
  else
32
36
  n = m
33
37
  end
34
- @method = :"#{n}"
38
+ @method = n.intern
35
39
  end
36
40
  end
41
+ group.list
37
42
  end
38
43
  # Create a new algorithm within the group. Once
39
44
  # the algorithm is added, it will be automatically
40
45
  # installed on all the targets of the group.
41
46
  def add(class_name, &block)
42
- class_name = :"#{cc(class_name)}"
43
- klass = self.const_set(class_name, Class.new)
47
+ klass = self.const_set(cc(class_name).intern, Class.new)
44
48
  method = self.method
45
- klass.class_eval do
46
- @@block = block
47
- eval "def #{method}(entity);" +
48
- "@@block.call(entity); end"
49
+ @@list[ucc(cl(self))] << class_name
50
+ klass.send(:define_singleton_method, method) do |entity, options={}|
51
+ block.call(entity, options)
49
52
  end
50
53
  end
51
54
  # Boolean - does the group have the supplied class
@@ -55,7 +58,6 @@ module Treat
55
58
  self.targets.each do |entity_type|
56
59
  entity_type = Entities.const_get(cc(entity_type))
57
60
  if target < entity_type || entity_type == target
58
-
59
61
  is_target = true; break
60
62
  end
61
63
  end
@@ -72,7 +74,7 @@ module Treat
72
74
  dirs = Dir.glob("#{Treat.lib}/treat/*/#{mod}/*.rb")
73
75
  dirs.each do |file|
74
76
  @@list[mod] <<
75
- :"#{file.split('/')[-1][0..-4]}"
77
+ file.split('/')[-1][0..-4].intern
76
78
  end
77
79
  end
78
80
  @@list[mod]
@@ -87,10 +89,14 @@ module Treat
87
89
  file = bits.join('/') + "/#{ucc(const)}"
88
90
  if not File.readable?("#{Treat.lib}/#{file}.rb")
89
91
  raise Treat::Exception,
90
- "File '#{file}.rb' corresponding to requested delegate "+
92
+ "File '#{file}.rb' corresponding to requested worker "+
91
93
  "#{self}::#{const} does not exist."
92
94
  else
93
95
  require file
96
+ if not const_defined?(const)
97
+ raise Treat::Exception,
98
+ "File #{file} does not define #{self}::#{const}."
99
+ end
94
100
  const_get(const)
95
101
  end
96
102
  end
@@ -0,0 +1,18 @@
1
+ module Treat
2
+ module Helpers
3
+ class LinguisticsLoader
4
+ silence_warnings { require 'linguistics' }
5
+ def self.load(language)
6
+ begin
7
+ l = language.to_s.upcase
8
+ klass = nil
9
+ silence_warnings { klass = ::Linguistics.const_get(l) }
10
+ klass
11
+ rescue RuntimeError
12
+ raise "Ruby Linguistics does not have a module " +
13
+ " installed for the #{language} language."
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -12,12 +12,22 @@ module Treat
12
12
  extend Group
13
13
  self.type = :annotator
14
14
  self.targets = [:word]
15
+ self.presets = {
16
+ :plural => {:count => :plural},
17
+ :singular => {:count => :singular}
18
+ }
15
19
  end
16
20
  # Retrieve the different conjugations of a word.
17
21
  module Conjugations
18
22
  extend Group
19
23
  self.type = :annotator
20
24
  self.targets = [:word]
25
+ self.presets = {
26
+ :infinitive => {:mode => :infinitive},
27
+ :present_participle => {:tense => :present, :mode => :participle},
28
+ :plural_verb => {:count => :plural},
29
+ :singular_verb => {:count => :singular}
30
+ }
21
31
  end
22
32
  # Retrieve the full text description of a cardinal number.
23
33
  module CardinalWords
@@ -7,8 +7,7 @@ module Treat
7
7
  #
8
8
  # Project website: http://deveiate.org/projects/Linguistics/
9
9
  class Linguistics
10
- # Require the 'linguistics' gem.
11
- silence_warnings { require 'linguistics' }
10
+ require 'treat/helpers/linguistics_loader'
12
11
  # Return the description of a cardinal number in words.
13
12
  #
14
13
  # Options:
@@ -34,7 +33,8 @@ module Treat
34
33
  #
35
34
  # More specific options when using :type => :ordinal:
36
35
  def self.cardinal_words(entity, options = {})
37
- silence_warnings { ::Linguistics::EN.numwords(entity.to_s, options) }
36
+ klass = Treat::Helpers::LinguisticsLoader.load(entity.language)
37
+ klass.numwords(entity.to_s, options)
38
38
  end
39
39
  end
40
40
  end
@@ -6,7 +6,7 @@ module Treat
6
6
  #
7
7
  # Project website: http://deveiate.org/projects/Linguistics/
8
8
  class Linguistics
9
- silence_warnings { require 'linguistics' }
9
+ require 'treat/helpers/linguistics_loader'
10
10
  # Conjugate a verb using ruby linguistics with the specified
11
11
  # mode, tense, count and person.
12
12
  #
@@ -17,20 +17,13 @@ module Treat
17
17
  # - (Symbol) :count => :singular, :plural
18
18
  # - (Symbol) :person => :first, :second, :third
19
19
  def self.conjugations(entity, parameters)
20
- begin
21
- l = entity.language.to_s.upcase
22
- delegate = nil
23
- silence_warnings { delegate = ::Linguistics.const_get(l) }
24
- rescue RuntimeError
25
- raise "Ruby Linguistics does not have a module " +
26
- " installed for the #{entity.language} language."
27
- end
20
+ klass = Treat::Helpers::LinguisticsLoader.load(entity.language)
28
21
  if parameters[:mode] == :infinitive
29
- silence_warnings { delegate.infinitive(entity.to_s) }
22
+ silence_warnings { klass.infinitive(entity.to_s) }
30
23
  elsif parameters[:mode] == :participle && parameters[:tense] == :present
31
- silence_warnings { delegate.present_participle(entity.to_s) }
24
+ silence_warnings { klass.present_participle(entity.to_s) }
32
25
  elsif parameters[:count] == :plural && parameters.size == 1
33
- silence_warnings { delegate.plural_verb(entity.to_s) }
26
+ silence_warnings { klass.plural_verb(entity.to_s) }
34
27
  else
35
28
  raise Treat::Exception,
36
29
  'This combination of modes, tenses, persons ' +
@@ -0,0 +1,319 @@
1
+ module Treat
2
+ module Inflectors
3
+ module Declensions
4
+ # This class is a wrapper for the Inflect module,
5
+ # copied from the unmaintained 'english' ruby gem,
6
+ # created by Thomas Sawyer.
7
+ #
8
+ # Released under the MIT License.
9
+ #
10
+ # http://english.rubyforge.org
11
+ class English
12
+ # Retrieve the declensions (singular, plural)
13
+ # of an english word using a class lifted from
14
+ # the 'english' ruby gem.
15
+ def self.declensions(entity, options)
16
+ unless options[:count]
17
+ raise Treat::Exception,
18
+ "Must supply option count (:singular or :plural)."
19
+ end
20
+ string = entity.to_s
21
+ if entity.category == :verb
22
+ raise Treat::Exception,
23
+ "Cannot retrieve the declensions of a verb. " +
24
+ "Use #singular_verb and #plural_verb instead."
25
+ elsif options[:count] == :plural
26
+ Inflect.plural(string)
27
+ elsif options[:count] == :singular
28
+ Inflect.singular(string)
29
+ else
30
+ {:singular => Inflect.singular(string),
31
+ :plural => Inflect.plural(string)}
32
+ end
33
+ end
34
+
35
+ module Inflect
36
+
37
+ @singular_of = {}
38
+ @plural_of = {}
39
+
40
+ @singular_rules = []
41
+ @plural_rules = []
42
+
43
+ # This class provides the DSL for creating inflections, you can add additional rules.
44
+ # Examples:
45
+ #
46
+ # word "ox", "oxen"
47
+ # word "octopus", "octopi"
48
+ # word "man", "men"
49
+ #
50
+ # rule "lf", "lves"
51
+ #
52
+ # word "equipment"
53
+ #
54
+ # Rules are evaluated by size, so rules you add to override specific cases should be longer than the rule
55
+ # it overrides. For instance, if you want "pta" to pluralize to "ptas", even though a general purpose rule
56
+ # for "ta" => "tum" already exists, simply add a new rule for "pta" => "ptas", and it will automatically win
57
+ # since it is longer than the old rule.
58
+ #
59
+ # Also, single-word exceptions win over general words ("ox" pluralizes to "oxen", because it's a single word
60
+ # exception, even though "fox" pluralizes to "foxes")
61
+ class << self
62
+ # Define a general two-way exception.
63
+ #
64
+ # This also defines a general rule, so foo_child will correctly become
65
+ # foo_children.
66
+ #
67
+ # Whole words also work if they are capitalized (Goose => Geese).
68
+ def word(singular, plural=nil)
69
+ plural = singular unless plural
70
+ singular_word(singular, plural)
71
+ plural_word(singular, plural)
72
+ rule(singular, plural)
73
+ end
74
+
75
+ # Define a singularization exception.
76
+ def singular_word(singular, plural)
77
+ @singular_of[plural] = singular
78
+ @singular_of[plural.capitalize] = singular.capitalize
79
+ end
80
+
81
+ # Define a pluralization exception.
82
+ def plural_word(singular, plural)
83
+ @plural_of[singular] = plural
84
+ @plural_of[singular.capitalize] = plural.capitalize
85
+ end
86
+
87
+ # Define a general rule.
88
+ def rule(singular, plural)
89
+ singular_rule(singular, plural)
90
+ plural_rule(singular, plural)
91
+ end
92
+
93
+ # Define a singularization rule.
94
+ def singular_rule(singular, plural)
95
+ @singular_rules << [singular, plural]
96
+ end
97
+
98
+ # Define a plurualization rule.
99
+ def plural_rule(singular, plural)
100
+ @plural_rules << [singular, plural]
101
+ end
102
+
103
+ # Read prepared singularization rules.
104
+ def singularization_rules
105
+ if defined?(@singularization_regex) && @singularization_regex
106
+ return [@singularization_regex, @singularization_hash]
107
+ end
108
+ # No sorting needed: Regexen match on longest string
109
+ @singularization_regex = Regexp.new("(" + @singular_rules.map {|s,p| p}.join("|") + ")$", "i")
110
+ @singularization_hash = Hash[*@singular_rules.flatten].invert
111
+ [@singularization_regex, @singularization_hash]
112
+ end
113
+
114
+ # Read prepared singularization rules.
115
+ #def singularization_rules
116
+ # return @singularization_rules if @singularization_rules
117
+ # sorted = @singular_rules.sort_by{ |s, p| "#{p}".size }.reverse
118
+ # @singularization_rules = sorted.collect do |s, p|
119
+ # [ /#{p}$/, "#{s}" ]
120
+ # end
121
+ #end
122
+
123
+ # Read prepared pluralization rules.
124
+ def pluralization_rules
125
+ if defined?(@pluralization_regex) && @pluralization_regex
126
+ return [@pluralization_regex, @pluralization_hash]
127
+ end
128
+ @pluralization_regex = Regexp.new("(" + @plural_rules.map {|s,p| s}.join("|") + ")$", "i")
129
+ @pluralization_hash = Hash[*@plural_rules.flatten]
130
+ [@pluralization_regex, @pluralization_hash]
131
+ end
132
+
133
+ # Read prepared pluralization rules.
134
+ #def pluralization_rules
135
+ # return @pluralization_rules if @pluralization_rules
136
+ # sorted = @plural_rules.sort_by{ |s, p| "#{s}".size }.reverse
137
+ # @pluralization_rules = sorted.collect do |s, p|
138
+ # [ /#{s}$/, "#{p}" ]
139
+ # end
140
+ #end
141
+
142
+ #
143
+ def singular_of ; @singular_of ; end
144
+
145
+ #
146
+ def plural_of ; @plural_of ; end
147
+
148
+ # Convert an English word from plurel to singular.
149
+ #
150
+ # "boys".singular #=> boy
151
+ # "tomatoes".singular #=> tomato
152
+ #
153
+ def singular(word)
154
+ return "" if word == ""
155
+ if result = singular_of[word]
156
+ return result.dup
157
+ end
158
+ result = word.dup
159
+
160
+ regex, hash = singularization_rules
161
+ result.sub!(regex) {|m| hash[m]}
162
+ singular_of[word] = result
163
+ return result
164
+ #singularization_rules.each do |(match, replacement)|
165
+ # break if result.gsub!(match, replacement)
166
+ #end
167
+ #return result
168
+ end
169
+
170
+ # Alias for #singular (a Railism).
171
+ #
172
+ alias_method(:singularize, :singular)
173
+
174
+ # Convert an English word from singular to plurel.
175
+ #
176
+ # "boy".plural #=> boys
177
+ # "tomato".plural #=> tomatoes
178
+ #
179
+ def plural(word)
180
+ return "" if word == ""
181
+ if result = plural_of[word]
182
+ return result.dup
183
+ end
184
+ #return self.dup if /s$/ =~ self # ???
185
+ result = word.dup
186
+
187
+ regex, hash = pluralization_rules
188
+ result.sub!(regex) {|m| hash[m]}
189
+ plural_of[word] = result
190
+ return result
191
+ #pluralization_rules.each do |(match, replacement)|
192
+ # break if result.gsub!(match, replacement)
193
+ #end
194
+ #return result
195
+ end
196
+
197
+ # Alias for #plural (a Railism).
198
+ alias_method(:pluralize, :plural)
199
+
200
+ # Clear all rules.
201
+ def clear(type = :all)
202
+ if type == :singular || type == :all
203
+ @singular_of = {}
204
+ @singular_rules = []
205
+ @singularization_rules, @singularization_regex = nil, nil
206
+ end
207
+ if type == :plural || type == :all
208
+ @singular_of = {}
209
+ @singular_rules = []
210
+ @singularization_rules, @singularization_regex = nil, nil
211
+ end
212
+ end
213
+ end
214
+
215
+ # One argument means singular and plural are the same.
216
+
217
+ word 'equipment'
218
+ word 'information'
219
+ word 'money'
220
+ word 'species'
221
+ word 'series'
222
+ word 'fish'
223
+ word 'sheep'
224
+ word 'moose'
225
+ word 'hovercraft'
226
+ word 'news'
227
+ word 'rice'
228
+ word 'plurals'
229
+
230
+ # Two arguments defines a singular and plural exception.
231
+
232
+ word 'Swiss' , 'Swiss'
233
+ word 'alias' , 'aliases'
234
+ word 'analysis' , 'analyses'
235
+ #word 'axis' , 'axes'
236
+ word 'basis' , 'bases'
237
+ word 'buffalo' , 'buffaloes'
238
+ word 'child' , 'children'
239
+ #word 'cow' , 'kine'
240
+ word 'crisis' , 'crises'
241
+ word 'criterion' , 'criteria'
242
+ word 'datum' , 'data'
243
+ word 'goose' , 'geese'
244
+ word 'hive' , 'hives'
245
+ word 'index' , 'indices'
246
+ word 'life' , 'lives'
247
+ word 'louse' , 'lice'
248
+ word 'man' , 'men'
249
+ word 'matrix' , 'matrices'
250
+ word 'medium' , 'media'
251
+ word 'mouse' , 'mice'
252
+ word 'movie' , 'movies'
253
+ word 'octopus' , 'octopi'
254
+ word 'ox' , 'oxen'
255
+ word 'person' , 'people'
256
+ word 'potato' , 'potatoes'
257
+ word 'quiz' , 'quizzes'
258
+ word 'shoe' , 'shoes'
259
+ word 'status' , 'statuses'
260
+ word 'testis' , 'testes'
261
+ word 'thesis' , 'theses'
262
+ word 'thief' , 'thieves'
263
+ word 'tomato' , 'tomatoes'
264
+ word 'torpedo' , 'torpedoes'
265
+ word 'vertex' , 'vertices'
266
+ word 'virus' , 'viri'
267
+ word 'wife' , 'wives'
268
+
269
+ # One-way singularization exception (convert plural to singular).
270
+
271
+ singular_word 'cactus', 'cacti'
272
+
273
+ # One-way pluralizaton exception (convert singular to plural).
274
+
275
+ plural_word 'axis', 'axes'
276
+
277
+ # General rules.
278
+
279
+ rule 'rf' , 'rves'
280
+ rule 'ero' , 'eroes'
281
+ rule 'ch' , 'ches'
282
+ rule 'sh' , 'shes'
283
+ rule 'ss' , 'sses'
284
+ #rule 'ess' , 'esses'
285
+ rule 'ta' , 'tum'
286
+ rule 'ia' , 'ium'
287
+ rule 'ra' , 'rum'
288
+ rule 'ay' , 'ays'
289
+ rule 'ey' , 'eys'
290
+ rule 'oy' , 'oys'
291
+ rule 'uy' , 'uys'
292
+ rule 'y' , 'ies'
293
+ rule 'x' , 'xes'
294
+ rule 'lf' , 'lves'
295
+ rule 'ffe' , 'ffes'
296
+ rule 'af' , 'aves'
297
+ rule 'us' , 'uses'
298
+ rule 'ouse' , 'ouses'
299
+ rule 'osis' , 'oses'
300
+ rule 'ox' , 'oxes'
301
+ rule '' , 's'
302
+
303
+ # One-way singular rules.
304
+
305
+ singular_rule 'of' , 'ofs' # proof
306
+ singular_rule 'o' , 'oes' # hero, heroes
307
+ #singular_rule 'f' , 'ves'
308
+
309
+ # One-way plural rules.
310
+
311
+ plural_rule 's' , 'ses'
312
+ plural_rule 'ive' , 'ives' # don't want to snag wife
313
+ plural_rule 'fe' , 'ves' # don't want to snag perspectives
314
+
315
+ end
316
+ end
317
+ end
318
+ end
319
+ end