treat 0.2.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. data/LICENSE +3 -3
  2. data/README.md +33 -0
  3. data/files/INFO +1 -0
  4. data/lib/treat.rb +40 -105
  5. data/lib/treat/ai.rb +12 -0
  6. data/lib/treat/ai/classifiers/id3.rb +27 -0
  7. data/lib/treat/categories.rb +82 -35
  8. data/lib/treat/categorizable.rb +44 -0
  9. data/lib/treat/classification.rb +61 -0
  10. data/lib/treat/configurable.rb +115 -0
  11. data/lib/treat/data_set.rb +42 -0
  12. data/lib/treat/dependencies.rb +24 -0
  13. data/lib/treat/downloader.rb +87 -0
  14. data/lib/treat/entities.rb +68 -66
  15. data/lib/treat/entities/abilities.rb +10 -0
  16. data/lib/treat/entities/abilities/buildable.rb +327 -0
  17. data/lib/treat/entities/abilities/checkable.rb +31 -0
  18. data/lib/treat/entities/abilities/copyable.rb +45 -0
  19. data/lib/treat/entities/abilities/countable.rb +51 -0
  20. data/lib/treat/entities/abilities/debuggable.rb +83 -0
  21. data/lib/treat/entities/abilities/delegatable.rb +123 -0
  22. data/lib/treat/entities/abilities/doable.rb +62 -0
  23. data/lib/treat/entities/abilities/exportable.rb +11 -0
  24. data/lib/treat/entities/abilities/iterable.rb +115 -0
  25. data/lib/treat/entities/abilities/magical.rb +83 -0
  26. data/lib/treat/entities/abilities/registrable.rb +74 -0
  27. data/lib/treat/entities/abilities/stringable.rb +91 -0
  28. data/lib/treat/entities/entities.rb +104 -0
  29. data/lib/treat/entities/entity.rb +122 -245
  30. data/lib/treat/exception.rb +4 -4
  31. data/lib/treat/extractors.rb +77 -80
  32. data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
  33. data/lib/treat/extractors/language/what_language.rb +50 -45
  34. data/lib/treat/extractors/name_tag/stanford.rb +55 -0
  35. data/lib/treat/extractors/tf_idf/native.rb +87 -0
  36. data/lib/treat/extractors/time/chronic.rb +55 -0
  37. data/lib/treat/extractors/time/nickel.rb +86 -62
  38. data/lib/treat/extractors/time/ruby.rb +53 -0
  39. data/lib/treat/extractors/topic_words/lda.rb +67 -58
  40. data/lib/treat/extractors/topics/reuters.rb +100 -87
  41. data/lib/treat/formatters.rb +39 -35
  42. data/lib/treat/formatters/readers/abw.rb +49 -29
  43. data/lib/treat/formatters/readers/autoselect.rb +37 -33
  44. data/lib/treat/formatters/readers/doc.rb +19 -13
  45. data/lib/treat/formatters/readers/html.rb +52 -30
  46. data/lib/treat/formatters/readers/image.rb +41 -40
  47. data/lib/treat/formatters/readers/odt.rb +59 -45
  48. data/lib/treat/formatters/readers/pdf.rb +28 -25
  49. data/lib/treat/formatters/readers/txt.rb +12 -15
  50. data/lib/treat/formatters/readers/xml.rb +73 -36
  51. data/lib/treat/formatters/serializers/xml.rb +80 -79
  52. data/lib/treat/formatters/serializers/yaml.rb +19 -18
  53. data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
  54. data/lib/treat/formatters/unserializers/xml.rb +94 -99
  55. data/lib/treat/formatters/unserializers/yaml.rb +20 -19
  56. data/lib/treat/formatters/visualizers/dot.rb +132 -132
  57. data/lib/treat/formatters/visualizers/standoff.rb +52 -44
  58. data/lib/treat/formatters/visualizers/tree.rb +26 -29
  59. data/lib/treat/groupable.rb +153 -0
  60. data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
  61. data/lib/treat/inflectors.rb +50 -45
  62. data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
  63. data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
  64. data/lib/treat/inflectors/declensors/active_support.rb +31 -0
  65. data/lib/treat/inflectors/declensors/english.rb +38 -0
  66. data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
  67. data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
  68. data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
  69. data/lib/treat/inflectors/stemmers/porter.rb +160 -0
  70. data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
  71. data/lib/treat/inflectors/stemmers/uea.rb +28 -0
  72. data/lib/treat/installer.rb +308 -0
  73. data/lib/treat/kernel.rb +105 -27
  74. data/lib/treat/languages.rb +122 -88
  75. data/lib/treat/languages/arabic.rb +15 -15
  76. data/lib/treat/languages/chinese.rb +15 -15
  77. data/lib/treat/languages/dutch.rb +15 -15
  78. data/lib/treat/languages/english.rb +61 -62
  79. data/lib/treat/languages/french.rb +19 -19
  80. data/lib/treat/languages/german.rb +20 -20
  81. data/lib/treat/languages/greek.rb +15 -15
  82. data/lib/treat/languages/italian.rb +16 -16
  83. data/lib/treat/languages/polish.rb +15 -15
  84. data/lib/treat/languages/portuguese.rb +15 -15
  85. data/lib/treat/languages/russian.rb +15 -15
  86. data/lib/treat/languages/spanish.rb +16 -16
  87. data/lib/treat/languages/swedish.rb +16 -16
  88. data/lib/treat/lexicalizers.rb +34 -55
  89. data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
  90. data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
  91. data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
  92. data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
  93. data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
  94. data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
  95. data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
  96. data/lib/treat/linguistics.rb +9 -0
  97. data/lib/treat/linguistics/categories.rb +11 -0
  98. data/lib/treat/linguistics/tags.rb +422 -0
  99. data/lib/treat/loaders/linguistics.rb +30 -0
  100. data/lib/treat/loaders/stanford.rb +27 -0
  101. data/lib/treat/object.rb +1 -0
  102. data/lib/treat/processors.rb +37 -44
  103. data/lib/treat/processors/chunkers/autoselect.rb +16 -0
  104. data/lib/treat/processors/chunkers/html.rb +71 -0
  105. data/lib/treat/processors/chunkers/txt.rb +18 -24
  106. data/lib/treat/processors/parsers/enju.rb +253 -208
  107. data/lib/treat/processors/parsers/stanford.rb +130 -131
  108. data/lib/treat/processors/segmenters/punkt.rb +79 -45
  109. data/lib/treat/processors/segmenters/stanford.rb +46 -48
  110. data/lib/treat/processors/segmenters/tactful.rb +43 -36
  111. data/lib/treat/processors/tokenizers/perl.rb +124 -92
  112. data/lib/treat/processors/tokenizers/ptb.rb +81 -0
  113. data/lib/treat/processors/tokenizers/punkt.rb +48 -42
  114. data/lib/treat/processors/tokenizers/stanford.rb +39 -38
  115. data/lib/treat/processors/tokenizers/tactful.rb +64 -55
  116. data/lib/treat/proxies.rb +52 -35
  117. data/lib/treat/retrievers.rb +26 -16
  118. data/lib/treat/retrievers/indexers/ferret.rb +47 -26
  119. data/lib/treat/retrievers/searchers/ferret.rb +69 -50
  120. data/lib/treat/tree.rb +241 -183
  121. data/spec/collection.rb +123 -0
  122. data/spec/document.rb +93 -0
  123. data/spec/entity.rb +408 -0
  124. data/spec/languages.rb +25 -0
  125. data/spec/phrase.rb +146 -0
  126. data/spec/samples/mathematicians/archimedes.abw +34 -0
  127. data/spec/samples/mathematicians/euler.html +21 -0
  128. data/spec/samples/mathematicians/gauss.pdf +0 -0
  129. data/spec/samples/mathematicians/leibniz.txt +13 -0
  130. data/spec/samples/mathematicians/newton.doc +0 -0
  131. data/spec/sandbox.rb +5 -0
  132. data/spec/token.rb +109 -0
  133. data/spec/treat.rb +52 -0
  134. data/spec/tree.rb +117 -0
  135. data/spec/word.rb +110 -0
  136. data/spec/zone.rb +66 -0
  137. data/tmp/INFO +1 -1
  138. metadata +100 -201
  139. data/INSTALL +0 -1
  140. data/README +0 -3
  141. data/TODO +0 -28
  142. data/lib/economist/half_cocked_basel.txt +0 -16
  143. data/lib/economist/hungarys_troubles.txt +0 -46
  144. data/lib/economist/indias_slowdown.txt +0 -15
  145. data/lib/economist/merkozy_rides_again.txt +0 -24
  146. data/lib/economist/prada_is_not_walmart.txt +0 -9
  147. data/lib/economist/to_infinity_and_beyond.txt +0 -15
  148. data/lib/ferret/_11.cfs +0 -0
  149. data/lib/ferret/_14.cfs +0 -0
  150. data/lib/ferret/_p.cfs +0 -0
  151. data/lib/ferret/_s.cfs +0 -0
  152. data/lib/ferret/_v.cfs +0 -0
  153. data/lib/ferret/_y.cfs +0 -0
  154. data/lib/ferret/segments +0 -0
  155. data/lib/ferret/segments_15 +0 -0
  156. data/lib/treat/buildable.rb +0 -157
  157. data/lib/treat/category.rb +0 -33
  158. data/lib/treat/delegatable.rb +0 -116
  159. data/lib/treat/doable.rb +0 -45
  160. data/lib/treat/entities/collection.rb +0 -14
  161. data/lib/treat/entities/document.rb +0 -12
  162. data/lib/treat/entities/phrases.rb +0 -17
  163. data/lib/treat/entities/tokens.rb +0 -61
  164. data/lib/treat/entities/zones.rb +0 -41
  165. data/lib/treat/extractors/coreferences/stanford.rb +0 -69
  166. data/lib/treat/extractors/date/chronic.rb +0 -32
  167. data/lib/treat/extractors/date/ruby.rb +0 -25
  168. data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
  169. data/lib/treat/extractors/language/language_extractor.rb +0 -27
  170. data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
  171. data/lib/treat/extractors/roles/naive.rb +0 -73
  172. data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
  173. data/lib/treat/extractors/statistics/position_in.rb +0 -14
  174. data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
  175. data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
  176. data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
  177. data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
  178. data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
  179. data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
  180. data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
  181. data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
  182. data/lib/treat/feature.rb +0 -58
  183. data/lib/treat/features.rb +0 -7
  184. data/lib/treat/formatters/visualizers/short_value.rb +0 -29
  185. data/lib/treat/formatters/visualizers/txt.rb +0 -45
  186. data/lib/treat/group.rb +0 -106
  187. data/lib/treat/helpers/linguistics_loader.rb +0 -18
  188. data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
  189. data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
  190. data/lib/treat/inflectors/declensions/english.rb +0 -319
  191. data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
  192. data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
  193. data/lib/treat/inflectors/stem/porter.rb +0 -162
  194. data/lib/treat/inflectors/stem/porter_c.rb +0 -26
  195. data/lib/treat/inflectors/stem/uea.rb +0 -30
  196. data/lib/treat/install.rb +0 -59
  197. data/lib/treat/languages/tags.rb +0 -377
  198. data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
  199. data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
  200. data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
  201. data/lib/treat/lexicalizers/tag/brill.rb +0 -91
  202. data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
  203. data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
  204. data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
  205. data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
  206. data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
  207. data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
  208. data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
  209. data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
  210. data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
  211. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
  212. data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
  213. data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
  214. data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
  215. data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
  216. data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
  217. data/lib/treat/registrable.rb +0 -28
  218. data/lib/treat/sugar.rb +0 -50
  219. data/lib/treat/viewable.rb +0 -29
  220. data/lib/treat/visitable.rb +0 -28
  221. data/test/profile.rb +0 -2
  222. data/test/tc_entity.rb +0 -117
  223. data/test/tc_extractors.rb +0 -73
  224. data/test/tc_formatters.rb +0 -41
  225. data/test/tc_inflectors.rb +0 -34
  226. data/test/tc_lexicalizers.rb +0 -32
  227. data/test/tc_processors.rb +0 -50
  228. data/test/tc_resources.rb +0 -22
  229. data/test/tc_treat.rb +0 -60
  230. data/test/tc_tree.rb +0 -60
  231. data/test/tests.rb +0 -20
  232. data/test/texts.rb +0 -19
  233. data/test/texts/english/half_cocked_basel.txt +0 -16
  234. data/test/texts/english/hose_and_dry.doc +0 -0
  235. data/test/texts/english/hungarys_troubles.abw +0 -70
  236. data/test/texts/english/long.html +0 -24
  237. data/test/texts/english/long.txt +0 -22
  238. data/test/texts/english/medium.txt +0 -5
  239. data/test/texts/english/republican_nomination.pdf +0 -0
  240. data/test/texts/english/saving_the_euro.odt +0 -0
  241. data/test/texts/english/short.txt +0 -3
  242. data/test/texts/english/zero_sum.html +0 -111
@@ -0,0 +1,55 @@
1
+ # This class is a wrapper for the functions included
2
+ # in the 'linguistics' gem that allow to conjugate verbs.
3
+ #
4
+ # Project website: http://deveiate.org/projects/Linguistics/
5
+ module Treat::Inflectors::Conjugators::Linguistics
6
+
7
+ require 'treat/loaders/linguistics'
8
+
9
+ DefaultOptions = {
10
+ :strict => false
11
+ }
12
+
13
+ Forms = {
14
+ :present_participle =>
15
+ {:mode => :participle, :tense => :present},
16
+ :infinitive => {:mode => :infinitive},
17
+ :plural_verb => {:count => :plural},
18
+ :singular_verb => {:count => :singular}
19
+ }
20
+
21
+ # Conjugate a verb using ruby linguistics with the specified
22
+ # mode, tense, count and person.
23
+ #
24
+ # Options:
25
+ #
26
+ # - (Boolean) :strict => whether to tag all words or only verbs.
27
+ # - (Symbol) :mode => :infinitive, :indicative, :subjunctive, :participle
28
+ # - (Symbol) :tense => :past, :present, :future
29
+ # - (Symbol) :count => :singular, :plural
30
+ # - (Symbol) :person => :first, :second, :third
31
+ #
32
+ def self.conjugate(entity, options = {})
33
+
34
+ options = DefaultOptions.merge(options)
35
+ cat = entity.check_has(:category)
36
+ return if cat != :verb && options[:strict]
37
+
38
+ options = Forms[options[:form]] if options[:form]
39
+
40
+ klass = Treat::Loaders::Linguistics.load(entity.language)
41
+ if options[:mode] == :infinitive
42
+ silence_warnings { klass.infinitive(entity.to_s) }
43
+ elsif options[:mode] == :participle && options[:tense] == :present
44
+ silence_warnings { klass.present_participle(entity.to_s) }
45
+ elsif options[:count] == :plural && options.size == 1
46
+ silence_warnings { klass.plural_verb(entity.to_s) }
47
+ else
48
+ raise Treat::Exception,
49
+ 'This combination of modes, tenses, persons ' +
50
+ 'and/or counts is not presently supported.'
51
+ end
52
+
53
+ end
54
+
55
+ end
@@ -0,0 +1,31 @@
1
+ # This class is a wrapper for the ActiveSupport
2
+ # declension tools.
3
+ class Treat::Inflectors::Declensors::English
4
+
5
+ require 'active_support/inflector/inflections'
6
+
7
+ # Declense a word using ActiveSupport::Inflector::Inflections
8
+ def self.declense(entity, options)
9
+
10
+ cat = entity.check_has(:category)
11
+ unless [:noun, :adjective, :determiner].
12
+ include?(cat)
13
+ return
14
+ end
15
+
16
+ unless options[:count]
17
+ raise Treat::Exception,
18
+ "Must supply option count (:singular or :plural)."
19
+ end
20
+
21
+ string = entity.to_s
22
+
23
+ if options[:count] == :plural
24
+ ActiveSupport::Inflector::Inflections.pluralize(string)
25
+ elsif options[:count] == :singular
26
+ ActiveSupport::Inflector::Inflections.singularize(string)
27
+ end
28
+
29
+ end
30
+
31
+ end
@@ -0,0 +1,38 @@
1
+ # This class is a wrapper for the Inflect module,
2
+ # copied from the unmaintained 'english' ruby gem,
3
+ # created by Thomas Sawyer.
4
+ #
5
+ # Released under the MIT License.
6
+ #
7
+ # http://english.rubyforge.org
8
+ class Treat::Inflectors::Declensors::English
9
+
10
+ require 'treat/inflectors/declensors/english/inflect'
11
+
12
+ # Retrieve the declensions (singular, plural)
13
+ # of an english word using a class lifted from
14
+ # the 'english' ruby gem.
15
+ def self.declense(entity, options)
16
+
17
+ cat = entity.check_has(:category)
18
+ unless [:noun, :adjective, :determiner].
19
+ include?(cat)
20
+ return
21
+ end
22
+
23
+ unless options[:count]
24
+ raise Treat::Exception,
25
+ "Must supply option count (:singular or :plural)."
26
+ end
27
+
28
+ string = entity.to_s
29
+
30
+ if options[:count] == :plural
31
+ Inflect.plural(string)
32
+ elsif options[:count] == :singular
33
+ Inflect.singular(string)
34
+ end
35
+
36
+ end
37
+
38
+ end
@@ -0,0 +1,288 @@
1
+ # This class comes from the Inflect module; it has been
2
+ # copied from the unmaintained 'english' ruby gem,
3
+ # created by Thomas Sawyer.
4
+ #
5
+ # Released under the MIT License.
6
+ #
7
+ # http://english.rubyforge.org
8
+ module Treat::Inflectors::Declensors::English::Inflect
9
+
10
+ @singular_of = {}
11
+ @plural_of = {}
12
+
13
+ @singular_rules = []
14
+ @plural_rules = []
15
+
16
+ # This class provides the DSL for creating inflections, you can add additional rules.
17
+ # Examples:
18
+ #
19
+ # word "ox", "oxen"
20
+ # word "octopus", "octopi"
21
+ # word "man", "men"
22
+ #
23
+ # rule "lf", "lves"
24
+ #
25
+ # word "equipment"
26
+ #
27
+ # Rules are evaluated by size, so rules you add to override specific cases should be longer than the rule
28
+ # it overrides. For instance, if you want "pta" to pluralize to "ptas", even though a general purpose rule
29
+ # for "ta" => "tum" already exists, simply add a new rule for "pta" => "ptas", and it will automatically win
30
+ # since it is longer than the old rule.
31
+ #
32
+ # Also, single-word exceptions win over general words ("ox" pluralizes to "oxen", because it's a single word
33
+ # exception, even though "fox" pluralizes to "foxes")
34
+ class << self
35
+ # Define a general two-way exception.
36
+ #
37
+ # This also defines a general rule, so foo_child will correctly become
38
+ # foo_children.
39
+ #
40
+ # Whole words also work if they are capitalized (Goose => Geese).
41
+ def word(singular, plural=nil)
42
+ plural = singular unless plural
43
+ singular_word(singular, plural)
44
+ plural_word(singular, plural)
45
+ rule(singular, plural)
46
+ end
47
+
48
+ # Define a singularization exception.
49
+ def singular_word(singular, plural)
50
+ @singular_of[plural] = singular
51
+ @singular_of[plural.capitalize] = singular.capitalize
52
+ end
53
+
54
+ # Define a pluralization exception.
55
+ def plural_word(singular, plural)
56
+ @plural_of[singular] = plural
57
+ @plural_of[singular.capitalize] = plural.capitalize
58
+ end
59
+
60
+ # Define a general rule.
61
+ def rule(singular, plural)
62
+ singular_rule(singular, plural)
63
+ plural_rule(singular, plural)
64
+ end
65
+
66
+ # Define a singularization rule.
67
+ def singular_rule(singular, plural)
68
+ @singular_rules << [singular, plural]
69
+ end
70
+
71
+ # Define a plurualization rule.
72
+ def plural_rule(singular, plural)
73
+ @plural_rules << [singular, plural]
74
+ end
75
+
76
+ # Read prepared singularization rules.
77
+ def singularization_rules
78
+ if defined?(@singularization_regex) && @singularization_regex
79
+ return [@singularization_regex, @singularization_hash]
80
+ end
81
+ # No sorting needed: Regexen match on longest string
82
+ @singularization_regex = Regexp.new("(" + @singular_rules.map {|s,p| p}.join("|") + ")$", "i")
83
+ @singularization_hash = Hash[*@singular_rules.flatten].invert
84
+ [@singularization_regex, @singularization_hash]
85
+ end
86
+
87
+ # Read prepared singularization rules.
88
+ #def singularization_rules
89
+ # return @singularization_rules if @singularization_rules
90
+ # sorted = @singular_rules.sort_by{ |s, p| "#{p}".size }.reverse
91
+ # @singularization_rules = sorted.collect do |s, p|
92
+ # [ /#{p}$/, "#{s}" ]
93
+ # end
94
+ #end
95
+
96
+ # Read prepared pluralization rules.
97
+ def pluralization_rules
98
+ if defined?(@pluralization_regex) && @pluralization_regex
99
+ return [@pluralization_regex, @pluralization_hash]
100
+ end
101
+ @pluralization_regex = Regexp.new("(" + @plural_rules.map {|s,p| s}.join("|") + ")$", "i")
102
+ @pluralization_hash = Hash[*@plural_rules.flatten]
103
+ [@pluralization_regex, @pluralization_hash]
104
+ end
105
+
106
+ # Read prepared pluralization rules.
107
+ #def pluralization_rules
108
+ # return @pluralization_rules if @pluralization_rules
109
+ # sorted = @plural_rules.sort_by{ |s, p| "#{s}".size }.reverse
110
+ # @pluralization_rules = sorted.collect do |s, p|
111
+ # [ /#{s}$/, "#{p}" ]
112
+ # end
113
+ #end
114
+
115
+ #
116
+ def singular_of ; @singular_of ; end
117
+
118
+ #
119
+ def plural_of ; @plural_of ; end
120
+
121
+ # Convert an English word from plurel to singular.
122
+ #
123
+ # "boys".singular #=> boy
124
+ # "tomatoes".singular #=> tomato
125
+ #
126
+ def singular(word)
127
+ return "" if word == ""
128
+ if result = singular_of[word]
129
+ return result.dup
130
+ end
131
+ result = word.dup
132
+
133
+ regex, hash = singularization_rules
134
+ result.sub!(regex) {|m| hash[m]}
135
+ singular_of[word] = result
136
+ return result
137
+ #singularization_rules.each do |(match, replacement)|
138
+ # break if result.gsub!(match, replacement)
139
+ #end
140
+ #return result
141
+ end
142
+
143
+ # Alias for #singular (a Railism).
144
+ #
145
+ alias_method(:singularize, :singular)
146
+
147
+ # Convert an English word from singular to plurel.
148
+ #
149
+ # "boy".plural #=> boys
150
+ # "tomato".plural #=> tomatoes
151
+ #
152
+ def plural(word)
153
+ return "" if word == ""
154
+ if result = plural_of[word]
155
+ return result.dup
156
+ end
157
+ #return self.dup if /s$/ =~ self # ???
158
+ result = word.dup
159
+
160
+ regex, hash = pluralization_rules
161
+ result.sub!(regex) {|m| hash[m]}
162
+ plural_of[word] = result
163
+ return result
164
+ #pluralization_rules.each do |(match, replacement)|
165
+ # break if result.gsub!(match, replacement)
166
+ #end
167
+ #return result
168
+ end
169
+
170
+ # Alias for #plural (a Railism).
171
+ alias_method(:pluralize, :plural)
172
+
173
+ # Clear all rules.
174
+ def clear(type = :all)
175
+ if type == :singular || type == :all
176
+ @singular_of = {}
177
+ @singular_rules = []
178
+ @singularization_rules, @singularization_regex = nil, nil
179
+ end
180
+ if type == :plural || type == :all
181
+ @singular_of = {}
182
+ @singular_rules = []
183
+ @singularization_rules, @singularization_regex = nil, nil
184
+ end
185
+ end
186
+ end
187
+
188
+ # One argument means singular and plural are the same.
189
+
190
+ word 'equipment'
191
+ word 'information'
192
+ word 'money'
193
+ word 'species'
194
+ word 'series'
195
+ word 'fish'
196
+ word 'sheep'
197
+ word 'moose'
198
+ word 'hovercraft'
199
+ word 'news'
200
+ word 'rice'
201
+ word 'plurals'
202
+
203
+ # Two arguments defines a singular and plural exception.
204
+
205
+ word 'Swiss' , 'Swiss'
206
+ word 'alias' , 'aliases'
207
+ word 'analysis' , 'analyses'
208
+ #word 'axis' , 'axes'
209
+ word 'basis' , 'bases'
210
+ word 'buffalo' , 'buffaloes'
211
+ word 'child' , 'children'
212
+ #word 'cow' , 'kine'
213
+ word 'crisis' , 'crises'
214
+ word 'criterion' , 'criteria'
215
+ word 'datum' , 'data'
216
+ word 'goose' , 'geese'
217
+ word 'hive' , 'hives'
218
+ word 'index' , 'indices'
219
+ word 'life' , 'lives'
220
+ word 'louse' , 'lice'
221
+ word 'man' , 'men'
222
+ word 'matrix' , 'matrices'
223
+ word 'medium' , 'media'
224
+ word 'mouse' , 'mice'
225
+ word 'movie' , 'movies'
226
+ word 'octopus' , 'octopi'
227
+ word 'ox' , 'oxen'
228
+ word 'person' , 'people'
229
+ word 'potato' , 'potatoes'
230
+ word 'quiz' , 'quizzes'
231
+ word 'shoe' , 'shoes'
232
+ word 'status' , 'statuses'
233
+ word 'testis' , 'testes'
234
+ word 'thesis' , 'theses'
235
+ word 'thief' , 'thieves'
236
+ word 'tomato' , 'tomatoes'
237
+ word 'torpedo' , 'torpedoes'
238
+ word 'vertex' , 'vertices'
239
+ word 'virus' , 'viri'
240
+ word 'wife' , 'wives'
241
+
242
+ # One-way singularization exception (convert plural to singular).
243
+
244
+ singular_word 'cactus', 'cacti'
245
+
246
+ # One-way pluralizaton exception (convert singular to plural).
247
+
248
+ plural_word 'axis', 'axes'
249
+
250
+ # General rules.
251
+
252
+ rule 'rf' , 'rves'
253
+ rule 'ero' , 'eroes'
254
+ rule 'ch' , 'ches'
255
+ rule 'sh' , 'shes'
256
+ rule 'ss' , 'sses'
257
+ #rule 'ess' , 'esses'
258
+ rule 'ta' , 'tum'
259
+ rule 'ia' , 'ium'
260
+ rule 'ra' , 'rum'
261
+ rule 'ay' , 'ays'
262
+ rule 'ey' , 'eys'
263
+ rule 'oy' , 'oys'
264
+ rule 'uy' , 'uys'
265
+ rule 'y' , 'ies'
266
+ rule 'x' , 'xes'
267
+ rule 'lf' , 'lves'
268
+ rule 'ffe' , 'ffes'
269
+ rule 'af' , 'aves'
270
+ rule 'us' , 'uses'
271
+ rule 'ouse' , 'ouses'
272
+ rule 'osis' , 'oses'
273
+ rule 'ox' , 'oxes'
274
+ rule '' , 's'
275
+
276
+ # One-way singular rules.
277
+
278
+ singular_rule 'of' , 'ofs' # proof
279
+ singular_rule 'o' , 'oes' # hero, heroes
280
+ #singular_rule 'f' , 'ves'
281
+
282
+ # One-way plural rules.
283
+
284
+ plural_rule 's' , 'ses'
285
+ plural_rule 'ive' , 'ives' # don't want to snag wife
286
+ plural_rule 'fe' , 'ves' # don't want to snag perspectives
287
+
288
+ end
@@ -0,0 +1,49 @@
1
+ # This class is a wrapper for the functions included
2
+ # in the 'linguistics' gem that allow to obtain the
3
+ # declensions of a word.
4
+ #
5
+ # Project website: http://deveiate.org/projects/Linguistics/
6
+ class Treat::Inflectors::Declensors::Linguistics
7
+
8
+ require 'treat/loaders/linguistics'
9
+
10
+ # Retrieve a declension of a word using the 'linguistics' gem.
11
+ #
12
+ # Options:
13
+ #
14
+ # - (Identifier) :count => :singular, :plural
15
+ def self.declense(entity, options = {})
16
+
17
+ cat = entity.check_has(:category)
18
+ unless [:noun, :adjective, :determiner].
19
+ include?(cat)
20
+ return
21
+ end
22
+
23
+ unless options[:count]
24
+ raise Treat::Exception,
25
+ "Must supply option count (:singular or :plural)."
26
+ end
27
+
28
+ klass = Treat::Loaders::Linguistics.load(entity.language)
29
+ string = entity.to_s
30
+
31
+ if options[:count] == :plural
32
+
33
+ if entity.has?(:category) &&
34
+ [:noun, :adjective, :verb].
35
+ include?(entity.category)
36
+ silence_warnings do
37
+ klass.send(
38
+ :"plural_#{entity.category}",
39
+ string)
40
+ end
41
+ else
42
+ klass.plural(string)
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+
49
+ end