treat 0.2.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. data/LICENSE +3 -3
  2. data/README.md +33 -0
  3. data/files/INFO +1 -0
  4. data/lib/treat.rb +40 -105
  5. data/lib/treat/ai.rb +12 -0
  6. data/lib/treat/ai/classifiers/id3.rb +27 -0
  7. data/lib/treat/categories.rb +82 -35
  8. data/lib/treat/categorizable.rb +44 -0
  9. data/lib/treat/classification.rb +61 -0
  10. data/lib/treat/configurable.rb +115 -0
  11. data/lib/treat/data_set.rb +42 -0
  12. data/lib/treat/dependencies.rb +24 -0
  13. data/lib/treat/downloader.rb +87 -0
  14. data/lib/treat/entities.rb +68 -66
  15. data/lib/treat/entities/abilities.rb +10 -0
  16. data/lib/treat/entities/abilities/buildable.rb +327 -0
  17. data/lib/treat/entities/abilities/checkable.rb +31 -0
  18. data/lib/treat/entities/abilities/copyable.rb +45 -0
  19. data/lib/treat/entities/abilities/countable.rb +51 -0
  20. data/lib/treat/entities/abilities/debuggable.rb +83 -0
  21. data/lib/treat/entities/abilities/delegatable.rb +123 -0
  22. data/lib/treat/entities/abilities/doable.rb +62 -0
  23. data/lib/treat/entities/abilities/exportable.rb +11 -0
  24. data/lib/treat/entities/abilities/iterable.rb +115 -0
  25. data/lib/treat/entities/abilities/magical.rb +83 -0
  26. data/lib/treat/entities/abilities/registrable.rb +74 -0
  27. data/lib/treat/entities/abilities/stringable.rb +91 -0
  28. data/lib/treat/entities/entities.rb +104 -0
  29. data/lib/treat/entities/entity.rb +122 -245
  30. data/lib/treat/exception.rb +4 -4
  31. data/lib/treat/extractors.rb +77 -80
  32. data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
  33. data/lib/treat/extractors/language/what_language.rb +50 -45
  34. data/lib/treat/extractors/name_tag/stanford.rb +55 -0
  35. data/lib/treat/extractors/tf_idf/native.rb +87 -0
  36. data/lib/treat/extractors/time/chronic.rb +55 -0
  37. data/lib/treat/extractors/time/nickel.rb +86 -62
  38. data/lib/treat/extractors/time/ruby.rb +53 -0
  39. data/lib/treat/extractors/topic_words/lda.rb +67 -58
  40. data/lib/treat/extractors/topics/reuters.rb +100 -87
  41. data/lib/treat/formatters.rb +39 -35
  42. data/lib/treat/formatters/readers/abw.rb +49 -29
  43. data/lib/treat/formatters/readers/autoselect.rb +37 -33
  44. data/lib/treat/formatters/readers/doc.rb +19 -13
  45. data/lib/treat/formatters/readers/html.rb +52 -30
  46. data/lib/treat/formatters/readers/image.rb +41 -40
  47. data/lib/treat/formatters/readers/odt.rb +59 -45
  48. data/lib/treat/formatters/readers/pdf.rb +28 -25
  49. data/lib/treat/formatters/readers/txt.rb +12 -15
  50. data/lib/treat/formatters/readers/xml.rb +73 -36
  51. data/lib/treat/formatters/serializers/xml.rb +80 -79
  52. data/lib/treat/formatters/serializers/yaml.rb +19 -18
  53. data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
  54. data/lib/treat/formatters/unserializers/xml.rb +94 -99
  55. data/lib/treat/formatters/unserializers/yaml.rb +20 -19
  56. data/lib/treat/formatters/visualizers/dot.rb +132 -132
  57. data/lib/treat/formatters/visualizers/standoff.rb +52 -44
  58. data/lib/treat/formatters/visualizers/tree.rb +26 -29
  59. data/lib/treat/groupable.rb +153 -0
  60. data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
  61. data/lib/treat/inflectors.rb +50 -45
  62. data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
  63. data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
  64. data/lib/treat/inflectors/declensors/active_support.rb +31 -0
  65. data/lib/treat/inflectors/declensors/english.rb +38 -0
  66. data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
  67. data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
  68. data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
  69. data/lib/treat/inflectors/stemmers/porter.rb +160 -0
  70. data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
  71. data/lib/treat/inflectors/stemmers/uea.rb +28 -0
  72. data/lib/treat/installer.rb +308 -0
  73. data/lib/treat/kernel.rb +105 -27
  74. data/lib/treat/languages.rb +122 -88
  75. data/lib/treat/languages/arabic.rb +15 -15
  76. data/lib/treat/languages/chinese.rb +15 -15
  77. data/lib/treat/languages/dutch.rb +15 -15
  78. data/lib/treat/languages/english.rb +61 -62
  79. data/lib/treat/languages/french.rb +19 -19
  80. data/lib/treat/languages/german.rb +20 -20
  81. data/lib/treat/languages/greek.rb +15 -15
  82. data/lib/treat/languages/italian.rb +16 -16
  83. data/lib/treat/languages/polish.rb +15 -15
  84. data/lib/treat/languages/portuguese.rb +15 -15
  85. data/lib/treat/languages/russian.rb +15 -15
  86. data/lib/treat/languages/spanish.rb +16 -16
  87. data/lib/treat/languages/swedish.rb +16 -16
  88. data/lib/treat/lexicalizers.rb +34 -55
  89. data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
  90. data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
  91. data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
  92. data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
  93. data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
  94. data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
  95. data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
  96. data/lib/treat/linguistics.rb +9 -0
  97. data/lib/treat/linguistics/categories.rb +11 -0
  98. data/lib/treat/linguistics/tags.rb +422 -0
  99. data/lib/treat/loaders/linguistics.rb +30 -0
  100. data/lib/treat/loaders/stanford.rb +27 -0
  101. data/lib/treat/object.rb +1 -0
  102. data/lib/treat/processors.rb +37 -44
  103. data/lib/treat/processors/chunkers/autoselect.rb +16 -0
  104. data/lib/treat/processors/chunkers/html.rb +71 -0
  105. data/lib/treat/processors/chunkers/txt.rb +18 -24
  106. data/lib/treat/processors/parsers/enju.rb +253 -208
  107. data/lib/treat/processors/parsers/stanford.rb +130 -131
  108. data/lib/treat/processors/segmenters/punkt.rb +79 -45
  109. data/lib/treat/processors/segmenters/stanford.rb +46 -48
  110. data/lib/treat/processors/segmenters/tactful.rb +43 -36
  111. data/lib/treat/processors/tokenizers/perl.rb +124 -92
  112. data/lib/treat/processors/tokenizers/ptb.rb +81 -0
  113. data/lib/treat/processors/tokenizers/punkt.rb +48 -42
  114. data/lib/treat/processors/tokenizers/stanford.rb +39 -38
  115. data/lib/treat/processors/tokenizers/tactful.rb +64 -55
  116. data/lib/treat/proxies.rb +52 -35
  117. data/lib/treat/retrievers.rb +26 -16
  118. data/lib/treat/retrievers/indexers/ferret.rb +47 -26
  119. data/lib/treat/retrievers/searchers/ferret.rb +69 -50
  120. data/lib/treat/tree.rb +241 -183
  121. data/spec/collection.rb +123 -0
  122. data/spec/document.rb +93 -0
  123. data/spec/entity.rb +408 -0
  124. data/spec/languages.rb +25 -0
  125. data/spec/phrase.rb +146 -0
  126. data/spec/samples/mathematicians/archimedes.abw +34 -0
  127. data/spec/samples/mathematicians/euler.html +21 -0
  128. data/spec/samples/mathematicians/gauss.pdf +0 -0
  129. data/spec/samples/mathematicians/leibniz.txt +13 -0
  130. data/spec/samples/mathematicians/newton.doc +0 -0
  131. data/spec/sandbox.rb +5 -0
  132. data/spec/token.rb +109 -0
  133. data/spec/treat.rb +52 -0
  134. data/spec/tree.rb +117 -0
  135. data/spec/word.rb +110 -0
  136. data/spec/zone.rb +66 -0
  137. data/tmp/INFO +1 -1
  138. metadata +100 -201
  139. data/INSTALL +0 -1
  140. data/README +0 -3
  141. data/TODO +0 -28
  142. data/lib/economist/half_cocked_basel.txt +0 -16
  143. data/lib/economist/hungarys_troubles.txt +0 -46
  144. data/lib/economist/indias_slowdown.txt +0 -15
  145. data/lib/economist/merkozy_rides_again.txt +0 -24
  146. data/lib/economist/prada_is_not_walmart.txt +0 -9
  147. data/lib/economist/to_infinity_and_beyond.txt +0 -15
  148. data/lib/ferret/_11.cfs +0 -0
  149. data/lib/ferret/_14.cfs +0 -0
  150. data/lib/ferret/_p.cfs +0 -0
  151. data/lib/ferret/_s.cfs +0 -0
  152. data/lib/ferret/_v.cfs +0 -0
  153. data/lib/ferret/_y.cfs +0 -0
  154. data/lib/ferret/segments +0 -0
  155. data/lib/ferret/segments_15 +0 -0
  156. data/lib/treat/buildable.rb +0 -157
  157. data/lib/treat/category.rb +0 -33
  158. data/lib/treat/delegatable.rb +0 -116
  159. data/lib/treat/doable.rb +0 -45
  160. data/lib/treat/entities/collection.rb +0 -14
  161. data/lib/treat/entities/document.rb +0 -12
  162. data/lib/treat/entities/phrases.rb +0 -17
  163. data/lib/treat/entities/tokens.rb +0 -61
  164. data/lib/treat/entities/zones.rb +0 -41
  165. data/lib/treat/extractors/coreferences/stanford.rb +0 -69
  166. data/lib/treat/extractors/date/chronic.rb +0 -32
  167. data/lib/treat/extractors/date/ruby.rb +0 -25
  168. data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
  169. data/lib/treat/extractors/language/language_extractor.rb +0 -27
  170. data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
  171. data/lib/treat/extractors/roles/naive.rb +0 -73
  172. data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
  173. data/lib/treat/extractors/statistics/position_in.rb +0 -14
  174. data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
  175. data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
  176. data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
  177. data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
  178. data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
  179. data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
  180. data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
  181. data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
  182. data/lib/treat/feature.rb +0 -58
  183. data/lib/treat/features.rb +0 -7
  184. data/lib/treat/formatters/visualizers/short_value.rb +0 -29
  185. data/lib/treat/formatters/visualizers/txt.rb +0 -45
  186. data/lib/treat/group.rb +0 -106
  187. data/lib/treat/helpers/linguistics_loader.rb +0 -18
  188. data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
  189. data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
  190. data/lib/treat/inflectors/declensions/english.rb +0 -319
  191. data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
  192. data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
  193. data/lib/treat/inflectors/stem/porter.rb +0 -162
  194. data/lib/treat/inflectors/stem/porter_c.rb +0 -26
  195. data/lib/treat/inflectors/stem/uea.rb +0 -30
  196. data/lib/treat/install.rb +0 -59
  197. data/lib/treat/languages/tags.rb +0 -377
  198. data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
  199. data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
  200. data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
  201. data/lib/treat/lexicalizers/tag/brill.rb +0 -91
  202. data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
  203. data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
  204. data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
  205. data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
  206. data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
  207. data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
  208. data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
  209. data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
  210. data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
  211. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
  212. data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
  213. data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
  214. data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
  215. data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
  216. data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
  217. data/lib/treat/registrable.rb +0 -28
  218. data/lib/treat/sugar.rb +0 -50
  219. data/lib/treat/viewable.rb +0 -29
  220. data/lib/treat/visitable.rb +0 -28
  221. data/test/profile.rb +0 -2
  222. data/test/tc_entity.rb +0 -117
  223. data/test/tc_extractors.rb +0 -73
  224. data/test/tc_formatters.rb +0 -41
  225. data/test/tc_inflectors.rb +0 -34
  226. data/test/tc_lexicalizers.rb +0 -32
  227. data/test/tc_processors.rb +0 -50
  228. data/test/tc_resources.rb +0 -22
  229. data/test/tc_treat.rb +0 -60
  230. data/test/tc_tree.rb +0 -60
  231. data/test/tests.rb +0 -20
  232. data/test/texts.rb +0 -19
  233. data/test/texts/english/half_cocked_basel.txt +0 -16
  234. data/test/texts/english/hose_and_dry.doc +0 -0
  235. data/test/texts/english/hungarys_troubles.abw +0 -70
  236. data/test/texts/english/long.html +0 -24
  237. data/test/texts/english/long.txt +0 -22
  238. data/test/texts/english/medium.txt +0 -5
  239. data/test/texts/english/republican_nomination.pdf +0 -0
  240. data/test/texts/english/saving_the_euro.odt +0 -0
  241. data/test/texts/english/short.txt +0 -3
  242. data/test/texts/english/zero_sum.html +0 -111
data/lib/treat/feature.rb DELETED
@@ -1,58 +0,0 @@
1
- module Treat
2
- # This class represents a probabilistic feature;
3
- # it is currently not used, because its
4
- # behaviour is non-deterministic. Perhaps at
5
- # some point this will be of value for specific
6
- # algorithms and so I'm keeping it here.
7
- class Feature
8
- # Undefine all methods, except those that
9
- # create any problems (e.g. with serializing).
10
- instance_methods.each do |meth|
11
- undef_method(meth) if meth !~
12
- /^(__|object_id|class|instance_variables|instance_variable_get)/
13
- end
14
- # Allows to read the probability hash,
15
- # the possible values of the feature,
16
- # and the best value (with highest P).
17
- attr_reader :p_hash, :values, :best
18
- # Initialize the feature with a hash
19
- # of features => probabilities.
20
- def initialize(p_hash)
21
- @p_hash = p_hash
22
- normalize
23
- max = @p_hash.values.max
24
- @best = @p_hash.select { |i,j| j == max }.keys.sample
25
- @values = @p_hash.keys
26
- type = @values[0].class
27
- if type == ::Symbol || type == ::NilClass
28
- @object = @best
29
- else
30
- @object = type.new(@best)
31
- end
32
- end
33
- # Normalize the probabilities, so that
34
- # the sum of all probabilities is 1,
35
- # except if the sum of all probabilities
36
- # is already below one (in which case we
37
- # assume that the feature is intentionally
38
- # incomplete).
39
- def normalize
40
- sum = @p_hash.inject(0.0) { |r, e| r + e[1] }
41
- return if sum <= 1.0
42
- p = {}
43
- @p_hash.each { |k,v| p[k] = v.to_f/sum.to_f }
44
- @p_hash = p
45
- end
46
- # Find the probability of value x.
47
- def probability(x)
48
- @p_hash[x] ? @p_hash[x] : 0
49
- end
50
- # Alias for probability: p(x).
51
- alias :p :probability
52
- # Catch all other methods than the ones
53
- # explicitly defined.
54
- def method_missing(sym, *args, &block)
55
- @object.send(sym, *args, &block)
56
- end
57
- end
58
- end
@@ -1,7 +0,0 @@
1
- module Treat
2
- module Features
3
- Time = Struct.new(:start, :end, :recurrence, :recurrence_interval)
4
- Roles = Struct.new(:subject, :verb, :object, :patient, :agent)
5
- Date = Struct.new(:year, :month, :day)
6
- end
7
- end
@@ -1,29 +0,0 @@
1
- module Treat
2
- module Formatters
3
- module Visualizers
4
- class ShortValue
5
- # Default options for the visualizer.
6
- DefaultOptions = { :max_words => 6, :max_length => 30 }
7
- # Returns the text value of an entity, shortend
8
- # with [..] if the value is longer than :max_words
9
- # or longer than :max_length.
10
- #
11
- # Options:
12
- # - (Integer) :max_words => the maximum number
13
- # of words in an entity before it is shortened.
14
- # - (Integer) :max_length => the maximum number
15
- # of characters in an entity before it is shortened.s
16
- def self.visualize(entity, options = {})
17
- options = DefaultOptions.merge(options)
18
- words = entity.to_s.split(' ')
19
- if words.size < options[:max_words] ||
20
- entity.to_s.length < options[:max_length]
21
- entity.to_s
22
- else
23
- words[0..2].join(' ') + ' [...] ' + words[-3..-1].join(' ')
24
- end
25
- end
26
- end
27
- end
28
- end
29
- end
@@ -1,45 +0,0 @@
1
- module Treat
2
- module Formatters
3
- module Visualizers
4
- # Creates a plain text visualization of an entity.
5
- class Txt
6
- # The default options for the visualizer.
7
- DefaultOptions = { :sep => ' ' }
8
- # Obtain a plain text visualization of the entity,
9
- # with no additional information.
10
- #
11
- # Options:
12
- # (String) :sep => the separator to use between words.
13
- def self.visualize(entity, options = {})
14
- options[:first] = true unless options[:first] == false
15
- first = options[:first]
16
- options = DefaultOptions.merge(options)
17
- return entity.value.dup if !entity.has_children?
18
- value = ''
19
- options[:first] = false
20
- entity.each do |child|
21
- value += "\n\n" if child.is_a?(Treat::Entities::Section)
22
- if child.is_a?(Treat::Entities::Token) || child.value != ''
23
- # Remove the trailing space for tokens that
24
- # 'stick' to the previous one, such
25
- # as punctuation symbols and clitics.
26
- if child.is_a?(Treat::Entities::Punctuation) ||
27
- child.is_a?(Treat::Entities::Clitic)
28
- value.strip!
29
- end
30
- value += child.value + options[:sep]
31
- else
32
- value += visualize(child, options)
33
- end
34
- if child.is_a?(Treat::Entities::Title) ||
35
- child.is_a?(Treat::Entities::Paragraph)
36
- value += "\n\n"
37
- end
38
- end
39
- value = value.strip if first
40
- value
41
- end
42
- end
43
- end
44
- end
45
- end
data/lib/treat/group.rb DELETED
@@ -1,106 +0,0 @@
1
- module Treat
2
- module Group
3
- # Modify the extended class.
4
- def self.extended(group)
5
- group.module_eval do
6
- class << self
7
- attr_accessor :type, :default, :targets
8
- attr_accessor :presets, :preprocessors, :postprocessors
9
- end
10
- self.presets = {}
11
- self.preprocessors = {}
12
- self.postprocessors = {}
13
- # Return the method corresponding to the group.
14
- # This method resolves the name of the method
15
- # that a group should provide based on the name
16
- # of the group. Basically, if the group ends in
17
- # -ers, the verb corresponding to the group is
18
- # returned (tokenizers -> tokenize, inflectors ->
19
- # inflect). Otherwise, the name of the method
20
- # is the same as that of the group (encoding ->
21
- # encoding, tag -> tag).
22
- @method = nil
23
- def self.method
24
- return @method if @method
25
- m = ucc(cl(self))
26
- if m[-3..-1] == 'ers'
27
- if ['k', 't', 'm', 'd', 'g', 'n', 'x', 'h'].include? m[-4]
28
- n = m[0..-4]
29
- n = n[0..-2] if n[-1] == n[-2]
30
- else
31
- n = m[0..-3]
32
- end
33
- elsif m[-3..-1] == 'ors'
34
- n = m[0..-4] + 'e'
35
- else
36
- n = m
37
- end
38
- @method = n.intern
39
- end
40
- end
41
- group.list
42
- end
43
- # Create a new algorithm within the group. Once
44
- # the algorithm is added, it will be automatically
45
- # installed on all the targets of the group.
46
- def add(class_name, &block)
47
- klass = self.const_set(cc(class_name).intern, Class.new)
48
- method = self.method
49
- @@list[ucc(cl(self))] << class_name
50
- klass.send(:define_singleton_method, method) do |entity, options={}|
51
- block.call(entity, options)
52
- end
53
- end
54
- # Boolean - does the group have the supplied class
55
- # included in its targets?
56
- def has_target?(target, strict = false)
57
- is_target = false
58
- self.targets.each do |entity_type|
59
- entity_type = Entities.const_get(cc(entity_type))
60
- if target < entity_type || entity_type == target
61
- is_target = true; break
62
- end
63
- end
64
- is_target
65
- end
66
- # Cache the list of adaptors to improve performance.
67
- @@list = {}
68
- # Populates once the list of the adaptors in the group
69
- # by crawling the filesystem.
70
- def list
71
- mod = ucc(cl(self))
72
- if @@list[mod].nil?
73
- @@list[mod] = []
74
- dirs = Dir.glob("#{Treat.lib}/treat/*/#{mod}/*.rb")
75
- dirs.each do |file|
76
- @@list[mod] <<
77
- file.split('/')[-1][0..-4].intern
78
- end
79
- end
80
- @@list[mod]
81
- end
82
- # Get constants in this module, excluding those
83
- # defined by parent modules.
84
- def const_get(const)
85
- super(const, false)
86
- end
87
- # Lazy load the classes in the group.
88
- def const_missing(const)
89
- bits = self.ancestors[0].to_s.split('::')
90
- bits.collect! { |bit| ucc(bit) }
91
- file = bits.join('/') + "/#{ucc(const)}"
92
- if not File.readable?("#{Treat.lib}/#{file}.rb")
93
- raise Treat::Exception,
94
- "File '#{file}.rb' corresponding to requested worker "+
95
- "#{self}::#{const} does not exist."
96
- else
97
- require file
98
- if not const_defined?(const)
99
- raise Treat::Exception,
100
- "File #{file} does not define #{self}::#{const}."
101
- end
102
- const_get(const)
103
- end
104
- end
105
- end
106
- end
@@ -1,18 +0,0 @@
1
- module Treat
2
- module Helpers
3
- class LinguisticsLoader
4
- silence_warnings { require 'linguistics' }
5
- def self.load(language)
6
- begin
7
- l = language.to_s.upcase
8
- klass = nil
9
- silence_warnings { klass = ::Linguistics.const_get(l) }
10
- klass
11
- rescue RuntimeError
12
- raise "Ruby Linguistics does not have a module " +
13
- " installed for the #{language} language."
14
- end
15
- end
16
- end
17
- end
18
- end
@@ -1,42 +0,0 @@
1
- module Treat
2
- module Inflectors
3
- module CardinalWords
4
- # This class is a wrapper for the functions included
5
- # in the 'linguistics' gem that allow to describe a
6
- # number in words in cardinal form.
7
- #
8
- # Project website: http://deveiate.org/projects/Linguistics/
9
- class Linguistics
10
- require 'treat/helpers/linguistics_loader'
11
- # Return the description of a cardinal number in words.
12
- #
13
- # Options:
14
- #
15
- # - :group => Controls how many numbers at a time are
16
- # grouped together. Valid values are 0 (normal grouping),
17
- # 1 (single-digit grouping, e.g., “one, two, three, four”),
18
- # 2 (double-digit grouping, e.g., “twelve, thirty-four”, or
19
- # 3 (triple-digit grouping, e.g., “one twenty-three, four”).
20
- # - :comma => Set the character/s used to separate word groups.
21
- # Defaults to ", ".
22
- # - :and => Set the word and/or characters used where ' and '
23
- # (the default) is normally used. Setting :and to ' ', for
24
- # example, will cause 2556 to be returned as “two-thousand,
25
- # five hundred fifty-six” instead of “two-thousand, five
26
- # hundred and fifty-six”.
27
- # - :zero => Set the word used to represent the numeral 0 in
28
- # the result. 'zero' is the default.
29
- # - :decimal => Set the translation of any decimal points in
30
- # the number; the default is 'point'.
31
- # - :asArray If set to a true value, the number will be returned
32
- # as an array of word groups instead of a String.
33
- #
34
- # More specific options when using :type => :ordinal:
35
- def self.cardinal_words(entity, options = {})
36
- klass = Treat::Helpers::LinguisticsLoader.load(entity.language)
37
- klass.numwords(entity.to_s, options)
38
- end
39
- end
40
- end
41
- end
42
- end
@@ -1,36 +0,0 @@
1
- module Treat
2
- module Inflectors
3
- module Conjugations
4
- # This class is a wrapper for the functions included
5
- # in the 'linguistics' gem that allow to conjugate verbs.
6
- #
7
- # Project website: http://deveiate.org/projects/Linguistics/
8
- class Linguistics
9
- require 'treat/helpers/linguistics_loader'
10
- # Conjugate a verb using ruby linguistics with the specified
11
- # mode, tense, count and person.
12
- #
13
- # Options:
14
- #
15
- # - (Symbol) :mode => :infinitive, :indicative, :subjunctive, :participle
16
- # - (Symbol) :tense => :past, :present, :future
17
- # - (Symbol) :count => :singular, :plural
18
- # - (Symbol) :person => :first, :second, :third
19
- def self.conjugations(entity, parameters)
20
- klass = Treat::Helpers::LinguisticsLoader.load(entity.language)
21
- if parameters[:mode] == :infinitive
22
- silence_warnings { klass.infinitive(entity.to_s) }
23
- elsif parameters[:mode] == :participle && parameters[:tense] == :present
24
- silence_warnings { klass.present_participle(entity.to_s) }
25
- elsif parameters[:count] == :plural && parameters.size == 1
26
- silence_warnings { klass.plural_verb(entity.to_s) }
27
- else
28
- raise Treat::Exception,
29
- 'This combination of modes, tenses, persons ' +
30
- 'and/or counts is not presently supported.'
31
- end
32
- end
33
- end
34
- end
35
- end
36
- end
@@ -1,319 +0,0 @@
1
- module Treat
2
- module Inflectors
3
- module Declensions
4
- # This class is a wrapper for the Inflect module,
5
- # copied from the unmaintained 'english' ruby gem,
6
- # created by Thomas Sawyer.
7
- #
8
- # Released under the MIT License.
9
- #
10
- # http://english.rubyforge.org
11
- class English
12
- # Retrieve the declensions (singular, plural)
13
- # of an english word using a class lifted from
14
- # the 'english' ruby gem.
15
- def self.declensions(entity, options)
16
- unless options[:count]
17
- raise Treat::Exception,
18
- "Must supply option count (:singular or :plural)."
19
- end
20
- string = entity.to_s
21
- if entity.category == :verb
22
- raise Treat::Exception,
23
- "Cannot retrieve the declensions of a verb. " +
24
- "Use #singular_verb and #plural_verb instead."
25
- elsif options[:count] == :plural
26
- Inflect.plural(string)
27
- elsif options[:count] == :singular
28
- Inflect.singular(string)
29
- else
30
- {:singular => Inflect.singular(string),
31
- :plural => Inflect.plural(string)}
32
- end
33
- end
34
-
35
- module Inflect
36
-
37
- @singular_of = {}
38
- @plural_of = {}
39
-
40
- @singular_rules = []
41
- @plural_rules = []
42
-
43
- # This class provides the DSL for creating inflections, you can add additional rules.
44
- # Examples:
45
- #
46
- # word "ox", "oxen"
47
- # word "octopus", "octopi"
48
- # word "man", "men"
49
- #
50
- # rule "lf", "lves"
51
- #
52
- # word "equipment"
53
- #
54
- # Rules are evaluated by size, so rules you add to override specific cases should be longer than the rule
55
- # it overrides. For instance, if you want "pta" to pluralize to "ptas", even though a general purpose rule
56
- # for "ta" => "tum" already exists, simply add a new rule for "pta" => "ptas", and it will automatically win
57
- # since it is longer than the old rule.
58
- #
59
- # Also, single-word exceptions win over general words ("ox" pluralizes to "oxen", because it's a single word
60
- # exception, even though "fox" pluralizes to "foxes")
61
- class << self
62
- # Define a general two-way exception.
63
- #
64
- # This also defines a general rule, so foo_child will correctly become
65
- # foo_children.
66
- #
67
- # Whole words also work if they are capitalized (Goose => Geese).
68
- def word(singular, plural=nil)
69
- plural = singular unless plural
70
- singular_word(singular, plural)
71
- plural_word(singular, plural)
72
- rule(singular, plural)
73
- end
74
-
75
- # Define a singularization exception.
76
- def singular_word(singular, plural)
77
- @singular_of[plural] = singular
78
- @singular_of[plural.capitalize] = singular.capitalize
79
- end
80
-
81
- # Define a pluralization exception.
82
- def plural_word(singular, plural)
83
- @plural_of[singular] = plural
84
- @plural_of[singular.capitalize] = plural.capitalize
85
- end
86
-
87
- # Define a general rule.
88
- def rule(singular, plural)
89
- singular_rule(singular, plural)
90
- plural_rule(singular, plural)
91
- end
92
-
93
- # Define a singularization rule.
94
- def singular_rule(singular, plural)
95
- @singular_rules << [singular, plural]
96
- end
97
-
98
- # Define a plurualization rule.
99
- def plural_rule(singular, plural)
100
- @plural_rules << [singular, plural]
101
- end
102
-
103
- # Read prepared singularization rules.
104
- def singularization_rules
105
- if defined?(@singularization_regex) && @singularization_regex
106
- return [@singularization_regex, @singularization_hash]
107
- end
108
- # No sorting needed: Regexen match on longest string
109
- @singularization_regex = Regexp.new("(" + @singular_rules.map {|s,p| p}.join("|") + ")$", "i")
110
- @singularization_hash = Hash[*@singular_rules.flatten].invert
111
- [@singularization_regex, @singularization_hash]
112
- end
113
-
114
- # Read prepared singularization rules.
115
- #def singularization_rules
116
- # return @singularization_rules if @singularization_rules
117
- # sorted = @singular_rules.sort_by{ |s, p| "#{p}".size }.reverse
118
- # @singularization_rules = sorted.collect do |s, p|
119
- # [ /#{p}$/, "#{s}" ]
120
- # end
121
- #end
122
-
123
- # Read prepared pluralization rules.
124
- def pluralization_rules
125
- if defined?(@pluralization_regex) && @pluralization_regex
126
- return [@pluralization_regex, @pluralization_hash]
127
- end
128
- @pluralization_regex = Regexp.new("(" + @plural_rules.map {|s,p| s}.join("|") + ")$", "i")
129
- @pluralization_hash = Hash[*@plural_rules.flatten]
130
- [@pluralization_regex, @pluralization_hash]
131
- end
132
-
133
- # Read prepared pluralization rules.
134
- #def pluralization_rules
135
- # return @pluralization_rules if @pluralization_rules
136
- # sorted = @plural_rules.sort_by{ |s, p| "#{s}".size }.reverse
137
- # @pluralization_rules = sorted.collect do |s, p|
138
- # [ /#{s}$/, "#{p}" ]
139
- # end
140
- #end
141
-
142
- #
143
- def singular_of ; @singular_of ; end
144
-
145
- #
146
- def plural_of ; @plural_of ; end
147
-
148
- # Convert an English word from plurel to singular.
149
- #
150
- # "boys".singular #=> boy
151
- # "tomatoes".singular #=> tomato
152
- #
153
- def singular(word)
154
- return "" if word == ""
155
- if result = singular_of[word]
156
- return result.dup
157
- end
158
- result = word.dup
159
-
160
- regex, hash = singularization_rules
161
- result.sub!(regex) {|m| hash[m]}
162
- singular_of[word] = result
163
- return result
164
- #singularization_rules.each do |(match, replacement)|
165
- # break if result.gsub!(match, replacement)
166
- #end
167
- #return result
168
- end
169
-
170
- # Alias for #singular (a Railism).
171
- #
172
- alias_method(:singularize, :singular)
173
-
174
- # Convert an English word from singular to plurel.
175
- #
176
- # "boy".plural #=> boys
177
- # "tomato".plural #=> tomatoes
178
- #
179
- def plural(word)
180
- return "" if word == ""
181
- if result = plural_of[word]
182
- return result.dup
183
- end
184
- #return self.dup if /s$/ =~ self # ???
185
- result = word.dup
186
-
187
- regex, hash = pluralization_rules
188
- result.sub!(regex) {|m| hash[m]}
189
- plural_of[word] = result
190
- return result
191
- #pluralization_rules.each do |(match, replacement)|
192
- # break if result.gsub!(match, replacement)
193
- #end
194
- #return result
195
- end
196
-
197
- # Alias for #plural (a Railism).
198
- alias_method(:pluralize, :plural)
199
-
200
- # Clear all rules.
201
- def clear(type = :all)
202
- if type == :singular || type == :all
203
- @singular_of = {}
204
- @singular_rules = []
205
- @singularization_rules, @singularization_regex = nil, nil
206
- end
207
- if type == :plural || type == :all
208
- @singular_of = {}
209
- @singular_rules = []
210
- @singularization_rules, @singularization_regex = nil, nil
211
- end
212
- end
213
- end
214
-
215
- # One argument means singular and plural are the same.
216
-
217
- word 'equipment'
218
- word 'information'
219
- word 'money'
220
- word 'species'
221
- word 'series'
222
- word 'fish'
223
- word 'sheep'
224
- word 'moose'
225
- word 'hovercraft'
226
- word 'news'
227
- word 'rice'
228
- word 'plurals'
229
-
230
- # Two arguments defines a singular and plural exception.
231
-
232
- word 'Swiss' , 'Swiss'
233
- word 'alias' , 'aliases'
234
- word 'analysis' , 'analyses'
235
- #word 'axis' , 'axes'
236
- word 'basis' , 'bases'
237
- word 'buffalo' , 'buffaloes'
238
- word 'child' , 'children'
239
- #word 'cow' , 'kine'
240
- word 'crisis' , 'crises'
241
- word 'criterion' , 'criteria'
242
- word 'datum' , 'data'
243
- word 'goose' , 'geese'
244
- word 'hive' , 'hives'
245
- word 'index' , 'indices'
246
- word 'life' , 'lives'
247
- word 'louse' , 'lice'
248
- word 'man' , 'men'
249
- word 'matrix' , 'matrices'
250
- word 'medium' , 'media'
251
- word 'mouse' , 'mice'
252
- word 'movie' , 'movies'
253
- word 'octopus' , 'octopi'
254
- word 'ox' , 'oxen'
255
- word 'person' , 'people'
256
- word 'potato' , 'potatoes'
257
- word 'quiz' , 'quizzes'
258
- word 'shoe' , 'shoes'
259
- word 'status' , 'statuses'
260
- word 'testis' , 'testes'
261
- word 'thesis' , 'theses'
262
- word 'thief' , 'thieves'
263
- word 'tomato' , 'tomatoes'
264
- word 'torpedo' , 'torpedoes'
265
- word 'vertex' , 'vertices'
266
- word 'virus' , 'viri'
267
- word 'wife' , 'wives'
268
-
269
- # One-way singularization exception (convert plural to singular).
270
-
271
- singular_word 'cactus', 'cacti'
272
-
273
- # One-way pluralizaton exception (convert singular to plural).
274
-
275
- plural_word 'axis', 'axes'
276
-
277
- # General rules.
278
-
279
- rule 'rf' , 'rves'
280
- rule 'ero' , 'eroes'
281
- rule 'ch' , 'ches'
282
- rule 'sh' , 'shes'
283
- rule 'ss' , 'sses'
284
- #rule 'ess' , 'esses'
285
- rule 'ta' , 'tum'
286
- rule 'ia' , 'ium'
287
- rule 'ra' , 'rum'
288
- rule 'ay' , 'ays'
289
- rule 'ey' , 'eys'
290
- rule 'oy' , 'oys'
291
- rule 'uy' , 'uys'
292
- rule 'y' , 'ies'
293
- rule 'x' , 'xes'
294
- rule 'lf' , 'lves'
295
- rule 'ffe' , 'ffes'
296
- rule 'af' , 'aves'
297
- rule 'us' , 'uses'
298
- rule 'ouse' , 'ouses'
299
- rule 'osis' , 'oses'
300
- rule 'ox' , 'oxes'
301
- rule '' , 's'
302
-
303
- # One-way singular rules.
304
-
305
- singular_rule 'of' , 'ofs' # proof
306
- singular_rule 'o' , 'oes' # hero, heroes
307
- #singular_rule 'f' , 'ves'
308
-
309
- # One-way plural rules.
310
-
311
- plural_rule 's' , 'ses'
312
- plural_rule 'ive' , 'ives' # don't want to snag wife
313
- plural_rule 'fe' , 'ves' # don't want to snag perspectives
314
-
315
- end
316
- end
317
- end
318
- end
319
- end