treat 1.0.6 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (210) hide show
  1. data/LICENSE +2 -4
  2. data/README.md +13 -12
  3. data/bin/MANIFEST +1 -0
  4. data/bin/stanford/bridge.jar +0 -0
  5. data/bin/stanford/joda-time.jar +0 -0
  6. data/bin/stanford/stanford-corenlp.jar +0 -0
  7. data/bin/stanford/stanford-parser.jar +0 -0
  8. data/bin/stanford/xom.jar +0 -0
  9. data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
  10. data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
  11. data/files/{INFO → MANIFEST} +0 -0
  12. data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
  13. data/files/weather-central-canada-heat-wave.html +1370 -0
  14. data/lib/treat/config/core/acronyms.rb +4 -0
  15. data/lib/treat/config/core/encodings.rb +8 -0
  16. data/lib/treat/config/core/entities.rb +2 -0
  17. data/lib/treat/config/core/language.rb +3 -0
  18. data/lib/treat/config/core/paths.rb +8 -0
  19. data/lib/treat/config/core/syntax.rb +1 -0
  20. data/lib/treat/config/core/verbosity.rb +1 -0
  21. data/lib/treat/config/databases/mongo.rb +3 -0
  22. data/lib/treat/config/languages/agnostic.rb +34 -0
  23. data/lib/treat/config/languages/arabic.rb +13 -0
  24. data/lib/treat/config/languages/chinese.rb +13 -0
  25. data/lib/treat/config/languages/dutch.rb +12 -0
  26. data/lib/treat/config/languages/english.rb +60 -0
  27. data/lib/treat/config/languages/french.rb +18 -0
  28. data/lib/treat/config/languages/german.rb +18 -0
  29. data/lib/treat/config/languages/greek.rb +12 -0
  30. data/lib/treat/config/languages/italian.rb +12 -0
  31. data/lib/treat/config/languages/polish.rb +12 -0
  32. data/lib/treat/config/languages/portuguese.rb +12 -0
  33. data/lib/treat/config/languages/russian.rb +12 -0
  34. data/lib/treat/config/languages/spanish.rb +12 -0
  35. data/lib/treat/config/languages/swedish.rb +12 -0
  36. data/lib/treat/config/libraries/stanford.rb +1 -0
  37. data/lib/treat/config/linguistics/categories.rb +4 -0
  38. data/lib/treat/config/linguistics/punctuation.rb +33 -0
  39. data/lib/treat/config/tags/aligned.rb +221 -0
  40. data/lib/treat/config/tags/enju.rb +71 -0
  41. data/lib/treat/config/tags/paris7.rb +17 -0
  42. data/lib/treat/config/tags/ptb.rb +15 -0
  43. data/lib/treat/config/workers/extractors.rb +39 -0
  44. data/lib/treat/config/workers/formatters.rb +20 -0
  45. data/lib/treat/config/workers/inflectors.rb +27 -0
  46. data/lib/treat/config/workers/learners.rb +6 -0
  47. data/lib/treat/config/workers/lexicalizers.rb +18 -0
  48. data/lib/treat/config/workers/list.rb +1 -0
  49. data/lib/treat/config/workers/processors.rb +19 -0
  50. data/lib/treat/config/workers/retrievers.rb +12 -0
  51. data/lib/treat/config.rb +125 -0
  52. data/lib/treat/{classification.rb → core/classification.rb} +1 -1
  53. data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
  54. data/lib/treat/{tree.rb → core/node.rb} +5 -5
  55. data/lib/treat/core/server.rb +3 -0
  56. data/lib/treat/core.rb +5 -0
  57. data/lib/treat/entities/abilities/buildable.rb +61 -56
  58. data/lib/treat/entities/abilities/checkable.rb +2 -2
  59. data/lib/treat/entities/abilities/comparable.rb +21 -0
  60. data/lib/treat/entities/abilities/copyable.rb +2 -0
  61. data/lib/treat/entities/abilities/countable.rb +1 -1
  62. data/lib/treat/entities/abilities/debuggable.rb +1 -1
  63. data/lib/treat/entities/abilities/delegatable.rb +42 -36
  64. data/lib/treat/entities/abilities/doable.rb +2 -2
  65. data/lib/treat/entities/abilities/exportable.rb +1 -1
  66. data/lib/treat/entities/abilities/iterable.rb +21 -33
  67. data/lib/treat/entities/abilities/magical.rb +8 -8
  68. data/lib/treat/entities/abilities/registrable.rb +0 -38
  69. data/lib/treat/entities/abilities/stringable.rb +19 -19
  70. data/lib/treat/entities/collection.rb +31 -0
  71. data/lib/treat/entities/document.rb +10 -0
  72. data/lib/treat/entities/entity.rb +18 -13
  73. data/lib/treat/entities/group.rb +15 -0
  74. data/lib/treat/entities/section.rb +13 -0
  75. data/lib/treat/entities/token.rb +35 -0
  76. data/lib/treat/entities/zone.rb +11 -0
  77. data/lib/treat/entities.rb +5 -75
  78. data/lib/treat/helpers/didyoumean.rb +57 -0
  79. data/lib/treat/helpers/escaping.rb +15 -0
  80. data/lib/treat/helpers/formatting.rb +41 -0
  81. data/lib/treat/helpers/platform.rb +15 -0
  82. data/lib/treat/helpers/reflection.rb +17 -0
  83. data/lib/treat/helpers/temporary.rb +27 -0
  84. data/lib/treat/helpers/verbosity.rb +19 -0
  85. data/lib/treat/helpers.rb +5 -0
  86. data/lib/treat/installer.rb +46 -165
  87. data/lib/treat/loaders/linguistics.rb +22 -27
  88. data/lib/treat/loaders/stanford.rb +23 -41
  89. data/lib/treat/loaders.rb +10 -0
  90. data/lib/treat/proxies.rb +73 -24
  91. data/lib/treat/version.rb +3 -0
  92. data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
  93. data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
  94. data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
  95. data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
  96. data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
  97. data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
  98. data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
  99. data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
  100. data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
  101. data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
  102. data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
  103. data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
  104. data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
  105. data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
  106. data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
  107. data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
  108. data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
  109. data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
  110. data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
  111. data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
  112. data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
  113. data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
  114. data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
  115. data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
  116. data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
  117. data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
  118. data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
  119. data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
  120. data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
  121. data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
  122. data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
  123. data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
  124. data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
  125. data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
  126. data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
  127. data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
  128. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
  129. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
  130. data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
  131. data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
  132. data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
  133. data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
  134. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
  135. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
  136. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
  137. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
  138. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
  139. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
  140. data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
  141. data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
  142. data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
  143. data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
  144. data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
  145. data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
  146. data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
  147. data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
  148. data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
  149. data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
  150. data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
  151. data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
  152. data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
  153. data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
  154. data/lib/treat/workers.rb +96 -0
  155. data/lib/treat.rb +23 -49
  156. data/spec/collection.rb +4 -4
  157. data/spec/document.rb +5 -5
  158. data/spec/entity.rb +33 -32
  159. data/spec/{tree.rb → node.rb} +5 -5
  160. data/spec/phrase.rb +5 -39
  161. data/spec/sandbox.rb +212 -6
  162. data/spec/token.rb +12 -9
  163. data/spec/treat.rb +12 -9
  164. data/spec/word.rb +10 -9
  165. data/spec/zone.rb +6 -2
  166. data/tmp/{INFO → MANIFEST} +0 -0
  167. data/tmp/english.yaml +10340 -0
  168. metadata +149 -139
  169. data/lib/treat/ai.rb +0 -12
  170. data/lib/treat/categories.rb +0 -90
  171. data/lib/treat/categorizable.rb +0 -44
  172. data/lib/treat/configurable.rb +0 -115
  173. data/lib/treat/dependencies.rb +0 -25
  174. data/lib/treat/downloader.rb +0 -87
  175. data/lib/treat/entities/abilities.rb +0 -10
  176. data/lib/treat/entities/entities.rb +0 -102
  177. data/lib/treat/exception.rb +0 -7
  178. data/lib/treat/extractors.rb +0 -79
  179. data/lib/treat/formatters/serializers/mongo.rb +0 -64
  180. data/lib/treat/formatters.rb +0 -41
  181. data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
  182. data/lib/treat/inflectors.rb +0 -52
  183. data/lib/treat/kernel.rb +0 -208
  184. data/lib/treat/languages/arabic.rb +0 -16
  185. data/lib/treat/languages/chinese.rb +0 -16
  186. data/lib/treat/languages/dutch.rb +0 -16
  187. data/lib/treat/languages/english.rb +0 -63
  188. data/lib/treat/languages/french.rb +0 -20
  189. data/lib/treat/languages/german.rb +0 -20
  190. data/lib/treat/languages/greek.rb +0 -16
  191. data/lib/treat/languages/italian.rb +0 -17
  192. data/lib/treat/languages/language.rb +0 -10
  193. data/lib/treat/languages/list.txt +0 -504
  194. data/lib/treat/languages/polish.rb +0 -16
  195. data/lib/treat/languages/portuguese.rb +0 -16
  196. data/lib/treat/languages/russian.rb +0 -16
  197. data/lib/treat/languages/spanish.rb +0 -16
  198. data/lib/treat/languages/swedish.rb +0 -16
  199. data/lib/treat/languages.rb +0 -132
  200. data/lib/treat/lexicalizers.rb +0 -37
  201. data/lib/treat/object.rb +0 -7
  202. data/lib/treat/processors/chunkers/autoselect.rb +0 -16
  203. data/lib/treat/processors/chunkers/txt.rb +0 -21
  204. data/lib/treat/processors.rb +0 -38
  205. data/lib/treat/retrievers.rb +0 -27
  206. data/lib/treat/server.rb +0 -26
  207. data/lib/treat/universalisation/encodings.rb +0 -12
  208. data/lib/treat/universalisation/tags.rb +0 -453
  209. data/lib/treat/universalisation.rb +0 -9
  210. data/spec/languages.rb +0 -25
@@ -1,6 +1,6 @@
1
1
  # This class is a wrapper for the Psych YAML
2
2
  # parser; it unserializes YAML files.
3
- class Treat::Formatters::Unserializers::YAML
3
+ class Treat::Workers::Formatters::Unserializers::YAML
4
4
 
5
5
  silence_warnings do
6
6
  # Require the Psych YAML parser.
@@ -1,4 +1,4 @@
1
- class Treat::Formatters::Visualizers::DOT
1
+ class Treat::Workers::Formatters::Visualizers::DOT
2
2
 
3
3
  require 'date'
4
4
  DefaultOptions = {
@@ -1,7 +1,7 @@
1
1
  # This class allows the visualization of
2
2
  # an entity in standoff format; for example:
3
3
  # (S (NP John) (VP has (VP come))).
4
- class Treat::Formatters::Visualizers::Standoff
4
+ class Treat::Workers::Formatters::Visualizers::Standoff
5
5
 
6
6
  # Start out with an indent of 0.
7
7
  DefaultOptions = { :indent => 0 }
@@ -44,8 +44,7 @@ class Treat::Formatters::Visualizers::Standoff
44
44
  end
45
45
 
46
46
  def self.ptb_escape(val)
47
- Treat::Universalisation::Tags::
48
- PTBEscapeCharacters.each do |char, esc|
47
+ Treat.tags.ptb.escape_characters.each do |char, esc|
49
48
  val.gsub!(char, val)
50
49
  end
51
50
 
@@ -1,6 +1,6 @@
1
1
  # This class generates an ASCII representation
2
2
  # of a tree of entities.
3
- class Treat::Formatters::Visualizers::Tree
3
+ class Treat::Workers::Formatters::Visualizers::Tree
4
4
 
5
5
  # Start out with an indent at 0.
6
6
  DefaultOptions = { :indent => 0 }
@@ -1,18 +1,18 @@
1
- module Treat::Groupable
1
+ module Treat::Workers::Group
2
2
 
3
3
  # Lazily load the worker classes in the group.
4
4
  def const_missing(const)
5
5
  bits = self.ancestors[0].to_s.split('::')
6
6
  bits.collect! { |bit| ucc(bit) }
7
7
  file = bits.join('/') + "/#{ucc(const)}"
8
- if not File.readable?(Treat.lib + "#{file}.rb")
8
+ if not File.readable?(Treat.paths.lib + "#{file}.rb")
9
9
  raise Treat::Exception,
10
10
  "File '#{file}.rb' corresponding to " +
11
11
  "requested worker #{self}::#{const} " +
12
12
  "does not exist."
13
13
  else
14
14
  require file
15
- if not const_defined?(const)
15
+ if not self.const_defined?(const)
16
16
  raise Treat::Exception,
17
17
  "File #{file} does not define " +
18
18
  "#{self}::#{const}."
@@ -29,7 +29,7 @@ module Treat::Groupable
29
29
  mod = ucc(cl(self))
30
30
  if @@list[mod].nil?
31
31
  @@list[mod] = []
32
- dirs = Dir[Treat.lib + "treat/*/#{mod}/*.rb"]
32
+ dirs = Dir[Treat.paths.lib + "treat/workers/*/#{mod}/*.rb"]
33
33
  dirs.each do |file|
34
34
  @@list[mod] <<
35
35
  file.split('/')[-1][0..-4].intern
@@ -100,6 +100,7 @@ module Treat::Groupable
100
100
  end
101
101
 
102
102
  self.recursive = false
103
+ self.list
103
104
 
104
105
  # Return the method corresponding to the group.
105
106
  # This method resolves the name of the method
@@ -116,11 +117,7 @@ module Treat::Groupable
116
117
  m = ucc(cl(self)).dup
117
118
  if m[-4..-1] == 'zers'
118
119
  if type == :annotator
119
- if m[-6] == 'l'
120
- m[-5..-1] = ''
121
- else
122
- m[-5..-1] = 'y'
123
- end
120
+ m[-5..-1] = m[-6] == 'l' ? '' : 'y'
124
121
  else
125
122
  m = m[0..-3]
126
123
  end
@@ -147,9 +144,6 @@ module Treat::Groupable
147
144
  @method = n.intern
148
145
  end
149
146
 
150
- # Populate the group's list.
151
- group.list
152
-
153
147
  end
154
148
 
155
149
  end
@@ -3,10 +3,14 @@
3
3
  # number in words in cardinal form.
4
4
  #
5
5
  # Project website: http://deveiate.org/projects/Linguistics/
6
- module Treat::Inflectors::Cardinalizers::Linguistics
6
+ module Treat::Workers::Inflectors::Cardinalizers::Linguistics
7
7
 
8
8
  require 'treat/loaders/linguistics'
9
9
 
10
+ DefaultOptions = {
11
+ :language => Treat.core.language.default
12
+ }
13
+
10
14
  # Return the description of a cardinal number in words.
11
15
  #
12
16
  # Options:
@@ -32,8 +36,9 @@ module Treat::Inflectors::Cardinalizers::Linguistics
32
36
  #
33
37
  # More specific options when using :type => :ordinal:
34
38
  def self.cardinal(entity, options = {})
39
+ options = DefaultOptions.merge(options)
35
40
  Treat::Loaders::Linguistics.
36
- load(entity.language).
41
+ load(options[:language]).
37
42
  numwords(entity.to_s, options)
38
43
  end
39
44
 
@@ -2,7 +2,7 @@
2
2
  # in the 'linguistics' gem that allow to conjugate verbs.
3
3
  #
4
4
  # Project website: http://deveiate.org/projects/Linguistics/
5
- module Treat::Inflectors::Conjugators::Linguistics
5
+ module Treat::Workers::Inflectors::Conjugators::Linguistics
6
6
 
7
7
  require 'treat/loaders/linguistics'
8
8
 
@@ -12,10 +12,10 @@ module Treat::Inflectors::Conjugators::Linguistics
12
12
 
13
13
  Forms = {
14
14
  :present_participle =>
15
- {:mode => :participle, :tense => :present},
16
- :infinitive => {:mode => :infinitive},
17
- :plural_verb => {:count => :plural},
18
- :singular_verb => {:count => :singular}
15
+ {:mode => 'participle', :tense => 'present'},
16
+ :infinitive => {:mode => 'infinitive'},
17
+ :plural_verb => {:count => 'plural'},
18
+ :singular_verb => {:count => 'singular'}
19
19
  }
20
20
 
21
21
  # Conjugate a verb using ruby linguistics with the specified
@@ -33,16 +33,16 @@ module Treat::Inflectors::Conjugators::Linguistics
33
33
 
34
34
  options = DefaultOptions.merge(options)
35
35
  cat = entity.check_has(:category)
36
- return if cat != :verb && options[:strict]
37
-
36
+ return if cat != 'verb' && options[:strict]
37
+
38
38
  options = Forms[options[:form]] if options[:form]
39
-
39
+
40
40
  klass = Treat::Loaders::Linguistics.load(entity.language)
41
- if options[:mode] == :infinitive
41
+ if options[:mode] == 'infinitive'
42
42
  silence_warnings { klass.infinitive(entity.to_s) }
43
- elsif options[:mode] == :participle && options[:tense] == :present
43
+ elsif options[:mode] == 'participle' && options[:tense] == 'present'
44
44
  silence_warnings { klass.present_participle(entity.to_s) }
45
- elsif options[:count] == :plural && options.size == 1
45
+ elsif options[:count] == 'plural' && options.size == 1
46
46
  silence_warnings { klass.plural_verb(entity.to_s) }
47
47
  else
48
48
  raise Treat::Exception,
@@ -1,6 +1,6 @@
1
1
  # This class is a wrapper for the ActiveSupport
2
2
  # declension tools.
3
- class Treat::Inflectors::Declensors::English
3
+ class Treat::Workers::Inflectors::Declensors::English
4
4
 
5
5
  require 'active_support/inflector/inflections'
6
6
 
@@ -8,7 +8,7 @@ class Treat::Inflectors::Declensors::English
8
8
  def self.declense(entity, options)
9
9
 
10
10
  cat = entity.check_has(:category)
11
- unless [:noun, :adjective, :determiner].
11
+ unless ['noun', 'adjective', 'determiner'].
12
12
  include?(cat)
13
13
  return
14
14
  end
@@ -5,7 +5,7 @@
5
5
  # Released under the MIT License.
6
6
  #
7
7
  # http://english.rubyforge.org
8
- module Treat::Inflectors::Declensors::English::Inflect
8
+ module Treat::Workers::Inflectors::Declensors::English::Inflect
9
9
 
10
10
  @singular_of = {}
11
11
  @plural_of = {}
@@ -5,7 +5,7 @@
5
5
  # Released under the MIT License.
6
6
  #
7
7
  # http://english.rubyforge.org
8
- class Treat::Inflectors::Declensors::English
8
+ class Treat::Workers::Inflectors::Declensors::English
9
9
 
10
10
  require 'treat/inflectors/declensors/english/inflect'
11
11
 
@@ -15,7 +15,7 @@ class Treat::Inflectors::Declensors::English
15
15
  def self.declense(entity, options)
16
16
 
17
17
  cat = entity.check_has(:category)
18
- unless [:noun, :adjective, :determiner].
18
+ unless ['noun', 'adjective', 'determiner'].
19
19
  include?(cat)
20
20
  return
21
21
  end
@@ -3,7 +3,7 @@
3
3
  # declensions of a word.
4
4
  #
5
5
  # Project website: http://deveiate.org/projects/Linguistics/
6
- class Treat::Inflectors::Declensors::Linguistics
6
+ class Treat::Workers::Inflectors::Declensors::Linguistics
7
7
 
8
8
  require 'treat/loaders/linguistics'
9
9
 
@@ -15,7 +15,7 @@ class Treat::Inflectors::Declensors::Linguistics
15
15
  def self.declense(entity, options = {})
16
16
 
17
17
  cat = entity.check_has(:category)
18
- unless [:noun, :adjective, :determiner].
18
+ unless ['noun', 'adjective', 'determiner'].
19
19
  include?(cat)
20
20
  return
21
21
  end
@@ -28,10 +28,10 @@ class Treat::Inflectors::Declensors::Linguistics
28
28
  klass = Treat::Loaders::Linguistics.load(entity.language)
29
29
  string = entity.to_s
30
30
 
31
- if options[:count] == :plural
31
+ if options[:count] == 'plural'
32
32
 
33
33
  if entity.has?(:category) &&
34
- [:noun, :adjective, :verb].
34
+ ['noun', 'adjective', 'verb'].
35
35
  include?(entity.category)
36
36
  silence_warnings do
37
37
  klass.send(
@@ -3,14 +3,20 @@
3
3
  # number in words in ordinal form.
4
4
  #
5
5
  # Project website: http://deveiate.org/projects/Linguistics/
6
- class Treat::Inflectors::Ordinalizers::Linguistics
6
+ class Treat::Workers::Inflectors::Ordinalizers::Linguistics
7
7
 
8
8
  require 'treat/loaders/linguistics'
9
9
 
10
+ DefaultOptions = {
11
+ :language => Treat.core.language.default
12
+ }
13
+
10
14
  # Desribe a number in words in ordinal form, using the
11
15
  # 'linguistics' gem.
12
16
  def self.ordinal(number, options = {})
13
- klass = Treat::Loaders::Linguistics.load(number.language)
17
+ options = DefaultOptions.merge(options)
18
+ klass = Treat::Loaders::
19
+ Linguistics.load(options[:language])
14
20
  klass.ordinate(number.to_s)
15
21
  end
16
22
 
@@ -2,7 +2,7 @@
2
2
  # Porter stemming algorithm, ported to Ruby from a
3
3
  # version coded up in Perl. This is a simplified
4
4
  # implementation; for a true and fast Porter stemmer,
5
- # see Treat::Inflectors::Stemmers::PorterC.
5
+ # see Treat::Workers::Inflectors::Stemmers::PorterC.
6
6
  #
7
7
  # Authored by Ray Pereda (raypereda@hotmail.com).
8
8
  # Unknown license.
@@ -10,7 +10,7 @@
10
10
  # Original paper: Porter, 1980. An algorithm for suffix stripping,
11
11
  # Program, Vol. 14, no. 3, pp 130-137,
12
12
  # Original C implementation: http://www.tartarus.org/~martin/PorterStemmer.
13
- class Treat::Inflectors::Stemmers::Porter
13
+ class Treat::Workers::Inflectors::Stemmers::Porter
14
14
 
15
15
  # Returns the stem of a word using a native Porter stemmer.
16
16
  #
@@ -5,7 +5,7 @@
5
5
  # Original paper: Porter, 1980. An algorithm for suffix stripping,
6
6
  # Program, Vol. 14, no. 3, pp 130-137,
7
7
  # Original C implementation: http://www.tartarus.org/~martin/PorterStemmer.
8
- module Treat::Inflectors::Stemmers::PorterC
8
+ module Treat::Workers::Inflectors::Stemmers::PorterC
9
9
 
10
10
  # Require the 'ruby-stemmer' gem.
11
11
  silence_warnings { require 'lingua/stemmer' }
@@ -10,7 +10,7 @@
10
10
  # Original paper: Jenkins, Marie-Claire, Smith, Dan,
11
11
  # Conservative stemming for search and indexing, 2005.
12
12
  # http://www.uea.ac.uk/polopoly_fs/1.85493!stemmer25feb.pdf
13
- class Treat::Inflectors::Stemmers::UEA
13
+ class Treat::Workers::Inflectors::Stemmers::UEA
14
14
 
15
15
  # Require the 'uea-stemmer' gem.
16
16
  silence_warnings { require 'uea-stemmer' }
@@ -1,4 +1,4 @@
1
- class Treat::AI::Classifiers::ID3
1
+ class Treat::Workers::Learners::Classifiers::ID3
2
2
 
3
3
  require 'decisiontree'
4
4
 
@@ -1,5 +1,5 @@
1
1
  # Currently, this MLP is limited to 1 output.
2
- class Treat::AI::Classifiers::MLP
2
+ class Treat::Workers::Learners::Classifiers::MLP
3
3
 
4
4
  require 'ai4r'
5
5
 
@@ -1,20 +1,20 @@
1
1
  # Finds the general part of speech of an entity
2
2
  # (:sentence, :noun_phrase, :verb, :adverb, etc.)
3
3
  # from its tag (e.g. 'S', 'NP', 'VBZ', 'ADV', etc.).
4
- class Treat::Lexicalizers::Categorizers::FromTag
4
+ class Treat::Workers::Lexicalizers::Categorizers::FromTag
5
5
 
6
- Pttc = Treat::Universalisation::Tags::PhraseTagToCategory
7
- Wttc = Treat::Universalisation::Tags::WordTagToCategory
8
- Ptc = Treat::Universalisation::Tags::PunctuationToCategory
6
+ Pttc = Treat.tags.aligned.phrase_tags_to_category
7
+ Wttc = Treat.tags.aligned.word_tags_to_category
8
+ Ptc = Treat.linguistics.punctuation.punct_to_category
9
9
 
10
10
  # Find the category of the entity from its tag.
11
11
  def self.category(entity, options = {})
12
12
 
13
13
  tag = entity.check_has(:tag)
14
14
 
15
- return :unknown if tag.nil? || tag == '' || entity.type == :symbol
16
- return :sentence if tag == 'S' || entity.type == :sentence
17
- return :number if entity.type == :number
15
+ return 'unknown' if tag.nil? || tag == '' || entity.type == :symbol
16
+ return 'sentence' if tag == 'S' || entity.type == :sentence
17
+ return 'number' if entity.type == :number
18
18
 
19
19
  return Ptc[entity.to_s] if entity.type == :punctuation
20
20
 
@@ -32,7 +32,7 @@ class Treat::Lexicalizers::Categorizers::FromTag
32
32
  if entity.has?(:tag_set)
33
33
  ts = entity.get(:tag_set)
34
34
  else
35
- a = entity.ancestor_with_feature(:phrase, :tag_set)
35
+ a = entity.ancestor_with_feature(:tag_set)
36
36
  if a
37
37
  ts = a.get(:tag_set)
38
38
  else
@@ -51,7 +51,7 @@ class Treat::Lexicalizers::Categorizers::FromTag
51
51
  "for token #{entity.to_s}."
52
52
  end
53
53
 
54
- :unknown
54
+ 'unknown'
55
55
 
56
56
  end
57
57
 
@@ -1,5 +1,5 @@
1
1
  # An adaptor for synsets used by the Wordnet gem.
2
- class Treat::Lexicalizers::Sensers::Wordnet::Synset
2
+ class Treat::Workers::Lexicalizers::Sensers::Wordnet::Synset
3
3
 
4
4
  # The POS tag of the word.
5
5
  attr_accessor :pos
@@ -61,7 +61,7 @@ class Treat::Lexicalizers::Sensers::Wordnet::Synset
61
61
  # Respond to the missing method event.
62
62
  def method_missing(sym, *args, &block)
63
63
  ret = @original_synset.send(sym)
64
- if ret.is_a?(Treat::Lexicalizers::Sensers::Wordnet::Synset)
64
+ if ret.is_a?(Treat::Workers::Lexicalizers::Sensers::Wordnet::Synset)
65
65
  self.new(ret)
66
66
  else
67
67
  ret
@@ -1,6 +1,6 @@
1
1
  # Obtain lexical information about a word using the
2
2
  # ruby 'wordnet' gem.
3
- class Treat::Lexicalizers::Sensers::Wordnet
3
+ class Treat::Workers::Lexicalizers::Sensers::Wordnet
4
4
 
5
5
  # Require the 'wordnet' gem.
6
6
  require 'wordnet'
@@ -13,7 +13,7 @@ class Treat::Lexicalizers::Sensers::Wordnet
13
13
  end
14
14
 
15
15
  # Require an adaptor for Wordnet synsets.
16
- require 'treat/lexicalizers/sensers/wordnet/synset'
16
+ require 'treat/workers/lexicalizers/sensers/wordnet/synset'
17
17
 
18
18
  # Noun, adjective and verb indexes.
19
19
  @@indexes = {}
@@ -29,7 +29,7 @@ class Treat::Lexicalizers::Sensers::Wordnet
29
29
  "the :nym option (:synonym, :hypernym, etc.)"
30
30
  end
31
31
 
32
- unless [:noun, :adjective, :verb].
32
+ unless ['noun', 'adjective', 'verb'].
33
33
  include?(word.category)
34
34
  return []
35
35
  end
@@ -45,7 +45,7 @@ class Treat::Lexicalizers::Sensers::Wordnet
45
45
 
46
46
  lemma.synsets.each do |synset|
47
47
  synsets <<
48
- Treat::Lexicalizers::Sensers::Wordnet::Synset.new(synset)
48
+ Treat::Workers::Lexicalizers::Sensers::Wordnet::Synset.new(synset)
49
49
  end
50
50
 
51
51
  ((synsets.collect do |ss|
@@ -7,7 +7,7 @@ begin
7
7
  # will clash with the top-level class 'Word'
8
8
  # we define when syntactic sugar is enabled.
9
9
  rescue TypeError
10
- if Treat.sweetened?
10
+ if Treat.core.syntax.sweetened
11
11
  patch = true
12
12
  # Unset the class Word for the duration
13
13
  # of loading the tagger.
@@ -19,7 +19,7 @@ rescue TypeError
19
19
  end
20
20
  ensure
21
21
  # Reset the class Word if using syntactic sugar.
22
- if Treat.sweetened? && patch
22
+ if Treat.core.syntax.sweetened && patch
23
23
  Object.const_set(:Word, Treat::Entities::Word)
24
24
  end
25
25
  end
@@ -13,11 +13,11 @@
13
13
  # Project website:
14
14
  #
15
15
  # http://rbtagger.rubyforge.org/
16
- module Treat::Lexicalizers::Taggers::Brill
16
+ module Treat::Workers::Lexicalizers::Taggers::Brill
17
17
 
18
18
  require 'rbtagger'
19
19
 
20
- require 'treat/lexicalizers/taggers/brill/patch'
20
+ require 'treat/workers/lexicalizers/taggers/brill/patch'
21
21
 
22
22
  # Hold one instance of the tagger.
23
23
  @@tagger = nil
@@ -32,12 +32,6 @@ module Treat::Lexicalizers::Taggers::Brill
32
32
  # :contextual_rules => String (Contextual rules file to use)
33
33
  def self.tag(entity, options = {})
34
34
 
35
- # Tokenize the sentence/phrase.
36
- if !entity.has_children? &&
37
- !entity.is_a?(Treat::Entities::Token)
38
- entity.tokenize(options)
39
- end
40
-
41
35
  # Create the tagger if necessary
42
36
  @@tagger ||= ::Brill::Tagger.new(options[:lexicon],
43
37
  options[:lexical_rules], options[:contextual_rules])
@@ -12,7 +12,7 @@
12
12
  # Project website: http://engtagger.rubyforge.org/
13
13
  # Original Perl module site:
14
14
  # http://cpansearch.perl.org/src/ACOBURN/Lingua-EN-Tagger-0.15/
15
- class Treat::Lexicalizers::Taggers::Lingua
15
+ class Treat::Workers::Lexicalizers::Taggers::Lingua
16
16
 
17
17
  # Require the 'engtagger' gem.
18
18
  silence_warnings { require 'engtagger' }
@@ -48,11 +48,6 @@ class Treat::Lexicalizers::Taggers::Lingua
48
48
  # particularly words used polysemously.
49
49
  def self.tag(entity, options = {})
50
50
 
51
- if !entity.has_children? &&
52
- !entity.is_a?(Treat::Entities::Token)
53
- entity.tokenize
54
- end
55
-
56
51
  options = DefaultOptions.merge(options)
57
52
 
58
53
  @@tagger ||= ::EngTagger.new(options)
@@ -1,11 +1,11 @@
1
1
  # Wrapper for the Stanford POS tagger.
2
- class Treat::Lexicalizers::Taggers::Stanford
2
+ class Treat::Workers::Lexicalizers::Taggers::Stanford
3
3
 
4
4
  require 'treat/loaders/stanford'
5
5
 
6
6
  # Hold one tagger per language.
7
7
  @@taggers = {}
8
-
8
+
9
9
  # Hold the default options.
10
10
  DefaultOptions = {
11
11
  :tagger_model => nil
@@ -13,75 +13,64 @@ class Treat::Lexicalizers::Taggers::Stanford
13
13
 
14
14
  # Tag the word using one of the Stanford taggers.
15
15
  def self.tag(entity, options = {})
16
-
17
- # Tokenize the sentence/phrase.
18
- if !entity.has_children? &&
19
- !entity.is_a?(Treat::Entities::Token)
20
- entity.tokenize(:stanford, options)
16
+
17
+ # Handle tags for sentences and phrases.
18
+ if entity.is_a?(Treat::Entities::Sentence) ||
19
+ (entity.is_a?(Treat::Entities::Phrase) &&
20
+ !entity.parent_sentence)
21
+
22
+ tag_set = options[:tag_set]
23
+ entity.set :tag_set, tag_set
24
+ end
25
+
26
+ if entity.is_a?(Treat::Entities::Sentence)
27
+ return 'S'
28
+ elsif entity.is_a?(Treat::Entities::Phrase)
29
+ return 'P'
21
30
  end
22
31
 
23
32
  # Handle options and initialize the tagger.
24
33
  lang = entity.language
25
34
  options = get_options(options, lang)
26
- init_tagger(lang)
35
+ init_tagger(lang) unless @@taggers[lang]
27
36
  tokens, list = get_token_list(entity)
28
-
37
+
29
38
  # Do the tagging.
30
39
  i = 0
31
- isolated_token = entity.is_a?(Treat::Entities::Token)
40
+ isolated_token = entity.is_a?(Treat::Entities::Token)
41
+
32
42
  @@taggers[lang].apply(list).each do |tok|
33
43
  tokens[i].set :tag, tok.tag
34
- tokens[i].set :tag_set,
44
+ tokens[i].set :tag_set,
35
45
  options[:tag_set] if isolated_token
36
46
  return tok.tag if isolated_token
37
47
  i += 1
38
48
  end
39
49
 
40
- # Handle tags for sentences and phrases.
41
- if entity.is_a?(Treat::Entities::Sentence) ||
42
- (entity.is_a?(Treat::Entities::Phrase) &&
43
- !entity.parent_sentence)
44
-
45
- tag_set = Treat::Universalisation::Tags::
46
- StanfordTagSetForLanguage[
47
- Treat::Languages.describe(lang)]
48
- entity.set :tag_set, tag_set
49
- end
50
-
51
- if entity.is_a?(Treat::Entities::Sentence)
52
- return 'S'
53
- elsif entity.is_a?(Treat::Entities::Phrase)
54
- return 'P'
55
- end
56
-
57
50
  end
58
-
51
+
59
52
  # Initialize the tagger for a language.
60
- def self.init_tagger(lang)
61
- language = Treat::Languages.describe(lang)
53
+ def self.init_tagger(language)
62
54
  Treat::Loaders::Stanford.load(language)
63
55
  model = StanfordCoreNLP::Config::Models[:pos][language]
64
- model = Treat::Loaders::Stanford.model_path +
56
+ model = Treat.paths.models + 'stanford/' +
65
57
  StanfordCoreNLP::Config::ModelFolders[:pos] + model
66
- @@taggers[lang] ||=
58
+ @@taggers[language] ||=
67
59
  StanfordCoreNLP::MaxentTagger.new(model)
68
60
  end
69
-
61
+
70
62
  # Handle the options for the tagger.
71
- def self.get_options(options, lang)
72
- language = Treat::Languages.describe(lang)
63
+ def self.get_options(options, language)
73
64
  options = DefaultOptions.merge(options)
74
- options[:tag_set] =
75
- StanfordCoreNLP::Config::TagSets[language]
76
65
  if options[:tagger_model]
77
66
  ::StanfordCoreNLP.set_model('pos.model',
78
67
  options[:tagger_model])
79
68
  end
80
- options[:tag_set] =
69
+ options[:tag_set] =
81
70
  StanfordCoreNLP::Config::TagSets[language]
82
71
  options
83
72
  end
84
-
73
+
85
74
  # Retrieve a Java ArrayList object.
86
75
  def self.get_token_list(entity)
87
76
  list = StanfordCoreNLP::ArrayList.new
@@ -95,5 +84,5 @@ class Treat::Lexicalizers::Taggers::Stanford
95
84
  end
96
85
  return tokens, list
97
86
  end
98
-
99
- end
87
+
88
+ end
@@ -0,0 +1,19 @@
1
+ class Treat::Workers::Processors::Chunkers::Autoselect
2
+
3
+ def self.chunk(entity, options = {})
4
+ unless entity.has?(:format)
5
+ raise Treat::Exception,
6
+ "Must have a format to autoselect chunker."
7
+ end
8
+ begin
9
+ k = Treat::Workers::Processors::
10
+ Chunkers.const_get(cc(entity.format))
11
+ k.chunk(entity, options)
12
+ rescue Treat::Exception
13
+ Treat::Workers::Processors::
14
+ Chunkers::TXT.chunk(entity, options)
15
+ end
16
+
17
+ end
18
+
19
+ end