treat 0.2.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. data/LICENSE +3 -3
  2. data/README.md +33 -0
  3. data/files/INFO +1 -0
  4. data/lib/treat.rb +40 -105
  5. data/lib/treat/ai.rb +12 -0
  6. data/lib/treat/ai/classifiers/id3.rb +27 -0
  7. data/lib/treat/categories.rb +82 -35
  8. data/lib/treat/categorizable.rb +44 -0
  9. data/lib/treat/classification.rb +61 -0
  10. data/lib/treat/configurable.rb +115 -0
  11. data/lib/treat/data_set.rb +42 -0
  12. data/lib/treat/dependencies.rb +24 -0
  13. data/lib/treat/downloader.rb +87 -0
  14. data/lib/treat/entities.rb +68 -66
  15. data/lib/treat/entities/abilities.rb +10 -0
  16. data/lib/treat/entities/abilities/buildable.rb +327 -0
  17. data/lib/treat/entities/abilities/checkable.rb +31 -0
  18. data/lib/treat/entities/abilities/copyable.rb +45 -0
  19. data/lib/treat/entities/abilities/countable.rb +51 -0
  20. data/lib/treat/entities/abilities/debuggable.rb +83 -0
  21. data/lib/treat/entities/abilities/delegatable.rb +123 -0
  22. data/lib/treat/entities/abilities/doable.rb +62 -0
  23. data/lib/treat/entities/abilities/exportable.rb +11 -0
  24. data/lib/treat/entities/abilities/iterable.rb +115 -0
  25. data/lib/treat/entities/abilities/magical.rb +83 -0
  26. data/lib/treat/entities/abilities/registrable.rb +74 -0
  27. data/lib/treat/entities/abilities/stringable.rb +91 -0
  28. data/lib/treat/entities/entities.rb +104 -0
  29. data/lib/treat/entities/entity.rb +122 -245
  30. data/lib/treat/exception.rb +4 -4
  31. data/lib/treat/extractors.rb +77 -80
  32. data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
  33. data/lib/treat/extractors/language/what_language.rb +50 -45
  34. data/lib/treat/extractors/name_tag/stanford.rb +55 -0
  35. data/lib/treat/extractors/tf_idf/native.rb +87 -0
  36. data/lib/treat/extractors/time/chronic.rb +55 -0
  37. data/lib/treat/extractors/time/nickel.rb +86 -62
  38. data/lib/treat/extractors/time/ruby.rb +53 -0
  39. data/lib/treat/extractors/topic_words/lda.rb +67 -58
  40. data/lib/treat/extractors/topics/reuters.rb +100 -87
  41. data/lib/treat/formatters.rb +39 -35
  42. data/lib/treat/formatters/readers/abw.rb +49 -29
  43. data/lib/treat/formatters/readers/autoselect.rb +37 -33
  44. data/lib/treat/formatters/readers/doc.rb +19 -13
  45. data/lib/treat/formatters/readers/html.rb +52 -30
  46. data/lib/treat/formatters/readers/image.rb +41 -40
  47. data/lib/treat/formatters/readers/odt.rb +59 -45
  48. data/lib/treat/formatters/readers/pdf.rb +28 -25
  49. data/lib/treat/formatters/readers/txt.rb +12 -15
  50. data/lib/treat/formatters/readers/xml.rb +73 -36
  51. data/lib/treat/formatters/serializers/xml.rb +80 -79
  52. data/lib/treat/formatters/serializers/yaml.rb +19 -18
  53. data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
  54. data/lib/treat/formatters/unserializers/xml.rb +94 -99
  55. data/lib/treat/formatters/unserializers/yaml.rb +20 -19
  56. data/lib/treat/formatters/visualizers/dot.rb +132 -132
  57. data/lib/treat/formatters/visualizers/standoff.rb +52 -44
  58. data/lib/treat/formatters/visualizers/tree.rb +26 -29
  59. data/lib/treat/groupable.rb +153 -0
  60. data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
  61. data/lib/treat/inflectors.rb +50 -45
  62. data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
  63. data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
  64. data/lib/treat/inflectors/declensors/active_support.rb +31 -0
  65. data/lib/treat/inflectors/declensors/english.rb +38 -0
  66. data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
  67. data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
  68. data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
  69. data/lib/treat/inflectors/stemmers/porter.rb +160 -0
  70. data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
  71. data/lib/treat/inflectors/stemmers/uea.rb +28 -0
  72. data/lib/treat/installer.rb +308 -0
  73. data/lib/treat/kernel.rb +105 -27
  74. data/lib/treat/languages.rb +122 -88
  75. data/lib/treat/languages/arabic.rb +15 -15
  76. data/lib/treat/languages/chinese.rb +15 -15
  77. data/lib/treat/languages/dutch.rb +15 -15
  78. data/lib/treat/languages/english.rb +61 -62
  79. data/lib/treat/languages/french.rb +19 -19
  80. data/lib/treat/languages/german.rb +20 -20
  81. data/lib/treat/languages/greek.rb +15 -15
  82. data/lib/treat/languages/italian.rb +16 -16
  83. data/lib/treat/languages/polish.rb +15 -15
  84. data/lib/treat/languages/portuguese.rb +15 -15
  85. data/lib/treat/languages/russian.rb +15 -15
  86. data/lib/treat/languages/spanish.rb +16 -16
  87. data/lib/treat/languages/swedish.rb +16 -16
  88. data/lib/treat/lexicalizers.rb +34 -55
  89. data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
  90. data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
  91. data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
  92. data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
  93. data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
  94. data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
  95. data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
  96. data/lib/treat/linguistics.rb +9 -0
  97. data/lib/treat/linguistics/categories.rb +11 -0
  98. data/lib/treat/linguistics/tags.rb +422 -0
  99. data/lib/treat/loaders/linguistics.rb +30 -0
  100. data/lib/treat/loaders/stanford.rb +27 -0
  101. data/lib/treat/object.rb +1 -0
  102. data/lib/treat/processors.rb +37 -44
  103. data/lib/treat/processors/chunkers/autoselect.rb +16 -0
  104. data/lib/treat/processors/chunkers/html.rb +71 -0
  105. data/lib/treat/processors/chunkers/txt.rb +18 -24
  106. data/lib/treat/processors/parsers/enju.rb +253 -208
  107. data/lib/treat/processors/parsers/stanford.rb +130 -131
  108. data/lib/treat/processors/segmenters/punkt.rb +79 -45
  109. data/lib/treat/processors/segmenters/stanford.rb +46 -48
  110. data/lib/treat/processors/segmenters/tactful.rb +43 -36
  111. data/lib/treat/processors/tokenizers/perl.rb +124 -92
  112. data/lib/treat/processors/tokenizers/ptb.rb +81 -0
  113. data/lib/treat/processors/tokenizers/punkt.rb +48 -42
  114. data/lib/treat/processors/tokenizers/stanford.rb +39 -38
  115. data/lib/treat/processors/tokenizers/tactful.rb +64 -55
  116. data/lib/treat/proxies.rb +52 -35
  117. data/lib/treat/retrievers.rb +26 -16
  118. data/lib/treat/retrievers/indexers/ferret.rb +47 -26
  119. data/lib/treat/retrievers/searchers/ferret.rb +69 -50
  120. data/lib/treat/tree.rb +241 -183
  121. data/spec/collection.rb +123 -0
  122. data/spec/document.rb +93 -0
  123. data/spec/entity.rb +408 -0
  124. data/spec/languages.rb +25 -0
  125. data/spec/phrase.rb +146 -0
  126. data/spec/samples/mathematicians/archimedes.abw +34 -0
  127. data/spec/samples/mathematicians/euler.html +21 -0
  128. data/spec/samples/mathematicians/gauss.pdf +0 -0
  129. data/spec/samples/mathematicians/leibniz.txt +13 -0
  130. data/spec/samples/mathematicians/newton.doc +0 -0
  131. data/spec/sandbox.rb +5 -0
  132. data/spec/token.rb +109 -0
  133. data/spec/treat.rb +52 -0
  134. data/spec/tree.rb +117 -0
  135. data/spec/word.rb +110 -0
  136. data/spec/zone.rb +66 -0
  137. data/tmp/INFO +1 -1
  138. metadata +100 -201
  139. data/INSTALL +0 -1
  140. data/README +0 -3
  141. data/TODO +0 -28
  142. data/lib/economist/half_cocked_basel.txt +0 -16
  143. data/lib/economist/hungarys_troubles.txt +0 -46
  144. data/lib/economist/indias_slowdown.txt +0 -15
  145. data/lib/economist/merkozy_rides_again.txt +0 -24
  146. data/lib/economist/prada_is_not_walmart.txt +0 -9
  147. data/lib/economist/to_infinity_and_beyond.txt +0 -15
  148. data/lib/ferret/_11.cfs +0 -0
  149. data/lib/ferret/_14.cfs +0 -0
  150. data/lib/ferret/_p.cfs +0 -0
  151. data/lib/ferret/_s.cfs +0 -0
  152. data/lib/ferret/_v.cfs +0 -0
  153. data/lib/ferret/_y.cfs +0 -0
  154. data/lib/ferret/segments +0 -0
  155. data/lib/ferret/segments_15 +0 -0
  156. data/lib/treat/buildable.rb +0 -157
  157. data/lib/treat/category.rb +0 -33
  158. data/lib/treat/delegatable.rb +0 -116
  159. data/lib/treat/doable.rb +0 -45
  160. data/lib/treat/entities/collection.rb +0 -14
  161. data/lib/treat/entities/document.rb +0 -12
  162. data/lib/treat/entities/phrases.rb +0 -17
  163. data/lib/treat/entities/tokens.rb +0 -61
  164. data/lib/treat/entities/zones.rb +0 -41
  165. data/lib/treat/extractors/coreferences/stanford.rb +0 -69
  166. data/lib/treat/extractors/date/chronic.rb +0 -32
  167. data/lib/treat/extractors/date/ruby.rb +0 -25
  168. data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
  169. data/lib/treat/extractors/language/language_extractor.rb +0 -27
  170. data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
  171. data/lib/treat/extractors/roles/naive.rb +0 -73
  172. data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
  173. data/lib/treat/extractors/statistics/position_in.rb +0 -14
  174. data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
  175. data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
  176. data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
  177. data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
  178. data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
  179. data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
  180. data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
  181. data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
  182. data/lib/treat/feature.rb +0 -58
  183. data/lib/treat/features.rb +0 -7
  184. data/lib/treat/formatters/visualizers/short_value.rb +0 -29
  185. data/lib/treat/formatters/visualizers/txt.rb +0 -45
  186. data/lib/treat/group.rb +0 -106
  187. data/lib/treat/helpers/linguistics_loader.rb +0 -18
  188. data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
  189. data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
  190. data/lib/treat/inflectors/declensions/english.rb +0 -319
  191. data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
  192. data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
  193. data/lib/treat/inflectors/stem/porter.rb +0 -162
  194. data/lib/treat/inflectors/stem/porter_c.rb +0 -26
  195. data/lib/treat/inflectors/stem/uea.rb +0 -30
  196. data/lib/treat/install.rb +0 -59
  197. data/lib/treat/languages/tags.rb +0 -377
  198. data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
  199. data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
  200. data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
  201. data/lib/treat/lexicalizers/tag/brill.rb +0 -91
  202. data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
  203. data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
  204. data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
  205. data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
  206. data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
  207. data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
  208. data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
  209. data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
  210. data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
  211. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
  212. data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
  213. data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
  214. data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
  215. data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
  216. data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
  217. data/lib/treat/registrable.rb +0 -28
  218. data/lib/treat/sugar.rb +0 -50
  219. data/lib/treat/viewable.rb +0 -29
  220. data/lib/treat/visitable.rb +0 -28
  221. data/test/profile.rb +0 -2
  222. data/test/tc_entity.rb +0 -117
  223. data/test/tc_extractors.rb +0 -73
  224. data/test/tc_formatters.rb +0 -41
  225. data/test/tc_inflectors.rb +0 -34
  226. data/test/tc_lexicalizers.rb +0 -32
  227. data/test/tc_processors.rb +0 -50
  228. data/test/tc_resources.rb +0 -22
  229. data/test/tc_treat.rb +0 -60
  230. data/test/tc_tree.rb +0 -60
  231. data/test/tests.rb +0 -20
  232. data/test/texts.rb +0 -19
  233. data/test/texts/english/half_cocked_basel.txt +0 -16
  234. data/test/texts/english/hose_and_dry.doc +0 -0
  235. data/test/texts/english/hungarys_troubles.abw +0 -70
  236. data/test/texts/english/long.html +0 -24
  237. data/test/texts/english/long.txt +0 -22
  238. data/test/texts/english/medium.txt +0 -5
  239. data/test/texts/english/republican_nomination.pdf +0 -0
  240. data/test/texts/english/saving_the_euro.odt +0 -0
  241. data/test/texts/english/short.txt +0 -3
  242. data/test/texts/english/zero_sum.html +0 -111
@@ -0,0 +1,31 @@
1
+ # This module implements methods that are used
2
+ # by workers to determine if an entity is properly
3
+ # formatted before working on it.
4
+ module Treat::Entities::Abilities::Checkable
5
+
6
+ # Check if the entity has the given feature,
7
+ # and if so return it. If not, calculate the
8
+ # requested feature if do_it is set to true,
9
+ # or raise an exception if do_it is set to false.
10
+ def check_has(feature, do_it = true)
11
+ return @features[feature] if has?(feature)
12
+ return send(feature) if do_it
13
+ task = caller_method(2) # This is dangerous !
14
+ g1 = Treat::Categories.lookup(task)
15
+ g2 = Treat::Categories.lookup(feature)
16
+
17
+ raise Treat::Exception,
18
+ "#{g1.type.to_s.capitalize} #{task} " +
19
+ "requires #{g2.type} #{g2.method}."
20
+ end
21
+
22
+ # Raises an error if the entity has children.
23
+ def check_hasnt_children
24
+ return unless has_children?
25
+ raise Treat::Exception,
26
+ "Warning: can't #{caller_method(2)} "+
27
+ "an entity that has children. Removing " +
28
+ " all children of text \"[#{short_value}].\""
29
+ end
30
+
31
+ end
@@ -0,0 +1,45 @@
1
+ module Treat::Entities::Abilities::Copyable
2
+
3
+ require 'fileutils'
4
+
5
+ def copy_into(collection)
6
+ unless collection.is_a?(
7
+ Treat::Entities::Collection)
8
+ raise Treat::Exception,
9
+ "Cannot copy an entity into " +
10
+ "something else than a collection."
11
+ end
12
+ if type == :document
13
+ copy_document_into(collection)
14
+ elsif type == :collection
15
+ copy_collection_into(collection)
16
+ else
17
+ raise Treat::Exception,
18
+ "Can only copy a document " +
19
+ "or collection into a collection."
20
+ end
21
+ end
22
+
23
+ def copy_collection_into(collection)
24
+ copy = dup
25
+ f = File.dirname(folder)
26
+ f = f.split(File::SEPARATOR)[-1]
27
+ f = File.join(collection.folder, f)
28
+ FileUtils.mkdir(f) unless
29
+ FileTest.directory(f)
30
+ FileUtils.cp_r(folder, f)
31
+ copy.set :folder, f
32
+ copy
33
+ end
34
+
35
+ def copy_document_into(collection)
36
+ copy = dup
37
+ return copy unless file
38
+ f = File.basename(file)
39
+ f = File.join(collection.folder, f)
40
+ FileUtils.cp(file, f)
41
+ copy.set :file, f
42
+ copy
43
+ end
44
+
45
+ end
@@ -0,0 +1,51 @@
1
+ module Treat::Entities::Abilities::Countable
2
+
3
+ # Find the position of the current entity
4
+ # inside the parent entity, starting at 1.
5
+ def position
6
+
7
+ unless has_parent?
8
+ raise Treat::Exception,
9
+ "No parent to get position in."
10
+ end
11
+
12
+ parent.children.index(self) + 1
13
+
14
+ end
15
+
16
+ # Find the frequency of the entity in
17
+ # the supplied parent or in the root
18
+ # node if nil.
19
+ def frequency_in(parent_type = nil)
20
+
21
+ unless parent_type
22
+ root.registry[:value][id]
23
+ end
24
+
25
+ registry(parent_type)[:value][value]
26
+
27
+ end
28
+
29
+ # Get the frequency of this entity's
30
+ # value in the root node.
31
+ alias :frequency :frequency_in
32
+
33
+ # Get the number of children with a type
34
+ # in this entity.
35
+ def count(type)
36
+ @registry[:type][type].size
37
+ end
38
+
39
+ # Returns the frequency of the given value
40
+ # in the this entity.
41
+ def frequency_of(value)
42
+ if is_a?(Treat::Entities::Token)
43
+ raise Treat::Exception,
44
+ "Cannot get the frequency " +
45
+ "of something within a leaf."
46
+ end
47
+ tv = @registry[:value][value]
48
+ tv ? tv : 0
49
+ end
50
+
51
+ end
@@ -0,0 +1,83 @@
1
+ # When Treat.debug is set to true, each call to
2
+ # #call_worker will result in a debug message being
3
+ # printed by the #print_debug function.
4
+ module Treat::Entities::Abilities::Debuggable
5
+
6
+ @@prev = nil
7
+ @@i = 0
8
+
9
+ # Explains what Treat is currently doing.
10
+ def print_debug(entity, task, worker, group, options)
11
+
12
+ targs = group.targets.map do |target|
13
+ target.to_s
14
+ end
15
+
16
+ if targs.size == 1
17
+ t = targs[0]
18
+ else
19
+ t = targs[0..-2].join(', ') +
20
+ ' and/or ' + targs[-1]
21
+ end
22
+
23
+ genitive = targs.size > 1 ?
24
+ 'their' : 'its'
25
+
26
+ doing = ''
27
+
28
+ human_task = task.to_s.gsub('_', ' ')
29
+
30
+ if group.type == :transformer ||
31
+ group.type == :computer
32
+
33
+ tt = human_task
34
+ tt = tt[0..-2] if tt[-1] == 'e'
35
+ ed = tt[-1] == 'd' ? '' : 'ed'
36
+ doing = "#{tt.capitalize}#{ed} #{t}"
37
+
38
+ elsif group.type == :annotator
39
+
40
+ if group.preset_option
41
+ opt = options[group.preset_option]
42
+ form = opt.to_s.gsub('_', ' ')
43
+ human_task[-1] = ''
44
+ human_task = form + ' ' + human_task
45
+ end
46
+
47
+ doing = "Annotated #{t} with " +
48
+ "#{genitive} #{human_task}"
49
+ end
50
+
51
+ if group.to_s.index('Formatters')
52
+ curr = doing +
53
+ ' in format ' +
54
+ worker.to_s
55
+ else
56
+ curr = doing +
57
+ ' using ' +
58
+ worker.to_s.gsub('_', ' ')
59
+ end
60
+
61
+ curr.gsub!('ss', 's')
62
+ curr += '.'
63
+
64
+ if curr == @@prev
65
+ @@i += 1
66
+ else
67
+ if @@i > 1
68
+ Treat::Entities.list.each do |e|
69
+ @@prev.gsub!(e.to_s, e.to_s + 's')
70
+ end
71
+ @@prev.gsub!('its', 'their')
72
+ @@prev = @@prev.split(' ').
73
+ insert(1, @@i.to_s).join(' ')
74
+ end
75
+ @@i = 0
76
+ puts @@prev # Last call doesn't get shown.
77
+ end
78
+
79
+ @@prev = curr
80
+
81
+ end
82
+
83
+ end
@@ -0,0 +1,123 @@
1
+ # Makes a class delegatable, allowing calls
2
+ # on it to be forwarded to a worker class
3
+ # able to perform the appropriate task.
4
+ module Treat::Entities::Abilities::Delegatable
5
+
6
+ # Add preset methods to an entity class.
7
+ def add_presets(group)
8
+
9
+ opt = group.preset_option
10
+ return unless opt
11
+
12
+ group.presets.each do |preset|
13
+ define_method(preset) do |worker=nil, options={}|
14
+ return get(preset) if has?(preset)
15
+ options = {opt => preset}.merge(options)
16
+ m = group.method
17
+ send(m, worker, options)
18
+ f = unset(m)
19
+ features[preset] = f if f
20
+ end
21
+ end
22
+
23
+ end
24
+
25
+ # Add the workers to perform a task on an entity class.
26
+ def add_workers(group)
27
+
28
+ self.class_eval do
29
+ task = group.method
30
+ add_presets(group)
31
+ define_method(task) do |worker=nil, options={}|
32
+ if worker.is_a?(Hash)
33
+ options, worker =
34
+ worker, nil
35
+ end
36
+ if !@features[task].nil?
37
+ @features[task]
38
+ else
39
+ self.class.call_worker(
40
+ self, task, worker,
41
+ group, options
42
+ )
43
+ end
44
+ end
45
+ end
46
+
47
+ end
48
+
49
+ # Ask a worker found in the given group to perform
50
+ # a task on the entity with the supplied options.
51
+ def call_worker(entity, task, worker, group, options)
52
+
53
+ if worker.nil? || worker == :default
54
+ worker = find_worker(entity, group)
55
+ end
56
+
57
+ print_debug(entity, task, worker,
58
+ group, options) if Treat.debug
59
+
60
+ if not group.list.include?(worker)
61
+ raise Treat::Exception,
62
+ worker_not_found(worker, group)
63
+ else
64
+
65
+ worker = group.const_get(
66
+ cc(worker.to_s).intern
67
+ )
68
+
69
+ result = worker.send(group.method, entity, options)
70
+
71
+ if group.type == :annotator && result
72
+ entity.features[task] = result
73
+ end
74
+
75
+ if group.type == :transformer
76
+ entity
77
+ else
78
+ result
79
+ end
80
+
81
+ end
82
+ end
83
+
84
+ # Find which worker to use if none has been supplied.
85
+ def find_worker(entity, group)
86
+ group.default.nil? ?
87
+ self.find_worker_for_language(
88
+ entity.language, group) :
89
+ group.default
90
+ end
91
+
92
+ # Get the default worker for that language
93
+ # inside the given group.
94
+ def find_worker_for_language(language, group)
95
+
96
+ lang = Treat::Languages.describe(language)
97
+ klass = cc(lang).intern
98
+ lclass = Treat::Languages.const_get(klass)
99
+ cat = group.to_s.split('::')[-2].intern
100
+ klass = lclass.const_get(cat)
101
+
102
+ g = ucc(cl(group)).intern
103
+
104
+ if !klass[g] || !klass[g][0]
105
+ d = ucc(cl(group))
106
+ d.gsub!('_', ' ')
107
+ d = 'worker to find "' + d
108
+ raise Treat::Exception, "No #{d}" +
109
+ "\" is available for the " +
110
+ "#{lang.to_s.capitalize} language."
111
+ end
112
+ return klass[g][0]
113
+
114
+ end
115
+
116
+ # Return an error message and suggest possible typos.
117
+ def worker_not_found(klass, group)
118
+ "Algorithm '#{ucc(cl(klass))}' couldn't be "+
119
+ "found in group #{group}." + did_you_mean?(
120
+ group.list.map { |c| ucc(c) }, ucc(klass))
121
+ end
122
+
123
+ end
@@ -0,0 +1,62 @@
1
+ # Implement support for the functions #do and #do_task.
2
+ module Treat::Entities::Abilities::Doable
3
+
4
+ # Perform the supplied tasks on the entity.
5
+ def do(*tasks)
6
+ tasks.each do |task|
7
+
8
+ if task.is_a?(Hash)
9
+
10
+ task.each do |k,v|
11
+ t, w = k, v
12
+ w, o = *w if w.is_a?(Array)
13
+ o ||= {}
14
+ do_task(t, w, o)
15
+ end
16
+ else
17
+
18
+ t = task.is_a?(Array) ? task[0] : task
19
+ w = task.is_a?(Array) ? task[1] : nil
20
+ w, o = *w if w.is_a?(Array)
21
+ o ||= {}
22
+ do_task(t, w, o)
23
+ end
24
+
25
+ end
26
+ self
27
+ end
28
+
29
+ # Perform an individual task on an entity
30
+ # given a worker and options to pass to it.
31
+ def do_task(task, worker, options, group = nil)
32
+ group ||= get_group(task)
33
+ entity_types = group.targets
34
+ f = nil
35
+ entity_types.each do |t|
36
+ f = true if Treat::Entities.match_types[t][type]
37
+ end
38
+ if f || entity_types.include?(:entity)
39
+ send(task, worker, options)
40
+ else
41
+ each_entity(*entity_types) do |entity|
42
+ entity.do_task(task, worker, options, group)
43
+ end
44
+ unless entity_types.include?(type)
45
+ features.delete(task)
46
+ end
47
+ nil
48
+ end
49
+ end
50
+
51
+ # Get the group of a task.
52
+ def get_group(task)
53
+ g = Treat::Categories.lookup(task)
54
+ unless g
55
+ raise Treat::Exception,
56
+ "Task #{task} does not exist."
57
+ end
58
+ g
59
+ end
60
+
61
+
62
+ end
@@ -0,0 +1,11 @@
1
+ module Treat::Entities::Abilities::Exportable
2
+
3
+ def export(classification)
4
+ ds = Treat::DataSet.new(classification)
5
+ each_entity(*classification.types) do |e|
6
+ ds << e
7
+ end
8
+ ds
9
+ end
10
+
11
+ end
@@ -0,0 +1,115 @@
1
+ module Treat::Entities::Abilities::Iterable
2
+
3
+ # Yields each entity of any of the supplied
4
+ # types in the children tree of this Entity.
5
+ # Note that this function is recursive, unlike
6
+ # #each. It does not yield the top element being
7
+ # recursed.
8
+ #
9
+ # This function NEEDS to be ported to C.
10
+ def each_entity(*types)
11
+ types = [:entity] if types.size == 0
12
+ f = false
13
+ types.each do |t2|
14
+ if Treat::Entities.match_types[t2][type]
15
+ f = true; break
16
+ end
17
+ end
18
+ yield self if f
19
+ unless @children.size == 0
20
+ # return unless contains_types?(types)
21
+ @children.each do |child|
22
+ child.each_entity(*types) { |y| yield y }
23
+ end
24
+ end
25
+ end
26
+
27
+ # Returns an array of the children that have a feature
28
+ # equal to value within the entities of the given type.
29
+ def entities_with_feature(feature, value, type = nil)
30
+ a = []
31
+ type = :entity unless type
32
+ each_entity(type) do |e|
33
+ a << e if (e.has?(feature) &&
34
+ e.features[feature] == value) ||
35
+ ([:id, :value, :type].include?(feature) &&
36
+ e.send(feature) == value)
37
+ end
38
+ a
39
+ end
40
+
41
+ # Returns an array of the children that have a type
42
+ # within the supplied types.
43
+ def entities_with_types(*types)
44
+ a = []
45
+ each_entity(*types) { |e| a << e }
46
+ a
47
+ end
48
+
49
+ alias :entities_with_type :entities_with_types
50
+
51
+ # Returns an array of the entities with the given
52
+ # category.
53
+ def entities_with_category(category, type = nil)
54
+ entities_with_feature(:category, type)
55
+ end
56
+
57
+ # Returns the first ancestor of this entity
58
+ # that has the given type.
59
+ def ancestor_with_types(*types)
60
+ ancestor = @parent
61
+ match_types = lambda do |t1, t2|
62
+ f = false
63
+ types.each do |t2|
64
+ if Treat::Entities.match_types[t2][t1]
65
+ f = true; break
66
+ end
67
+ end
68
+ f
69
+ end
70
+ if ancestor
71
+ while not match_types.call(ancestor.type, type)
72
+ return nil unless (ancestor && ancestor.has_parent?)
73
+ ancestor = ancestor.parent
74
+ end
75
+ match_types.call(ancestor.type, types) ? ancestor : nil
76
+ end
77
+ end
78
+
79
+ alias :ancestor_with_type :ancestor_with_types
80
+
81
+ # Yields each ancestors of this entity that
82
+ # has one of the the given types. May skip levels.
83
+ def each_ancestor(*types)
84
+ types = [:entity] if types.empty?
85
+ ancestor = self
86
+ while (a = ancestor.ancestor_with_types(*types))
87
+ yield a
88
+ ancestor = ancestor.parent
89
+ end
90
+ end
91
+
92
+ # Returns an array of ancestors of this entity that
93
+ # has one of the the given types. May skip levels.
94
+ def ancestors_with_types(*types)
95
+ as = []
96
+ each_ancestor(*types) { |a| as << a }
97
+ as
98
+ end
99
+
100
+ alias :ancestors_with_type :ancestors_with_types
101
+
102
+ # Return the first element in the array, warning if not
103
+ # the only one in the array. Used for magic methods: e.g.,
104
+ # the magic method "word" if called on a sentence with many
105
+ # words, Treat will return the first word, but warn the user.
106
+ def first_but_warn(array, type)
107
+ if array.size > 1
108
+ warn "Warning: requested one #{type}, but" +
109
+ " there are many #{type}s in this entity."
110
+ end
111
+ array[0]
112
+ end
113
+
114
+
115
+ end