treat 1.0.6 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (210) hide show
  1. data/LICENSE +2 -4
  2. data/README.md +13 -12
  3. data/bin/MANIFEST +1 -0
  4. data/bin/stanford/bridge.jar +0 -0
  5. data/bin/stanford/joda-time.jar +0 -0
  6. data/bin/stanford/stanford-corenlp.jar +0 -0
  7. data/bin/stanford/stanford-parser.jar +0 -0
  8. data/bin/stanford/xom.jar +0 -0
  9. data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
  10. data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
  11. data/files/{INFO → MANIFEST} +0 -0
  12. data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
  13. data/files/weather-central-canada-heat-wave.html +1370 -0
  14. data/lib/treat/config/core/acronyms.rb +4 -0
  15. data/lib/treat/config/core/encodings.rb +8 -0
  16. data/lib/treat/config/core/entities.rb +2 -0
  17. data/lib/treat/config/core/language.rb +3 -0
  18. data/lib/treat/config/core/paths.rb +8 -0
  19. data/lib/treat/config/core/syntax.rb +1 -0
  20. data/lib/treat/config/core/verbosity.rb +1 -0
  21. data/lib/treat/config/databases/mongo.rb +3 -0
  22. data/lib/treat/config/languages/agnostic.rb +34 -0
  23. data/lib/treat/config/languages/arabic.rb +13 -0
  24. data/lib/treat/config/languages/chinese.rb +13 -0
  25. data/lib/treat/config/languages/dutch.rb +12 -0
  26. data/lib/treat/config/languages/english.rb +60 -0
  27. data/lib/treat/config/languages/french.rb +18 -0
  28. data/lib/treat/config/languages/german.rb +18 -0
  29. data/lib/treat/config/languages/greek.rb +12 -0
  30. data/lib/treat/config/languages/italian.rb +12 -0
  31. data/lib/treat/config/languages/polish.rb +12 -0
  32. data/lib/treat/config/languages/portuguese.rb +12 -0
  33. data/lib/treat/config/languages/russian.rb +12 -0
  34. data/lib/treat/config/languages/spanish.rb +12 -0
  35. data/lib/treat/config/languages/swedish.rb +12 -0
  36. data/lib/treat/config/libraries/stanford.rb +1 -0
  37. data/lib/treat/config/linguistics/categories.rb +4 -0
  38. data/lib/treat/config/linguistics/punctuation.rb +33 -0
  39. data/lib/treat/config/tags/aligned.rb +221 -0
  40. data/lib/treat/config/tags/enju.rb +71 -0
  41. data/lib/treat/config/tags/paris7.rb +17 -0
  42. data/lib/treat/config/tags/ptb.rb +15 -0
  43. data/lib/treat/config/workers/extractors.rb +39 -0
  44. data/lib/treat/config/workers/formatters.rb +20 -0
  45. data/lib/treat/config/workers/inflectors.rb +27 -0
  46. data/lib/treat/config/workers/learners.rb +6 -0
  47. data/lib/treat/config/workers/lexicalizers.rb +18 -0
  48. data/lib/treat/config/workers/list.rb +1 -0
  49. data/lib/treat/config/workers/processors.rb +19 -0
  50. data/lib/treat/config/workers/retrievers.rb +12 -0
  51. data/lib/treat/config.rb +125 -0
  52. data/lib/treat/{classification.rb → core/classification.rb} +1 -1
  53. data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
  54. data/lib/treat/{tree.rb → core/node.rb} +5 -5
  55. data/lib/treat/core/server.rb +3 -0
  56. data/lib/treat/core.rb +5 -0
  57. data/lib/treat/entities/abilities/buildable.rb +61 -56
  58. data/lib/treat/entities/abilities/checkable.rb +2 -2
  59. data/lib/treat/entities/abilities/comparable.rb +21 -0
  60. data/lib/treat/entities/abilities/copyable.rb +2 -0
  61. data/lib/treat/entities/abilities/countable.rb +1 -1
  62. data/lib/treat/entities/abilities/debuggable.rb +1 -1
  63. data/lib/treat/entities/abilities/delegatable.rb +42 -36
  64. data/lib/treat/entities/abilities/doable.rb +2 -2
  65. data/lib/treat/entities/abilities/exportable.rb +1 -1
  66. data/lib/treat/entities/abilities/iterable.rb +21 -33
  67. data/lib/treat/entities/abilities/magical.rb +8 -8
  68. data/lib/treat/entities/abilities/registrable.rb +0 -38
  69. data/lib/treat/entities/abilities/stringable.rb +19 -19
  70. data/lib/treat/entities/collection.rb +31 -0
  71. data/lib/treat/entities/document.rb +10 -0
  72. data/lib/treat/entities/entity.rb +18 -13
  73. data/lib/treat/entities/group.rb +15 -0
  74. data/lib/treat/entities/section.rb +13 -0
  75. data/lib/treat/entities/token.rb +35 -0
  76. data/lib/treat/entities/zone.rb +11 -0
  77. data/lib/treat/entities.rb +5 -75
  78. data/lib/treat/helpers/didyoumean.rb +57 -0
  79. data/lib/treat/helpers/escaping.rb +15 -0
  80. data/lib/treat/helpers/formatting.rb +41 -0
  81. data/lib/treat/helpers/platform.rb +15 -0
  82. data/lib/treat/helpers/reflection.rb +17 -0
  83. data/lib/treat/helpers/temporary.rb +27 -0
  84. data/lib/treat/helpers/verbosity.rb +19 -0
  85. data/lib/treat/helpers.rb +5 -0
  86. data/lib/treat/installer.rb +46 -165
  87. data/lib/treat/loaders/linguistics.rb +22 -27
  88. data/lib/treat/loaders/stanford.rb +23 -41
  89. data/lib/treat/loaders.rb +10 -0
  90. data/lib/treat/proxies.rb +73 -24
  91. data/lib/treat/version.rb +3 -0
  92. data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
  93. data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
  94. data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
  95. data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
  96. data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
  97. data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
  98. data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
  99. data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
  100. data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
  101. data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
  102. data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
  103. data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
  104. data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
  105. data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
  106. data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
  107. data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
  108. data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
  109. data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
  110. data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
  111. data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
  112. data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
  113. data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
  114. data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
  115. data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
  116. data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
  117. data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
  118. data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
  119. data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
  120. data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
  121. data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
  122. data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
  123. data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
  124. data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
  125. data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
  126. data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
  127. data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
  128. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
  129. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
  130. data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
  131. data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
  132. data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
  133. data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
  134. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
  135. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
  136. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
  137. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
  138. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
  139. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
  140. data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
  141. data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
  142. data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
  143. data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
  144. data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
  145. data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
  146. data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
  147. data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
  148. data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
  149. data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
  150. data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
  151. data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
  152. data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
  153. data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
  154. data/lib/treat/workers.rb +96 -0
  155. data/lib/treat.rb +23 -49
  156. data/spec/collection.rb +4 -4
  157. data/spec/document.rb +5 -5
  158. data/spec/entity.rb +33 -32
  159. data/spec/{tree.rb → node.rb} +5 -5
  160. data/spec/phrase.rb +5 -39
  161. data/spec/sandbox.rb +212 -6
  162. data/spec/token.rb +12 -9
  163. data/spec/treat.rb +12 -9
  164. data/spec/word.rb +10 -9
  165. data/spec/zone.rb +6 -2
  166. data/tmp/{INFO → MANIFEST} +0 -0
  167. data/tmp/english.yaml +10340 -0
  168. metadata +149 -139
  169. data/lib/treat/ai.rb +0 -12
  170. data/lib/treat/categories.rb +0 -90
  171. data/lib/treat/categorizable.rb +0 -44
  172. data/lib/treat/configurable.rb +0 -115
  173. data/lib/treat/dependencies.rb +0 -25
  174. data/lib/treat/downloader.rb +0 -87
  175. data/lib/treat/entities/abilities.rb +0 -10
  176. data/lib/treat/entities/entities.rb +0 -102
  177. data/lib/treat/exception.rb +0 -7
  178. data/lib/treat/extractors.rb +0 -79
  179. data/lib/treat/formatters/serializers/mongo.rb +0 -64
  180. data/lib/treat/formatters.rb +0 -41
  181. data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
  182. data/lib/treat/inflectors.rb +0 -52
  183. data/lib/treat/kernel.rb +0 -208
  184. data/lib/treat/languages/arabic.rb +0 -16
  185. data/lib/treat/languages/chinese.rb +0 -16
  186. data/lib/treat/languages/dutch.rb +0 -16
  187. data/lib/treat/languages/english.rb +0 -63
  188. data/lib/treat/languages/french.rb +0 -20
  189. data/lib/treat/languages/german.rb +0 -20
  190. data/lib/treat/languages/greek.rb +0 -16
  191. data/lib/treat/languages/italian.rb +0 -17
  192. data/lib/treat/languages/language.rb +0 -10
  193. data/lib/treat/languages/list.txt +0 -504
  194. data/lib/treat/languages/polish.rb +0 -16
  195. data/lib/treat/languages/portuguese.rb +0 -16
  196. data/lib/treat/languages/russian.rb +0 -16
  197. data/lib/treat/languages/spanish.rb +0 -16
  198. data/lib/treat/languages/swedish.rb +0 -16
  199. data/lib/treat/languages.rb +0 -132
  200. data/lib/treat/lexicalizers.rb +0 -37
  201. data/lib/treat/object.rb +0 -7
  202. data/lib/treat/processors/chunkers/autoselect.rb +0 -16
  203. data/lib/treat/processors/chunkers/txt.rb +0 -21
  204. data/lib/treat/processors.rb +0 -38
  205. data/lib/treat/retrievers.rb +0 -27
  206. data/lib/treat/server.rb +0 -26
  207. data/lib/treat/universalisation/encodings.rb +0 -12
  208. data/lib/treat/universalisation/tags.rb +0 -453
  209. data/lib/treat/universalisation.rb +0 -9
  210. data/spec/languages.rb +0 -25
@@ -2,6 +2,8 @@ module Treat::Entities::Abilities::Copyable
2
2
 
3
3
  require 'fileutils'
4
4
 
5
+ # What happens when it is a database-stored
6
+ # collection or document ?
5
7
  def copy_into(collection)
6
8
  unless collection.is_a?(
7
9
  Treat::Entities::Collection)
@@ -7,7 +7,7 @@ module Treat::Entities::Abilities::Countable
7
7
  raise Treat::Exception,
8
8
  "No parent to get position in."
9
9
  end
10
- parent.children.index(self) + 1
10
+ parent.children.index(self)
11
11
  end
12
12
 
13
13
  # Find the position of this entity from
@@ -65,7 +65,7 @@ module Treat::Entities::Abilities::Debuggable
65
65
  @@i += 1
66
66
  else
67
67
  if @@i > 1
68
- Treat::Entities.list.each do |e|
68
+ Treat.core.entities.list.each do |e|
69
69
  @@prev.gsub!(e.to_s, e.to_s + 's')
70
70
  end
71
71
  @@prev.gsub!('its', 'their')
@@ -9,6 +9,7 @@ module Treat::Entities::Abilities::Delegatable
9
9
  opt = group.preset_option
10
10
  return unless opt
11
11
 
12
+ self.class_eval do
12
13
  group.presets.each do |preset|
13
14
  define_method(preset) do |worker=nil, options={}|
14
15
  return get(preset) if has?(preset)
@@ -19,15 +20,17 @@ module Treat::Entities::Abilities::Delegatable
19
20
  features[preset] = f if f
20
21
  end
21
22
  end
23
+ end
22
24
 
23
25
  end
24
26
 
25
27
  # Add the workers to perform a task on an entity class.
26
28
  def add_workers(group)
27
-
28
29
  self.class_eval do
30
+
29
31
  task = group.method
30
32
  add_presets(group)
33
+
31
34
  define_method(task) do |worker=nil, options={}|
32
35
  if worker.is_a?(Hash)
33
36
  options, worker =
@@ -37,8 +40,8 @@ module Treat::Entities::Abilities::Delegatable
37
40
  @features[task]
38
41
  else
39
42
  self.class.call_worker(
40
- self, task, worker,
41
- group, options
43
+ self, task, worker,
44
+ group, options
42
45
  )
43
46
  end
44
47
  end
@@ -55,30 +58,25 @@ module Treat::Entities::Abilities::Delegatable
55
58
  end
56
59
 
57
60
  print_debug(entity, task, worker,
58
- group, options) if Treat.debug
59
-
61
+ group, options) if Treat.core.verbosity.debug
60
62
  if not group.list.include?(worker)
61
63
  raise Treat::Exception,
62
64
  worker_not_found(worker, group)
63
- else
64
-
65
- worker = group.const_get(
66
- cc(worker.to_s).intern
67
- )
68
-
69
- result = worker.send(group.method, entity, options)
65
+ end
70
66
 
71
- if group.type == :annotator && result
72
- entity.features[task] = result
73
- end
67
+ worker = group.const_get(cc(worker.to_s).intern)
68
+ result = worker.send(group.method, entity, options)
74
69
 
75
- if group.type == :transformer
76
- entity
77
- else
78
- result
79
- end
70
+ if group.type == :annotator && result
71
+ entity.features[task] = result
72
+ end
80
73
 
74
+ if group.type == :transformer
75
+ entity
76
+ else
77
+ result
81
78
  end
79
+
82
80
  end
83
81
 
84
82
  # Find which worker to use if none has been supplied.
@@ -93,23 +91,31 @@ module Treat::Entities::Abilities::Delegatable
93
91
  # inside the given group.
94
92
  def find_worker_for_language(language, group)
95
93
 
96
- lang = Treat::Languages.describe(language)
97
- klass = cc(lang).intern
98
- lclass = Treat::Languages.const_get(klass)
99
- cat = group.to_s.split('::')[-2].intern
100
- klass = lclass.const_get(cat)
101
-
102
- g = ucc(cl(group)).intern
103
-
104
- if !klass[g] || !klass[g][0]
105
- d = ucc(cl(group))
106
- d.gsub!('_', ' ')
107
- d = d[0..-2]
108
- raise Treat::Exception, "No #{d}" +
109
- " is available for the " +
110
- "#{lang.to_s.capitalize} language."
94
+ lang = Treat.languages[language]
95
+ cat = group.to_s.split('::')[2].downcase.intern
96
+ group = ucc(cl(group)).intern
97
+
98
+ if lang.nil?
99
+ raise Treat::Exception,
100
+ "No configuration file loaded for language #{language}."
101
+ end
102
+
103
+ workers = lang.workers
104
+
105
+ if !workers.respond_to?(cat) ||
106
+ !workers[cat].respond_to?(group)
107
+ workers = Treat.languages.agnostic.workers
108
+ end
109
+
110
+ if !workers.respond_to?(cat) ||
111
+ !workers[cat].respond_to?(group)
112
+ raise Treat::Exception,
113
+ "No #{group} is/are available for the " +
114
+ "#{language.to_s.capitalize} language."
111
115
  end
112
- return klass[g][0]
116
+
117
+
118
+ workers[cat][group].first
113
119
 
114
120
  end
115
121
 
@@ -33,7 +33,7 @@ module Treat::Entities::Abilities::Doable
33
33
  entity_types = group.targets
34
34
  f = nil
35
35
  entity_types.each do |t|
36
- f = true if Treat::Entities.match_types[t][type]
36
+ f = true if is_a?(Treat::Entities.const_get(cc(t)))
37
37
  end
38
38
  if f || entity_types.include?(:entity)
39
39
  send(task, worker, options)
@@ -55,7 +55,7 @@ module Treat::Entities::Abilities::Doable
55
55
 
56
56
  # Get the group of a task.
57
57
  def get_group(task)
58
- g = Treat::Categories.lookup(task)
58
+ g = Treat::Workers.lookup(task)
59
59
  unless g
60
60
  raise Treat::Exception,
61
61
  "Task #{task} does not exist."
@@ -1,7 +1,7 @@
1
1
  module Treat::Entities::Abilities::Exportable
2
2
 
3
3
  def export(classification)
4
- ds = Treat::DataSet.new(classification)
4
+ ds = Treat::Core::DataSet.new(classification)
5
5
  each_entity(*classification.types) do |e|
6
6
  ds << e
7
7
  end
@@ -11,7 +11,7 @@ module Treat::Entities::Abilities::Iterable
11
11
  types = [:entity] if types.size == 0
12
12
  f = false
13
13
  types.each do |t2|
14
- if Treat::Entities.match_types[t2][type]
14
+ if is_a?(Treat::Entities.const_get(cc(t2)))
15
15
  f = true; break
16
16
  end
17
17
  end
@@ -54,57 +54,45 @@ module Treat::Entities::Abilities::Iterable
54
54
 
55
55
  # Returns the first ancestor of this entity
56
56
  # that has the given type.
57
- def ancestor_with_types(*types)
57
+ def ancestor_with_type(type)
58
+ return unless has_parent?
58
59
  ancestor = @parent
59
- match_types = lambda do |t1, t2|
60
- f = false
61
- types.each do |t2|
62
- if Treat::Entities.match_types[t2][t1]
63
- f = true; break
64
- end
65
- end
66
- f
67
- end
68
- if ancestor
69
- while not match_types.call(ancestor.type, type)
70
- return nil unless (ancestor && ancestor.has_parent?)
71
- ancestor = ancestor.parent
72
- end
73
- match_types.call(ancestor.type, types) ? ancestor : nil
60
+ type_klass = Treat::Entities.const_get(cc(type))
61
+ while not ancestor.is_a?(type_klass)
62
+ return nil unless (ancestor && ancestor.has_parent?)
63
+ ancestor = ancestor.parent
74
64
  end
65
+ ancestor
75
66
  end
76
67
 
77
- alias :ancestor_with_type :ancestor_with_types
78
-
79
68
  # Yields each ancestors of this entity that
80
- # has one of the the given types. May skip levels.
81
- def each_ancestor(*types)
82
- types = [:entity] if types.empty?
69
+ # has the given type.
70
+ def each_ancestor(type = :entity)
83
71
  ancestor = self
84
- while (a = ancestor.ancestor_with_types(*types))
72
+ while (a = ancestor.ancestor_with_type(type))
85
73
  yield a
86
74
  ancestor = ancestor.parent
87
75
  end
88
76
  end
89
77
 
90
- # Returns an array of ancestors of this entity that
91
- # has one of the the given types. May skip levels.
92
- def ancestors_with_types(*types)
93
- as = []
94
- each_ancestor(*types) { |a| as << a }
95
- as
78
+ # Returns an array of ancestors of this
79
+ # entity that have the given type.
80
+ def ancestors_with_type(type)
81
+ ancestors = []
82
+ each_ancestor(type) do |a|
83
+ ancestors << a
84
+ end
85
+ ancestors
96
86
  end
97
87
 
98
88
  # Returns the first ancestor that has a feature
99
89
  # with the given name, otherwise nil.
100
- def ancestor_with_feature(type, feature)
101
- each_ancestor(type) do |ancestor|
90
+ def ancestor_with_feature(feature)
91
+ each_ancestor do |ancestor|
102
92
  return ancestor if ancestor.has?(feature)
103
93
  end
104
94
  end
105
95
 
106
- alias :ancestors_with_type :ancestors_with_types
107
-
108
96
  # Number of children that have a given feature.
109
97
  def num_children_with_feature(feature)
110
98
  i = 0
@@ -24,8 +24,9 @@ module Treat::Entities::Abilities::Magical
24
24
  #
25
25
  def magic(sym, *args)
26
26
 
27
- @@entities_regexp ||= "(#{Treat::Entities.list.join('|')})"
28
- @@cats_regexp ||= "(#{Treat::Languages::Language::WordCategories.join('|')})"
27
+ # Cache this for performance.
28
+ @@entities_regexp ||= "(#{Treat.core.entities.list.join('|')})"
29
+ @@cats_regexp ||= "(#{Treat.linguistics.categories.join('|')})"
29
30
 
30
31
  method = sym.to_s =~ /entities/ ?
31
32
  sym.to_s.gsub('entities', 'entitys') :
@@ -56,17 +57,16 @@ module Treat::Entities::Abilities::Magical
56
57
  entities_with_feature($2.intern,
57
58
  args[0], $1.intern).each { |e| yield e }
58
59
  elsif method =~ /^each_#{@@cats_regexp}$/
59
- entities_with_category($1.intern
60
- ).each { |e| yield e }
60
+ entities_with_category($1).each { |e| yield e }
61
61
  elsif method =~ /^#{@@cats_regexp}s$/
62
- entities_with_category($1.intern)
62
+ entities_with_category($1)
63
63
  elsif method =~ /^#{@@cats_regexp}$/
64
- first_but_warn(entities_with_category($1.intern), $1)
64
+ first_but_warn(entities_with_category($1), $1)
65
65
  elsif method =~ /^first_#{@@cats_regexp}$/
66
- e = entities_with_category($1.intern)
66
+ e = entities_with_category($1)
67
67
  e ? e[0] : nil
68
68
  elsif method =~ /^#{@@cats_regexp}_count$/
69
- entities_with_category($1.intern).size
69
+ entities_with_category($1).size
70
70
  elsif method =~ /^(.*)_count$/
71
71
  num_children_with_feature($1.intern)
72
72
  elsif method =~ /^#{@@cats_regexp}s_with_([a-z]*)$/
@@ -43,42 +43,4 @@ module Treat::Entities::Abilities::Registrable
43
43
  end
44
44
  end
45
45
 
46
- def contains_id?(id)
47
-
48
- @registry[:id][id]
49
-
50
- end
51
-
52
- def contains_value?(val)
53
-
54
- @registry[:value][val] ?
55
- true : false
56
-
57
- end
58
-
59
- def contains_type?(type1)
60
-
61
- return true if @registry[:type][type1]
62
-
63
- @registry[:type].each do |type2, count|
64
- if Treat::Entities.
65
- match_types[type1][type2]
66
- return true
67
- end
68
- end
69
-
70
- false
71
-
72
- end
73
-
74
- def contains_types?(types)
75
-
76
- types.each do |type|
77
- return true if contains_type?(type)
78
- end
79
-
80
- false
81
-
82
- end
83
-
84
46
  end
@@ -32,28 +32,28 @@ module Treat::Entities::Abilities::Stringable
32
32
  end
33
33
  end
34
34
 
35
- # Return an informative string representation
36
- # of the entity.
37
- def inspect
38
- s = "#{cl(self.class)} (#{@id.to_s})"
39
- if caller_method(2) == :inspect
40
- @id.to_s
41
- else
42
- dependencies = []
43
- @dependencies.each do |dependency|
44
- dependencies <<
45
- "#{dependency.target}#{dependency.type}"
46
- end
47
- s += " --- #{short_value.inspect}" +
48
- " --- #{@features.inspect} " +
49
- " --- #{dependencies.inspect} "
50
- end
51
- s
52
- end
53
-
54
35
  # Print out an ASCII representation of the tree.
55
36
  def print_tree; puts visualize(:tree); end
56
37
 
38
+ # Return an informative string representation
39
+ # of the entity.
40
+ def inspect
41
+ s = "#{cl(self.class)} (#{@id.to_s})"
42
+ if caller_method(2) == :inspect
43
+ @id.to_s
44
+ else
45
+ dependencies = []
46
+ @dependencies.each do |dependency|
47
+ dependencies <<
48
+ "#{dependency.target}#{dependency.type}"
49
+ end
50
+ s += " --- #{short_value.inspect}" +
51
+ " --- #{@features.inspect} " +
52
+ " --- #{dependencies.inspect} "
53
+ end
54
+ s
55
+ end
56
+
57
57
  # Helper method to implode the string value of the subtree.
58
58
  def implode
59
59
 
@@ -0,0 +1,31 @@
1
+ module Treat::Entities
2
+ # Represents a collection of texts.
3
+ class Collection < Treat::Entities::Entity
4
+
5
+ # Initialize the collection with a folder
6
+ # containing the texts of the collection.
7
+ def initialize(folder = nil, id = nil)
8
+ super('', id)
9
+ set :folder, folder
10
+ i = folder + '/.index'
11
+ set :index, i if FileTest.directory?(i)
12
+ end
13
+
14
+ # Works like the default <<, but if the
15
+ # file being added is a collection or a
16
+ # document, then copy that collection or
17
+ # document into this collection's folder.
18
+ def <<(entities, copy = true)
19
+ unless entities.is_a? Array
20
+ entities = [entities]
21
+ end
22
+ entities.each do |entity|
23
+ if [:document, :collection].
24
+ include?(entity.type) && copy
25
+ entity = entity.copy_into(self)
26
+ end
27
+ end
28
+ super(entities)
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,10 @@
1
+ module Treat::Entities
2
+ # Represents a document.
3
+ class Document < Treat::Entities::Entity
4
+ # Initialize a document with a file name.
5
+ def initialize(file = nil, id = nil)
6
+ super('', id)
7
+ set :file, file
8
+ end
9
+ end
10
+ end
@@ -1,17 +1,20 @@
1
1
  module Treat::Entities
2
2
 
3
- # Require base class for Entity.
4
- require 'treat/tree'
3
+ module Abilities; end
5
4
 
6
- class Entity < Treat::Tree::Node
5
+ # Require abilities.
6
+ p = Treat.paths.lib +
7
+ 'treat/entities/abilities/*.rb'
8
+ Dir.glob(p).each { |f| require f }
9
+
10
+ require 'birch'
11
+
12
+ class Entity < Treat::Core::Node
7
13
 
8
14
  # A Symbol representing the lowercase
9
15
  # version of the class name.
10
16
  attr_accessor :type
11
17
 
12
- # Require abilities.
13
- require 'treat/entities/abilities'
14
-
15
18
  # Implements support for #register,
16
19
  # #registry, and #contains_* methods.
17
20
  include Abilities::Registrable
@@ -48,14 +51,17 @@ module Treat::Entities
48
51
  # #entities_with_type, #ancestors_with_type,
49
52
  # #entities_with_feature, #entities_with_category.
50
53
  include Abilities::Iterable
51
-
54
+
52
55
  # Implement support for #export to export
53
56
  # a line of a data set based on a classification.
54
57
  include Abilities::Exportable
55
-
58
+
56
59
  # Implement support for #copy_into.
57
60
  include Abilities::Copyable
58
-
61
+
62
+ # Implement support for #self.compare_with
63
+ extend Abilities::Comparable
64
+
59
65
  # Initialize the entity with its value and
60
66
  # (optionally) a unique identifier. By default,
61
67
  # the object_id will be used as id.
@@ -65,7 +71,7 @@ module Treat::Entities
65
71
  @type = :entity if self == Entity
66
72
  @type ||= ucc(cl(self.class)).intern
67
73
  end
68
-
74
+
69
75
  # Add an entity to the current entity.
70
76
  # Registers the entity in the root node
71
77
  # token registry if the entity is a leaf.
@@ -83,7 +89,6 @@ module Treat::Entities
83
89
  entities[0]
84
90
  end
85
91
 
86
-
87
92
  # Catch missing methods to support method-like
88
93
  # access to features (e.g. entity.category
89
94
  # instead of entity.features[:category]) and to
@@ -105,13 +110,13 @@ module Treat::Entities
105
110
  super(sym, *args, &block)
106
111
  rescue NoMethodError
107
112
  raise Treat::Exception,
108
- if Treat::Categories.lookup(sym)
113
+ if Treat::Workers.lookup(sym)
109
114
  msg = "Method #{sym} cannot " +
110
115
  "be called on a #{type}."
111
116
  else
112
117
  msg = "Method #{sym} does not exist."
113
118
  msg += did_you_mean?(
114
- Treat::Categories.methods, sym)
119
+ Treat::Workers.methods, sym)
115
120
  end
116
121
  end
117
122
  else
@@ -0,0 +1,15 @@
1
+ module Treat::Entities
2
+
3
+ # Any kind of grouped entities.
4
+ class Group < Treat::Entities::Entity; end
5
+
6
+ # Represents a group of words with a sentence ender.
7
+ class Sentence < Group; end
8
+
9
+ # Represents a group of words.
10
+ class Phrase < Group; end
11
+
12
+ # Represents a non-linguistic fragment
13
+ class Fragment < Group; end
14
+
15
+ end
@@ -0,0 +1,13 @@
1
+ module Treat::Entities
2
+ # Represents a section.
3
+ class Section < Treat::Entities::Entity; end
4
+
5
+ # Represents a page of text.
6
+ class Page < Section; end
7
+
8
+ # Represents a block of text
9
+ class Block < Section; end
10
+
11
+ # Represents a list.
12
+ class List < Section; end
13
+ end
@@ -0,0 +1,35 @@
1
+ module Treat::Entities
2
+ # Represents a terminal element in the text structure.
3
+ class Token < Treat::Entities::Entity; end
4
+
5
+ # Represents a word.
6
+ class Word < Token; end
7
+
8
+ # Represents a clitic ('s).
9
+ class Enclitic < Token; end
10
+
11
+ # Represents a number.
12
+ class Number < Token
13
+ def to_i; to_s.to_i; end
14
+ def to_f; to_s.to_f; end
15
+ end
16
+
17
+ # Represents a punctuation sign.
18
+ class Punctuation < Token; end
19
+
20
+ # Represents a character that is neither
21
+ # alphabetical, numerical or a punctuation
22
+ # character (e.g. @#$%&*).
23
+ class Symbol < Token; end
24
+
25
+ # Represents a url.
26
+ class Url < Token; end
27
+
28
+ # Represents a valid RFC822 address.
29
+ class Email < Token; end
30
+
31
+ # Represents a token whose type
32
+ # cannot be identified.
33
+ class Unknown; end
34
+
35
+ end
@@ -0,0 +1,11 @@
1
+ module Treat::Entities
2
+ # Represents a zone of text
3
+ # (Title, Paragraph, List, Quote).
4
+ class Zone < Treat::Entities::Entity; end
5
+
6
+ # Represents a title, subtitle, logical header.
7
+ class Title < Zone; end
8
+
9
+ # Represents a paragraph.
10
+ class Paragraph < Zone; end
11
+ end
@@ -1,76 +1,6 @@
1
- # Entities are Tree structures that represent textual entities
2
- # (from a collection of texts down to an individual word) with
3
- # a unique identifier, a value, features, children and dependencies
4
- # linking them to other textual entities.
5
- #
6
- # - A Collection represents a folder containing documents (and folders).
7
- # - A Document represents a file with a textual content.
8
- # - A Zone represents a logical division of content in a document.
9
- # - A Phrase is a group of words; a Sentence is a Phrase with an ender.
10
- # - A Token represents a Word, a Number, a Punctuation or a Symbol.
1
+ # Contains the textual model used by Treat.
11
2
  module Treat::Entities
12
-
13
- # Variables for the singleton class.
14
- class << self
15
- # Provide a list of all entity types except Entity,
16
- # as non_camel_case identifiers.
17
- attr_accessor :list
18
- end
19
-
20
- # Require all entities.
21
- require 'treat/entities/entities'
22
-
23
- # Add each constant to the list, except Entity.
24
- self.list = []
25
- constants.each do |constant|
26
- unless constant == :Entity ||
27
- constant == :Abilities
28
- self.list << ucc(constant).intern
29
- end
30
- end
31
-
32
- # Make each Entity class buildable magically.
33
- # This enables to create Entities without calling
34
- # #new (e.g. Word 'hello').
35
- constants.each do |entity|
36
- define_singleton_method(entity) do |value='', id=nil|
37
- const_get(entity).build(value, id)
38
- end
39
- end
40
-
41
- # Create entity lookup table.
42
- @@match_types = nil
43
- def self.match_types
44
- return @@match_types if @@match_types
45
- list = (Treat::Entities.list + [:entity])
46
- @@match_types = {}
47
- list.each do |type1|
48
- list.each do |type2|
49
- @@match_types[type2] ||= {}
50
- if (type1 == type2) ||
51
- (Treat::Entities.const_get(cc(type1)) <
52
- Treat::Entities.const_get(cc(type2)))
53
- @@match_types[type2][type1] = true
54
- end
55
- end
56
- end
57
- @@match_types
58
- end
59
-
60
- # A bottom-up ordering of general types of entities.
61
- @@order = [Token, Phrase, Zone, Section, Document, Collection]
62
-
63
- # Return the hierarchy level of the entity
64
- # class, the minimum being a Token and the
65
- # maximum being a Collection.
66
- #
67
- # Implement as true comparison functions.
68
- def self.rank(type)
69
- klass = Treat::Entities.const_get(cc(type))
70
- compare = lambda { |a,b| a == b || a < b }
71
- 1.upto(@@order.size) do |i|
72
- return i if compare.call(klass, @@order[i])
73
- end
74
- end
75
-
76
- end
3
+ require 'treat/entities/entity'
4
+ p = Treat.paths.lib + 'treat/entities/*.rb'
5
+ Dir.glob(p).each { |f| require f }
6
+ end