treat 1.2.0 → 2.0.0rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (217) hide show
  1. data/LICENSE +2 -2
  2. data/README.md +12 -21
  3. data/lib/treat/autoload.rb +44 -0
  4. data/lib/treat/config/config.rb +38 -0
  5. data/lib/treat/config/configurable.rb +51 -0
  6. data/lib/treat/config/data/config.rb +50 -0
  7. data/lib/treat/config/data/core.rb +52 -0
  8. data/lib/treat/config/data/databases.rb +10 -0
  9. data/lib/treat/config/data/entities.rb +15 -0
  10. data/lib/treat/config/data/languages/agnostic.rb +31 -0
  11. data/lib/treat/config/{languages → data/languages}/arabic.rb +0 -0
  12. data/lib/treat/config/{languages → data/languages}/chinese.rb +0 -0
  13. data/lib/treat/config/{languages → data/languages}/dutch.rb +1 -1
  14. data/lib/treat/config/data/languages/english.rb +95 -0
  15. data/lib/treat/config/data/languages/french.rb +148 -0
  16. data/lib/treat/config/data/languages/german.rb +135 -0
  17. data/lib/treat/config/{languages → data/languages}/greek.rb +1 -1
  18. data/lib/treat/config/data/languages/italian.rb +162 -0
  19. data/lib/treat/config/data/languages/polish.rb +11 -0
  20. data/lib/treat/config/{languages → data/languages}/portuguese.rb +1 -1
  21. data/lib/treat/config/{languages → data/languages}/russian.rb +1 -1
  22. data/lib/treat/config/data/languages/spanish.rb +291 -0
  23. data/lib/treat/config/data/languages/swedish.rb +289 -0
  24. data/lib/treat/config/data/libraries.rb +12 -0
  25. data/lib/treat/config/data/linguistics.rb +44 -0
  26. data/lib/treat/config/data/tags.rb +328 -0
  27. data/lib/treat/config/{workers → data/workers}/extractors.rb +2 -10
  28. data/lib/treat/config/{workers → data/workers}/formatters.rb +0 -0
  29. data/lib/treat/config/{workers → data/workers}/inflectors.rb +0 -0
  30. data/lib/treat/config/{workers → data/workers}/learners.rb +0 -0
  31. data/lib/treat/config/{workers → data/workers}/lexicalizers.rb +4 -3
  32. data/lib/treat/config/{workers → data/workers}/processors.rb +3 -3
  33. data/lib/treat/config/{workers → data/workers}/retrievers.rb +0 -0
  34. data/lib/treat/config/importable.rb +31 -0
  35. data/lib/treat/config/paths.rb +23 -0
  36. data/lib/treat/config/tags.rb +37 -0
  37. data/lib/treat/core/dsl.rb +55 -0
  38. data/lib/treat/{installer.rb → core/installer.rb} +10 -12
  39. data/lib/treat/core/server.rb +40 -0
  40. data/lib/treat/entities/entities.rb +101 -0
  41. data/lib/treat/entities/{abilities/doable.rb → entity/applicable.rb} +5 -3
  42. data/lib/treat/entities/{abilities → entity}/buildable.rb +118 -63
  43. data/lib/treat/entities/{abilities → entity}/checkable.rb +2 -2
  44. data/lib/treat/entities/{abilities → entity}/comparable.rb +6 -6
  45. data/lib/treat/entities/{abilities → entity}/countable.rb +2 -1
  46. data/lib/treat/entities/entity/debuggable.rb +86 -0
  47. data/lib/treat/entities/{abilities → entity}/delegatable.rb +16 -26
  48. data/lib/treat/entities/{abilities → entity}/exportable.rb +2 -2
  49. data/lib/treat/entities/{abilities → entity}/iterable.rb +4 -16
  50. data/lib/treat/entities/{abilities → entity}/magical.rb +22 -17
  51. data/lib/treat/entities/entity/registrable.rb +36 -0
  52. data/lib/treat/entities/{abilities → entity}/stringable.rb +18 -15
  53. data/lib/treat/entities/entity.rb +86 -77
  54. data/lib/treat/exception.rb +3 -0
  55. data/lib/treat/helpers/hash.rb +29 -0
  56. data/lib/treat/helpers/help.rb +35 -0
  57. data/lib/treat/helpers/object.rb +55 -0
  58. data/lib/treat/helpers/string.rb +124 -0
  59. data/lib/treat/{core → learning}/data_set.rb +11 -11
  60. data/lib/treat/{core → learning}/export.rb +3 -3
  61. data/lib/treat/{core → learning}/problem.rb +26 -16
  62. data/lib/treat/{core → learning}/question.rb +5 -9
  63. data/lib/treat/loaders/linguistics.rb +8 -9
  64. data/lib/treat/loaders/stanford.rb +5 -11
  65. data/lib/treat/modules.rb +33 -0
  66. data/lib/treat/proxies/array.rb +27 -0
  67. data/lib/treat/proxies/language.rb +47 -0
  68. data/lib/treat/proxies/number.rb +18 -0
  69. data/lib/treat/proxies/proxy.rb +25 -0
  70. data/lib/treat/proxies/string.rb +18 -0
  71. data/lib/treat/version.rb +10 -1
  72. data/lib/treat/{workers.rb → workers/categorizable.rb} +18 -19
  73. data/lib/treat/workers/extractors/keywords/tf_idf.rb +11 -11
  74. data/lib/treat/workers/extractors/language/what_language.rb +8 -6
  75. data/lib/treat/workers/extractors/name_tag/stanford.rb +10 -4
  76. data/lib/treat/workers/extractors/similarity/levenshtein.rb +36 -0
  77. data/lib/treat/workers/extractors/similarity/tf_idf.rb +27 -0
  78. data/lib/treat/workers/extractors/tf_idf/native.rb +4 -4
  79. data/lib/treat/workers/extractors/time/chronic.rb +2 -4
  80. data/lib/treat/workers/extractors/time/nickel.rb +19 -20
  81. data/lib/treat/workers/extractors/time/ruby.rb +2 -1
  82. data/lib/treat/workers/extractors/topic_words/lda.rb +12 -12
  83. data/lib/treat/workers/extractors/topics/reuters.rb +9 -13
  84. data/lib/treat/workers/formatters/readers/autoselect.rb +1 -1
  85. data/lib/treat/workers/formatters/readers/image.rb +19 -9
  86. data/lib/treat/workers/formatters/readers/odt.rb +2 -1
  87. data/lib/treat/workers/formatters/readers/pdf.rb +20 -3
  88. data/lib/treat/workers/formatters/readers/xml.rb +0 -1
  89. data/lib/treat/workers/formatters/serializers/mongo.rb +10 -20
  90. data/lib/treat/workers/formatters/serializers/xml.rb +17 -26
  91. data/lib/treat/workers/formatters/serializers/yaml.rb +5 -4
  92. data/lib/treat/workers/formatters/unserializers/mongo.rb +4 -4
  93. data/lib/treat/workers/formatters/unserializers/xml.rb +3 -4
  94. data/lib/treat/workers/formatters/unserializers/yaml.rb +3 -4
  95. data/lib/treat/workers/formatters/visualizers/dot.rb +1 -0
  96. data/lib/treat/workers/formatters/visualizers/standoff.rb +2 -3
  97. data/lib/treat/workers/formatters/visualizers/tree.rb +2 -3
  98. data/lib/treat/workers/{group.rb → groupable.rb} +9 -9
  99. data/lib/treat/workers/inflectors/cardinalizers/linguistics.rb +1 -3
  100. data/lib/treat/workers/inflectors/conjugators/linguistics.rb +5 -7
  101. data/lib/treat/workers/inflectors/declensors/english.rb +13 -20
  102. data/lib/treat/workers/inflectors/declensors/linguistics.rb +29 -28
  103. data/lib/treat/workers/inflectors/ordinalizers/linguistics.rb +0 -2
  104. data/lib/treat/workers/inflectors/stemmers/porter.rb +8 -10
  105. data/lib/treat/workers/inflectors/stemmers/porter_c.rb +7 -7
  106. data/lib/treat/workers/inflectors/stemmers/uea.rb +3 -8
  107. data/lib/treat/workers/learners/classifiers/id3.rb +17 -14
  108. data/lib/treat/workers/learners/classifiers/linear.rb +15 -27
  109. data/lib/treat/workers/learners/classifiers/mlp.rb +32 -19
  110. data/lib/treat/workers/learners/classifiers/svm.rb +28 -21
  111. data/lib/treat/workers/lexicalizers/categorizers/from_tag.rb +19 -3
  112. data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +15 -7
  113. data/lib/treat/workers/lexicalizers/taggers/brill/patch.rb +4 -1
  114. data/lib/treat/workers/lexicalizers/taggers/brill.rb +8 -19
  115. data/lib/treat/workers/lexicalizers/taggers/lingua.rb +4 -15
  116. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +22 -13
  117. data/lib/treat/workers/processors/chunkers/autoselect.rb +2 -3
  118. data/lib/treat/workers/processors/chunkers/html.rb +1 -6
  119. data/lib/treat/workers/processors/parsers/enju.rb +2 -4
  120. data/lib/treat/workers/processors/parsers/stanford.rb +13 -7
  121. data/lib/treat/workers/processors/segmenters/punkt.rb +25 -11
  122. data/lib/treat/workers/processors/segmenters/scalpel.rb +20 -0
  123. data/lib/treat/workers/processors/segmenters/srx.rb +42 -0
  124. data/lib/treat/workers/processors/segmenters/stanford.rb +5 -5
  125. data/lib/treat/workers/processors/segmenters/tactful.rb +21 -11
  126. data/lib/treat/workers/processors/tokenizers/ptb.rb +40 -30
  127. data/lib/treat/workers/processors/tokenizers/punkt.rb +14 -19
  128. data/lib/treat/workers/processors/tokenizers/stanford.rb +38 -22
  129. data/lib/treat/workers/retrievers/indexers/ferret.rb +6 -3
  130. data/lib/treat/workers/retrievers/searchers/ferret.rb +2 -2
  131. data/lib/treat/workers/workers.rb +6 -0
  132. data/lib/treat.rb +18 -32
  133. data/models/MANIFEST +1 -0
  134. data/spec/core/data_set.rb +174 -0
  135. data/spec/core/export.rb +52 -0
  136. data/spec/core/problem.rb +144 -0
  137. data/spec/core/question.rb +52 -0
  138. data/spec/{collection.rb → entities/collection.rb} +20 -35
  139. data/spec/{document.rb → entities/document.rb} +3 -54
  140. data/spec/{entity.rb → entities/entity.rb} +10 -9
  141. data/spec/entities/phrase.rb +33 -0
  142. data/spec/{token.rb → entities/token.rb} +0 -57
  143. data/spec/entities/word.rb +3 -0
  144. data/spec/{zone.rb → entities/zone.rb} +0 -26
  145. data/spec/helper.rb +116 -32
  146. data/spec/sandbox.rb +258 -25
  147. data/spec/treat.rb +26 -34
  148. data/spec/workers/agnostic.rb +137 -0
  149. data/spec/workers/english.rb +194 -0
  150. data/spec/workers/examples/english/economist/hungarys_troubles.txt +46 -0
  151. data/spec/workers/examples/english/economist/saving_the_euro.odt +0 -0
  152. data/spec/{samples → workers/examples/english}/mathematicians/archimedes.abw +0 -0
  153. data/spec/{samples → workers/examples/english}/mathematicians/euler.html +0 -0
  154. data/spec/{samples → workers/examples/english}/mathematicians/gauss.pdf +0 -0
  155. data/spec/{samples → workers/examples/english}/mathematicians/leibniz.txt +0 -0
  156. data/spec/{samples → workers/examples/english}/mathematicians/newton.doc +0 -0
  157. data/spec/workers/examples/english/phrase.xml +5 -0
  158. data/spec/workers/examples/english/test.txt +1 -0
  159. data/spec/workers/language.rb +280 -0
  160. data/spec/workers.rb +28 -0
  161. metadata +122 -105
  162. data/lib/treat/config/core/acronyms.rb +0 -5
  163. data/lib/treat/config/core/encodings.rb +0 -8
  164. data/lib/treat/config/core/entities.rb +0 -2
  165. data/lib/treat/config/core/language.rb +0 -3
  166. data/lib/treat/config/core/paths.rb +0 -8
  167. data/lib/treat/config/core/syntax.rb +0 -1
  168. data/lib/treat/config/core/verbosity.rb +0 -1
  169. data/lib/treat/config/databases/default.rb +0 -1
  170. data/lib/treat/config/databases/mongo.rb +0 -1
  171. data/lib/treat/config/languages/agnostic.rb +0 -34
  172. data/lib/treat/config/languages/english.rb +0 -60
  173. data/lib/treat/config/languages/french.rb +0 -18
  174. data/lib/treat/config/languages/german.rb +0 -18
  175. data/lib/treat/config/languages/italian.rb +0 -12
  176. data/lib/treat/config/languages/polish.rb +0 -12
  177. data/lib/treat/config/languages/spanish.rb +0 -12
  178. data/lib/treat/config/languages/swedish.rb +0 -12
  179. data/lib/treat/config/libraries/punkt.rb +0 -1
  180. data/lib/treat/config/libraries/reuters.rb +0 -1
  181. data/lib/treat/config/libraries/stanford.rb +0 -1
  182. data/lib/treat/config/linguistics/categories.rb +0 -4
  183. data/lib/treat/config/linguistics/punctuation.rb +0 -33
  184. data/lib/treat/config/tags/aligned.rb +0 -221
  185. data/lib/treat/config/tags/enju.rb +0 -71
  186. data/lib/treat/config/tags/paris7.rb +0 -17
  187. data/lib/treat/config/tags/ptb.rb +0 -15
  188. data/lib/treat/config/workers/list.rb +0 -1
  189. data/lib/treat/config.rb +0 -135
  190. data/lib/treat/core.rb +0 -5
  191. data/lib/treat/entities/abilities/copyable.rb +0 -47
  192. data/lib/treat/entities/abilities/debuggable.rb +0 -83
  193. data/lib/treat/entities/abilities/registrable.rb +0 -46
  194. data/lib/treat/entities/collection.rb +0 -40
  195. data/lib/treat/entities/document.rb +0 -10
  196. data/lib/treat/entities/group.rb +0 -18
  197. data/lib/treat/entities/section.rb +0 -13
  198. data/lib/treat/entities/token.rb +0 -47
  199. data/lib/treat/entities/zone.rb +0 -12
  200. data/lib/treat/entities.rb +0 -6
  201. data/lib/treat/helpers/didyoumean.rb +0 -57
  202. data/lib/treat/helpers/escaping.rb +0 -15
  203. data/lib/treat/helpers/formatting.rb +0 -41
  204. data/lib/treat/helpers/objtohash.rb +0 -8
  205. data/lib/treat/helpers/platform.rb +0 -15
  206. data/lib/treat/helpers/reflection.rb +0 -17
  207. data/lib/treat/helpers/temporary.rb +0 -27
  208. data/lib/treat/helpers/verbosity.rb +0 -19
  209. data/lib/treat/helpers.rb +0 -5
  210. data/lib/treat/loaders.rb +0 -10
  211. data/lib/treat/proxies.rb +0 -106
  212. data/lib/treat/workers/formatters/unserializers/autoselect.rb +0 -17
  213. data/lib/treat/workers/inflectors/declensors/active_support.rb +0 -31
  214. data/lib/treat/workers/processors/tokenizers/tactful.rb +0 -68
  215. data/spec/core.rb +0 -441
  216. data/spec/phrase.rb +0 -112
  217. data/spec/word.rb +0 -111
@@ -1,7 +1,7 @@
1
- module Treat::Entities::Abilities::Exportable
1
+ module Treat::Entities::Entity::Exportable
2
2
 
3
3
  def export(problem)
4
- ds = Treat::Core::DataSet.new(problem)
4
+ ds = Treat::Learning::DataSet.new(problem)
5
5
  each_entity(problem.question.target) do |e|
6
6
  ds << e
7
7
  end
@@ -1,4 +1,4 @@
1
- module Treat::Entities::Abilities::Iterable
1
+ module Treat::Entities::Entity::Iterable
2
2
 
3
3
  # Yields each entity of any of the supplied
4
4
  # types in the children tree of this Entity.
@@ -6,12 +6,12 @@ module Treat::Entities::Abilities::Iterable
6
6
  # #each. It does not yield the top element being
7
7
  # recursed.
8
8
  #
9
- # This function NEEDS to be ported to C.
9
+ # This function NEEDS to be ported to C. #FIXME
10
10
  def each_entity(*types)
11
11
  types = [:entity] if types.size == 0
12
12
  f = false
13
13
  types.each do |t2|
14
- if is_a?(Treat::Entities.const_get(cc(t2)))
14
+ if is_a?(Treat::Entities.const_get(t2.cc))
15
15
  f = true; break
16
16
  end
17
17
  end
@@ -57,7 +57,7 @@ module Treat::Entities::Abilities::Iterable
57
57
  def ancestor_with_type(type)
58
58
  return unless has_parent?
59
59
  ancestor = @parent
60
- type_klass = Treat::Entities.const_get(cc(type))
60
+ type_klass = Treat::Entities.const_get(type.cc)
61
61
  while not ancestor.is_a?(type_klass)
62
62
  return nil unless (ancestor && ancestor.has_parent?)
63
63
  ancestor = ancestor.parent
@@ -105,18 +105,6 @@ module Treat::Entities::Abilities::Iterable
105
105
  end
106
106
  i
107
107
  end
108
-
109
- # Return the first element in the array, warning if not
110
- # the only one in the array. Used for magic methods: e.g.,
111
- # the magic method "word" if called on a sentence with many
112
- # words, Treat will return the first word, but warn the user.
113
- def first_but_warn(array, type)
114
- if array.size > 1
115
- warn "Warning: requested one #{type}, but" +
116
- " there are many #{type}s in this entity."
117
- end
118
- array[0]
119
- end
120
108
 
121
109
 
122
110
  end
@@ -1,27 +1,20 @@
1
- module Treat::Entities::Abilities::Magical
1
+ module Treat::Entities::Entity::Magical
2
2
 
3
3
  # Parse "magic methods", which allow the following
4
4
  # syntaxes to be used (where 'word' can be replaced
5
5
  # by any entity type, e.g. token, zone, etc.):
6
6
  #
7
- # - each_word : iterate over each entity of type word.
8
- # - words: return an array of words in the entity.
7
+ # - each_word : iterate over each children of type word.
8
+ # - words: return an array of children words.
9
9
  # - word: return the first word in the entity.
10
10
  # - word_count: return the number of words in the entity.
11
- # - words_with_*(value) (where is an arbitrary feature):
12
- # return the words that have the given feature.
13
- # - word_with_*(value) : return the first word with
14
- # the feature specified by * in value.
15
- #
16
- # Also provides magical methods for types of words:
17
- #
18
- # - each_noun:
19
- # - nouns:
20
- # - noun:
21
- # - noun_count:
22
- # - nouns_with_*(value)
23
- # - noun_with_*(value)
11
+ # - words_with_*(value) (where * is an arbitrary feature):
12
+ # return the words that have the given feature set to value.
24
13
  #
14
+ # Also provides magical methods for types of words (each_noun,
15
+ # nouns, noun_count, nouns_with_*(value) noun_with_*(value), etc.)
16
+ # For this to be used, the words in the text must have been
17
+ # tokenized and categorized in the first place.
25
18
  def magic(sym, *args)
26
19
 
27
20
  # Cache this for performance.
@@ -80,9 +73,21 @@ module Treat::Entities::Abilities::Magical
80
73
  elsif method =~ /^frequency_in_#{@@entities_regexp}$/
81
74
  frequency_in($1.intern)
82
75
  else
83
- return :no_magic
76
+ return :no_magic # :-(
84
77
  end
78
+
85
79
  end
86
80
 
81
+ # Return the first element in the array, warning if not
82
+ # the only one in the array. Used for magic methods: e.g.,
83
+ # the magic method "word" if called on a sentence with many
84
+ # words, Treat will return the first word, but warn the user.
85
+ def first_but_warn(array, type)
86
+ if array.size > 1
87
+ warn "Warning: requested one #{type}, but" +
88
+ " there are many #{type}s in this entity."
89
+ end
90
+ array[0]
91
+ end
87
92
 
88
93
  end
@@ -0,0 +1,36 @@
1
+ # Registers the entities ocurring in the subtree of
2
+ # a node as children are added. Also registers text
3
+ # occurrences for word groups and tokens (n grams).
4
+ module Treat::Entities::Entity::Registrable
5
+
6
+ # Registers a token or phrase in the registry.
7
+ # The registry keeps track of children by id,
8
+ # by entity type, and also keeps the position
9
+ # of the entity in its parent entity.
10
+ def register(entity)
11
+ unless @registry
12
+ @count, @registry = 0,
13
+ {id: {}, value: {}, position:{}, type: {}}
14
+ end
15
+ if entity.is_a?(Treat::Entities::Token) ||
16
+ entity.is_a?(Treat::Entities::Group)
17
+ val = entity.to_s.downcase
18
+ @registry[:value][val] ||= 0
19
+ @registry[:value][val] += 1
20
+ end
21
+ @registry[:id][entity.id] = true
22
+ @registry[:type][entity.type] ||= 0
23
+ @registry[:type][entity.type] += 1
24
+ @registry[:position][entity.id] = @count
25
+ @count += 1
26
+ @parent.register(entity) if has_parent?
27
+ end
28
+
29
+ # Backtrack up the tree to find a token registry,
30
+ # by default the one in the root node of the tree.
31
+ def registry(type = nil)
32
+ (has_parent? && type != self.type) ?
33
+ @parent.registry(type) : @registry
34
+ end
35
+
36
+ end
@@ -1,18 +1,22 @@
1
1
  # Gives entities the ability to be converted
2
2
  # to string representations (#to_string, #to_s,
3
3
  # #to_str, #inspect, #print_tree).
4
- module Treat::Entities::Abilities::Stringable
5
-
6
- # Return the entity's true string value in
7
- # plain text format. Non-terminal entities
8
- # will normally have an empty value.
9
- def to_string; @value; end
10
-
4
+ module Treat::Entities::Entity::Stringable
5
+
6
+ # Returns the entity's true string value.
7
+ def to_string; @value.dup; end
8
+
9
+ # Returns an array of the childrens' string
10
+ # values, found by calling #to_s on them.
11
+ def to_a; @children.map { |c| c.to_s }; end
12
+
13
+ alias :to_ary :to_a
14
+
11
15
  # Returns the entity's string value by
12
16
  # imploding the value of all terminal
13
17
  # entities in the subtree of that entity.
14
18
  def to_s
15
- @value != '' ? @value : implode.strip
19
+ has_children? ? implode.strip : @value.dup
16
20
  end
17
21
 
18
22
  # #to_str is the same as #to_s.
@@ -24,12 +28,10 @@ module Treat::Entities::Abilities::Stringable
24
28
  def short_value(max_length = 30)
25
29
  s = to_s
26
30
  words = s.split(' ')
27
- if s.length < max_length
28
- s
29
- else
30
- words[0..2].join(' ') + ' [...] ' +
31
- words[-2..-1].join(' ')
32
- end
31
+ return s if (s.length < max_length) ||
32
+ !(words[0..2] && words[-2..-1])
33
+ words[0..2].join(' ') + ' [...] ' +
34
+ words[-2..-1].join(' ')
33
35
  end
34
36
 
35
37
  # Print out an ASCII representation of the tree.
@@ -38,7 +40,8 @@ module Treat::Entities::Abilities::Stringable
38
40
  # Return an informative string representation
39
41
  # of the entity.
40
42
  def inspect
41
- s = "#{cl(self.class)} (#{@id.to_s})"
43
+ name = self.class.mn
44
+ s = "#{name} (#{@id.to_s})"
42
45
  if caller_method(2) == :inspect
43
46
  @id.to_s
44
47
  else
@@ -1,94 +1,106 @@
1
1
  module Treat::Entities
2
2
 
3
- module Abilities; end
4
-
5
- # Require abilities.
6
- p = Treat.paths.lib +
7
- 'treat/entities/abilities/*.rb'
8
- Dir.glob(p).each { |f| require f }
9
-
3
+ # Basic tree structure.
10
4
  require 'birch'
11
-
5
+
6
+ # The Entity class extends a basic tree structure
7
+ # (written in C for optimal speed) and represents
8
+ # any form of textual entityin a processing task
9
+ # (this could be a collection of documents, a
10
+ # single document, a single paragraph, etc.)
11
+ #
12
+ # Classes that extend Entity provide the concrete
13
+ # behavior corresponding to the relevant entity type.
14
+ # See entities.rb for a full list and description of
15
+ # the different entity types in the document model.
12
16
  class Entity < ::Birch::Tree
13
17
 
14
- # A Symbol representing the lowercase
15
- # version of the class name.
18
+ # A symbol representing the lowercase
19
+ # version of the class name. This is
20
+ # the only attribute that the Entity
21
+ # class adds to the Birch::Tree class.
16
22
  attr_accessor :type
23
+
24
+ # Autoload all the classes in /abilities.
25
+ path = File.expand_path(__FILE__)
26
+ patt = File.dirname(path) + '/entity/*.rb'
27
+ Dir.glob(patt).each { |f| require f }
28
+
29
+ # Implements support for #register, #registry.
30
+ include Registrable
17
31
 
18
- # Implements support for #register,
19
- # #registry, and #contains_* methods.
20
- include Abilities::Registrable
32
+ # Implement support for #self.call_worker, etc.
33
+ extend Delegatable
21
34
 
22
- # Implement support for #self.add_workers
23
- extend Abilities::Delegatable
35
+ # Implement support for #self.print_debug, etc.
36
+ extend Debuggable
24
37
 
25
- # Implement support for #self.print_debug and
26
- # #self.invalid_call_msg
27
- extend Abilities::Debuggable
38
+ # Implement support for #self.build and #self.from_*
39
+ extend Buildable
28
40
 
29
- # Implement support for #self.build
30
- # and #self.from_*
31
- extend Abilities::Buildable
41
+ # Implement support for #apply (previously #do).
42
+ include Applicable
32
43
 
33
- # Implement support for #do.
34
- include Abilities::Doable
44
+ # Implement support for #frequency, #frequency_in,
45
+ # #frequency_of, #position, #position_from_end, etc.
46
+ include Countable
35
47
 
36
- # Implement support for #frequency,
37
- # #frequency_in_parent and #position_in_parent.
38
- include Abilities::Countable
39
-
40
- # Implement support for #magic.
41
- include Abilities::Magical
48
+ # Implement support for over 100 #magic methods!
49
+ include Magical
42
50
 
43
51
  # Implement support for #to_s, #inspect, etc.
44
- include Abilities::Stringable
52
+ include Stringable
45
53
 
46
- # Implement support for #check_has
47
- # and #check_hasnt_children?
48
- include Abilities::Checkable
54
+ # Implement support for #check_has and others.
55
+ include Checkable
49
56
 
50
57
  # Implement support for #each_entity, as well as
51
58
  # #entities_with_type, #ancestors_with_type,
52
- # #entities_with_feature, #entities_with_category.
53
- include Abilities::Iterable
54
-
55
- # Implement support for #export to export
56
- # a line of a data set based on a classification.
57
- include Abilities::Exportable
59
+ # #entities_with_feature, #entities_with_category, etc.
60
+ include Iterable
58
61
 
59
- # Implement support for #copy_into.
60
- include Abilities::Copyable
62
+ # Implement support for #export, allowing to export
63
+ # a data set row from the receiving entity.
64
+ include Exportable
61
65
 
62
66
  # Implement support for #self.compare_with
63
- extend Abilities::Comparable
67
+ extend Comparable
64
68
 
65
69
  # Initialize the entity with its value and
66
70
  # (optionally) a unique identifier. By default,
67
71
  # the object_id will be used as id.
68
72
  def initialize(value = '', id = nil)
69
- id ||= object_id
70
- super(value, id)
73
+ id ||= object_id; super(value, id)
71
74
  @type = :entity if self == Entity
72
- @type ||= ucc(cl(self.class)).intern
75
+ @type ||= self.class.mn.ucc.intern
73
76
  end
74
77
 
75
78
  # Add an entity to the current entity.
76
79
  # Registers the entity in the root node
77
80
  # token registry if the entity is a leaf.
78
- #
79
- # @see Treat::Registrable
81
+ # Unsets the parent node's value; in order
82
+ # to keep the tree clean, only the leaf
83
+ # values are stored.
84
+ #
85
+ # Takes in a single entity or an array of
86
+ # entities. Returns the first child supplied.
87
+ # If a string is
80
88
  def <<(entities, clear_parent = true)
81
- unless entities.is_a? Array
82
- entities = [entities]
83
- end
84
- entities.each do |entity|
85
- register(entity)
86
- end
89
+ entities = (entities.is_a?(::String) ||
90
+ entities.is_a?(::Numeric)) ?
91
+ entities.to_entity : entities
92
+ entities = entities.is_a?(::Array) ?
93
+ entities : [entities]
94
+ # Register each entity in this node.
95
+ entities.each { |e| register(e) }
96
+ # Pass to the <<() method in Birch.
87
97
  super(entities)
98
+ # Unset the parent value if necessary.
88
99
  @parent.value = '' if has_parent?
89
- entities[0]
100
+ # Return the first child.
101
+ return entities[0]
90
102
  end
91
-
103
+
92
104
  # Catch missing methods to support method-like
93
105
  # access to features (e.g. entity.category
94
106
  # instead of entity.features[:category]) and to
@@ -102,29 +114,26 @@ module Treat::Entities
102
114
  # sugar for the #self.build method.
103
115
  def method_missing(sym, *args, &block)
104
116
  return self.build(*args) if sym == nil
105
-
106
- if !@features.has_key?(sym)
107
- r = magic(sym, *args, &block)
108
- return r unless r == :no_magic
109
- begin
110
- super(sym, *args, &block)
111
- rescue NoMethodError
112
- raise Treat::Exception,
113
- if Treat::Workers.lookup(sym)
114
- msg = "Method #{sym} cannot " +
115
- "be called on a #{type}."
116
- else
117
- msg = "Method #{sym} does not exist."
118
- msg += did_you_mean?(
119
- Treat::Workers.methods, sym)
120
- end
121
- end
122
- else
123
- @features[sym]
124
- end
125
-
117
+ return @features[sym] if @features.has_key?(sym)
118
+ result = magic(sym, *args, &block)
119
+ return result unless result == :no_magic
120
+ begin; super(sym, *args, &block)
121
+ rescue NoMethodError; invalid_call(sym); end
126
122
  end
127
-
123
+
124
+ # Raises a Treat::Exception saying that the
125
+ # method called was invalid, and that the
126
+ # requested method does not exist. Also
127
+ # provides suggestions for misspellings.
128
+ def invalid_call(sym)
129
+ msg = Treat::Workers.lookup(sym) ?
130
+ "Method #{sym} can't be called on a #{type}." :
131
+ "Method #{sym} is not defined by Treat." +
132
+ Treat::Helpers::Help.did_you_mean?(
133
+ Treat::Workers.methods, sym)
134
+ raise Treat::Exception, msg
135
+ end
136
+
128
137
  end
129
138
 
130
139
  end
@@ -0,0 +1,3 @@
1
+ # Custom exception class allowing to differentiate
2
+ # Treat errors from library/Ruby exceptions.
3
+ class Treat::Exception < ::Exception; end
@@ -0,0 +1,29 @@
1
+ # Helper methods to manipulate hashes.
2
+ class Treat::Helpers::Hash
3
+
4
+ # Mixin to allow conversion of hashes to
5
+ # nested structs with the keys as attributes.
6
+ module ToStruct
7
+ # Converts a hash to nested structs.
8
+ def to_struct
9
+ hash = self
10
+ symbols = hash.keys.select { |k|
11
+ !k.is_a?(Symbol) }.size
12
+ return hash if symbols > 0
13
+ klass = Struct.new(*hash.keys)
14
+ struct = klass.new(*hash.values)
15
+ hash.each do |key, value|
16
+ if value.is_a?(Hash)
17
+ v = value.to_struct
18
+ struct[key] = v
19
+ end
20
+ end; return struct
21
+ end
22
+ end
23
+
24
+ # Include the mixins on the core Hash class.
25
+ Hash.class_eval do
26
+ include Treat::Helpers::Hash::ToStruct
27
+ end
28
+
29
+ end
@@ -0,0 +1,35 @@
1
+ # Helper methods to detect misspellings
2
+ # and suggest alternatives to the user.
3
+ class Treat::Helpers::Help
4
+
5
+ # Search the list to see if there are
6
+ # words similar to #name in the #list
7
+ # If yes, return a string saying
8
+ # "Did you mean ... ?" with the names.
9
+ def self.did_you_mean?(list, name)
10
+ return '' # Fix
11
+ list = list.map { |e| e.to_s }
12
+ name = name.to_s
13
+ sugg = []
14
+ list.each do |element|
15
+ l = self.levenshtein(element,name)
16
+ if l > 0 && l < 2
17
+ sugg << element
18
+ end
19
+ end
20
+ unless sugg.size == 0
21
+ if sugg.size == 1
22
+ msg += " Perhaps you meant '#{sugg[0]}' ?"
23
+ else
24
+ sugg_quote = sugg[0..-2].map do
25
+ |x| '\'' + x + '\''
26
+ end
27
+ msg += " Perhaps you meant " +
28
+ "#{sugg_quote.join(', ')}," +
29
+ " or '#{sugg[-1]}' ?"
30
+ end
31
+ end
32
+ msg
33
+ end
34
+
35
+ end
@@ -0,0 +1,55 @@
1
+ # Methods related to object reflection.
2
+ class Treat::Helpers::Object
3
+
4
+ # Allow introspection onto what method called
5
+ # another one at runtime (useful for debugging).
6
+ module CallerMethod
7
+ # Pattern to match method from trace.
8
+ CMPattern = /^(.+?):(\d+)(?::in `(.*)')?/
9
+ # Return the name of the method that
10
+ # called the method that calls this method.
11
+ def caller_method(n = 3)
12
+ at = caller(n).first
13
+ CMPattern =~ at
14
+ Regexp.last_match[3].
15
+ gsub('block in ', '').intern
16
+ end
17
+ end
18
+
19
+ # Retrieve the last name of a class/module
20
+ # (i.e. the part after the last "::").
21
+ module ModuleName
22
+ def module_name; self.to_s.split('::')[-1]; end
23
+ alias :mn :module_name
24
+ end
25
+
26
+ module Verbosity
27
+ # Runs a block of code without warnings.
28
+ def silence_warnings(&block)
29
+ warn_level = $VERBOSE; $VERBOSE = nil
30
+ result = block.call; $VERBOSE = warn_level
31
+ result
32
+ end
33
+ # Runs a block of code while blocking stdout.
34
+ def silence_stdout(log = '/dev/null')
35
+ unless Treat.core.verbosity.silence
36
+ yield; return
37
+ end
38
+ file, old, ret = File.new(log, 'w'),
39
+ $stdout.dup, nil; $stdout.reopen(file)
40
+ ret = yield; $stdout = old; return ret
41
+ end
42
+ end
43
+
44
+ # Allow getting the caller method in any context.
45
+ Object.class_eval do
46
+ include Treat::Helpers::Object::CallerMethod
47
+ include Treat::Helpers::Object::Verbosity
48
+ end
49
+
50
+ # Allow getting the last name of any module/class.
51
+ Module.class_eval do
52
+ include Treat::Helpers::Object::ModuleName
53
+ end
54
+
55
+ end
@@ -0,0 +1,124 @@
1
+ # Helper methods for camel casing and
2
+ # escaping standard strings and symbols.
3
+ class Treat::Helpers::String
4
+
5
+ # Utility to escape floating point numbers
6
+ # from strings (useful for a variety of
7
+ # applications, including chunking, segmenting
8
+ # and tokenizing, to exclude periods that
9
+ # are not sentence terminators).
10
+ module Escapable
11
+
12
+ # Escape char to use.
13
+ EscapeChar = '^^^'
14
+ # Regex for escape.
15
+ Regex = /([0-9]+)\.([0-9]+)/
16
+
17
+ # Escape float periods with EscapeChar.
18
+ def escape_floats!
19
+ to_s.gsub!(Regex) { $1 + EscapeChar + $2 }
20
+ end
21
+
22
+ end
23
+
24
+ # Counterpart to Treat::Helpers::Escapable;
25
+ # unescapes floats, restoring the orgiinal text.
26
+ module Unescapable
27
+
28
+ # Escaped for regex.
29
+ EscapedEscapeChar = '\^\^\^'
30
+ # Regex for unescape.
31
+ Regex = /([0-9]+)#{EscapedEscapeChar}([0-9]+)/
32
+
33
+ # Unescape float periods (restore text).
34
+ def unescape_floats!
35
+ to_s.gsub!(Regex) { $1 + '.' + $2 }
36
+ end
37
+
38
+ end
39
+
40
+ # Transform an un_camel_cased string
41
+ # into a CamelCased string. This is
42
+ # available on String and Symbol.
43
+ module CamelCaseable
44
+
45
+ # A cache to optimize camel casing.
46
+ @@cc_cache = {}
47
+
48
+ # Regex for camel casing.
49
+ Regex = /^[a-z]|_[a-z]/
50
+
51
+ # Convert un_camel_case to CamelCase.
52
+ def camel_case
53
+ o_phrase, phrase = to_s, to_s.dup
54
+ if @@cc_cache[o_phrase]
55
+ return @@cc_cache[o_phrase]
56
+ end
57
+ if Treat.core.acronyms.include?(phrase)
58
+ phrase = phrase.upcase
59
+ else
60
+ phrase.gsub!(Regex) { |a| a.upcase }
61
+ phrase.gsub!('_', '')
62
+ end
63
+ @@cc_cache[o_phrase] = phrase
64
+ end
65
+
66
+ alias :cc :camel_case
67
+
68
+ end
69
+
70
+ # Counterpart of Treat::Helpers::CamelCaseable;
71
+ # transforms a CamelCase string to its un_camel_
72
+ # case corresponding form.
73
+ module UnCamelCaseable
74
+
75
+ # A cache to optimize un camel casing.
76
+ @@ucc_cache = {}
77
+
78
+ # Convert CamelCase to un_camel_case.
79
+ def un_camel_case
80
+ o_phrase, phrase = to_s, to_s.dup
81
+ if @@ucc_cache[o_phrase]
82
+ return @@ucc_cache[o_phrase]
83
+ end
84
+ acros = Treat.core.acronyms
85
+ if !acros.include?(phrase.downcase)
86
+ phrase.gsub!(/[A-Z]/) do |p|
87
+ '_' + p.downcase
88
+ end
89
+ if phrase[0] == '_'
90
+ return phrase = phrase[1..-1]
91
+ end
92
+ else
93
+ phrase = phrase.downcase
94
+ end
95
+ @@ucc_cache[o_phrase] = phrase
96
+ end
97
+
98
+ alias :ucc :un_camel_case
99
+
100
+ end
101
+
102
+ # Determines whether module is
103
+ # an "-able" mixin kind of thing.
104
+ module IsMixin
105
+ def is_mixin?; to_s[-4..-1] == 'able'; end
106
+ end
107
+
108
+ # Graft the helpers onto the string module.
109
+ String.class_eval do
110
+ include Treat::Helpers::String::CamelCaseable
111
+ include Treat::Helpers::String::UnCamelCaseable
112
+ include Treat::Helpers::String::Escapable
113
+ include Treat::Helpers::String::Unescapable
114
+ include Treat::Helpers::String::IsMixin
115
+ end
116
+
117
+ # Graft camel casing onto symbols.
118
+ Symbol.class_eval do
119
+ include Treat::Helpers::String::CamelCaseable
120
+ include Treat::Helpers::String::UnCamelCaseable
121
+ end
122
+
123
+
124
+ end