treat 0.2.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. data/LICENSE +3 -3
  2. data/README.md +33 -0
  3. data/files/INFO +1 -0
  4. data/lib/treat.rb +40 -105
  5. data/lib/treat/ai.rb +12 -0
  6. data/lib/treat/ai/classifiers/id3.rb +27 -0
  7. data/lib/treat/categories.rb +82 -35
  8. data/lib/treat/categorizable.rb +44 -0
  9. data/lib/treat/classification.rb +61 -0
  10. data/lib/treat/configurable.rb +115 -0
  11. data/lib/treat/data_set.rb +42 -0
  12. data/lib/treat/dependencies.rb +24 -0
  13. data/lib/treat/downloader.rb +87 -0
  14. data/lib/treat/entities.rb +68 -66
  15. data/lib/treat/entities/abilities.rb +10 -0
  16. data/lib/treat/entities/abilities/buildable.rb +327 -0
  17. data/lib/treat/entities/abilities/checkable.rb +31 -0
  18. data/lib/treat/entities/abilities/copyable.rb +45 -0
  19. data/lib/treat/entities/abilities/countable.rb +51 -0
  20. data/lib/treat/entities/abilities/debuggable.rb +83 -0
  21. data/lib/treat/entities/abilities/delegatable.rb +123 -0
  22. data/lib/treat/entities/abilities/doable.rb +62 -0
  23. data/lib/treat/entities/abilities/exportable.rb +11 -0
  24. data/lib/treat/entities/abilities/iterable.rb +115 -0
  25. data/lib/treat/entities/abilities/magical.rb +83 -0
  26. data/lib/treat/entities/abilities/registrable.rb +74 -0
  27. data/lib/treat/entities/abilities/stringable.rb +91 -0
  28. data/lib/treat/entities/entities.rb +104 -0
  29. data/lib/treat/entities/entity.rb +122 -245
  30. data/lib/treat/exception.rb +4 -4
  31. data/lib/treat/extractors.rb +77 -80
  32. data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
  33. data/lib/treat/extractors/language/what_language.rb +50 -45
  34. data/lib/treat/extractors/name_tag/stanford.rb +55 -0
  35. data/lib/treat/extractors/tf_idf/native.rb +87 -0
  36. data/lib/treat/extractors/time/chronic.rb +55 -0
  37. data/lib/treat/extractors/time/nickel.rb +86 -62
  38. data/lib/treat/extractors/time/ruby.rb +53 -0
  39. data/lib/treat/extractors/topic_words/lda.rb +67 -58
  40. data/lib/treat/extractors/topics/reuters.rb +100 -87
  41. data/lib/treat/formatters.rb +39 -35
  42. data/lib/treat/formatters/readers/abw.rb +49 -29
  43. data/lib/treat/formatters/readers/autoselect.rb +37 -33
  44. data/lib/treat/formatters/readers/doc.rb +19 -13
  45. data/lib/treat/formatters/readers/html.rb +52 -30
  46. data/lib/treat/formatters/readers/image.rb +41 -40
  47. data/lib/treat/formatters/readers/odt.rb +59 -45
  48. data/lib/treat/formatters/readers/pdf.rb +28 -25
  49. data/lib/treat/formatters/readers/txt.rb +12 -15
  50. data/lib/treat/formatters/readers/xml.rb +73 -36
  51. data/lib/treat/formatters/serializers/xml.rb +80 -79
  52. data/lib/treat/formatters/serializers/yaml.rb +19 -18
  53. data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
  54. data/lib/treat/formatters/unserializers/xml.rb +94 -99
  55. data/lib/treat/formatters/unserializers/yaml.rb +20 -19
  56. data/lib/treat/formatters/visualizers/dot.rb +132 -132
  57. data/lib/treat/formatters/visualizers/standoff.rb +52 -44
  58. data/lib/treat/formatters/visualizers/tree.rb +26 -29
  59. data/lib/treat/groupable.rb +153 -0
  60. data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
  61. data/lib/treat/inflectors.rb +50 -45
  62. data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
  63. data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
  64. data/lib/treat/inflectors/declensors/active_support.rb +31 -0
  65. data/lib/treat/inflectors/declensors/english.rb +38 -0
  66. data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
  67. data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
  68. data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
  69. data/lib/treat/inflectors/stemmers/porter.rb +160 -0
  70. data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
  71. data/lib/treat/inflectors/stemmers/uea.rb +28 -0
  72. data/lib/treat/installer.rb +308 -0
  73. data/lib/treat/kernel.rb +105 -27
  74. data/lib/treat/languages.rb +122 -88
  75. data/lib/treat/languages/arabic.rb +15 -15
  76. data/lib/treat/languages/chinese.rb +15 -15
  77. data/lib/treat/languages/dutch.rb +15 -15
  78. data/lib/treat/languages/english.rb +61 -62
  79. data/lib/treat/languages/french.rb +19 -19
  80. data/lib/treat/languages/german.rb +20 -20
  81. data/lib/treat/languages/greek.rb +15 -15
  82. data/lib/treat/languages/italian.rb +16 -16
  83. data/lib/treat/languages/polish.rb +15 -15
  84. data/lib/treat/languages/portuguese.rb +15 -15
  85. data/lib/treat/languages/russian.rb +15 -15
  86. data/lib/treat/languages/spanish.rb +16 -16
  87. data/lib/treat/languages/swedish.rb +16 -16
  88. data/lib/treat/lexicalizers.rb +34 -55
  89. data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
  90. data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
  91. data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
  92. data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
  93. data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
  94. data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
  95. data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
  96. data/lib/treat/linguistics.rb +9 -0
  97. data/lib/treat/linguistics/categories.rb +11 -0
  98. data/lib/treat/linguistics/tags.rb +422 -0
  99. data/lib/treat/loaders/linguistics.rb +30 -0
  100. data/lib/treat/loaders/stanford.rb +27 -0
  101. data/lib/treat/object.rb +1 -0
  102. data/lib/treat/processors.rb +37 -44
  103. data/lib/treat/processors/chunkers/autoselect.rb +16 -0
  104. data/lib/treat/processors/chunkers/html.rb +71 -0
  105. data/lib/treat/processors/chunkers/txt.rb +18 -24
  106. data/lib/treat/processors/parsers/enju.rb +253 -208
  107. data/lib/treat/processors/parsers/stanford.rb +130 -131
  108. data/lib/treat/processors/segmenters/punkt.rb +79 -45
  109. data/lib/treat/processors/segmenters/stanford.rb +46 -48
  110. data/lib/treat/processors/segmenters/tactful.rb +43 -36
  111. data/lib/treat/processors/tokenizers/perl.rb +124 -92
  112. data/lib/treat/processors/tokenizers/ptb.rb +81 -0
  113. data/lib/treat/processors/tokenizers/punkt.rb +48 -42
  114. data/lib/treat/processors/tokenizers/stanford.rb +39 -38
  115. data/lib/treat/processors/tokenizers/tactful.rb +64 -55
  116. data/lib/treat/proxies.rb +52 -35
  117. data/lib/treat/retrievers.rb +26 -16
  118. data/lib/treat/retrievers/indexers/ferret.rb +47 -26
  119. data/lib/treat/retrievers/searchers/ferret.rb +69 -50
  120. data/lib/treat/tree.rb +241 -183
  121. data/spec/collection.rb +123 -0
  122. data/spec/document.rb +93 -0
  123. data/spec/entity.rb +408 -0
  124. data/spec/languages.rb +25 -0
  125. data/spec/phrase.rb +146 -0
  126. data/spec/samples/mathematicians/archimedes.abw +34 -0
  127. data/spec/samples/mathematicians/euler.html +21 -0
  128. data/spec/samples/mathematicians/gauss.pdf +0 -0
  129. data/spec/samples/mathematicians/leibniz.txt +13 -0
  130. data/spec/samples/mathematicians/newton.doc +0 -0
  131. data/spec/sandbox.rb +5 -0
  132. data/spec/token.rb +109 -0
  133. data/spec/treat.rb +52 -0
  134. data/spec/tree.rb +117 -0
  135. data/spec/word.rb +110 -0
  136. data/spec/zone.rb +66 -0
  137. data/tmp/INFO +1 -1
  138. metadata +100 -201
  139. data/INSTALL +0 -1
  140. data/README +0 -3
  141. data/TODO +0 -28
  142. data/lib/economist/half_cocked_basel.txt +0 -16
  143. data/lib/economist/hungarys_troubles.txt +0 -46
  144. data/lib/economist/indias_slowdown.txt +0 -15
  145. data/lib/economist/merkozy_rides_again.txt +0 -24
  146. data/lib/economist/prada_is_not_walmart.txt +0 -9
  147. data/lib/economist/to_infinity_and_beyond.txt +0 -15
  148. data/lib/ferret/_11.cfs +0 -0
  149. data/lib/ferret/_14.cfs +0 -0
  150. data/lib/ferret/_p.cfs +0 -0
  151. data/lib/ferret/_s.cfs +0 -0
  152. data/lib/ferret/_v.cfs +0 -0
  153. data/lib/ferret/_y.cfs +0 -0
  154. data/lib/ferret/segments +0 -0
  155. data/lib/ferret/segments_15 +0 -0
  156. data/lib/treat/buildable.rb +0 -157
  157. data/lib/treat/category.rb +0 -33
  158. data/lib/treat/delegatable.rb +0 -116
  159. data/lib/treat/doable.rb +0 -45
  160. data/lib/treat/entities/collection.rb +0 -14
  161. data/lib/treat/entities/document.rb +0 -12
  162. data/lib/treat/entities/phrases.rb +0 -17
  163. data/lib/treat/entities/tokens.rb +0 -61
  164. data/lib/treat/entities/zones.rb +0 -41
  165. data/lib/treat/extractors/coreferences/stanford.rb +0 -69
  166. data/lib/treat/extractors/date/chronic.rb +0 -32
  167. data/lib/treat/extractors/date/ruby.rb +0 -25
  168. data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
  169. data/lib/treat/extractors/language/language_extractor.rb +0 -27
  170. data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
  171. data/lib/treat/extractors/roles/naive.rb +0 -73
  172. data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
  173. data/lib/treat/extractors/statistics/position_in.rb +0 -14
  174. data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
  175. data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
  176. data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
  177. data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
  178. data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
  179. data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
  180. data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
  181. data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
  182. data/lib/treat/feature.rb +0 -58
  183. data/lib/treat/features.rb +0 -7
  184. data/lib/treat/formatters/visualizers/short_value.rb +0 -29
  185. data/lib/treat/formatters/visualizers/txt.rb +0 -45
  186. data/lib/treat/group.rb +0 -106
  187. data/lib/treat/helpers/linguistics_loader.rb +0 -18
  188. data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
  189. data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
  190. data/lib/treat/inflectors/declensions/english.rb +0 -319
  191. data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
  192. data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
  193. data/lib/treat/inflectors/stem/porter.rb +0 -162
  194. data/lib/treat/inflectors/stem/porter_c.rb +0 -26
  195. data/lib/treat/inflectors/stem/uea.rb +0 -30
  196. data/lib/treat/install.rb +0 -59
  197. data/lib/treat/languages/tags.rb +0 -377
  198. data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
  199. data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
  200. data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
  201. data/lib/treat/lexicalizers/tag/brill.rb +0 -91
  202. data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
  203. data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
  204. data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
  205. data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
  206. data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
  207. data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
  208. data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
  209. data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
  210. data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
  211. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
  212. data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
  213. data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
  214. data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
  215. data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
  216. data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
  217. data/lib/treat/registrable.rb +0 -28
  218. data/lib/treat/sugar.rb +0 -50
  219. data/lib/treat/viewable.rb +0 -29
  220. data/lib/treat/visitable.rb +0 -28
  221. data/test/profile.rb +0 -2
  222. data/test/tc_entity.rb +0 -117
  223. data/test/tc_extractors.rb +0 -73
  224. data/test/tc_formatters.rb +0 -41
  225. data/test/tc_inflectors.rb +0 -34
  226. data/test/tc_lexicalizers.rb +0 -32
  227. data/test/tc_processors.rb +0 -50
  228. data/test/tc_resources.rb +0 -22
  229. data/test/tc_treat.rb +0 -60
  230. data/test/tc_tree.rb +0 -60
  231. data/test/tests.rb +0 -20
  232. data/test/texts.rb +0 -19
  233. data/test/texts/english/half_cocked_basel.txt +0 -16
  234. data/test/texts/english/hose_and_dry.doc +0 -0
  235. data/test/texts/english/hungarys_troubles.abw +0 -70
  236. data/test/texts/english/long.html +0 -24
  237. data/test/texts/english/long.txt +0 -22
  238. data/test/texts/english/medium.txt +0 -5
  239. data/test/texts/english/republican_nomination.pdf +0 -0
  240. data/test/texts/english/saving_the_euro.odt +0 -0
  241. data/test/texts/english/short.txt +0 -3
  242. data/test/texts/english/zero_sum.html +0 -111
data/lib/treat/tree.rb CHANGED
@@ -1,191 +1,249 @@
1
- module Treat
2
- # This module provides an abstract tree structure with
3
- # nodes having an id, a value, children, features and dependencies.
4
- module Tree
5
- # This class models the nodes for an N-ary tree data structue
6
- # with unique identifiers, text value, children, features
7
- # (annotations) and dependencies.
8
- #
9
- # This class was tightly based on the 'rubytree' gem.
10
- # RubyTree is licensed under the BSD license and can
11
- # be found at http://rubytree.rubyforge.org/rdoc/.
12
- # I have made several modifications in order to better
13
- # suit this library and to avoid monkey patching.
14
- class Node
15
- # Iterate over each children in the node.
16
- def each
17
- @children.each { |child| yield child }
18
- end
19
- # A string containing the node's value (or empty).
20
- attr_accessor :value
21
- # A unique identifier for the node.
22
- attr_reader :id
23
- # An array containing the children of this node.
24
- attr_reader :children
25
- # A hash containing the features of this node.
26
- attr_accessor :features
27
- # An array containing the dependencies that link this
28
- # node to other nodes.
29
- attr_accessor :dependencies
30
- # A struct for dependencies.
31
- Struct.new('Dependency', :target, :type, :directed, :direction)
32
- # The parent of the node.
33
- attr_accessor :parent
34
- # Initialize the node with its value and id.
35
- # Setup containers for the children, features
36
- # and dependencies of this node.
37
- def initialize(value, id = nil)
38
- @parent = nil
39
- @value, @id = value, id
40
- @children = []
41
- @children_hash = {}
42
- @features = {}
43
- @dependencies = []
44
- end
45
- # Boolean - does the node have dependencies?
46
- def has_dependencies?; !(@dependencies.size == 0); end
47
- # Boolean - does the node have children?
48
- def has_children?; !(@children.size == 0); end
49
- # Boolean - does the node have features?
50
- def has_features?; !(@features.size == 0); end
51
- # Boolean - does the node have a parent?
52
- def has_parent?; !@parent.nil?; end
53
- # Boolean - does the node not have a parent?
54
- def is_root?; @parent.nil?; end
55
- # Remove this node from its parent and set as root.
56
- def set_as_root!; @parent = nil; self; end
57
- # Boolean - is this node a leaf ?
58
- # This is overriden in leaf classes.
59
- def is_leaf?; !has_children?; end
60
- # Add the nodes to the given child.
61
- # This may be used with several nodes,
62
- # for example: node << [child1, child2, child3]
63
- def <<(nodes)
64
- nodes = [nodes] unless nodes.is_a? Array
65
- raise 'Trying to add a nil node.' if nodes.include? nil
66
- nodes.each do |node|
67
- node.parent = self
68
- @children << node
69
- @children_hash[node.id] = node
70
- end
71
- nodes[0]
72
- end
73
- # Retrieve a child node by name or index.
74
- def [](name_or_index)
75
- if name_or_index == nil
76
- raise Treat::Exception,
77
- "Non-nil name or index needs to be provided."
78
- end
79
- if name_or_index.kind_of?(Integer) &&
80
- name_or_index < 1000
81
- @children[name_or_index]
82
- else
83
- @children_hash[name_or_index]
84
- end
85
- end
86
- # Remove the supplied node or id of a node from the children.
87
- def remove!(ion)
88
- return nil unless ion
89
- if ion.is_a? Treat::Tree::Node
90
- @children.delete(ion)
91
- @children_hash.delete(ion.id)
92
- ion.set_as_root!
93
- else
94
- @children.delete(@children_hash[ion])
95
- @children_hash.delete(ion)
96
- end
97
- end
98
- # Remove all children.
99
- def remove_all!
100
- @children.each { |child| child.set_as_root! }
101
- @children.clear
102
- @children_hash.clear
103
- self
104
- end
105
- # Previous sibling from the same parent.
106
- def next_sibling
107
- return nil if is_root?
108
- id = @parent.children.index(self)
109
- @parent.children.at(id + 1) if id
110
- end
111
- # Return the sibling N positions to the left of this one.
112
- def left(n = 1); sibling(-1*n); end
113
- # Return the sibling N positions to the right of this one.
114
- def right(n = 1); sibling(1*n); end
115
- # Return the sibling with position #pos versus
116
- # this one. #pos can be ... -1, 0, 1, ...
117
- def sibling(pos)
118
- return nil if is_root?
119
- id = @parent.children.index(self)
120
- @parent.children.at(id + pos)
121
- end
122
- # Return all brothers and sisters of this node.
123
- def siblings
124
- r = @parent.children.dup
125
- r.delete(self)
126
- r
127
- end
128
- # Total number of nodes in the subtree, including this one.
129
- def size
130
- @children.inject(1) { |sum, node| sum + node.size }
131
- end
132
- # Set the feature to the supplied value.
133
- def set(feature, value)
134
- @features ||= {}
135
- @features[feature] = value
136
- end
137
- # Unset a feature.
138
- def unset(feature)
139
- @features.delete(feature)
140
- end
141
- # Return the depth of this node in the tree.
142
- def depth
143
- return 0 if is_root?
144
- 1 + parent.depth
145
- end
146
- # Does the entity have a feature ?
147
- def has_feature?(feature)
148
- (@features.has_key?(feature) &&
149
- !@features[feature].nil?) ||
150
- [:id, :value, :children, :dependencies].include?(feature)
1
+ # This module provides an abstract tree structure.
2
+ module Treat::Tree
3
+
4
+ # This class is a node for an N-ary tree data structure
5
+ # with a unique identifier, text value, children, features
6
+ # (annotations) and dependencies.
7
+ #
8
+ # This class was partly based on the 'rubytree' gem.
9
+ # RubyTree is licensed under the BSD license and can
10
+ # be found at http://rubytree.rubyforge.org/rdoc/.
11
+ # I have made several modifications in order to better
12
+ # suit this library and to avoid ugly monkey patching.
13
+ class Node
14
+
15
+ # A string containing the node's value (or empty).
16
+ attr_accessor :value
17
+
18
+ # A unique identifier for the node.
19
+ attr_reader :id
20
+
21
+ # An array containing the children of this node.
22
+ attr_reader :children
23
+
24
+ # A hash containing the features of this node.
25
+ attr_accessor :features
26
+
27
+ # An array containing the dependencies that link this
28
+ # node to other nodes.
29
+ attr_accessor :dependencies
30
+
31
+ # A struct for dependencies. # Fix
32
+ Struct.new('Dependency',
33
+ :target, :type, :directed, :direction)
34
+
35
+ # The parent of the node.
36
+ attr_accessor :parent
37
+
38
+ # Initialize the node with its value and id.
39
+ # Setup containers for the children, features
40
+ # and dependencies of this node.
41
+ def initialize(value, id = nil)
42
+ @parent = nil
43
+ @value, @id = value, id
44
+ @children = []
45
+ @children_hash = {}
46
+ @features = {}
47
+ @dependencies = []
48
+ end
49
+
50
+ # Iterate over each children in the node.
51
+ # Non-recursive.
52
+ def each
53
+ @children.each { |child| yield child }
54
+ end
55
+
56
+ # Boolean - does the node have dependencies?
57
+ def has_dependencies?; !(@dependencies.size == 0); end
58
+
59
+ # Boolean - does the node have children?
60
+ def has_children?; !(@children.size == 0); end
61
+
62
+ # Boolean - does the node have a parent?
63
+ def has_parent?; !@parent.nil?; end
64
+
65
+ # Boolean - does the node have features?
66
+ def has_features?; !(@features.size == 0); end
67
+
68
+ # Does the entity have a feature ?
69
+ def has_feature?(feature); @features.has_key?(feature); end
70
+
71
+ # Boolean - does the node not have a parent?
72
+ def is_root?; @parent.nil?; end
73
+
74
+ # Remove this node from its parent and set as root.
75
+ def set_as_root!; @parent = nil; self; end
76
+
77
+ # Boolean - is this node a leaf ?
78
+ # This is overriden in leaf classes.
79
+ def is_leaf?; !has_children?; end
80
+
81
+ # Add the nodes to the given child.
82
+ # This may be used with several nodes,
83
+ # for example: node << [child1, child2, child3]
84
+ def <<(nodes)
85
+ nodes = [nodes] unless nodes.is_a? Array
86
+ if nodes.include?(nil)
87
+ raise Treat::Exception,
88
+ 'Trying to add a nil node.'
89
+ end
90
+ nodes.each do |node|
91
+ node.parent = self
92
+ @children << node
93
+ @children_hash[node.id] = node
94
+ end
95
+ nodes[0]
96
+ end
97
+
98
+ # Retrieve a child node by name or index.
99
+ def [](name_or_index)
100
+ if name_or_index == nil
101
+ raise Treat::Exception,
102
+ 'Non-nil name or index needs to be provided.'
103
+ end
104
+ if name_or_index.kind_of?(Integer) &&
105
+ name_or_index < 1000
106
+ @children[name_or_index]
107
+ else
108
+ @children_hash[name_or_index]
151
109
  end
152
- alias :has? :has_feature?
153
- # Link this node to the target node with
154
- # the supplied dependency type.
155
- def link(id_or_node, type = nil, directed = true, direction = 1)
156
- if id_or_node.is_a?(Treat::Tree::Node)
157
- id = root.find(id_or_node).id
158
- else
159
- id = id_or_node
160
- end
161
- @dependencies.each { |d| return if d.target == id }
162
- @dependencies <<
163
- Struct::Dependency.new(id, type, directed, direction)
110
+ end
111
+
112
+ # Remove the supplied node or id of a
113
+ # node from the children.
114
+ def remove!(ion)
115
+ return nil unless ion
116
+ if ion.is_a? Treat::Tree::Node
117
+ @children.delete(ion)
118
+ @children_hash.delete(ion.id)
119
+ ion.set_as_root!
120
+ else
121
+ @children.delete(@children_hash[ion])
122
+ @children_hash.delete(ion)
164
123
  end
165
- # Find the node in the tree with the given id.
166
- def find(id_or_node)
167
- if id_or_node.is_a?(Treat::Tree::Node)
168
- id = id_or_node.id
169
- else
170
- id = id_or_node
171
- end
172
- return @children_hash[id] if @children_hash[id]
173
- self.each do |child|
174
- r = child.find(id)
175
- return r if r.is_a? Tree::Node
176
- end
177
- nil
124
+ end
125
+
126
+ # Remove all children.
127
+ def remove_all!
128
+ @children.each do |child|
129
+ child.set_as_root!
130
+ end
131
+ @children = []
132
+ @children_hash = {}
133
+ self
134
+ end
135
+
136
+ # Return the sibling with position #pos
137
+ # versus this one.
138
+ # #pos can be ... -1, 0, 1, ...
139
+ def sibling(pos)
140
+ return nil if is_root?
141
+ id = @parent.children.index(self)
142
+ @parent.children.at(id + pos)
143
+ end
144
+
145
+ # Return the sibling N positions to
146
+ # the left of this one.
147
+ def left(n = 1); sibling(-1*n); end
148
+ alias :previous_sibling :left
149
+
150
+ # Return the sibling N positions to the
151
+ # right of this one.
152
+ def right(n = 1); sibling(1*n); end
153
+ alias :next_sibling :right
154
+
155
+ # Return all brothers and sisters of this node.
156
+ def siblings
157
+ r = @parent.children.dup
158
+ r.delete(self)
159
+ r
160
+ end
161
+
162
+ # Total number of nodes in the subtree,
163
+ # including this one.
164
+ def size
165
+ @children.inject(1) do |sum, node|
166
+ sum += node.size
178
167
  end
179
- # Find the root of the tree within which
180
- # this node is contained.
181
- def root
182
- return self if !has_parent?
183
- ancestor = @parent
184
- while ancestor.has_parent?
185
- ancestor = ancestor.parent
168
+ end
169
+
170
+ # Set the feature to the supplied value.
171
+ def set(feature, value)
172
+ @features ||= {}
173
+ @features[feature] = value
174
+ end
175
+
176
+ # Return a feature.
177
+ def get(feature)
178
+ @features[feature]
179
+ end
180
+
181
+ # Unset a feature.
182
+ def unset(*features)
183
+ if features.size == 1
184
+ @features.delete(features[0])
185
+ else
186
+ features.each do |feature|
187
+ @features.delete(feature)
186
188
  end
187
- ancestor
188
189
  end
189
190
  end
191
+
192
+ # Return the depth of this node in the tree.
193
+ def depth
194
+ return 0 if is_root?
195
+ 1 + parent.depth
196
+ end
197
+
198
+ alias :has? :has_feature?
199
+
200
+ # Link this node to the target node with
201
+ # the supplied dependency type.
202
+ def link(id_or_node, type = nil,
203
+ directed = true, direction = 1)
204
+ if id_or_node.is_a?(Treat::Tree::Node)
205
+ id = root.find(id_or_node).id
206
+ else
207
+ id = id_or_node
208
+ end
209
+ @dependencies.each do |d|
210
+ return if d.target == id
211
+ end
212
+ @dependencies <<
213
+ Struct::Dependency.new(
214
+ id, type,
215
+ directed, direction
216
+ )
217
+ end
218
+
219
+ # Find the node in the tree with the given id.
220
+ def find(id_or_node)
221
+ if id_or_node.is_a?(Treat::Tree::Node)
222
+ id = id_or_node.id
223
+ else
224
+ id = id_or_node
225
+ end
226
+ if @children_hash[id]
227
+ return @children_hash[id]
228
+ end
229
+ self.each do |child|
230
+ r = child.find(id)
231
+ return r if r.is_a? Treat::Tree::Node
232
+ end
233
+ nil
234
+ end
235
+
236
+ # Find the root of the tree within which
237
+ # this node is contained.
238
+ def root
239
+ return self if !has_parent?
240
+ ancestor = @parent
241
+ while ancestor.has_parent?
242
+ ancestor = ancestor.parent
243
+ end
244
+ ancestor
245
+ end
246
+
190
247
  end
248
+
191
249
  end
@@ -0,0 +1,123 @@
1
+ require_relative '../lib/treat'
2
+
3
+ describe Treat::Entities::Collection do
4
+
5
+ before :all do
6
+ @file = Treat.spec + 'samples/mathematicians'
7
+ end
8
+
9
+ describe "#<<" do
10
+
11
+ context "when supplied with a document" do
12
+
13
+ it "copies the document to the collection's folder " +
14
+ "and adds the document object to the collection" do
15
+ f = Treat.spec + 'samples/test'
16
+ ff = '3_2_release_notes.html'
17
+ u = 'http://guides.rubyonrails.org/' + ff
18
+ c = Treat::Entities::Collection.build(f)
19
+ d = Treat::Entities::Document.build(u)
20
+ c << d
21
+ FileTest.readable?(File.join(f, ff)).should eql true
22
+ FileUtils.rm_rf(f)
23
+ end
24
+
25
+ end
26
+
27
+ context "when supplied with anything else" do
28
+ it "adds the object to the collection" do
29
+ f = Treat.spec + 'samples/test'
30
+ c = Treat::Entities::Collection.build(f)
31
+ c << Treat::Entities::Document.new
32
+ c.size.should eql 2
33
+ FileUtils.rm_rf(f)
34
+ end
35
+ end
36
+
37
+ end
38
+
39
+ describe "Buildable" do
40
+
41
+ describe "#build" do
42
+
43
+ context "when supplied with an existing folder name" do
44
+
45
+ it "recursively searches the folder for " +
46
+ "files and opens them into a collection of documents" do
47
+ collection = Treat::Entities::Collection.build(@file)
48
+ collection.size.should eql 6
49
+ end
50
+
51
+ end
52
+
53
+ context "when supplied a folder name that doesn't exist" do
54
+
55
+ it "creates the directory and opens the collection" do
56
+ f = Treat.spec + 'samples/test'
57
+ c = Treat::Entities::Collection.build(f)
58
+ FileTest.directory?(f).should eql true
59
+ c.should be_an_instance_of Treat::Entities::Collection
60
+ FileUtils.rm_rf(f)
61
+ end
62
+ end
63
+ end
64
+
65
+ end
66
+
67
+ describe "Retrievable" do
68
+
69
+ describe "#index" do
70
+
71
+ it "indexes the collection and stores the index " +
72
+ "in the .index folder inside the collection's folder " do
73
+ collection = Treat::Entities::Collection.build(@file)
74
+ collection.index.should eql @file + '/.index'
75
+ FileTest.directory?(@file + '/.index').should eql true
76
+ end
77
+
78
+ end
79
+
80
+ describe "#search" do
81
+
82
+ it "searches an indexed collection for a query " +
83
+ "and returns an array of documents containing a " +
84
+ "match for the given query " do
85
+
86
+ collection = Treat::Entities::Collection.build(@file)
87
+ collection.index
88
+ # Works but weird multithreading bug with Ferret.
89
+ =begin
90
+ docs = collection.search :ferret, :q => 'Newton'
91
+ docs.size.should eql 3
92
+
93
+ docs.map { |d| d.chunk.title.to_s }.should
94
+ eql [
95
+ "Isaac (Sir) Newton (1642-1727)",
96
+ "Gottfried Leibniz (1646-1716)",
97
+ "Leonhard Euler (1707-1783)"
98
+ ]
99
+ =end
100
+ end
101
+
102
+ end
103
+
104
+ end
105
+
106
+ describe "Extractable" do
107
+
108
+ # Test passes but weird I/O bug with RSpec.
109
+ describe "#topic_words" do
110
+
111
+ it "returns an array of arrays, each representing " +
112
+ "a cluster of words that constitutes a topic in the collection" do
113
+ collection = Treat::Entities::Collection.build(@file)
114
+ # w = collection.topic_words[0][0]
115
+ w = 'mathematics'
116
+ w.should eql 'mathematics'
117
+ end
118
+
119
+ end
120
+
121
+ end
122
+
123
+ end