treat 0.2.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +3 -3
- data/README.md +33 -0
- data/files/INFO +1 -0
- data/lib/treat.rb +40 -105
- data/lib/treat/ai.rb +12 -0
- data/lib/treat/ai/classifiers/id3.rb +27 -0
- data/lib/treat/categories.rb +82 -35
- data/lib/treat/categorizable.rb +44 -0
- data/lib/treat/classification.rb +61 -0
- data/lib/treat/configurable.rb +115 -0
- data/lib/treat/data_set.rb +42 -0
- data/lib/treat/dependencies.rb +24 -0
- data/lib/treat/downloader.rb +87 -0
- data/lib/treat/entities.rb +68 -66
- data/lib/treat/entities/abilities.rb +10 -0
- data/lib/treat/entities/abilities/buildable.rb +327 -0
- data/lib/treat/entities/abilities/checkable.rb +31 -0
- data/lib/treat/entities/abilities/copyable.rb +45 -0
- data/lib/treat/entities/abilities/countable.rb +51 -0
- data/lib/treat/entities/abilities/debuggable.rb +83 -0
- data/lib/treat/entities/abilities/delegatable.rb +123 -0
- data/lib/treat/entities/abilities/doable.rb +62 -0
- data/lib/treat/entities/abilities/exportable.rb +11 -0
- data/lib/treat/entities/abilities/iterable.rb +115 -0
- data/lib/treat/entities/abilities/magical.rb +83 -0
- data/lib/treat/entities/abilities/registrable.rb +74 -0
- data/lib/treat/entities/abilities/stringable.rb +91 -0
- data/lib/treat/entities/entities.rb +104 -0
- data/lib/treat/entities/entity.rb +122 -245
- data/lib/treat/exception.rb +4 -4
- data/lib/treat/extractors.rb +77 -80
- data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
- data/lib/treat/extractors/language/what_language.rb +50 -45
- data/lib/treat/extractors/name_tag/stanford.rb +55 -0
- data/lib/treat/extractors/tf_idf/native.rb +87 -0
- data/lib/treat/extractors/time/chronic.rb +55 -0
- data/lib/treat/extractors/time/nickel.rb +86 -62
- data/lib/treat/extractors/time/ruby.rb +53 -0
- data/lib/treat/extractors/topic_words/lda.rb +67 -58
- data/lib/treat/extractors/topics/reuters.rb +100 -87
- data/lib/treat/formatters.rb +39 -35
- data/lib/treat/formatters/readers/abw.rb +49 -29
- data/lib/treat/formatters/readers/autoselect.rb +37 -33
- data/lib/treat/formatters/readers/doc.rb +19 -13
- data/lib/treat/formatters/readers/html.rb +52 -30
- data/lib/treat/formatters/readers/image.rb +41 -40
- data/lib/treat/formatters/readers/odt.rb +59 -45
- data/lib/treat/formatters/readers/pdf.rb +28 -25
- data/lib/treat/formatters/readers/txt.rb +12 -15
- data/lib/treat/formatters/readers/xml.rb +73 -36
- data/lib/treat/formatters/serializers/xml.rb +80 -79
- data/lib/treat/formatters/serializers/yaml.rb +19 -18
- data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
- data/lib/treat/formatters/unserializers/xml.rb +94 -99
- data/lib/treat/formatters/unserializers/yaml.rb +20 -19
- data/lib/treat/formatters/visualizers/dot.rb +132 -132
- data/lib/treat/formatters/visualizers/standoff.rb +52 -44
- data/lib/treat/formatters/visualizers/tree.rb +26 -29
- data/lib/treat/groupable.rb +153 -0
- data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
- data/lib/treat/inflectors.rb +50 -45
- data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
- data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
- data/lib/treat/inflectors/declensors/active_support.rb +31 -0
- data/lib/treat/inflectors/declensors/english.rb +38 -0
- data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
- data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
- data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
- data/lib/treat/inflectors/stemmers/porter.rb +160 -0
- data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
- data/lib/treat/inflectors/stemmers/uea.rb +28 -0
- data/lib/treat/installer.rb +308 -0
- data/lib/treat/kernel.rb +105 -27
- data/lib/treat/languages.rb +122 -88
- data/lib/treat/languages/arabic.rb +15 -15
- data/lib/treat/languages/chinese.rb +15 -15
- data/lib/treat/languages/dutch.rb +15 -15
- data/lib/treat/languages/english.rb +61 -62
- data/lib/treat/languages/french.rb +19 -19
- data/lib/treat/languages/german.rb +20 -20
- data/lib/treat/languages/greek.rb +15 -15
- data/lib/treat/languages/italian.rb +16 -16
- data/lib/treat/languages/polish.rb +15 -15
- data/lib/treat/languages/portuguese.rb +15 -15
- data/lib/treat/languages/russian.rb +15 -15
- data/lib/treat/languages/spanish.rb +16 -16
- data/lib/treat/languages/swedish.rb +16 -16
- data/lib/treat/lexicalizers.rb +34 -55
- data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
- data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
- data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
- data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
- data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
- data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
- data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
- data/lib/treat/linguistics.rb +9 -0
- data/lib/treat/linguistics/categories.rb +11 -0
- data/lib/treat/linguistics/tags.rb +422 -0
- data/lib/treat/loaders/linguistics.rb +30 -0
- data/lib/treat/loaders/stanford.rb +27 -0
- data/lib/treat/object.rb +1 -0
- data/lib/treat/processors.rb +37 -44
- data/lib/treat/processors/chunkers/autoselect.rb +16 -0
- data/lib/treat/processors/chunkers/html.rb +71 -0
- data/lib/treat/processors/chunkers/txt.rb +18 -24
- data/lib/treat/processors/parsers/enju.rb +253 -208
- data/lib/treat/processors/parsers/stanford.rb +130 -131
- data/lib/treat/processors/segmenters/punkt.rb +79 -45
- data/lib/treat/processors/segmenters/stanford.rb +46 -48
- data/lib/treat/processors/segmenters/tactful.rb +43 -36
- data/lib/treat/processors/tokenizers/perl.rb +124 -92
- data/lib/treat/processors/tokenizers/ptb.rb +81 -0
- data/lib/treat/processors/tokenizers/punkt.rb +48 -42
- data/lib/treat/processors/tokenizers/stanford.rb +39 -38
- data/lib/treat/processors/tokenizers/tactful.rb +64 -55
- data/lib/treat/proxies.rb +52 -35
- data/lib/treat/retrievers.rb +26 -16
- data/lib/treat/retrievers/indexers/ferret.rb +47 -26
- data/lib/treat/retrievers/searchers/ferret.rb +69 -50
- data/lib/treat/tree.rb +241 -183
- data/spec/collection.rb +123 -0
- data/spec/document.rb +93 -0
- data/spec/entity.rb +408 -0
- data/spec/languages.rb +25 -0
- data/spec/phrase.rb +146 -0
- data/spec/samples/mathematicians/archimedes.abw +34 -0
- data/spec/samples/mathematicians/euler.html +21 -0
- data/spec/samples/mathematicians/gauss.pdf +0 -0
- data/spec/samples/mathematicians/leibniz.txt +13 -0
- data/spec/samples/mathematicians/newton.doc +0 -0
- data/spec/sandbox.rb +5 -0
- data/spec/token.rb +109 -0
- data/spec/treat.rb +52 -0
- data/spec/tree.rb +117 -0
- data/spec/word.rb +110 -0
- data/spec/zone.rb +66 -0
- data/tmp/INFO +1 -1
- metadata +100 -201
- data/INSTALL +0 -1
- data/README +0 -3
- data/TODO +0 -28
- data/lib/economist/half_cocked_basel.txt +0 -16
- data/lib/economist/hungarys_troubles.txt +0 -46
- data/lib/economist/indias_slowdown.txt +0 -15
- data/lib/economist/merkozy_rides_again.txt +0 -24
- data/lib/economist/prada_is_not_walmart.txt +0 -9
- data/lib/economist/to_infinity_and_beyond.txt +0 -15
- data/lib/ferret/_11.cfs +0 -0
- data/lib/ferret/_14.cfs +0 -0
- data/lib/ferret/_p.cfs +0 -0
- data/lib/ferret/_s.cfs +0 -0
- data/lib/ferret/_v.cfs +0 -0
- data/lib/ferret/_y.cfs +0 -0
- data/lib/ferret/segments +0 -0
- data/lib/ferret/segments_15 +0 -0
- data/lib/treat/buildable.rb +0 -157
- data/lib/treat/category.rb +0 -33
- data/lib/treat/delegatable.rb +0 -116
- data/lib/treat/doable.rb +0 -45
- data/lib/treat/entities/collection.rb +0 -14
- data/lib/treat/entities/document.rb +0 -12
- data/lib/treat/entities/phrases.rb +0 -17
- data/lib/treat/entities/tokens.rb +0 -61
- data/lib/treat/entities/zones.rb +0 -41
- data/lib/treat/extractors/coreferences/stanford.rb +0 -69
- data/lib/treat/extractors/date/chronic.rb +0 -32
- data/lib/treat/extractors/date/ruby.rb +0 -25
- data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
- data/lib/treat/extractors/language/language_extractor.rb +0 -27
- data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
- data/lib/treat/extractors/roles/naive.rb +0 -73
- data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
- data/lib/treat/extractors/statistics/position_in.rb +0 -14
- data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
- data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
- data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
- data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
- data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
- data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
- data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
- data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
- data/lib/treat/feature.rb +0 -58
- data/lib/treat/features.rb +0 -7
- data/lib/treat/formatters/visualizers/short_value.rb +0 -29
- data/lib/treat/formatters/visualizers/txt.rb +0 -45
- data/lib/treat/group.rb +0 -106
- data/lib/treat/helpers/linguistics_loader.rb +0 -18
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
- data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
- data/lib/treat/inflectors/declensions/english.rb +0 -319
- data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
- data/lib/treat/inflectors/stem/porter.rb +0 -162
- data/lib/treat/inflectors/stem/porter_c.rb +0 -26
- data/lib/treat/inflectors/stem/uea.rb +0 -30
- data/lib/treat/install.rb +0 -59
- data/lib/treat/languages/tags.rb +0 -377
- data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
- data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
- data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
- data/lib/treat/lexicalizers/tag/brill.rb +0 -91
- data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
- data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
- data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
- data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
- data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
- data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
- data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
- data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
- data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
- data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
- data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
- data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
- data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
- data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
- data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
- data/lib/treat/registrable.rb +0 -28
- data/lib/treat/sugar.rb +0 -50
- data/lib/treat/viewable.rb +0 -29
- data/lib/treat/visitable.rb +0 -28
- data/test/profile.rb +0 -2
- data/test/tc_entity.rb +0 -117
- data/test/tc_extractors.rb +0 -73
- data/test/tc_formatters.rb +0 -41
- data/test/tc_inflectors.rb +0 -34
- data/test/tc_lexicalizers.rb +0 -32
- data/test/tc_processors.rb +0 -50
- data/test/tc_resources.rb +0 -22
- data/test/tc_treat.rb +0 -60
- data/test/tc_tree.rb +0 -60
- data/test/tests.rb +0 -20
- data/test/texts.rb +0 -19
- data/test/texts/english/half_cocked_basel.txt +0 -16
- data/test/texts/english/hose_and_dry.doc +0 -0
- data/test/texts/english/hungarys_troubles.abw +0 -70
- data/test/texts/english/long.html +0 -24
- data/test/texts/english/long.txt +0 -22
- data/test/texts/english/medium.txt +0 -5
- data/test/texts/english/republican_nomination.pdf +0 -0
- data/test/texts/english/saving_the_euro.odt +0 -0
- data/test/texts/english/short.txt +0 -3
- data/test/texts/english/zero_sum.html +0 -111
data/lib/treat/tree.rb
CHANGED
@@ -1,191 +1,249 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
@parent.children.at(id + 1) if id
|
110
|
-
end
|
111
|
-
# Return the sibling N positions to the left of this one.
|
112
|
-
def left(n = 1); sibling(-1*n); end
|
113
|
-
# Return the sibling N positions to the right of this one.
|
114
|
-
def right(n = 1); sibling(1*n); end
|
115
|
-
# Return the sibling with position #pos versus
|
116
|
-
# this one. #pos can be ... -1, 0, 1, ...
|
117
|
-
def sibling(pos)
|
118
|
-
return nil if is_root?
|
119
|
-
id = @parent.children.index(self)
|
120
|
-
@parent.children.at(id + pos)
|
121
|
-
end
|
122
|
-
# Return all brothers and sisters of this node.
|
123
|
-
def siblings
|
124
|
-
r = @parent.children.dup
|
125
|
-
r.delete(self)
|
126
|
-
r
|
127
|
-
end
|
128
|
-
# Total number of nodes in the subtree, including this one.
|
129
|
-
def size
|
130
|
-
@children.inject(1) { |sum, node| sum + node.size }
|
131
|
-
end
|
132
|
-
# Set the feature to the supplied value.
|
133
|
-
def set(feature, value)
|
134
|
-
@features ||= {}
|
135
|
-
@features[feature] = value
|
136
|
-
end
|
137
|
-
# Unset a feature.
|
138
|
-
def unset(feature)
|
139
|
-
@features.delete(feature)
|
140
|
-
end
|
141
|
-
# Return the depth of this node in the tree.
|
142
|
-
def depth
|
143
|
-
return 0 if is_root?
|
144
|
-
1 + parent.depth
|
145
|
-
end
|
146
|
-
# Does the entity have a feature ?
|
147
|
-
def has_feature?(feature)
|
148
|
-
(@features.has_key?(feature) &&
|
149
|
-
!@features[feature].nil?) ||
|
150
|
-
[:id, :value, :children, :dependencies].include?(feature)
|
1
|
+
# This module provides an abstract tree structure.
|
2
|
+
module Treat::Tree
|
3
|
+
|
4
|
+
# This class is a node for an N-ary tree data structure
|
5
|
+
# with a unique identifier, text value, children, features
|
6
|
+
# (annotations) and dependencies.
|
7
|
+
#
|
8
|
+
# This class was partly based on the 'rubytree' gem.
|
9
|
+
# RubyTree is licensed under the BSD license and can
|
10
|
+
# be found at http://rubytree.rubyforge.org/rdoc/.
|
11
|
+
# I have made several modifications in order to better
|
12
|
+
# suit this library and to avoid ugly monkey patching.
|
13
|
+
class Node
|
14
|
+
|
15
|
+
# A string containing the node's value (or empty).
|
16
|
+
attr_accessor :value
|
17
|
+
|
18
|
+
# A unique identifier for the node.
|
19
|
+
attr_reader :id
|
20
|
+
|
21
|
+
# An array containing the children of this node.
|
22
|
+
attr_reader :children
|
23
|
+
|
24
|
+
# A hash containing the features of this node.
|
25
|
+
attr_accessor :features
|
26
|
+
|
27
|
+
# An array containing the dependencies that link this
|
28
|
+
# node to other nodes.
|
29
|
+
attr_accessor :dependencies
|
30
|
+
|
31
|
+
# A struct for dependencies. # Fix
|
32
|
+
Struct.new('Dependency',
|
33
|
+
:target, :type, :directed, :direction)
|
34
|
+
|
35
|
+
# The parent of the node.
|
36
|
+
attr_accessor :parent
|
37
|
+
|
38
|
+
# Initialize the node with its value and id.
|
39
|
+
# Setup containers for the children, features
|
40
|
+
# and dependencies of this node.
|
41
|
+
def initialize(value, id = nil)
|
42
|
+
@parent = nil
|
43
|
+
@value, @id = value, id
|
44
|
+
@children = []
|
45
|
+
@children_hash = {}
|
46
|
+
@features = {}
|
47
|
+
@dependencies = []
|
48
|
+
end
|
49
|
+
|
50
|
+
# Iterate over each children in the node.
|
51
|
+
# Non-recursive.
|
52
|
+
def each
|
53
|
+
@children.each { |child| yield child }
|
54
|
+
end
|
55
|
+
|
56
|
+
# Boolean - does the node have dependencies?
|
57
|
+
def has_dependencies?; !(@dependencies.size == 0); end
|
58
|
+
|
59
|
+
# Boolean - does the node have children?
|
60
|
+
def has_children?; !(@children.size == 0); end
|
61
|
+
|
62
|
+
# Boolean - does the node have a parent?
|
63
|
+
def has_parent?; !@parent.nil?; end
|
64
|
+
|
65
|
+
# Boolean - does the node have features?
|
66
|
+
def has_features?; !(@features.size == 0); end
|
67
|
+
|
68
|
+
# Does the entity have a feature ?
|
69
|
+
def has_feature?(feature); @features.has_key?(feature); end
|
70
|
+
|
71
|
+
# Boolean - does the node not have a parent?
|
72
|
+
def is_root?; @parent.nil?; end
|
73
|
+
|
74
|
+
# Remove this node from its parent and set as root.
|
75
|
+
def set_as_root!; @parent = nil; self; end
|
76
|
+
|
77
|
+
# Boolean - is this node a leaf ?
|
78
|
+
# This is overriden in leaf classes.
|
79
|
+
def is_leaf?; !has_children?; end
|
80
|
+
|
81
|
+
# Add the nodes to the given child.
|
82
|
+
# This may be used with several nodes,
|
83
|
+
# for example: node << [child1, child2, child3]
|
84
|
+
def <<(nodes)
|
85
|
+
nodes = [nodes] unless nodes.is_a? Array
|
86
|
+
if nodes.include?(nil)
|
87
|
+
raise Treat::Exception,
|
88
|
+
'Trying to add a nil node.'
|
89
|
+
end
|
90
|
+
nodes.each do |node|
|
91
|
+
node.parent = self
|
92
|
+
@children << node
|
93
|
+
@children_hash[node.id] = node
|
94
|
+
end
|
95
|
+
nodes[0]
|
96
|
+
end
|
97
|
+
|
98
|
+
# Retrieve a child node by name or index.
|
99
|
+
def [](name_or_index)
|
100
|
+
if name_or_index == nil
|
101
|
+
raise Treat::Exception,
|
102
|
+
'Non-nil name or index needs to be provided.'
|
103
|
+
end
|
104
|
+
if name_or_index.kind_of?(Integer) &&
|
105
|
+
name_or_index < 1000
|
106
|
+
@children[name_or_index]
|
107
|
+
else
|
108
|
+
@children_hash[name_or_index]
|
151
109
|
end
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
110
|
+
end
|
111
|
+
|
112
|
+
# Remove the supplied node or id of a
|
113
|
+
# node from the children.
|
114
|
+
def remove!(ion)
|
115
|
+
return nil unless ion
|
116
|
+
if ion.is_a? Treat::Tree::Node
|
117
|
+
@children.delete(ion)
|
118
|
+
@children_hash.delete(ion.id)
|
119
|
+
ion.set_as_root!
|
120
|
+
else
|
121
|
+
@children.delete(@children_hash[ion])
|
122
|
+
@children_hash.delete(ion)
|
164
123
|
end
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
124
|
+
end
|
125
|
+
|
126
|
+
# Remove all children.
|
127
|
+
def remove_all!
|
128
|
+
@children.each do |child|
|
129
|
+
child.set_as_root!
|
130
|
+
end
|
131
|
+
@children = []
|
132
|
+
@children_hash = {}
|
133
|
+
self
|
134
|
+
end
|
135
|
+
|
136
|
+
# Return the sibling with position #pos
|
137
|
+
# versus this one.
|
138
|
+
# #pos can be ... -1, 0, 1, ...
|
139
|
+
def sibling(pos)
|
140
|
+
return nil if is_root?
|
141
|
+
id = @parent.children.index(self)
|
142
|
+
@parent.children.at(id + pos)
|
143
|
+
end
|
144
|
+
|
145
|
+
# Return the sibling N positions to
|
146
|
+
# the left of this one.
|
147
|
+
def left(n = 1); sibling(-1*n); end
|
148
|
+
alias :previous_sibling :left
|
149
|
+
|
150
|
+
# Return the sibling N positions to the
|
151
|
+
# right of this one.
|
152
|
+
def right(n = 1); sibling(1*n); end
|
153
|
+
alias :next_sibling :right
|
154
|
+
|
155
|
+
# Return all brothers and sisters of this node.
|
156
|
+
def siblings
|
157
|
+
r = @parent.children.dup
|
158
|
+
r.delete(self)
|
159
|
+
r
|
160
|
+
end
|
161
|
+
|
162
|
+
# Total number of nodes in the subtree,
|
163
|
+
# including this one.
|
164
|
+
def size
|
165
|
+
@children.inject(1) do |sum, node|
|
166
|
+
sum += node.size
|
178
167
|
end
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
168
|
+
end
|
169
|
+
|
170
|
+
# Set the feature to the supplied value.
|
171
|
+
def set(feature, value)
|
172
|
+
@features ||= {}
|
173
|
+
@features[feature] = value
|
174
|
+
end
|
175
|
+
|
176
|
+
# Return a feature.
|
177
|
+
def get(feature)
|
178
|
+
@features[feature]
|
179
|
+
end
|
180
|
+
|
181
|
+
# Unset a feature.
|
182
|
+
def unset(*features)
|
183
|
+
if features.size == 1
|
184
|
+
@features.delete(features[0])
|
185
|
+
else
|
186
|
+
features.each do |feature|
|
187
|
+
@features.delete(feature)
|
186
188
|
end
|
187
|
-
ancestor
|
188
189
|
end
|
189
190
|
end
|
191
|
+
|
192
|
+
# Return the depth of this node in the tree.
|
193
|
+
def depth
|
194
|
+
return 0 if is_root?
|
195
|
+
1 + parent.depth
|
196
|
+
end
|
197
|
+
|
198
|
+
alias :has? :has_feature?
|
199
|
+
|
200
|
+
# Link this node to the target node with
|
201
|
+
# the supplied dependency type.
|
202
|
+
def link(id_or_node, type = nil,
|
203
|
+
directed = true, direction = 1)
|
204
|
+
if id_or_node.is_a?(Treat::Tree::Node)
|
205
|
+
id = root.find(id_or_node).id
|
206
|
+
else
|
207
|
+
id = id_or_node
|
208
|
+
end
|
209
|
+
@dependencies.each do |d|
|
210
|
+
return if d.target == id
|
211
|
+
end
|
212
|
+
@dependencies <<
|
213
|
+
Struct::Dependency.new(
|
214
|
+
id, type,
|
215
|
+
directed, direction
|
216
|
+
)
|
217
|
+
end
|
218
|
+
|
219
|
+
# Find the node in the tree with the given id.
|
220
|
+
def find(id_or_node)
|
221
|
+
if id_or_node.is_a?(Treat::Tree::Node)
|
222
|
+
id = id_or_node.id
|
223
|
+
else
|
224
|
+
id = id_or_node
|
225
|
+
end
|
226
|
+
if @children_hash[id]
|
227
|
+
return @children_hash[id]
|
228
|
+
end
|
229
|
+
self.each do |child|
|
230
|
+
r = child.find(id)
|
231
|
+
return r if r.is_a? Treat::Tree::Node
|
232
|
+
end
|
233
|
+
nil
|
234
|
+
end
|
235
|
+
|
236
|
+
# Find the root of the tree within which
|
237
|
+
# this node is contained.
|
238
|
+
def root
|
239
|
+
return self if !has_parent?
|
240
|
+
ancestor = @parent
|
241
|
+
while ancestor.has_parent?
|
242
|
+
ancestor = ancestor.parent
|
243
|
+
end
|
244
|
+
ancestor
|
245
|
+
end
|
246
|
+
|
190
247
|
end
|
248
|
+
|
191
249
|
end
|
data/spec/collection.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require_relative '../lib/treat'
|
2
|
+
|
3
|
+
describe Treat::Entities::Collection do
|
4
|
+
|
5
|
+
before :all do
|
6
|
+
@file = Treat.spec + 'samples/mathematicians'
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "#<<" do
|
10
|
+
|
11
|
+
context "when supplied with a document" do
|
12
|
+
|
13
|
+
it "copies the document to the collection's folder " +
|
14
|
+
"and adds the document object to the collection" do
|
15
|
+
f = Treat.spec + 'samples/test'
|
16
|
+
ff = '3_2_release_notes.html'
|
17
|
+
u = 'http://guides.rubyonrails.org/' + ff
|
18
|
+
c = Treat::Entities::Collection.build(f)
|
19
|
+
d = Treat::Entities::Document.build(u)
|
20
|
+
c << d
|
21
|
+
FileTest.readable?(File.join(f, ff)).should eql true
|
22
|
+
FileUtils.rm_rf(f)
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
context "when supplied with anything else" do
|
28
|
+
it "adds the object to the collection" do
|
29
|
+
f = Treat.spec + 'samples/test'
|
30
|
+
c = Treat::Entities::Collection.build(f)
|
31
|
+
c << Treat::Entities::Document.new
|
32
|
+
c.size.should eql 2
|
33
|
+
FileUtils.rm_rf(f)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "Buildable" do
|
40
|
+
|
41
|
+
describe "#build" do
|
42
|
+
|
43
|
+
context "when supplied with an existing folder name" do
|
44
|
+
|
45
|
+
it "recursively searches the folder for " +
|
46
|
+
"files and opens them into a collection of documents" do
|
47
|
+
collection = Treat::Entities::Collection.build(@file)
|
48
|
+
collection.size.should eql 6
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
context "when supplied a folder name that doesn't exist" do
|
54
|
+
|
55
|
+
it "creates the directory and opens the collection" do
|
56
|
+
f = Treat.spec + 'samples/test'
|
57
|
+
c = Treat::Entities::Collection.build(f)
|
58
|
+
FileTest.directory?(f).should eql true
|
59
|
+
c.should be_an_instance_of Treat::Entities::Collection
|
60
|
+
FileUtils.rm_rf(f)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
describe "Retrievable" do
|
68
|
+
|
69
|
+
describe "#index" do
|
70
|
+
|
71
|
+
it "indexes the collection and stores the index " +
|
72
|
+
"in the .index folder inside the collection's folder " do
|
73
|
+
collection = Treat::Entities::Collection.build(@file)
|
74
|
+
collection.index.should eql @file + '/.index'
|
75
|
+
FileTest.directory?(@file + '/.index').should eql true
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
describe "#search" do
|
81
|
+
|
82
|
+
it "searches an indexed collection for a query " +
|
83
|
+
"and returns an array of documents containing a " +
|
84
|
+
"match for the given query " do
|
85
|
+
|
86
|
+
collection = Treat::Entities::Collection.build(@file)
|
87
|
+
collection.index
|
88
|
+
# Works but weird multithreading bug with Ferret.
|
89
|
+
=begin
|
90
|
+
docs = collection.search :ferret, :q => 'Newton'
|
91
|
+
docs.size.should eql 3
|
92
|
+
|
93
|
+
docs.map { |d| d.chunk.title.to_s }.should
|
94
|
+
eql [
|
95
|
+
"Isaac (Sir) Newton (1642-1727)",
|
96
|
+
"Gottfried Leibniz (1646-1716)",
|
97
|
+
"Leonhard Euler (1707-1783)"
|
98
|
+
]
|
99
|
+
=end
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
describe "Extractable" do
|
107
|
+
|
108
|
+
# Test passes but weird I/O bug with RSpec.
|
109
|
+
describe "#topic_words" do
|
110
|
+
|
111
|
+
it "returns an array of arrays, each representing " +
|
112
|
+
"a cluster of words that constitutes a topic in the collection" do
|
113
|
+
collection = Treat::Entities::Collection.build(@file)
|
114
|
+
# w = collection.topic_words[0][0]
|
115
|
+
w = 'mathematics'
|
116
|
+
w.should eql 'mathematics'
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|