treebank 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,59 @@
1
+ = Tree Module
2
+
3
+ This module supports the creation, search, manipulation, and
4
+ serialization of tree structures.
5
+
6
+ Trees are implemented with Node objects. Each Node has a writable
7
+ _label_ that may be any arbitrary object and a list of other child
8
+ Node objects. Node objects support breadth and depth first iteration.
9
+
10
+ irb(main):001:0> require 'treebank'
11
+ => true
12
+ irb(main):002:0> p = Treebank::Node.new('parent')
13
+ => <Treebank::Node parent []>
14
+ irb(main):003:0> p.create_child!('child1')
15
+ => <Treebank::Node child1 []>
16
+ irb(main):004:0> p.create_child!('child2')
17
+ => <Treebank::Node child2 []>
18
+
19
+ Node has a subclass ParentedNode that keeps track of the parent of the
20
+ given node and has methods for iterating up the ancestor tree.
21
+
22
+ The default stringification method writes a node and all its children
23
+ in a bracketed tree format.
24
+
25
+ irb(main):005:0> puts p
26
+ (parent (child1) (child2))
27
+ => nil
28
+
29
+ Bracketed tree strings can be used to create Node trees.
30
+
31
+ irb(main):006:0> t = Treebank::Node.new.from_s('(parent (child1) (child2))')
32
+ => <Treebank::Node parent [child1 child2]>
33
+ irb(main):007:0> puts t
34
+ (parent (child1) (child2))
35
+ => nil
36
+
37
+ The bracketed tree format is the one used by the Penn
38
+ Treebank[http://www.cis.upenn.edu/~treebank/] Project to annonate
39
+ linguistic structure.
40
+
41
+ = History
42
+
43
+ * 1-0-0 ... First release
44
+
45
+ = See Also
46
+
47
+ Lingua::Treebank[http://search.cpan.org/~kahn/Lingua-Treebank-0.14/Treebank.pm]
48
+ implements similar functionality in Perl.
49
+
50
+ = Copyright
51
+
52
+ Copyright 2006, William Patrick McNeill
53
+
54
+ This program is distributed under the GNU General Public License.
55
+
56
+ = Author
57
+
58
+ W.P. McNeill mailto:billmcn@u.washington.edu
59
+
@@ -0,0 +1,49 @@
1
+ #!/bin/env ruby
2
+
3
+ #--
4
+ # Copyright 2006 William Patrick McNeill
5
+ #
6
+ # This file is part of Treebank.
7
+ #
8
+ # Treebank is free software; you can redistribute it and/or modify it
9
+ # under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation; either version 2 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # Treebank is distributed in the hope that it will be useful, but
14
+ # WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with editalign; if not, write to the Free Software Foundation,
20
+ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21
+ #
22
+ #++
23
+
24
+ # Print all the text in Penn Treebank parse files.
25
+
26
+ require 'treebank'
27
+
28
+ # A Penn Treebank File
29
+ #
30
+ # This class omits any comment lines when enumerating the lines in the
31
+ # file.
32
+ class TreebankFile < File
33
+ def each
34
+ super do |line|
35
+ line.gsub!(/\*x.*/, '')
36
+ yield line if not line.empty?
37
+ end
38
+ end
39
+ end
40
+
41
+ # Enumerate all the file names specified on the command line, opening
42
+ # each one and printing the strings in the trees it contains.
43
+ ARGV.each do |filename|
44
+ TreebankFile.open(filename) do |file|
45
+ Treebank::Parser.new(file).each do |tree|
46
+ puts tree.leaves {|leaf| leaf.label}.join(' ')
47
+ end
48
+ end
49
+ end
data/lib/treebank.rb ADDED
@@ -0,0 +1,446 @@
1
+ # Copyright 2006 William Patrick McNeill
2
+ #
3
+ # Treebank is free software; you can redistribute it and/or modify it
4
+ # under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation; either version 2 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # Treebank is distributed in the hope that it will be useful, but
9
+ # WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with editalign; if not, write to the Free Software Foundation,
15
+ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+
18
+ # Treebank is the namespace that contains all tree-related functions.
19
+ module Treebank
20
+
21
+ # An enumerable list of tokens in a string representation of a tree
22
+ #
23
+ # This class provides a way of enumerating over a source to produce
24
+ # tokens that can be used in parsing a string representation of a
25
+ # tree. The source is an enumerable object whose _each_ function
26
+ # returns a sequence of String objects, for example a file or a
27
+ # single String. Each returned string is delimited by left and
28
+ # right brackets and whitespace. The default brackets are '(' and
29
+ # ')', but different delimiters may be specified in the constructor.
30
+ #
31
+ # Treebank::TokenStream.new('(A (B c) (D))').collect
32
+ # => ["(", "A", "(", "B", "c", ")", "(", "D", ")", ")"]
33
+ class TokenStream
34
+ include Enumerable
35
+
36
+ # The left delimiter
37
+ attr_reader :left
38
+
39
+ # The right delimiter
40
+ attr_reader :right
41
+
42
+ # Constructor
43
+ #
44
+ # * source ... the string stream to tokenize
45
+ # * left ... left bracket symbol
46
+ # * right ... right bracket symbol
47
+ def initialize(source, left = '(', right = ')')
48
+ @source = source
49
+ @left = left
50
+ @right = right
51
+ # Escape the '[' and ']' characters in the character class
52
+ # regular expression.
53
+ cc_left = (left == '[') ? "\\#{left}" : left
54
+ cc_right = (right == ']') ? "\\#{right}" : right
55
+ # Delimit by left and right brackets, e.g. /\(|\)|[^()]/
56
+ @s_regex = Regexp.new("\\#{@left}|\\#{@right}|[^#{cc_left}#{cc_right}]+")
57
+ end
58
+
59
+ # Enumerate the tokens in the source
60
+ def each
61
+ @source.each do |string|
62
+ tokenize_string(string) {|token| yield token}
63
+ end
64
+ end
65
+
66
+ # Tokenize the source string
67
+ #
68
+ # * string ... the string to tokenize
69
+ def tokenize_string(string)
70
+ string.scan(@s_regex) do |bracket_delimited|
71
+ bracket_delimited.split.each {|token| yield token}
72
+ end
73
+ end
74
+
75
+ protected :tokenize_string
76
+
77
+ end # TokenStream
78
+
79
+ # A parser for string representations of trees
80
+ #
81
+ # This class uses a simplified shift-reduce parser to convert a
82
+ # string into a list of tree structures.
83
+ #
84
+ # Treebank::Parser.new('(A) (B (C) (D))').collect
85
+ # => [<Treebank::Node A []>, <Treebank::Node B [C D]>]
86
+ #
87
+ # The string representation of a list of trees has the following BNF
88
+ # definition
89
+ #
90
+ # * trees -> node*
91
+ # * node -> (label? children)
92
+ # * label -> word
93
+ # * children -> node*|word
94
+ # * word -> \w+
95
+ #
96
+ # Note that the BNF definition of children allows a shortcut in
97
+ # which the labels of terminal nodes may be specified without
98
+ # brackets. So, for example, <tt>(A (B))</tt> and <tt>(A B)</tt>
99
+ # are equivalent.
100
+ #
101
+ # The trees returned by this class are caller-defined node objects,
102
+ # where each node has a list of child nodes.
103
+ class Parser
104
+ include Enumerable
105
+
106
+ # Constructor
107
+ #
108
+ # * tokens ... stream of tokens to be converted into trees
109
+ # * node_class ... class of node to create
110
+ #
111
+ # If _tokens_ is not a kind of TokenStream object it will be used
112
+ # as the source stream of one.
113
+ def initialize(tokens, node_class = Node)
114
+ tokens = TokenStream.new(tokens) if not tokens.kind_of? TokenStream
115
+ @tokens = tokens
116
+ @node_class = node_class
117
+ end
118
+
119
+ # Enumerate the tokens yielding trees
120
+ def each # :yields: tree
121
+ parse = []
122
+ @tokens.each do |token|
123
+ case token
124
+ when @tokens.left
125
+ parse << :left
126
+ when @tokens.right
127
+ # Reduce the end of the parse stack.
128
+ left_index = parse.rindex(:left)
129
+ raise "Extra #{@tokens.right}" if left_index.nil?
130
+ parse[left_index..-1] = reduce(parse[left_index+1..-1])
131
+ # If the reduced stack consists of a single node, it must be
132
+ # a complete tree.
133
+ yield parse.pop if parse.length == 1
134
+ else
135
+ parse << token
136
+ end # case
137
+ end # do
138
+ raise "Extra #{@tokens.left}: #{parse}" if not parse.empty?
139
+ end
140
+
141
+ # Convert the end of the parse list into a single node
142
+ #
143
+ # * node_parse ... a list of labels and nodes
144
+ def reduce(node_parse)
145
+ node = @node_class.new
146
+ # The first item in the list may be a label.
147
+ if node_parse.first.class == String
148
+ node.label = node_parse.shift
149
+ # Special case: terminals without brackets, e.g. '(V ran)'
150
+ if node_parse.length == 1 and node_parse.last.class == String
151
+ node.create_child!(node_parse.last)
152
+ return node
153
+ end
154
+ end
155
+ # The remaining items are child nodes.
156
+ node_parse.each {|child| node.attach_child!(child)}
157
+ node
158
+ end
159
+
160
+ protected :reduce
161
+
162
+ end # Parser
163
+
164
+
165
+ # A node in a tree
166
+ #
167
+ # A Node consists of a label, which may be any arbitrary Object, and
168
+ # a list of children, which are also Node objects.
169
+ class Node
170
+ include Enumerable
171
+
172
+ # Iterates a tree breadth-first
173
+ class BFSIterator
174
+ include Enumerable
175
+
176
+ # Constructor
177
+ #
178
+ # * node ... the start node of the enumeration
179
+ # * visit ... optional enumeration control procedure
180
+ #
181
+ # The optional _visit_ argument can be used to control which
182
+ # children are visited by this iterator. If specified, it is
183
+ # called for every node, and only those nodes returning +true+
184
+ # will be visited.
185
+ def initialize(node, visit = nil)
186
+ @node = node
187
+ @visit = visit
188
+ end
189
+
190
+ # Enumerate the nodes
191
+ def each
192
+ @agenda = [@node]
193
+ while node = @agenda.shift
194
+ yield node
195
+ children = @visit ? node.find_all {|n| @visit.call(n)} : node.collect
196
+ recurse(children)
197
+ end
198
+ end
199
+
200
+ # Function that controls enumeration recursion
201
+ #
202
+ # * children ... a list of child nodes of the current node
203
+ #
204
+ # The only difference between the breadth-first and depth-first
205
+ # searches is this function.
206
+ def recurse(children)
207
+ @agenda += children
208
+ end
209
+ end # BFSIterator
210
+
211
+ # Iterates a tree depth-first
212
+ class DFSIterator < BFSIterator
213
+
214
+ # Function that controls enumeration recursion
215
+ #
216
+ # * children ... a list of child nodes of the current node
217
+ #
218
+ # The only difference between the breadth-first and depth-first
219
+ # searches is this function.
220
+ def recurse(children)
221
+ @agenda = children + @agenda
222
+ end
223
+ end # DFSIterator
224
+
225
+ # This node's label
226
+ attr_accessor :label
227
+
228
+ # Constructor
229
+ #
230
+ # * label ... the label of this node
231
+ # * child_labels ... list of labels for children of this node
232
+ def initialize(label = nil, child_labels = [])
233
+ @label = label
234
+ @children = []
235
+ child_labels.each {|label| create_child!(label)}
236
+ end
237
+
238
+ # Read the tree from a bracketed string
239
+ #
240
+ # * s ... bracketed string
241
+ # * left ... left bracket symbol
242
+ # * right ... right bracket symbol
243
+ #
244
+ # This function uses a Treebank::Parser object to create the tree from
245
+ # _s_. This raises an error if this node is not empty.
246
+ def from_s(s, left = '(', right = ')')
247
+ raise 'This node is not empty.' if not self.empty?
248
+ nodes = Parser.new(TokenStream.new(s, left, right), self.class).collect
249
+ raise "#{s} defines multiple trees" if nodes.length != 1
250
+ @label = nodes.first.label
251
+ @children = nodes.first.collect
252
+ self
253
+ end
254
+
255
+ # Stringify
256
+ #
257
+ # This writes to a bracketed string representation that can be
258
+ # read by the Parser object.
259
+ def to_s
260
+ space = leaf? ? '':' '
261
+ "(#{label}#{space}#{@children.join(' ')})"
262
+ end
263
+
264
+ # Interactive stringification
265
+ def inspect
266
+ child_labels = @children.collect {|n| n.label}
267
+ "<#{self.class} #{@label} [#{child_labels.join(' ')}]>"
268
+ end
269
+
270
+ # Tree equivalence operator
271
+ #
272
+ # If the other object is a tree and every node label in the
273
+ # corresponding nodes of the two depth first enumerations match,
274
+ # the trees are equivalent.
275
+ def ==(other)
276
+ return false if not other.kind_of? self.class
277
+ return true if self.empty? and other.empty?
278
+ mismatch = self.each_depth_first.zip(other.each_depth_first).find \
279
+ {|self_node, other_node| self_node.nil? or \
280
+ other_node.nil? or \
281
+ self_node.label != other_node.label}
282
+ mismatch.nil?
283
+ end
284
+
285
+ # Create a new node and add it as a child of this node
286
+ #
287
+ # * label ... the label of a node to create
288
+ # * index ... optional insertion index
289
+ #
290
+ # If _index_ is not specified, the node is added to the end of the
291
+ # child list.
292
+ #
293
+ # This function returns the added Node object.
294
+ def create_child!(label, index = nil)
295
+ attach_child!(self.class.new(label), index)
296
+ end
297
+
298
+ # Attach an existing node as the child of this node
299
+ #
300
+ # * node ... the node to add
301
+ # * index ... optional insertion index
302
+ #
303
+ # _node_ must be the same type as this node.
304
+ #
305
+ # If _index_ is not specified, the node is added to the end of the
306
+ # child list.
307
+ #
308
+ # This function returns the added Node object.
309
+ def attach_child!(node, index = nil)
310
+ raise "#{node} is not class #{self.class}" if node.class != self.class
311
+ if index.nil?
312
+ @children << node
313
+ else
314
+ @children[index, 0] = node
315
+ end
316
+ node
317
+ end
318
+
319
+ # Detach a child node
320
+ #
321
+ # * node ... the node to detach
322
+ #
323
+ # This removes the specfied node from this node's child list.
324
+ def detach_child!(node)
325
+ raise "#{node} is not a child of #{self}" if @children.delete(node).nil?
326
+ end
327
+
328
+ # Enumerate the children of this node.
329
+ def each
330
+ @children.each {|node| yield node}
331
+ end
332
+
333
+ # Enumerate all the nodes beneath this one breadth-first
334
+ #
335
+ # * visit ... optional enumeration control procedure
336
+ #
337
+ # The _visit_ parameter is passed down to the BFSIterator.
338
+ def each_breadth_first(visit = nil)
339
+ BFSIterator.new(self, visit)
340
+ end
341
+
342
+ # Enumerate all the nodes beneath this one depth-first
343
+ #
344
+ # * visit ... optional enumeration control procedure
345
+ #
346
+ # The _visit_ parameter is passed down to the DFSIterator.
347
+ def each_depth_first(visit = nil)
348
+ DFSIterator.new(self, visit)
349
+ end
350
+
351
+ # Is this a leaf node?
352
+ def leaf?
353
+ @children.empty?
354
+ end
355
+
356
+ # Is this node empty?
357
+ def empty?
358
+ @label.nil? and @children.empty?
359
+ end
360
+
361
+ # All the leaf nodes beneath this node
362
+ #
363
+ # * block ... an optional block to run on each leaf
364
+ def leaves(&block)
365
+ leaves = each_depth_first.find_all {|node| node.leaf?}
366
+ leaves = leaves.collect {|leaf| block.call(leaf)} if not block.nil?
367
+ leaves
368
+ end
369
+
370
+ end # Node
371
+
372
+
373
+ # A Node in a Tree that can locate its parent
374
+ #
375
+ # The ParentedNode adds a pointer back to the parent node to
376
+ # the Node class.
377
+ class ParentedNode < Node
378
+
379
+ # This node's parent
380
+ attr_reader :parent
381
+
382
+ # Iterates up a tree
383
+ class ParentIterator
384
+ include Enumerable
385
+
386
+ # Constructor
387
+ #
388
+ # * node ... the start node of the enumeration
389
+ def initialize(node)
390
+ @node = node
391
+ end
392
+
393
+ # Enumerate the ancestor chain
394
+ def each
395
+ node = @node
396
+ while not node.nil?
397
+ yield node
398
+ node = node.parent
399
+ end
400
+ end
401
+
402
+ end # ParentIterator
403
+
404
+ # Constructor
405
+ #
406
+ # * label ... the label of this node
407
+ # * child_labels ... list of labels for children of this node
408
+ # * parent ... the parent of this node
409
+ def initialize(label = nil, child_labels = [], parent = nil)
410
+ super(label, child_labels)
411
+ @parent = parent
412
+ end
413
+
414
+ # See Treebank::Node.attach_child!
415
+ def attach_child!(node, index = nil)
416
+ child = super(node, index)
417
+ child.parent = self
418
+ child
419
+ end
420
+
421
+ # See Treebank::Node.detach_child!
422
+ def detach_child!(node)
423
+ super(node)
424
+ node.parent = nil
425
+ end
426
+
427
+ # Set the parent of this node
428
+ #
429
+ # * parent ... the parent node
430
+ #
431
+ # This is a protected utility function. It does not change the
432
+ # child list of _parent_.
433
+ def parent=(parent)
434
+ @parent = parent
435
+ end
436
+
437
+ # Enumerate the ancestors of this node
438
+ def each_parent
439
+ ParentIterator.new(self)
440
+ end
441
+
442
+ protected :parent=
443
+
444
+ end # ParentedNode
445
+
446
+ end
@@ -0,0 +1,238 @@
1
+ #!/bin/env ruby
2
+
3
+ #--
4
+ # Copyright 2006 William Patrick McNeill
5
+ #
6
+ # This file is part of Treebank.
7
+ #
8
+ # Treebank is free software; you can redistribute it and/or modify it
9
+ # under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation; either version 2 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # Treebank is distributed in the hope that it will be useful, but
14
+ # WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with editalign; if not, write to the Free Software Foundation,
20
+ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21
+ #
22
+ #++
23
+
24
+ # Test cases for the Treebank module
25
+
26
+ require 'test/unit'
27
+ require 'treebank'
28
+
29
+ class TokenStreamTest < Test::Unit::TestCase
30
+ def test_basic_token_stream
31
+ t = Treebank::TokenStream.new('(A (B c) (D))')
32
+ assert_kind_of Treebank::TokenStream, t
33
+ assert_equal '(', t.left
34
+ assert_equal ')', t.right
35
+ assert_equal ['(', 'A', '(', 'B', 'c', ')', '(', 'D', ')', ')'], t.collect
36
+ end
37
+
38
+ def test_non_alphanum_token_stream
39
+ t = Treebank::TokenStream.new('(A!Node (B!Node .) (14))')
40
+ assert_kind_of Treebank::TokenStream, t
41
+ assert_equal '(', t.left
42
+ assert_equal ')', t.right
43
+ assert_equal ['(', 'A!Node', '(', 'B!Node', '.', ')', '(', '14', ')', ')'], t.collect
44
+ end
45
+
46
+ def test_different_delimiter
47
+ t = Treebank::TokenStream.new('<A <B c> <D>>', '<', '>')
48
+ assert_kind_of Treebank::TokenStream, t
49
+ assert_equal '<', t.left
50
+ assert_equal '>', t.right
51
+ assert_equal ['<', 'A', '<', 'B', 'c', '>', '<', 'D', '>', '>'], t.collect
52
+ end
53
+
54
+ def test_bracket_delimiter
55
+ t = Treebank::TokenStream.new('[A [B c] [D]]', '[', ']')
56
+ assert_kind_of Treebank::TokenStream, t
57
+ assert_equal '[', t.left
58
+ assert_equal ']', t.right
59
+ assert_equal ['[', 'A', '[', 'B', 'c', ']', '[', 'D', ']', ']'], t.collect
60
+ end
61
+
62
+ end
63
+
64
+
65
+ module ParseTreeMixin
66
+
67
+ # Parse tree string
68
+ def test_tree_parse
69
+ p = Treebank::Parser.new(Treebank::TokenStream.new('(A) (B)'), @node_class)
70
+ trees = p.collect
71
+ assert_equal [@node_class.new.from_s('(A)'), @node_class.new.from_s('(B)')], trees
72
+ assert_kind_of @node_class, trees[0]
73
+ assert_kind_of @node_class, trees[1]
74
+ t = Treebank::Parser.new(Treebank::TokenStream.new('(A (B) (C))'), @node_class).collect.first
75
+ assert_equal t.label, 'A'
76
+ assert_equal t.collect.first.label, 'B'
77
+ assert_equal t.collect.last.label, 'C'
78
+ assert_equal [@node_class.new], Treebank::Parser.new(Treebank::TokenStream.new('()'), @node_class).collect
79
+ end
80
+
81
+ def test_string_in_constructor
82
+ assert_equal [@node_class.new.from_s('(A)'), @node_class.new.from_s('(B)')], Treebank::Parser.new('(A) (B)', @node_class).collect
83
+ end
84
+
85
+ end
86
+
87
+
88
+ class TreeParserTest < Test::Unit::TestCase
89
+
90
+ include ParseTreeMixin
91
+
92
+ def setup
93
+ @node_class = Treebank::Node
94
+ end
95
+
96
+ end
97
+
98
+
99
+ class ParentedTreeParserTest < Test::Unit::TestCase
100
+
101
+ include ParseTreeMixin
102
+
103
+ def setup
104
+ @node_class = Treebank::ParentedNode
105
+ end
106
+
107
+ end
108
+
109
+
110
+ module NodeTestMixin
111
+
112
+ # Empty tree
113
+ def test_empty_tree
114
+ t = @node_class.new
115
+ assert_kind_of @node_class, t, 'Empty tree type'
116
+ assert_nil t.label, 'Empty tree nil head'
117
+ assert t.empty?, 'Empty empty?'
118
+ assert_equal [], t.collect, 'Empty child list'
119
+ assert_equal [t], t.each_breadth_first.collect, 'Empty breadth first'
120
+ assert_equal [t], t.each_depth_first.collect, 'Empty depth first'
121
+ end
122
+
123
+ # Test a single node tree
124
+ def test_single_node_tree
125
+ t = @node_class.new('a')
126
+ assert_equal 'a', t.label, 'Single node label'
127
+ assert !t.empty?, 'Single node not empty?'
128
+ assert_equal [], t.collect, 'Empty child list'
129
+ assert_equal [t], t.each_breadth_first.collect, 'Single node breadth first'
130
+ assert_equal [t], t.each_depth_first.collect, 'Single depth first'
131
+ end
132
+
133
+ # Test adding children in the constructor
134
+ def test_constructor_children
135
+ t = @node_class.new('a', ['b', 'c', 'd'])
136
+ assert_equal @node_class.new.from_s('(a (b) (c) (d) )'), t, 'Children in constructor'
137
+ end
138
+
139
+ # Add children
140
+ def test_add_children
141
+ # Add a child of the head node.
142
+ a = @node_class.new('a')
143
+ b = a.create_child!('b')
144
+ assert_kind_of @node_class, b, 'create_child! return value type'
145
+ assert_equal 'b', b.label, 'create_child! return value label'
146
+ assert_equal a.collect, [b], 'a children'
147
+ assert_equal b.collect, [], 'b children'
148
+ assert !a.empty?, 'create_child! not empty?'
149
+ a = @node_class.new('a', ['b', 'c', 'd'])
150
+ a.create_child!('z', 0)
151
+ assert_equal ['z', 'b', 'c', 'd'], a.collect {|n| n.label}
152
+ a.create_child!('x', 2)
153
+ assert_equal ['z', 'b', 'x', 'c', 'd'], a.collect {|n| n.label}
154
+ end
155
+
156
+ # Read from/to a string
157
+ def test_stringify
158
+ s = '(S (NP (D (the)) (N (boy))) (VP (V (ran))))'
159
+ multiline_s = \
160
+ '(S
161
+ (NP
162
+ (D (the))
163
+ (N (boy)))
164
+ (VP
165
+ (V (ran))))'
166
+ t = @node_class.new.from_s(s)
167
+ assert_kind_of @node_class, t, 'from_s'
168
+ assert_equal s, "#{t}", 'to_s'
169
+ m = @node_class.new.from_s(multiline_s)
170
+ assert_equal t, m, 'Single-/multi-line equal'
171
+ end
172
+
173
+ # Simple enumeration
174
+ def test_enumeration
175
+ # Enumerate all children.
176
+ t = @node_class.new.from_s('(a (b (R) (S) ) (c (T) (U)) )')
177
+ assert_equal ['a', 'b', 'c', 'R', 'S', 'T', 'U'], t.each_breadth_first.collect {|node| node.label}, 'Full breadth first'
178
+ assert_equal ['a', 'b', 'R', 'S', 'c', 'T', 'U'], t.each_depth_first.collect {|node| node.label}, 'Full depth first'
179
+ # Enumerate children beneath a node.
180
+ b = t.find {|node| node.label == 'b'}
181
+ assert_equal ['b', 'R', 'S'], b.each_breadth_first.collect {|node| node.label}, 'Partial breadth first'
182
+ assert_equal ['b', 'R', 'S'], b.each_depth_first.collect {|node| node.label}, 'Partial depth first'
183
+ # Customize visitation.
184
+ visit = proc{|n| n.label != 'c' and n.label != 'S'}
185
+ assert_equal ['a', 'b', 'R'], t.each_breadth_first(visit).collect {|node| node.label}, 'Full breadth first'
186
+ assert_equal ['a', 'b', 'R'], t.each_depth_first(visit).collect {|node| node.label}, 'Full depth first'
187
+ end
188
+
189
+ # Tree equivalence
190
+ def test_equivalence
191
+ t1 = @node_class.new.from_s('(a (b (R) (S) ) (c (T) (U)) )')
192
+ t2 = @node_class.new.from_s('(a (b (R) (S) ) (c (T) (U)) )')
193
+ s1 = @node_class.new.from_s('(a (b (R) (S) ) (c (T) ) )')
194
+ s2 = @node_class.new.from_s('(a (c (R) (S) ) (b (T) (U) ) )')
195
+ assert_equal t1, t2, 'Tree equivalence'
196
+ assert_not_equal t1, s1, 'Tree non-equivalence: different terminals'
197
+ assert_not_equal t1, s2, 'Tree non-equivalence: reversed non-terminal labels'
198
+ assert_not_equal t1, 'non-tree', 'Tree non-equivalence: not a tree'
199
+ end
200
+
201
+ def test_leaves
202
+ t = @node_class.new.from_s('(a (b c) (d e))')
203
+ leaves = t.each_depth_first.collect
204
+ c = leaves[2]
205
+ e = leaves[4]
206
+ assert_equal [c, e], t.leaves, 'Tree leaves'
207
+ assert_equal ['c', 'e'], t.leaves {|n| n.label}, 'Tree leaves with block'
208
+ end
209
+ end
210
+
211
+
212
+ class NodeTest < Test::Unit::TestCase
213
+
214
+ include NodeTestMixin
215
+
216
+ def setup
217
+ @node_class = Treebank::Node
218
+ end
219
+
220
+ end
221
+
222
+
223
+ class ParentedNodeTest < Test::Unit::TestCase
224
+
225
+ include NodeTestMixin
226
+
227
+ def setup
228
+ @node_class = Treebank::ParentedNode
229
+ end
230
+
231
+ def test_ancestor_enumeration
232
+ t = @node_class.new.from_s('(a (b (R) (S) ) (c (T) (U)) )')
233
+ assert_equal [t], t.each_parent.collect, 'Ancestors from head'
234
+ u = t.each_depth_first.find {|node| node.label == 'U'}
235
+ assert_equal ['U', 'c', 'a'], u.each_parent.collect {|node| node.label}, 'Ancestors from leaf'
236
+ end
237
+
238
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: treebank
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2006-06-08 00:00:00 -07:00
8
+ summary: Treebank implements support for ordered n-ary branching tree structures
9
+ require_paths:
10
+ - lib
11
+ email: billmcn@gmail.com
12
+ homepage: http://rubyforge.org/projects/treebank
13
+ rubyforge_project:
14
+ description: This module implements ordered n-ary branching tree structures. It includes support for breadth- and depth- first iteration, and serialization to and from a bracketed tree string.
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ authors:
29
+ - W.P. McNeill
30
+ files:
31
+ - test/test_treebank.rb
32
+ - lib/treebank.rb
33
+ - examples/penntb-words
34
+ - README
35
+ test_files:
36
+ - test/test_treebank.rb
37
+ rdoc_options:
38
+ - --title
39
+ - Treebank -- Ruby Tree
40
+ - --main
41
+ - README
42
+ - --line-numbers
43
+ - --inline-source
44
+ extra_rdoc_files:
45
+ - README
46
+ executables: []
47
+
48
+ extensions: []
49
+
50
+ requirements: []
51
+
52
+ dependencies:
53
+ - !ruby/object:Gem::Dependency
54
+ name: fsa
55
+ version_requirement:
56
+ version_requirements: !ruby/object:Gem::Version::Requirement
57
+ requirements:
58
+ - - ">"
59
+ - !ruby/object:Gem::Version
60
+ version: 0.0.0
61
+ version: