treebank 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,59 @@
1
+ = Tree Module
2
+
3
+ This module supports the creation, search, manipulation, and
4
+ serialization of tree structures.
5
+
6
+ Trees are implemented with Node objects. Each Node has a writable
7
+ _label_ that may be any arbitrary object and a list of other child
8
+ Node objects. Node objects support breadth and depth first iteration.
9
+
10
+ irb(main):001:0> require 'treebank'
11
+ => true
12
+ irb(main):002:0> p = Treebank::Node.new('parent')
13
+ => <Treebank::Node parent []>
14
+ irb(main):003:0> p.create_child!('child1')
15
+ => <Treebank::Node child1 []>
16
+ irb(main):004:0> p.create_child!('child2')
17
+ => <Treebank::Node child2 []>
18
+
19
+ Node has a subclass ParentedNode that keeps track of the parent of the
20
+ given node and has methods for iterating up the ancestor tree.
21
+
22
+ The default stringification method writes a node and all its children
23
+ in a bracketed tree format.
24
+
25
+ irb(main):005:0> puts p
26
+ (parent (child1) (child2))
27
+ => nil
28
+
29
+ Bracketed tree strings can be used to create Node trees.
30
+
31
+ irb(main):006:0> t = Treebank::Node.new.from_s('(parent (child1) (child2))')
32
+ => <Treebank::Node parent [child1 child2]>
33
+ irb(main):007:0> puts t
34
+ (parent (child1) (child2))
35
+ => nil
36
+
37
+ The bracketed tree format is the one used by the Penn
38
+ Treebank[http://www.cis.upenn.edu/~treebank/] Project to annonate
39
+ linguistic structure.
40
+
41
+ = History
42
+
43
+ * 1-0-0 ... First release
44
+
45
+ = See Also
46
+
47
+ Lingua::Treebank[http://search.cpan.org/~kahn/Lingua-Treebank-0.14/Treebank.pm]
48
+ implements similar functionality in Perl.
49
+
50
+ = Copyright
51
+
52
+ Copyright 2006, William Patrick McNeill
53
+
54
+ This program is distributed under the GNU General Public License.
55
+
56
+ = Author
57
+
58
+ W.P. McNeill mailto:billmcn@u.washington.edu
59
+
@@ -0,0 +1,49 @@
1
+ #!/bin/env ruby
2
+
3
+ #--
4
+ # Copyright 2006 William Patrick McNeill
5
+ #
6
+ # This file is part of Treebank.
7
+ #
8
+ # Treebank is free software; you can redistribute it and/or modify it
9
+ # under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation; either version 2 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # Treebank is distributed in the hope that it will be useful, but
14
+ # WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with editalign; if not, write to the Free Software Foundation,
20
+ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21
+ #
22
+ #++
23
+
24
+ # Print all the text in Penn Treebank parse files.
25
+
26
+ require 'treebank'
27
+
28
+ # A Penn Treebank File
29
+ #
30
+ # This class omits any comment lines when enumerating the lines in the
31
+ # file.
32
+ class TreebankFile < File
33
+ def each
34
+ super do |line|
35
+ line.gsub!(/\*x.*/, '')
36
+ yield line if not line.empty?
37
+ end
38
+ end
39
+ end
40
+
41
+ # Enumerate all the file names specified on the command line, opening
42
+ # each one and printing the strings in the trees it contains.
43
+ ARGV.each do |filename|
44
+ TreebankFile.open(filename) do |file|
45
+ Treebank::Parser.new(file).each do |tree|
46
+ puts tree.leaves {|leaf| leaf.label}.join(' ')
47
+ end
48
+ end
49
+ end
data/lib/treebank.rb ADDED
@@ -0,0 +1,446 @@
1
+ # Copyright 2006 William Patrick McNeill
2
+ #
3
+ # Treebank is free software; you can redistribute it and/or modify it
4
+ # under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation; either version 2 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # Treebank is distributed in the hope that it will be useful, but
9
+ # WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with editalign; if not, write to the Free Software Foundation,
15
+ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+
18
+ # Treebank is the namespace that contains all tree-related functions.
19
+ module Treebank
20
+
21
+ # An enumerable list of tokens in a string representation of a tree
22
+ #
23
+ # This class provides a way of enumerating over a source to produce
24
+ # tokens that can be used in parsing a string representation of a
25
+ # tree. The source is an enumerable object whose _each_ function
26
+ # returns a sequence of String objects, for example a file or a
27
+ # single String. Each returned string is delimited by left and
28
+ # right brackets and whitespace. The default brackets are '(' and
29
+ # ')', but different delimiters may be specified in the constructor.
30
+ #
31
+ # Treebank::TokenStream.new('(A (B c) (D))').collect
32
+ # => ["(", "A", "(", "B", "c", ")", "(", "D", ")", ")"]
33
+ class TokenStream
34
+ include Enumerable
35
+
36
+ # The left delimiter
37
+ attr_reader :left
38
+
39
+ # The right delimiter
40
+ attr_reader :right
41
+
42
+ # Constructor
43
+ #
44
+ # * source ... the string stream to tokenize
45
+ # * left ... left bracket symbol
46
+ # * right ... right bracket symbol
47
+ def initialize(source, left = '(', right = ')')
48
+ @source = source
49
+ @left = left
50
+ @right = right
51
+ # Escape the '[' and ']' characters in the character class
52
+ # regular expression.
53
+ cc_left = (left == '[') ? "\\#{left}" : left
54
+ cc_right = (right == ']') ? "\\#{right}" : right
55
+ # Delimit by left and right brackets, e.g. /\(|\)|[^()]/
56
+ @s_regex = Regexp.new("\\#{@left}|\\#{@right}|[^#{cc_left}#{cc_right}]+")
57
+ end
58
+
59
+ # Enumerate the tokens in the source
60
+ def each
61
+ @source.each do |string|
62
+ tokenize_string(string) {|token| yield token}
63
+ end
64
+ end
65
+
66
+ # Tokenize the source string
67
+ #
68
+ # * string ... the string to tokenize
69
+ def tokenize_string(string)
70
+ string.scan(@s_regex) do |bracket_delimited|
71
+ bracket_delimited.split.each {|token| yield token}
72
+ end
73
+ end
74
+
75
+ protected :tokenize_string
76
+
77
+ end # TokenStream
78
+
79
+ # A parser for string representations of trees
80
+ #
81
+ # This class uses a simplified shift-reduce parser to convert a
82
+ # string into a list of tree structures.
83
+ #
84
+ # Treebank::Parser.new('(A) (B (C) (D))').collect
85
+ # => [<Treebank::Node A []>, <Treebank::Node B [C D]>]
86
+ #
87
+ # The string representation of a list of trees has the following BNF
88
+ # definition
89
+ #
90
+ # * trees -> node*
91
+ # * node -> (label? children)
92
+ # * label -> word
93
+ # * children -> node*|word
94
+ # * word -> \w+
95
+ #
96
+ # Note that the BNF definition of children allows a shortcut in
97
+ # which the labels of terminal nodes may be specified without
98
+ # brackets. So, for example, <tt>(A (B))</tt> and <tt>(A B)</tt>
99
+ # are equivalent.
100
+ #
101
+ # The trees returned by this class are caller-defined node objects,
102
+ # where each node has a list of child nodes.
103
+ class Parser
104
+ include Enumerable
105
+
106
+ # Constructor
107
+ #
108
+ # * tokens ... stream of tokens to be converted into trees
109
+ # * node_class ... class of node to create
110
+ #
111
+ # If _tokens_ is not a kind of TokenStream object it will be used
112
+ # as the source stream of one.
113
+ def initialize(tokens, node_class = Node)
114
+ tokens = TokenStream.new(tokens) if not tokens.kind_of? TokenStream
115
+ @tokens = tokens
116
+ @node_class = node_class
117
+ end
118
+
119
+ # Enumerate the tokens yielding trees
120
+ def each # :yields: tree
121
+ parse = []
122
+ @tokens.each do |token|
123
+ case token
124
+ when @tokens.left
125
+ parse << :left
126
+ when @tokens.right
127
+ # Reduce the end of the parse stack.
128
+ left_index = parse.rindex(:left)
129
+ raise "Extra #{@tokens.right}" if left_index.nil?
130
+ parse[left_index..-1] = reduce(parse[left_index+1..-1])
131
+ # If the reduced stack consists of a single node, it must be
132
+ # a complete tree.
133
+ yield parse.pop if parse.length == 1
134
+ else
135
+ parse << token
136
+ end # case
137
+ end # do
138
+ raise "Extra #{@tokens.left}: #{parse}" if not parse.empty?
139
+ end
140
+
141
+ # Convert the end of the parse list into a single node
142
+ #
143
+ # * node_parse ... a list of labels and nodes
144
+ def reduce(node_parse)
145
+ node = @node_class.new
146
+ # The first item in the list may be a label.
147
+ if node_parse.first.class == String
148
+ node.label = node_parse.shift
149
+ # Special case: terminals without brackets, e.g. '(V ran)'
150
+ if node_parse.length == 1 and node_parse.last.class == String
151
+ node.create_child!(node_parse.last)
152
+ return node
153
+ end
154
+ end
155
+ # The remaining items are child nodes.
156
+ node_parse.each {|child| node.attach_child!(child)}
157
+ node
158
+ end
159
+
160
+ protected :reduce
161
+
162
+ end # Parser
163
+
164
+
165
+ # A node in a tree
166
+ #
167
+ # A Node consists of a label, which may be any arbitrary Object, and
168
+ # a list of children, which are also Node objects.
169
+ class Node
170
+ include Enumerable
171
+
172
+ # Iterates a tree breadth-first
173
+ class BFSIterator
174
+ include Enumerable
175
+
176
+ # Constructor
177
+ #
178
+ # * node ... the start node of the enumeration
179
+ # * visit ... optional enumeration control procedure
180
+ #
181
+ # The optional _visit_ argument can be used to control which
182
+ # children are visited by this iterator. If specified, it is
183
+ # called for every node, and only those nodes returning +true+
184
+ # will be visited.
185
+ def initialize(node, visit = nil)
186
+ @node = node
187
+ @visit = visit
188
+ end
189
+
190
+ # Enumerate the nodes
191
+ def each
192
+ @agenda = [@node]
193
+ while node = @agenda.shift
194
+ yield node
195
+ children = @visit ? node.find_all {|n| @visit.call(n)} : node.collect
196
+ recurse(children)
197
+ end
198
+ end
199
+
200
+ # Function that controls enumeration recursion
201
+ #
202
+ # * children ... a list of child nodes of the current node
203
+ #
204
+ # The only difference between the breadth-first and depth-first
205
+ # searches is this function.
206
+ def recurse(children)
207
+ @agenda += children
208
+ end
209
+ end # BFSIterator
210
+
211
+ # Iterates a tree depth-first
212
+ class DFSIterator < BFSIterator
213
+
214
+ # Function that controls enumeration recursion
215
+ #
216
+ # * children ... a list of child nodes of the current node
217
+ #
218
+ # The only difference between the breadth-first and depth-first
219
+ # searches is this function.
220
+ def recurse(children)
221
+ @agenda = children + @agenda
222
+ end
223
+ end # DFSIterator
224
+
225
+ # This node's label
226
+ attr_accessor :label
227
+
228
+ # Constructor
229
+ #
230
+ # * label ... the label of this node
231
+ # * child_labels ... list of labels for children of this node
232
+ def initialize(label = nil, child_labels = [])
233
+ @label = label
234
+ @children = []
235
+ child_labels.each {|label| create_child!(label)}
236
+ end
237
+
238
+ # Read the tree from a bracketed string
239
+ #
240
+ # * s ... bracketed string
241
+ # * left ... left bracket symbol
242
+ # * right ... right bracket symbol
243
+ #
244
+ # This function uses a Treebank::Parser object to create the tree from
245
+ # _s_. This raises an error if this node is not empty.
246
+ def from_s(s, left = '(', right = ')')
247
+ raise 'This node is not empty.' if not self.empty?
248
+ nodes = Parser.new(TokenStream.new(s, left, right), self.class).collect
249
+ raise "#{s} defines multiple trees" if nodes.length != 1
250
+ @label = nodes.first.label
251
+ @children = nodes.first.collect
252
+ self
253
+ end
254
+
255
+ # Stringify
256
+ #
257
+ # This writes to a bracketed string representation that can be
258
+ # read by the Parser object.
259
+ def to_s
260
+ space = leaf? ? '':' '
261
+ "(#{label}#{space}#{@children.join(' ')})"
262
+ end
263
+
264
+ # Interactive stringification
265
+ def inspect
266
+ child_labels = @children.collect {|n| n.label}
267
+ "<#{self.class} #{@label} [#{child_labels.join(' ')}]>"
268
+ end
269
+
270
+ # Tree equivalence operator
271
+ #
272
+ # If the other object is a tree and every node label in the
273
+ # corresponding nodes of the two depth first enumerations match,
274
+ # the trees are equivalent.
275
+ def ==(other)
276
+ return false if not other.kind_of? self.class
277
+ return true if self.empty? and other.empty?
278
+ mismatch = self.each_depth_first.zip(other.each_depth_first).find \
279
+ {|self_node, other_node| self_node.nil? or \
280
+ other_node.nil? or \
281
+ self_node.label != other_node.label}
282
+ mismatch.nil?
283
+ end
284
+
285
+ # Create a new node and add it as a child of this node
286
+ #
287
+ # * label ... the label of a node to create
288
+ # * index ... optional insertion index
289
+ #
290
+ # If _index_ is not specified, the node is added to the end of the
291
+ # child list.
292
+ #
293
+ # This function returns the added Node object.
294
+ def create_child!(label, index = nil)
295
+ attach_child!(self.class.new(label), index)
296
+ end
297
+
298
+ # Attach an existing node as the child of this node
299
+ #
300
+ # * node ... the node to add
301
+ # * index ... optional insertion index
302
+ #
303
+ # _node_ must be the same type as this node.
304
+ #
305
+ # If _index_ is not specified, the node is added to the end of the
306
+ # child list.
307
+ #
308
+ # This function returns the added Node object.
309
+ def attach_child!(node, index = nil)
310
+ raise "#{node} is not class #{self.class}" if node.class != self.class
311
+ if index.nil?
312
+ @children << node
313
+ else
314
+ @children[index, 0] = node
315
+ end
316
+ node
317
+ end
318
+
319
+ # Detach a child node
320
+ #
321
+ # * node ... the node to detach
322
+ #
323
+ # This removes the specfied node from this node's child list.
324
+ def detach_child!(node)
325
+ raise "#{node} is not a child of #{self}" if @children.delete(node).nil?
326
+ end
327
+
328
+ # Enumerate the children of this node.
329
+ def each
330
+ @children.each {|node| yield node}
331
+ end
332
+
333
+ # Enumerate all the nodes beneath this one breadth-first
334
+ #
335
+ # * visit ... optional enumeration control procedure
336
+ #
337
+ # The _visit_ parameter is passed down to the BFSIterator.
338
+ def each_breadth_first(visit = nil)
339
+ BFSIterator.new(self, visit)
340
+ end
341
+
342
+ # Enumerate all the nodes beneath this one depth-first
343
+ #
344
+ # * visit ... optional enumeration control procedure
345
+ #
346
+ # The _visit_ parameter is passed down to the DFSIterator.
347
+ def each_depth_first(visit = nil)
348
+ DFSIterator.new(self, visit)
349
+ end
350
+
351
+ # Is this a leaf node?
352
+ def leaf?
353
+ @children.empty?
354
+ end
355
+
356
+ # Is this node empty?
357
+ def empty?
358
+ @label.nil? and @children.empty?
359
+ end
360
+
361
+ # All the leaf nodes beneath this node
362
+ #
363
+ # * block ... an optional block to run on each leaf
364
+ def leaves(&block)
365
+ leaves = each_depth_first.find_all {|node| node.leaf?}
366
+ leaves = leaves.collect {|leaf| block.call(leaf)} if not block.nil?
367
+ leaves
368
+ end
369
+
370
+ end # Node
371
+
372
+
373
+ # A Node in a Tree that can locate its parent
374
+ #
375
+ # The ParentedNode adds a pointer back to the parent node to
376
+ # the Node class.
377
+ class ParentedNode < Node
378
+
379
+ # This node's parent
380
+ attr_reader :parent
381
+
382
+ # Iterates up a tree
383
+ class ParentIterator
384
+ include Enumerable
385
+
386
+ # Constructor
387
+ #
388
+ # * node ... the start node of the enumeration
389
+ def initialize(node)
390
+ @node = node
391
+ end
392
+
393
+ # Enumerate the ancestor chain
394
+ def each
395
+ node = @node
396
+ while not node.nil?
397
+ yield node
398
+ node = node.parent
399
+ end
400
+ end
401
+
402
+ end # ParentIterator
403
+
404
+ # Constructor
405
+ #
406
+ # * label ... the label of this node
407
+ # * child_labels ... list of labels for children of this node
408
+ # * parent ... the parent of this node
409
+ def initialize(label = nil, child_labels = [], parent = nil)
410
+ super(label, child_labels)
411
+ @parent = parent
412
+ end
413
+
414
+ # See Treebank::Node.attach_child!
415
+ def attach_child!(node, index = nil)
416
+ child = super(node, index)
417
+ child.parent = self
418
+ child
419
+ end
420
+
421
+ # See Treebank::Node.detach_child!
422
+ def detach_child!(node)
423
+ super(node)
424
+ node.parent = nil
425
+ end
426
+
427
+ # Set the parent of this node
428
+ #
429
+ # * parent ... the parent node
430
+ #
431
+ # This is a protected utility function. It does not change the
432
+ # child list of _parent_.
433
+ def parent=(parent)
434
+ @parent = parent
435
+ end
436
+
437
+ # Enumerate the ancestors of this node
438
+ def each_parent
439
+ ParentIterator.new(self)
440
+ end
441
+
442
+ protected :parent=
443
+
444
+ end # ParentedNode
445
+
446
+ end
@@ -0,0 +1,238 @@
1
+ #!/bin/env ruby
2
+
3
+ #--
4
+ # Copyright 2006 William Patrick McNeill
5
+ #
6
+ # This file is part of Treebank.
7
+ #
8
+ # Treebank is free software; you can redistribute it and/or modify it
9
+ # under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation; either version 2 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # Treebank is distributed in the hope that it will be useful, but
14
+ # WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with editalign; if not, write to the Free Software Foundation,
20
+ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21
+ #
22
+ #++
23
+
24
+ # Test cases for the Treebank module
25
+
26
+ require 'test/unit'
27
+ require 'treebank'
28
+
29
+ class TokenStreamTest < Test::Unit::TestCase
30
+ def test_basic_token_stream
31
+ t = Treebank::TokenStream.new('(A (B c) (D))')
32
+ assert_kind_of Treebank::TokenStream, t
33
+ assert_equal '(', t.left
34
+ assert_equal ')', t.right
35
+ assert_equal ['(', 'A', '(', 'B', 'c', ')', '(', 'D', ')', ')'], t.collect
36
+ end
37
+
38
+ def test_non_alphanum_token_stream
39
+ t = Treebank::TokenStream.new('(A!Node (B!Node .) (14))')
40
+ assert_kind_of Treebank::TokenStream, t
41
+ assert_equal '(', t.left
42
+ assert_equal ')', t.right
43
+ assert_equal ['(', 'A!Node', '(', 'B!Node', '.', ')', '(', '14', ')', ')'], t.collect
44
+ end
45
+
46
+ def test_different_delimiter
47
+ t = Treebank::TokenStream.new('<A <B c> <D>>', '<', '>')
48
+ assert_kind_of Treebank::TokenStream, t
49
+ assert_equal '<', t.left
50
+ assert_equal '>', t.right
51
+ assert_equal ['<', 'A', '<', 'B', 'c', '>', '<', 'D', '>', '>'], t.collect
52
+ end
53
+
54
+ def test_bracket_delimiter
55
+ t = Treebank::TokenStream.new('[A [B c] [D]]', '[', ']')
56
+ assert_kind_of Treebank::TokenStream, t
57
+ assert_equal '[', t.left
58
+ assert_equal ']', t.right
59
+ assert_equal ['[', 'A', '[', 'B', 'c', ']', '[', 'D', ']', ']'], t.collect
60
+ end
61
+
62
+ end
63
+
64
+
65
+ module ParseTreeMixin
66
+
67
+ # Parse tree string
68
+ def test_tree_parse
69
+ p = Treebank::Parser.new(Treebank::TokenStream.new('(A) (B)'), @node_class)
70
+ trees = p.collect
71
+ assert_equal [@node_class.new.from_s('(A)'), @node_class.new.from_s('(B)')], trees
72
+ assert_kind_of @node_class, trees[0]
73
+ assert_kind_of @node_class, trees[1]
74
+ t = Treebank::Parser.new(Treebank::TokenStream.new('(A (B) (C))'), @node_class).collect.first
75
+ assert_equal t.label, 'A'
76
+ assert_equal t.collect.first.label, 'B'
77
+ assert_equal t.collect.last.label, 'C'
78
+ assert_equal [@node_class.new], Treebank::Parser.new(Treebank::TokenStream.new('()'), @node_class).collect
79
+ end
80
+
81
+ def test_string_in_constructor
82
+ assert_equal [@node_class.new.from_s('(A)'), @node_class.new.from_s('(B)')], Treebank::Parser.new('(A) (B)', @node_class).collect
83
+ end
84
+
85
+ end
86
+
87
+
88
+ class TreeParserTest < Test::Unit::TestCase
89
+
90
+ include ParseTreeMixin
91
+
92
+ def setup
93
+ @node_class = Treebank::Node
94
+ end
95
+
96
+ end
97
+
98
+
99
+ class ParentedTreeParserTest < Test::Unit::TestCase
100
+
101
+ include ParseTreeMixin
102
+
103
+ def setup
104
+ @node_class = Treebank::ParentedNode
105
+ end
106
+
107
+ end
108
+
109
+
110
+ module NodeTestMixin
111
+
112
+ # Empty tree
113
+ def test_empty_tree
114
+ t = @node_class.new
115
+ assert_kind_of @node_class, t, 'Empty tree type'
116
+ assert_nil t.label, 'Empty tree nil head'
117
+ assert t.empty?, 'Empty empty?'
118
+ assert_equal [], t.collect, 'Empty child list'
119
+ assert_equal [t], t.each_breadth_first.collect, 'Empty breadth first'
120
+ assert_equal [t], t.each_depth_first.collect, 'Empty depth first'
121
+ end
122
+
123
+ # Test a single node tree
124
+ def test_single_node_tree
125
+ t = @node_class.new('a')
126
+ assert_equal 'a', t.label, 'Single node label'
127
+ assert !t.empty?, 'Single node not empty?'
128
+ assert_equal [], t.collect, 'Empty child list'
129
+ assert_equal [t], t.each_breadth_first.collect, 'Single node breadth first'
130
+ assert_equal [t], t.each_depth_first.collect, 'Single depth first'
131
+ end
132
+
133
+ # Test adding children in the constructor
134
+ def test_constructor_children
135
+ t = @node_class.new('a', ['b', 'c', 'd'])
136
+ assert_equal @node_class.new.from_s('(a (b) (c) (d) )'), t, 'Children in constructor'
137
+ end
138
+
139
+ # Add children
140
+ def test_add_children
141
+ # Add a child of the head node.
142
+ a = @node_class.new('a')
143
+ b = a.create_child!('b')
144
+ assert_kind_of @node_class, b, 'create_child! return value type'
145
+ assert_equal 'b', b.label, 'create_child! return value label'
146
+ assert_equal a.collect, [b], 'a children'
147
+ assert_equal b.collect, [], 'b children'
148
+ assert !a.empty?, 'create_child! not empty?'
149
+ a = @node_class.new('a', ['b', 'c', 'd'])
150
+ a.create_child!('z', 0)
151
+ assert_equal ['z', 'b', 'c', 'd'], a.collect {|n| n.label}
152
+ a.create_child!('x', 2)
153
+ assert_equal ['z', 'b', 'x', 'c', 'd'], a.collect {|n| n.label}
154
+ end
155
+
156
+ # Read from/to a string
157
+ def test_stringify
158
+ s = '(S (NP (D (the)) (N (boy))) (VP (V (ran))))'
159
+ multiline_s = \
160
+ '(S
161
+ (NP
162
+ (D (the))
163
+ (N (boy)))
164
+ (VP
165
+ (V (ran))))'
166
+ t = @node_class.new.from_s(s)
167
+ assert_kind_of @node_class, t, 'from_s'
168
+ assert_equal s, "#{t}", 'to_s'
169
+ m = @node_class.new.from_s(multiline_s)
170
+ assert_equal t, m, 'Single-/multi-line equal'
171
+ end
172
+
173
+ # Simple enumeration
174
+ def test_enumeration
175
+ # Enumerate all children.
176
+ t = @node_class.new.from_s('(a (b (R) (S) ) (c (T) (U)) )')
177
+ assert_equal ['a', 'b', 'c', 'R', 'S', 'T', 'U'], t.each_breadth_first.collect {|node| node.label}, 'Full breadth first'
178
+ assert_equal ['a', 'b', 'R', 'S', 'c', 'T', 'U'], t.each_depth_first.collect {|node| node.label}, 'Full depth first'
179
+ # Enumerate children beneath a node.
180
+ b = t.find {|node| node.label == 'b'}
181
+ assert_equal ['b', 'R', 'S'], b.each_breadth_first.collect {|node| node.label}, 'Partial breadth first'
182
+ assert_equal ['b', 'R', 'S'], b.each_depth_first.collect {|node| node.label}, 'Partial depth first'
183
+ # Customize visitation.
184
+ visit = proc{|n| n.label != 'c' and n.label != 'S'}
185
+ assert_equal ['a', 'b', 'R'], t.each_breadth_first(visit).collect {|node| node.label}, 'Full breadth first'
186
+ assert_equal ['a', 'b', 'R'], t.each_depth_first(visit).collect {|node| node.label}, 'Full depth first'
187
+ end
188
+
189
+ # Tree equivalence
190
+ def test_equivalence
191
+ t1 = @node_class.new.from_s('(a (b (R) (S) ) (c (T) (U)) )')
192
+ t2 = @node_class.new.from_s('(a (b (R) (S) ) (c (T) (U)) )')
193
+ s1 = @node_class.new.from_s('(a (b (R) (S) ) (c (T) ) )')
194
+ s2 = @node_class.new.from_s('(a (c (R) (S) ) (b (T) (U) ) )')
195
+ assert_equal t1, t2, 'Tree equivalence'
196
+ assert_not_equal t1, s1, 'Tree non-equivalence: different terminals'
197
+ assert_not_equal t1, s2, 'Tree non-equivalence: reversed non-terminal labels'
198
+ assert_not_equal t1, 'non-tree', 'Tree non-equivalence: not a tree'
199
+ end
200
+
201
+ def test_leaves
202
+ t = @node_class.new.from_s('(a (b c) (d e))')
203
+ leaves = t.each_depth_first.collect
204
+ c = leaves[2]
205
+ e = leaves[4]
206
+ assert_equal [c, e], t.leaves, 'Tree leaves'
207
+ assert_equal ['c', 'e'], t.leaves {|n| n.label}, 'Tree leaves with block'
208
+ end
209
+ end
210
+
211
+
212
+ class NodeTest < Test::Unit::TestCase
213
+
214
+ include NodeTestMixin
215
+
216
+ def setup
217
+ @node_class = Treebank::Node
218
+ end
219
+
220
+ end
221
+
222
+
223
+ class ParentedNodeTest < Test::Unit::TestCase
224
+
225
+ include NodeTestMixin
226
+
227
+ def setup
228
+ @node_class = Treebank::ParentedNode
229
+ end
230
+
231
+ def test_ancestor_enumeration
232
+ t = @node_class.new.from_s('(a (b (R) (S) ) (c (T) (U)) )')
233
+ assert_equal [t], t.each_parent.collect, 'Ancestors from head'
234
+ u = t.each_depth_first.find {|node| node.label == 'U'}
235
+ assert_equal ['U', 'c', 'a'], u.each_parent.collect {|node| node.label}, 'Ancestors from leaf'
236
+ end
237
+
238
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: treebank
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2006-06-08 00:00:00 -07:00
8
+ summary: Treebank implements support for ordered n-ary branching tree structures
9
+ require_paths:
10
+ - lib
11
+ email: billmcn@gmail.com
12
+ homepage: http://rubyforge.org/projects/treebank
13
+ rubyforge_project:
14
+ description: This module implements ordered n-ary branching tree structures. It includes support for breadth- and depth- first iteration, and serialization to and from a bracketed tree string.
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ authors:
29
+ - W.P. McNeill
30
+ files:
31
+ - test/test_treebank.rb
32
+ - lib/treebank.rb
33
+ - examples/penntb-words
34
+ - README
35
+ test_files:
36
+ - test/test_treebank.rb
37
+ rdoc_options:
38
+ - --title
39
+ - Treebank -- Ruby Tree
40
+ - --main
41
+ - README
42
+ - --line-numbers
43
+ - --inline-source
44
+ extra_rdoc_files:
45
+ - README
46
+ executables: []
47
+
48
+ extensions: []
49
+
50
+ requirements: []
51
+
52
+ dependencies:
53
+ - !ruby/object:Gem::Dependency
54
+ name: fsa
55
+ version_requirement:
56
+ version_requirements: !ruby/object:Gem::Version::Requirement
57
+ requirements:
58
+ - - ">"
59
+ - !ruby/object:Gem::Version
60
+ version: 0.0.0
61
+ version: