bracket_notation 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ #--
2
+ # This file is part of BracketNotation.
3
+ #
4
+ # BracketNotation is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # BracketNotation is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with BracketNotation. If not, see <http://www.gnu.org/licenses/>.
16
+ #++
17
+ # BracketNotation is a parser for generating syntax trees from sentences
18
+ # annotated with the kind of bracket notation that is commonly used by
19
+ # linguists. The result is a tree structure with nodes that describe the phrases
20
+ # and constituents of the sentence.
21
+ #
22
+ # BracketNotation was inspired by Yoichiro Hasebe's RSyntaxTree[http://yohasebe.com/rsyntaxtree/],
23
+ # and small portions of his code have been incorporated in the parser.
24
+ #
25
+ # Author:: Cody Brimhall (mailto:brimhall@somuchwit.com)
26
+ # Copyright:: Copyright (c) 2010 Cody Brimhall
27
+ # License:: Distributed under the terms of the GNU General Public License, v. 3
28
+
29
+ module BracketNotation # :nodoc:
30
+ # This class represents a parser for sentences annotated with the kind of
31
+ # bracket notation that is commonly used by linguists. After being checked for
32
+ # obvious problems, the input string is scanned for tokens, which are
33
+ # evaluated to produce an expression tree.
34
+ class Parser
35
+ attr_reader :input
36
+
37
+ # This class is an Exception subclass that reports errors in the input
38
+ # validation process.
39
+ class ValidationError < RuntimeError; end
40
+
41
+ # Saves the input string, as well as a copy of the input string that has
42
+ # been normalized and validated.
43
+ def initialize(input)
44
+ validation_error "Parser input cannot be nil" if input.nil?
45
+
46
+ @input = input
47
+ @data = scrub(input)
48
+ validate
49
+ end
50
+
51
+ # Scans and evaluates the input string, returning an expression tree.
52
+ def parse
53
+ scanner = Scanner.new(@data)
54
+ evaluator = Evaluator.new(scanner.scan)
55
+ expression = evaluator.evaluate
56
+ end
57
+
58
+ private
59
+
60
+ # Normalizes the input string to make it easier to parse.
61
+ def scrub(str)
62
+ output = str.gsub(/\t/, "")
63
+ output.gsub!(/\s+/, " ")
64
+ output.gsub!(/\] \[/, "][")
65
+ output.gsub!(/ \[/, "[")
66
+
67
+ return output
68
+ end
69
+
70
+ # Checks to see if the input is valid, i.e. it has a length, no unnamed
71
+ # nodes, and the bracket-nesting is balanced.
72
+ def validate
73
+ validation_error("Input string can't be empty.") if @data.length < 1
74
+ validation_error("All opening brackets must have a label.") if /\[\s*\[/ =~ @data
75
+
76
+ # Count the opening and closing brackets to make sure they're balanced
77
+ chars = @data.gsub(/[^\[\]]/, "").split(//)
78
+ validation_error("Opening and closing brackets must be balanced.") if chars.length % 2 != 0
79
+
80
+ open_count, close_count = 0, 0
81
+
82
+ chars.each do |char|
83
+ case char
84
+ when '[': open_count += 1
85
+ when ']': close_count += 1
86
+ end
87
+
88
+ break if open_count < close_count
89
+ end
90
+
91
+ validation_error("Opening and closing brackets must be properly nested.") if open_count != close_count
92
+ end
93
+
94
+ # Raises a validation exception with the given message
95
+ def validation_error(message)
96
+ raise ValidationError, message
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,129 @@
1
+ #--
2
+ # This file is part of BracketNotation.
3
+ #
4
+ # BracketNotation is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # BracketNotation is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with BracketNotation. If not, see <http://www.gnu.org/licenses/>.
16
+ #++
17
+ # BracketNotation is a parser for generating syntax trees from sentences
18
+ # annotated with the kind of bracket notation that is commonly used by
19
+ # linguists. The result is a tree structure with nodes that describe the phrases
20
+ # and constituents of the sentence.
21
+ #
22
+ # BracketNotation was inspired by Yoichiro Hasebe's RSyntaxTree[http://yohasebe.com/rsyntaxtree/],
23
+ # and small portions of his code have been incorporated in the parser.
24
+ #
25
+ # Author:: Cody Brimhall (mailto:brimhall@somuchwit.com)
26
+ # Copyright:: Copyright (c) 2010 Cody Brimhall
27
+ # License:: Distributed under the terms of the GNU General Public License, v. 3
28
+
29
+ module BracketNotation # :nodoc:
30
+ # This class represents a scanner for sentences annotated with the kind of
31
+ # bracket notation that is commonly used by linguists. The scanner reads the
32
+ # input string and generates a list of Token instances.
33
+ class Scanner
34
+ include Enumerable
35
+
36
+ attr_reader :input
37
+
38
+ UNRESERVED_CHARACTER = /^[^\[\]\s]$/
39
+ LBRACKET_CHARACTER = "["
40
+ RBRACKET_CHARACTER = "]"
41
+ EOL_CHARACTER = nil
42
+
43
+ # Saves the input string.
44
+ def initialize(input)
45
+ @input = input
46
+ @pos = 0
47
+ @chunk_size = 1
48
+ @last_read = "\n"
49
+ @tokens = nil
50
+ end
51
+
52
+ # Returns an array of all the tokens produced by the scanner.
53
+ def scan
54
+ return @tokens unless @tokens.nil?
55
+
56
+ @tokens = []
57
+ token = nil
58
+ @tokens << token while (token = next_token)
59
+
60
+ return @tokens
61
+ end
62
+
63
+ # Enumerates the list of tokens, passing each in turn to the provided block.
64
+ def each(&block)
65
+ scan.each &block
66
+ end
67
+
68
+ private
69
+
70
+ # Generate and return the next token in the token stream.
71
+ def next_token
72
+ return nil if @last_read.nil?
73
+
74
+ # Scan the input string for the next token, ignoring white space (and
75
+ # anything else that isn't a recognized character)
76
+ token = nil
77
+ while(token.nil?)
78
+ token = case read_char
79
+ when UNRESERVED_CHARACTER: name_token
80
+ when LBRACKET_CHARACTER: Token.LBRACKET
81
+ when RBRACKET_CHARACTER: Token.RBRACKET
82
+ when EOL_CHARACTER: Token.EOL
83
+ else nil
84
+ end
85
+ end
86
+
87
+ return token
88
+ end
89
+
90
+ # Read a single character and update the position pointer.
91
+ def read_char
92
+ return @last_read if @last_read.nil? # Already at end of line
93
+
94
+ @last_read = input[@pos, @chunk_size]
95
+ @pos += @chunk_size
96
+
97
+ return @last_read || Token::EOL
98
+ end
99
+
100
+ # Look ahead to see what the next char will be, without updating @last_read
101
+ # or the position pointer.
102
+ def peek_char
103
+ if @last_read.nil?
104
+ return @last_read
105
+ end
106
+
107
+ return input[@pos, @chunk_size]
108
+ end
109
+
110
+ # Gobble up the string of unreserved characters to make a name token.
111
+ def name_token
112
+ value = String.new(@last_read)
113
+
114
+ # Read through the subsequent unreserved characters to build the name token.
115
+ while(peek_char =~ UNRESERVED_CHARACTER)
116
+ value << read_char
117
+ end
118
+
119
+ return Token.NAME(value)
120
+ end
121
+
122
+ # Go back to the beginning of the input string and prepare to generate the.
123
+ # tokens again
124
+ def reset
125
+ @pos = 0
126
+ @last_read = "\n"
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,79 @@
1
+ #--
2
+ # This file is part of BracketNotation.
3
+ #
4
+ # BracketNotation is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # BracketNotation is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with BracketNotation. If not, see <http://www.gnu.org/licenses/>.
16
+ #++
17
+ # BracketNotation is a parser for generating syntax trees from sentences
18
+ # annotated with the kind of bracket notation that is commonly used by
19
+ # linguists. The result is a tree structure with nodes that describe the phrases
20
+ # and constituents of the sentence.
21
+ #
22
+ # BracketNotation was inspired by Yoichiro Hasebe's RSyntaxTree[http://yohasebe.com/rsyntaxtree/],
23
+ # and small portions of his code have been incorporated in the parser.
24
+ #
25
+ # Author:: Cody Brimhall (mailto:brimhall@somuchwit.com)
26
+ # Copyright:: Copyright (c) 2010 Cody Brimhall
27
+ # License:: Distributed under the terms of the GNU General Public License, v. 3
28
+
29
+ module BracketNotation # :nodoc:
30
+ # This class represents a token in a stream of characters. All tokens have a
31
+ # type, and some of them (e.g. +NAME+ tokens) have corresponding values.
32
+ class Token
33
+ attr_reader :type
34
+ attr_reader :value
35
+
36
+ # Constants that identify the different types of tokens
37
+ LBRACKET = "LBRACKET"
38
+ RBRACKET = "RBRACKET"
39
+ NAME = "NAME"
40
+ EOL = "EOL"
41
+
42
+ # Initializes and returns a new token of type +LBRACKET+.
43
+ def self.LBRACKET; return self.new(LBRACKET); end
44
+
45
+ # Initializes and returns a new token of type +RBRACKET+.
46
+ def self.RBRACKET; return self.new(RBRACKET); end
47
+
48
+ # Initializes and returns a new token of type +NAME+ with the given value.
49
+ def self.NAME(value); return self.new(NAME, value); end
50
+
51
+ # Initializes and returns a new token of type +EOL+.
52
+ def self.EOL; return self.new(EOL); end
53
+
54
+ # Saves the token type, as well as an optional value.
55
+ def initialize(type, value = nil)
56
+ @type = type
57
+ @value = value
58
+ end
59
+
60
+ # Provides a human-friendly string representation of a token instance.
61
+ def inspect # :nodoc:
62
+ output = "#{@type}"
63
+ output << " \"#{@value}\"" unless @value.nil?
64
+
65
+ return output
66
+ end
67
+
68
+ # Compares the receiver with another object, returning true only if the
69
+ # other object is also an instance of Token, and only if the two tokens
70
+ # share the same +type+ and +value+.
71
+ def ==(rvalue)
72
+ if self.class != rvalue.class
73
+ return super
74
+ end
75
+
76
+ return @type == rvalue.type && @value == rvalue.value
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,40 @@
1
+ #--
2
+ # This file is part of BracketNotation.
3
+ #
4
+ # BracketNotation is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # BracketNotation is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with BracketNotation. If not, see <http://www.gnu.org/licenses/>.
16
+ #++
17
+ # BracketNotation is a parser for generating syntax trees from sentences
18
+ # annotated with the kind of bracket notation that is commonly used by
19
+ # linguists. The result is a tree structure with nodes that describe the phrases
20
+ # and constituents of the sentence.
21
+ #
22
+ # BracketNotation was inspired by Yoichiro Hasebe's RSyntaxTree[http://yohasebe.com/rsyntaxtree/],
23
+ # and small portions of his code have been incorporated in the parser.
24
+ #
25
+ # Author:: Cody Brimhall (mailto:brimhall@somuchwit.com)
26
+ # Copyright:: Copyright (c) 2010 Cody Brimhall
27
+ # License:: Distributed under the terms of the GNU General Public License, v. 3
28
+
29
+ module BracketNotation # :nodoc:
30
+ module Version # :nodoc:
31
+ MAJOR = 1
32
+ MINOR = 0
33
+ MAINT = 3
34
+
35
+ # Returns the current version string.
36
+ def self.to_s;
37
+ return [MAJOR, MINOR, MAINT].join(".")
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,48 @@
1
+ #--
2
+ # This file is part of BracketNotation.
3
+ #
4
+ # BracketNotation is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # BracketNotation is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with BracketNotation. If not, see <http://www.gnu.org/licenses/>.
16
+ #++
17
+ # BracketNotation is a parser for generating syntax trees from sentences
18
+ # annotated with the kind of bracket notation that is commonly used by
19
+ # linguists. The result is a tree structure with nodes that describe the phrases
20
+ # and constituents of the sentence.
21
+ #
22
+ # BracketNotation was inspired by Yoichiro Hasebe's RSyntaxTree[http://yohasebe.com/rsyntaxtree/],
23
+ # and small portions of his code have been incorporated in the parser.
24
+ #
25
+ # Author:: Cody Brimhall (mailto:brimhall@somuchwit.com)
26
+ # Copyright:: Copyright (c) 2010 Cody Brimhall
27
+ # License:: Distributed under the terms of the GNU General Public License, v. 3
28
+
29
+ require 'test/test_helper'
30
+
31
+ class EvaluatorTest < Test::Unit::TestCase
32
+ include BracketNotation
33
+
34
+ context "the evaluator" do
35
+ setup do
36
+ @tokens = [Token.LBRACKET, Token.NAME("S"), Token.LBRACKET, Token.NAME("DP"), Token.LBRACKET, Token.NAME("D"), Token.NAME("the"), Token.RBRACKET, Token.LBRACKET, Token.NAME("NP"), Token.LBRACKET, Token.NAME("N"), Token.NAME("boy"), Token.RBRACKET, Token.RBRACKET, Token.RBRACKET, Token.LBRACKET, Token.NAME("VP"), Token.LBRACKET, Token.NAME("V"), Token.NAME("ate"), Token.RBRACKET, Token.LBRACKET, Token.NAME("DP"), Token.LBRACKET, Token.NAME("D"), Token.NAME("the"), Token.RBRACKET, Token.LBRACKET, Token.NAME("NP"), Token.LBRACKET, Token.NAME("N"), Token.NAME("bread"), Token.RBRACKET, Token.RBRACKET, Token.RBRACKET, Token.RBRACKET, Token.RBRACKET, Token.EOL]
37
+ @evaluator = Evaluator.new(@tokens)
38
+ end
39
+
40
+ should "produce an expression" do
41
+ assert_kind_of Expression, @evaluator.evaluate
42
+ end
43
+
44
+ should "produce a root node" do
45
+ assert_nil @evaluator.evaluate.parent
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,79 @@
1
+ #--
2
+ # This file is part of BracketNotation.
3
+ #
4
+ # BracketNotation is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # BracketNotation is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with BracketNotation. If not, see <http://www.gnu.org/licenses/>.
16
+ #++
17
+ # BracketNotation is a parser for generating syntax trees from sentences
18
+ # annotated with the kind of bracket notation that is commonly used by
19
+ # linguists. The result is a tree structure with nodes that describe the phrases
20
+ # and constituents of the sentence.
21
+ #
22
+ # BracketNotation was inspired by Yoichiro Hasebe's RSyntaxTree[http://yohasebe.com/rsyntaxtree/],
23
+ # and small portions of his code have been incorporated in the parser.
24
+ #
25
+ # Author:: Cody Brimhall (mailto:brimhall@somuchwit.com)
26
+ # Copyright:: Copyright (c) 2010 Cody Brimhall
27
+ # License:: Distributed under the terms of the GNU General Public License, v. 3
28
+
29
+ require 'test/test_helper'
30
+
31
+ class ParserTest < Test::Unit::TestCase
32
+ include BracketNotation
33
+
34
+ context "the parser" do
35
+ setup do
36
+ @valid_input = "[S [DP [D the] [NP [N boy]]] [VP [V ate] [DP [D the] [NP [N bread]]]]]"
37
+ @input_with_unnamed_phrase = "[ [DP [D the] [NP [N boy]]] [VP [V ate] [DP [D the] [NP [N bread]]]]]"
38
+ @input_with_too_many_brackets = "[S [DP [D the] [NP [N boy]]] [VP [V ate] [DP [D the] [NP [N bread]]]]]]"
39
+ @input_with_too_few_brackets = "[S [DP [D the] [NP [N boy]]] [VP [V ate] [DP [D the] [NP [N bread]]]]"
40
+ @parser = nil
41
+ end
42
+
43
+ should "validate good input" do
44
+ assert_nothing_raised do
45
+ @parser = Parser.new(@valid_input)
46
+ end
47
+ end
48
+
49
+ should "not validate unnamed phrases" do
50
+ assert_raise Parser::ValidationError do
51
+ @parser = Parser.new(@input_with_unnamed_phrase)
52
+ end
53
+ end
54
+
55
+ should "not validate too many brackets" do
56
+ assert_raise Parser::ValidationError do
57
+ @parser = Parser.new(@input_with_too_many_brackets)
58
+ end
59
+ end
60
+
61
+ should "not validate too few brackets" do
62
+ assert_raise Parser::ValidationError do
63
+ @parser = Parser.new(@input_with_too_few_brackets)
64
+ end
65
+ end
66
+
67
+ should "not validate the empty string" do
68
+ assert_raise Parser::ValidationError do
69
+ @parser = Parser.new("")
70
+ end
71
+ end
72
+
73
+ should "not validate nil" do
74
+ assert_raise Parser::ValidationError do
75
+ @parser = Parser.new(nil)
76
+ end
77
+ end
78
+ end
79
+ end