dhaka 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/lib/evaluator/evaluator.rb +18 -17
  2. data/lib/grammar/grammar.rb +4 -5
  3. data/lib/lexer/dfa.rb +63 -13
  4. data/lib/lexer/lexeme.rb +3 -4
  5. data/lib/lexer/lexer.rb +12 -3
  6. data/lib/lexer/lexer_run.rb +22 -10
  7. data/lib/lexer/regex_grammar.rb +88 -14
  8. data/lib/lexer/regex_parser.rb +1523 -1401
  9. data/lib/lexer/specification.rb +29 -3
  10. data/lib/lexer/state.rb +32 -9
  11. data/lib/lexer/state_machine.rb +2 -2
  12. data/lib/parser/channel.rb +4 -4
  13. data/lib/parser/parser.rb +17 -12
  14. data/lib/parser/parser_state.rb +3 -1
  15. data/test/chittagong/chittagong_lexer.rb +63 -63
  16. data/test/chittagong/chittagong_lexer.rb.rej +189 -0
  17. data/test/chittagong/chittagong_lexer_specification.rb +6 -8
  18. data/test/chittagong/chittagong_parser.rb +659 -659
  19. data/test/chittagong/chittagong_parser.rb.rej +1623 -0
  20. data/test/{another_lalr_but_not_slr_grammar.rb → core/another_lalr_but_not_slr_grammar.rb} +1 -1
  21. data/test/{compiled_parser_test.rb → core/compiled_parser_test.rb} +1 -1
  22. data/test/core/dfa_test.rb +170 -0
  23. data/test/{evaluator_test.rb → core/evaluator_test.rb} +3 -3
  24. data/test/{grammar_test.rb → core/grammar_test.rb} +3 -3
  25. data/test/{lalr_but_not_slr_grammar.rb → core/lalr_but_not_slr_grammar.rb} +0 -0
  26. data/test/core/lexer_test.rb +139 -0
  27. data/test/{malformed_grammar.rb → core/malformed_grammar.rb} +0 -0
  28. data/test/{malformed_grammar_test.rb → core/malformed_grammar_test.rb} +1 -1
  29. data/test/{nullable_grammar.rb → core/nullable_grammar.rb} +0 -0
  30. data/test/{parse_result_test.rb → core/parse_result_test.rb} +1 -1
  31. data/test/{parser_state_test.rb → core/parser_state_test.rb} +1 -1
  32. data/test/{parser_test.rb → core/parser_test.rb} +2 -2
  33. data/test/{precedence_grammar.rb → core/precedence_grammar.rb} +0 -0
  34. data/test/{precedence_grammar_test.rb → core/precedence_grammar_test.rb} +1 -1
  35. data/test/{rr_conflict_grammar.rb → core/rr_conflict_grammar.rb} +0 -0
  36. data/test/{simple_grammar.rb → core/simple_grammar.rb} +0 -0
  37. data/test/{sr_conflict_grammar.rb → core/sr_conflict_grammar.rb} +0 -0
  38. metadata +25 -22
  39. data/test/lexer_test.rb +0 -215
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__)+'/../lib/dhaka'
1
+ require File.dirname(__FILE__)+'/../dhaka_test_helper'
2
2
 
3
3
  class AnotherLALRButNotSLRGrammar < Dhaka::Grammar
4
4
 
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/dhaka_test_helper'
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + "/simple_grammar"
3
3
  eval(Dhaka::Parser.new(SimpleGrammar).compile_to_ruby_source_as(:SimpleParser))
4
4
 
@@ -0,0 +1,170 @@
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
+
3
+ class TestDFA < Test::Unit::TestCase
4
+ def test_build_AST_from_parse_tree_and_compute_follow_first_and_last
5
+ root = Dhaka::LexerSupport::RegexParser.parse(Dhaka::LexerSupport::RegexTokenizer.tokenize("(a|b)*abb"))
6
+ star_node = root.left.left.left.left
7
+ or_node = star_node.child
8
+ first_a = or_node.children[0]
9
+ first_b = or_node.children[1]
10
+ second_a = root.left.left.left.right
11
+ second_b = root.left.left.right
12
+ last_b = root.left.right
13
+ sentinel = root.right
14
+
15
+ assert(!root.nullable)
16
+ assert(!root.left.nullable)
17
+ assert(!root.left.left.nullable)
18
+ assert(star_node.nullable)
19
+
20
+ assert_equal(Set.new([first_a, first_b, second_a]), root.first)
21
+ assert_equal(Set.new([last_b]), root.left.last)
22
+
23
+ root.calculate_follow_sets
24
+
25
+ assert_equal(Set.new([first_a, first_b, second_a]), first_a.follow_set)
26
+ assert_equal(Set.new([first_a, first_b, second_a]), first_b.follow_set)
27
+ assert_equal(Set.new([second_b]), second_a.follow_set)
28
+ assert_equal(Set.new([last_b]), second_b.follow_set)
29
+ assert_equal(Set.new([sentinel]), last_b.follow_set)
30
+ end
31
+
32
+ def test_DFA_raises_exception_if_empty_regex
33
+ machine = Dhaka::LexerSupport::DFA.new("")
34
+ flunk "Should have thrown an unexpected end of regex exception"
35
+ rescue Dhaka::LexerSupport::InvalidRegexException => e
36
+ assert_equal("Unexpected end of regex.", e.message)
37
+ end
38
+
39
+ def test_DFA_raises_exception_if_error_parsing_regex
40
+ machine = Dhaka::LexerSupport::DFA.new("(a|b)*+abb")
41
+ flunk "Should have thrown an unexpected token exception"
42
+ rescue Dhaka::LexerSupport::InvalidRegexException => e
43
+ assert_equal("Unexpected token +: (a|b)*>>>+abb", e.message)
44
+ end
45
+
46
+ def test_match_a_regex
47
+ machine = Dhaka::LexerSupport::DFA.new("(a|b)*abb")
48
+ assert_full_match(machine, "abababb")
49
+ assert_full_match(machine, "ababaabb")
50
+ assert_empty(machine.match("abababab"))
51
+ assert_equal("abababb", machine.match("abababbc"))
52
+ assert_equal("abababb", machine.match("abababbaa"))
53
+ end
54
+
55
+ def test_match_a_regex_with_optional_characters_at_the_end
56
+ machine = Dhaka::LexerSupport::DFA.new("bad(c|d)+(ab)*")
57
+ assert_full_match(machine, "badccddabab")
58
+ assert_full_match(machine, "baddcc")
59
+ assert_empty(machine.match("badab"))
60
+ assert_empty(machine.match("bacdab"))
61
+ end
62
+
63
+ def test_match_a_nullable_regex
64
+ machine = Dhaka::LexerSupport::DFA.new("(ab)*")
65
+ assert_full_match(machine, "abab")
66
+ assert_full_match(machine, "ab")
67
+ assert_full_match(machine, "")
68
+ assert_equal("", machine.match("b"))
69
+ end
70
+
71
+ def test_match_a_regex_with_the_dot_character
72
+ machine = Dhaka::LexerSupport::DFA.new("ab.*cd")
73
+ assert_full_match(machine, "abacd")
74
+ assert_full_match(machine, "abcd")
75
+ assert_full_match(machine, "abAcd")
76
+ assert_empty(machine.match("ab999c"))
77
+ end
78
+
79
+ def test_match_a_regex_with_sets
80
+ machine = Dhaka::LexerSupport::DFA.new("ab[j-lu]*cd")
81
+ assert_empty(machine.match("abacd"))
82
+ assert_full_match(machine, "abcd")
83
+ assert_full_match(machine, "abjklucd")
84
+ assert_empty(machine.match("abijklucd"))
85
+ assert_empty(machine.match("ab999c"))
86
+ end
87
+
88
+ def test_match_a_regex_with_negative_sets
89
+ machine = Dhaka::LexerSupport::DFA.new("ab[^j-lr]*cd")
90
+ assert_full_match(machine, "abcd")
91
+ assert_empty(machine.match("abjcd"))
92
+ assert_empty(machine.match("abrcd"))
93
+ assert_empty(machine.match("abijklucd"))
94
+ assert_full_match(machine, "abyqcd")
95
+ end
96
+
97
+ def test_match_a_regex_with_sets_containing_escaped_characters
98
+ machine = Dhaka::LexerSupport::DFA.new("ab[\\^\\-.]*cd")
99
+ assert_full_match(machine, "abcd")
100
+ assert_empty(machine.match("abjcd"))
101
+ assert_full_match(machine, "ab^-.cd")
102
+ assert_empty(machine.match("abijklucd"))
103
+ assert_empty(machine.match("ab\\cd"))
104
+ end
105
+
106
+ def test_match_a_regex_using_unescaped_caret_and_dash_characters
107
+ machine = Dhaka::LexerSupport::DFA.new("(\\^-)+")
108
+ assert_full_match(machine, "^-")
109
+ assert_full_match(machine, "^-^-")
110
+ assert_empty(machine.match("?cd"))
111
+ end
112
+
113
+ def test_match_a_regex_using_escape_characters
114
+ machine = Dhaka::LexerSupport::DFA.new(%q/(-\?\(\)\\\\)*/)
115
+ assert_full_match(machine, "-?()\\")
116
+ end
117
+
118
+ def test_match_a_regex_using_lt_and_gt
119
+ machine = Dhaka::LexerSupport::DFA.new('<.+>')
120
+ assert_full_match(machine, "<ab>")
121
+ assert_full_match(machine, "<absdf><sdg><sse>")
122
+ assert_empty(machine.match("ab>"))
123
+ end
124
+
125
+ def test_simulating_curly_brace_quantifiers
126
+ machine = Dhaka::LexerSupport::DFA.new('aaa?a?a?')
127
+ assert_full_match(machine, "aa")
128
+ assert_full_match(machine, "aaa")
129
+ assert_full_match(machine, "aaaa")
130
+ assert_full_match(machine, "aaaaa")
131
+ assert_equal("aaaaa", machine.match("aaaaaa"))
132
+ assert_empty(machine.match("a"))
133
+ end
134
+
135
+ def test_matching_a_regex_with_lookahead
136
+ machine = Dhaka::LexerSupport::DFA.new('ab/cd')
137
+ assert_equal("ab", machine.match("abcd"))
138
+ assert_empty(machine.match("ab"))
139
+ assert_empty(machine.match("abef"))
140
+ end
141
+
142
+ def test_matching_a_regex_with_nullable_pre_lookahead_regex
143
+ machine = Dhaka::LexerSupport::DFA.new('(ab)*/cd')
144
+ assert_equal("ab", machine.match("abcd"))
145
+ assert_equal("ababab", machine.match("abababcd"))
146
+ assert_empty(machine.match("ababc"))
147
+ assert_empty(machine.match("abef"))
148
+ end
149
+
150
+ def test_matching_a_regex_with_post_lookahead_characters_in_common_with_pre_lookahead_characters
151
+ machine = Dhaka::LexerSupport::DFA.new('(ab)+/abcd')
152
+ assert_equal("ababab", machine.match("abababcd"))
153
+ assert_empty(machine.match("ab"))
154
+ assert_empty(machine.match("abef"))
155
+ end
156
+
157
+ def test_machine_with_nullable_lookahead
158
+ machine = Dhaka::LexerSupport::DFA.new(":/[aA\n\r\t]*")
159
+ assert_equal(":", machine.match(":"))
160
+ end
161
+
162
+ private
163
+ def assert_full_match(machine, input)
164
+ assert_equal(input, machine.match(input))
165
+ end
166
+
167
+ def assert_empty(input)
168
+ assert(input.empty?)
169
+ end
170
+ end
@@ -1,13 +1,13 @@
1
- require File.dirname(__FILE__) + '/dhaka_test_helper'
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + '/simple_grammar'
3
3
 
4
4
  class TestEvaluator < Test::Unit::TestCase
5
- def test_throws_exception_if_evaluation_rules_not_completely_defined
5
+ def test_throws_exception_if_evaluation_rules_not_completely_defined_and_raise_error_option_set_to_true
6
6
  assert_raise(Dhaka::EvaluatorDefinitionError) do
7
7
  eval(
8
8
  "class IncompleteSimpleEvaluator < Dhaka::Evaluator
9
9
  self.grammar = SimpleGrammar
10
- define_evaluation_rules do
10
+ define_evaluation_rules(:raise_error => true) do
11
11
  for_start do
12
12
  something
13
13
  end
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/dhaka_test_helper'
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + '/simple_grammar'
3
3
 
4
4
  class SimpleGrammarTest < Test::Unit::TestCase
@@ -43,7 +43,7 @@ class SimpleGrammarTest < Test::Unit::TestCase
43
43
  start_production = @grammar.production_named('start')
44
44
  start_item = Dhaka::Item.new(start_production, 0)
45
45
  kernel = Set.new([start_item])
46
- channels, closure = @grammar.closure(kernel)
46
+ closure, channels = @grammar.closure(kernel)
47
47
  expected_items = Set.new(['_Start_ ::= -> S # []',
48
48
  'S ::= -> E []',
49
49
  'E ::= -> E - T []',
@@ -60,7 +60,7 @@ class SimpleGrammarTest < Test::Unit::TestCase
60
60
  'Spontaneous Channel from _Start_ ::= -> S # [] to S ::= -> E []'
61
61
  ])
62
62
  assert_equal(expected_items, Set.new(closure.values.collect{|item| item.to_s}))
63
- assert_equal(expected_channels, Set.new(channels.collect{|item| item.to_s}))
63
+ assert_equal(expected_channels, Set.new(channels.values.collect{|set| set.to_a}.flatten.collect{|item| item.to_s}))
64
64
  end
65
65
 
66
66
  def test_export_grammar_to_bnf
@@ -0,0 +1,139 @@
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
+
3
+ class TestLexer < Test::Unit::TestCase
4
+ class LexerSpec < Dhaka::LexerSpecification
5
+
6
+ for_pattern 'zz' do
7
+ "two zs"
8
+ end
9
+
10
+ for_pattern '\w(\w|\d)*' do
11
+ "word #{current_lexeme.value}"
12
+ end
13
+
14
+ # can optionally use Regexps as well
15
+ for_pattern(/(\d)*(\.\d+)?/) do
16
+ "number #{current_lexeme.value}"
17
+ end
18
+
19
+ for_pattern '<.*>' do
20
+ "tag #{current_lexeme.value}"
21
+ end
22
+
23
+ for_pattern ' +' do
24
+ #ignores whitespace
25
+ end
26
+
27
+ for_pattern "\n+" do
28
+ "newline"
29
+ end
30
+
31
+ for_pattern "\r+" do
32
+ "carriage return"
33
+ end
34
+
35
+ end
36
+
37
+ def test_lexer_with_valid_input
38
+ lexer = Dhaka::Lexer.new(LexerSpec)
39
+ eval(lexer.compile_to_ruby_source_as(:SomeLexer))
40
+ input = "these are words a z zz caPITALIZED word \r
41
+ this is a float 12.00 an integer 134 a float without a leading digit .2335 another word1"
42
+ results = SomeLexer.lex(input).collect
43
+ assert_equal(
44
+ ["word these",
45
+ "word are",
46
+ "word words",
47
+ "word a",
48
+ "word z",
49
+ "two zs",
50
+ "word caPITALIZED",
51
+ "word word",
52
+ "carriage return",
53
+ "newline",
54
+ "word this",
55
+ "word is",
56
+ "word a",
57
+ "word float",
58
+ "number 12.00",
59
+ "word an",
60
+ "word integer",
61
+ "number 134",
62
+ "word a",
63
+ "word float",
64
+ "word without",
65
+ "word a",
66
+ "word leading",
67
+ "word digit",
68
+ "number .2335",
69
+ "word another",
70
+ "word word1"], results[0..-2])
71
+ end
72
+
73
+ def test_lexer_with_invalid_input
74
+ lexer = Dhaka::Lexer.new(LexerSpec)
75
+ result = lexer.lex("this will cause an error here 123.").each do |result|
76
+ end
77
+ assert(result.has_error?)
78
+ assert_equal(34, result.unexpected_char_index)
79
+ end
80
+
81
+ def test_lexer_with_greedy_character_consumption
82
+ lexer = Dhaka::Lexer.new(LexerSpec)
83
+ results = lexer.lex("<html></html>this is a word").collect
84
+ assert_equal(["tag <html></html>",
85
+ "word this",
86
+ "word is",
87
+ "word a",
88
+ "word word"], results[0..-2])
89
+ end
90
+
91
+ class LexerWithLookaheadsSpec < Dhaka::LexerSpecification
92
+
93
+ for_pattern '\s+' do
94
+ # ignore whitespace
95
+ end
96
+
97
+ for_pattern ':/\w+' do
98
+ "a symbol qualifier"
99
+ end
100
+
101
+ for_pattern(":/[^a-zA-Z \n\r\t]*") do
102
+ "a colon"
103
+ end
104
+
105
+ for_pattern "ab/cd" do
106
+ "ab followed by cd: #{current_lexeme.value}"
107
+ end
108
+
109
+ for_pattern "abc/e" do
110
+ "abc followed by e: #{current_lexeme.value}"
111
+ end
112
+
113
+ for_pattern '\w+' do
114
+ "word #{current_lexeme.value}"
115
+ end
116
+
117
+ for_pattern '\d+' do
118
+ "number #{current_lexeme.value}"
119
+ end
120
+
121
+ end
122
+
123
+ def test_lexer_with_regexes_that_use_lookaheads
124
+ lexer = Dhaka::Lexer.new(LexerWithLookaheadsSpec)
125
+ eval(lexer.compile_to_ruby_source_as(:LexerWithTrickyLookaheads))
126
+ results = LexerWithTrickyLookaheads.lex("234 : :whatever :1934 abcd ::").collect
127
+ assert_equal(["number 234",
128
+ "a colon",
129
+ "a symbol qualifier",
130
+ "word whatever",
131
+ "a colon",
132
+ "number 1934",
133
+ "ab followed by cd: ab",
134
+ "word cd",
135
+ "a colon",
136
+ "a colon"], results[0..-2])
137
+ end
138
+
139
+ end
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/dhaka_test_helper'
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + "/malformed_grammar"
3
3
 
4
4
  class TestMalformedGrammar < Test::Unit::TestCase
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/dhaka_test_helper'
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + '/simple_grammar'
3
3
 
4
4
  class TestParseSuccessResult < Test::Unit::TestCase
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/dhaka_test_helper'
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
 
3
3
  class TestParserState < Test::Unit::TestCase
4
4
  include Dhaka
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/dhaka_test_helper'
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + '/simple_grammar'
3
3
  require File.dirname(__FILE__) + '/nullable_grammar'
4
4
  require File.dirname(__FILE__) + '/lalr_but_not_slr_grammar'
@@ -23,7 +23,7 @@ class ParserTest < Test::Unit::TestCase
23
23
  def assert_collection_equal(expected, actual)
24
24
  assert_equal(expected.size, actual.size)
25
25
  actual.each do |actual_member|
26
- assert(contains(actual_member, expected))
26
+ assert(contains(actual_member, expected), "Should have found #{actual_member} in expected set.")
27
27
  end
28
28
  end
29
29
 
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/dhaka_test_helper'
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + "/precedence_grammar"
3
3
 
4
4
  class TestPrecedenceGrammar < Test::Unit::TestCase
File without changes
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.0
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: dhaka
5
5
  version: !ruby/object:Gem::Version
6
- version: 2.1.0
7
- date: 2007-03-11 00:00:00 -05:00
6
+ version: 2.2.0
7
+ date: 2007-05-29 00:00:00 -04:00
8
8
  summary: An LALR1 parser generator written in Ruby
9
9
  require_paths:
10
10
  - lib
@@ -62,25 +62,6 @@ files:
62
62
  - lib/parser/token.rb
63
63
  - lib/tokenizer/tokenizer.rb
64
64
  - test/all_tests.rb
65
- - test/another_lalr_but_not_slr_grammar.rb
66
- - test/compiled_parser_test.rb
67
- - test/dhaka_test_helper.rb
68
- - test/evaluator_test.rb
69
- - test/fake_logger.rb
70
- - test/grammar_test.rb
71
- - test/lalr_but_not_slr_grammar.rb
72
- - test/lexer_test.rb
73
- - test/malformed_grammar.rb
74
- - test/malformed_grammar_test.rb
75
- - test/nullable_grammar.rb
76
- - test/parse_result_test.rb
77
- - test/parser_state_test.rb
78
- - test/parser_test.rb
79
- - test/precedence_grammar.rb
80
- - test/precedence_grammar_test.rb
81
- - test/rr_conflict_grammar.rb
82
- - test/simple_grammar.rb
83
- - test/sr_conflict_grammar.rb
84
65
  - test/arithmetic/arithmetic_evaluator.rb
85
66
  - test/arithmetic/arithmetic_evaluator_test.rb
86
67
  - test/arithmetic/arithmetic_grammar.rb
@@ -102,11 +83,33 @@ files:
102
83
  - test/chittagong/chittagong_evaluator_test.rb
103
84
  - test/chittagong/chittagong_grammar.rb
104
85
  - test/chittagong/chittagong_lexer.rb
86
+ - test/chittagong/chittagong_lexer.rb.rej
105
87
  - test/chittagong/chittagong_lexer_specification.rb
106
88
  - test/chittagong/chittagong_lexer_test.rb
107
89
  - test/chittagong/chittagong_parser.rb
90
+ - test/chittagong/chittagong_parser.rb.rej
108
91
  - test/chittagong/chittagong_parser_test.rb
109
92
  - test/chittagong/chittagong_test.rb
93
+ - test/core/another_lalr_but_not_slr_grammar.rb
94
+ - test/core/compiled_parser_test.rb
95
+ - test/core/dfa_test.rb
96
+ - test/core/evaluator_test.rb
97
+ - test/core/grammar_test.rb
98
+ - test/core/lalr_but_not_slr_grammar.rb
99
+ - test/core/lexer_test.rb
100
+ - test/core/malformed_grammar.rb
101
+ - test/core/malformed_grammar_test.rb
102
+ - test/core/nullable_grammar.rb
103
+ - test/core/parse_result_test.rb
104
+ - test/core/parser_state_test.rb
105
+ - test/core/parser_test.rb
106
+ - test/core/precedence_grammar.rb
107
+ - test/core/precedence_grammar_test.rb
108
+ - test/core/rr_conflict_grammar.rb
109
+ - test/core/simple_grammar.rb
110
+ - test/core/sr_conflict_grammar.rb
111
+ - test/dhaka_test_helper.rb
112
+ - test/fake_logger.rb
110
113
  - Rakefile
111
114
  test_files: []
112
115