rley 0.3.08 → 0.3.09

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,71 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree
2
- require 'rley' # Load the gem
3
-
4
- # Steps to render a parse tree (of a valid parsed input):
5
- # 1. Define a grammar
6
- # 2. Create a tokenizer for the language
7
- # 3. Create a parser for that grammar
8
- # 4. Tokenize the input
9
- # 5. Let the parser process the input
10
- # 6. Generate a parse tree from the parse result
11
- # 7. Render the parse tree (in JSON)
12
-
13
- ########################################
14
- # Step 1. Define a grammar for a very simple language
15
- # It recognizes/generates strings like 'b', 'abc', 'aabcc', 'aaabccc',...
16
- # (based on example in N. Wirth's book "Compiler Construction", p. 6)
17
- # Let's create the grammar step-by-step with the grammar builder:
18
- builder = Rley::Syntax::GrammarBuilder.new
19
- builder.add_terminals('a', 'b', 'c')
20
- builder.add_production('S' => 'A')
21
- builder.add_production('A' => %w(a A c))
22
- builder.add_production('A' => 'b')
23
-
24
- # And now build the grammar...
25
- grammar_abc = builder.grammar
26
-
27
-
28
- ########################################
29
- # 2. Create a tokenizer for the language
30
- # The tokenizer transforms the input into an array of tokens
31
- def tokenizer(aText, aGrammar)
32
- tokens = aText.chars.map do |ch|
33
- terminal = aGrammar.name2symbol[ch]
34
- raise StandardError, "Unknown input character '#{ch}'" if terminal.nil?
35
- Rley::Parser::Token.new(ch, terminal)
36
- end
37
-
38
- return tokens
39
- end
40
-
41
- ########################################
42
- # Step 3. Create a parser for that grammar
43
- parser = Rley::Parser::EarleyParser.new(grammar_abc)
44
-
45
- ########################################
46
- # Step 3. Tokenize the input
47
- valid_input = 'aabcc'
48
- tokens = tokenizer(valid_input, grammar_abc)
49
-
50
- ########################################
51
- # Step 5. Let the parser process the input
52
- result = parser.parse(tokens)
53
-
54
-
55
- ########################################
56
- # Step 6. Generate a parse tree from the parse result
57
- ptree = result.parse_tree
58
-
59
-
60
- ########################################
61
- # Step 7. Render the parse tree (in JSON)
62
- # Let's create a parse tree visitor
63
- visitor = Rley::ParseTreeVisitor.new(ptree)
64
-
65
- #Here we create a renderer object...
66
- renderer = Rley::Formatter::Json.new(STDOUT)
67
-
68
- # Now emit the parse tree as JSON on the console output
69
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
70
- renderer.render(visitor)
71
- # End of file
@@ -1,92 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree
2
-
3
- require 'pp' # TODO remove this dependency
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse tree from the parse result
13
- # 7. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Define a grammar for a very simple language
17
- # Grammar 3: An ambiguous arithmetic expression language
18
- # (based on example in article on Earley's algorithm in Wikipedia)
19
- # Let's create the grammar step-by-step with the grammar builder:
20
- builder = Rley::Syntax::GrammarBuilder.new
21
- builder.add_terminals('integer', '+', '*')
22
- builder.add_production('P' => 'S')
23
- builder.add_production('S' => %w(S + S))
24
- builder.add_production('S' => %w(S * S))
25
- builder.add_production('S' => 'L')
26
- builder.add_production('L' => 'integer')
27
-
28
- # And now build the grammar...
29
- grammar_amb = builder.grammar
30
-
31
-
32
- ########################################
33
- # 2. Create a tokenizer for the language
34
- # The tokenizer transforms the input into an array of tokens
35
- def tokenizer(aText, aGrammar)
36
- tokens = aText.scan(/\S+/).map do |lexeme|
37
- case lexeme
38
- when '+', '*'
39
- terminal = aGrammar.name2symbol[lexeme]
40
- when /^[-+]?\d+$/
41
- terminal = aGrammar.name2symbol['integer']
42
- else
43
- msg = "Unknown input text '#{lexeme}'"
44
- raise StandardError, msg
45
- end
46
- Rley::Parser::Token.new(lexeme, terminal)
47
- end
48
-
49
- return tokens
50
- end
51
-
52
- ########################################
53
- # Step 3. Create a parser for that grammar
54
- parser = Rley::Parser::EarleyParser.new(grammar_amb)
55
-
56
- ########################################
57
- # Step 3. Tokenize the input
58
- valid_input = '2 + 3 * 4'
59
- tokens = tokenizer(valid_input, grammar_amb)
60
-
61
- ########################################
62
- # Step 5. Let the parser process the input
63
- result = parser.parse(tokens)
64
- puts "Parsing success? #{result.success?}"
65
- puts "Ambiguous parse? #{result.ambiguous?}"
66
- # pp result
67
-
68
- result.chart.state_sets.each_with_index do |aStateSet, index|
69
- puts "State[#{index}]"
70
- puts "========"
71
- aStateSet.states.each { |aState| puts aState.to_s }
72
- end
73
-
74
- =begin
75
- ########################################
76
- # Step 6. Generate a parse tree from the parse result
77
- ptree = result.parse_tree
78
- pp ptree
79
-
80
- ########################################
81
- # Step 7. Render the parse tree (in JSON)
82
- # Let's create a parse tree visitor
83
- visitor = Rley::ParseTreeVisitor.new(ptree)
84
-
85
- #Here we create a renderer object...
86
- renderer = Rley::Formatter::Json.new(STDOUT)
87
-
88
- # Now emit the parse tree as JSON on the console output
89
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
90
- renderer.render(visitor)
91
- =end
92
- # End of file
@@ -1,70 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree
2
-
3
- require 'pp' # TODO remove this dependency
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse tree from the parse result
13
- # 7. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Define a problematic grammar
17
- # Grammar Z: A grammar with hidden left recursion and a cycle
18
- # (based on example 2 in article of Elizabeth Scott, "SPPF-Style Parsing From Earley Recognisers"
19
- # Electronic Notes in Theoretical Computer Science 203 (2008) 53–67
20
- # Let's create the grammar step-by-step with the grammar builder:
21
- builder = Rley::Syntax::GrammarBuilder.new
22
- builder.add_terminals('b')
23
- builder.add_production('S' => %w(S S))
24
- builder.add_production('S' => 'b')
25
-
26
- # And now build the grammar...
27
- grammar_tricky = builder.grammar
28
-
29
-
30
- ########################################
31
- # 2. Create a tokenizer for the language
32
- # The tokenizer transforms the input into an array of tokens
33
- def tokenizer(aText, aGrammar)
34
- tokens = aText.chars.map do |lexeme|
35
- case lexeme
36
- when 'b'
37
- terminal = aGrammar.name2symbol[lexeme]
38
- else
39
- msg = "Unknown input text '#{lexeme}'"
40
- raise StandardError, msg
41
- end
42
- Rley::Parser::Token.new(lexeme, terminal)
43
- end
44
-
45
- return tokens
46
- end
47
-
48
- ########################################
49
- # Step 3. Create a parser for that grammar
50
- parser = Rley::Parser::EarleyParser.new(grammar_tricky)
51
-
52
- ########################################
53
- # Step 3. Tokenize the input
54
- valid_input = 'bbb'
55
- tokens = tokenizer(valid_input, grammar_tricky)
56
-
57
- ########################################
58
- # Step 5. Let the parser process the input
59
- result = parser.parse(tokens)
60
- puts "Parsing success? #{result.success?}"
61
- puts "Parsing ambiguous? #{result.ambiguous?}"
62
- #pp result
63
-
64
- result.chart.state_sets.each_with_index do |aStateSet, index|
65
- puts "State[#{index}]"
66
- puts "========"
67
- aStateSet.states.each { |aState| puts aState.to_s }
68
- end
69
-
70
- # End of file
@@ -1,85 +0,0 @@
1
- # Purpose: to demonstrate how to parse basic arithmetic expressions
2
- # and render a parse tree
3
- require 'pp' # TODO remove this dependency
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse tree from the parse result
13
- # 7. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Define a grammar for a very simple arithmetic expression language
17
- # (based on example in article on Earley's algorithm in Wikipedia)
18
-
19
- # Let's create the grammar piece by piece
20
- builder = Rley::Syntax::GrammarBuilder.new
21
- builder.add_terminals('+', '*', 'integer')
22
- builder.add_production('P' => 'S')
23
- builder.add_production('S' => %w(S + M))
24
- builder.add_production('S' => 'M')
25
- builder.add_production('M' => %w(M * T))
26
- builder.add_production('M' => 'T')
27
- builder.add_production('T' => 'integer')
28
-
29
- # And now build the grammar...
30
- grammar_s_expr = builder.grammar
31
-
32
-
33
- ########################################
34
- # 2. Create a tokenizer for the language
35
- # The tokenizer transforms the input into an array of tokens
36
- def tokenizer(aText, aGrammar)
37
- tokens = aText.scan(/\S+/).map do |lexeme|
38
- case lexeme
39
- when '+', '*'
40
- terminal = aGrammar.name2symbol[lexeme]
41
- when /^[-+]?\d+$/
42
- terminal = aGrammar.name2symbol['integer']
43
- else
44
- msg = "Unknown input text '#{lexeme}'"
45
- raise StandardError, msg
46
- end
47
- Rley::Parser::Token.new(lexeme, terminal)
48
- end
49
-
50
- return tokens
51
- end
52
-
53
- ########################################
54
- # Step 3. Create a parser for that grammar
55
- parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
56
-
57
- ########################################
58
- # Step 3. Tokenize the input
59
- valid_input = '2 + 3 * 4'
60
- tokens = tokenizer(valid_input, grammar_s_expr)
61
-
62
- ########################################
63
- # Step 5. Let the parser process the input
64
- result = parser.parse(tokens)
65
- puts "Parse successful? #{result.success?}"
66
-
67
-
68
- ########################################
69
- # Step 6. Generate a parse tree from the parse result
70
- ptree = result.parse_tree
71
- pp ptree
72
-
73
- ########################################
74
- # Step 7. Render the parse tree (in JSON)
75
- # Let's create a parse tree visitor
76
- visitor = Rley::ParseTreeVisitor.new(ptree)
77
-
78
- #Here we create a renderer object...
79
- renderer = Rley::Formatter::Json.new(STDOUT)
80
-
81
- # Now emit the parse tree as JSON on the console output
82
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
83
- renderer.render(visitor)
84
-
85
- # End of file
@@ -1,74 +0,0 @@
1
- # Purpose: to demonstrate how to catch parsing errors
2
-
3
- require 'pp' # TODO remove this dependency
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the invalid input
12
-
13
-
14
- ########################################
15
- # Step 1. Define a grammar for a very simple arithmetic expression language
16
- # (based on example in article on Earley's algorithm in Wikipedia)
17
-
18
- # Let's create the grammar piece by piece
19
- builder = Rley::Syntax::GrammarBuilder.new
20
- builder.add_terminals('+', '*', 'integer')
21
- builder.add_production('P' => 'S')
22
- builder.add_production('S' => %w(S + M))
23
- builder.add_production('S' => 'M')
24
- builder.add_production('M' => %w(M * T))
25
- builder.add_production('M' => 'T')
26
- builder.add_production('T' => 'integer')
27
-
28
- # And now build the grammar...
29
- grammar_s_expr = builder.grammar
30
-
31
-
32
- ########################################
33
- # 2. Create a tokenizer for the language
34
- # The tokenizer transforms the input into an array of tokens
35
- def tokenizer(aText, aGrammar)
36
- tokens = aText.scan(/\S+/).map do |lexeme|
37
- case lexeme
38
- when '+', '*'
39
- terminal = aGrammar.name2symbol[lexeme]
40
- when /^[-+]?\d+$/
41
- terminal = aGrammar.name2symbol['integer']
42
- else
43
- msg = "Unknown input text '#{lexeme}'"
44
- raise StandardError, msg
45
- end
46
- Rley::Parser::Token.new(lexeme, terminal)
47
- end
48
-
49
- return tokens
50
- end
51
-
52
- ########################################
53
- # Step 3. Create a parser for that grammar
54
- parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
55
-
56
- ########################################
57
- # Step 4. Tokenize the invalid input
58
- invalid_input = '2 + 3 * * 4' # Notice the repeated stars (*)
59
- puts "Invalid expression to parse: #{invalid_input}"
60
- puts ''
61
- tokens = tokenizer(invalid_input, grammar_s_expr)
62
-
63
- ########################################
64
- # Step 5. Let catch the exception caused by a syntax error...
65
- # ... and display the error message
66
- begin
67
- parser.parse(tokens)
68
- rescue StandardError => exc
69
- puts exc.message
70
- end
71
-
72
-
73
-
74
- # End of file
@@ -1,97 +0,0 @@
1
- # Purpose: to demonstrate how to parse an emblematic ambiguous sentence
2
- # Based on example found at: http://www.nltk.org/book_1ed/ch08.html
3
-
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse forest from the parse result
13
-
14
- ########################################
15
- # Step 1. Define a grammar for a micro English-like language
16
- # based on Jurafky & Martin L0 language (chapter 12 of the book).
17
- # It defines the syntax of a sentence in a language with a
18
- # very limited syntax and lexicon in the context of airline reservation.
19
- builder = Rley::Syntax::GrammarBuilder.new
20
- builder.add_terminals('N', 'V', 'Pro') # N(oun), V(erb), Pro(noun)
21
- builder.add_terminals('Det', 'P') # Det(erminer), P(reposition)
22
- builder.add_production('S' => %w[NP VP])
23
- builder.add_production('NP' => %w[Det N])
24
- builder.add_production('NP' => %w[Det N PP])
25
- builder.add_production('NP' => 'Pro')
26
- builder.add_production('VP' => %w[V NP])
27
- builder.add_production('VP' => %w[VP PP])
28
- builder.add_production('PP' => %w[P NP])
29
-
30
- # And now build the grammar...
31
- groucho_grammar = builder.grammar
32
-
33
-
34
- ########################################
35
- # 2. Create a tokenizer for the language
36
- # The tokenizer transforms the input into an array of tokens
37
- # This is a very simplistic implementation for demo purposes.
38
-
39
- # The lexicon is just a Hash with pairs of the form:
40
- # word => terminal symbol name
41
- Groucho_lexicon = {
42
- 'elephant' => 'N',
43
- 'pajamas' => 'N',
44
- 'shot' => 'V',
45
- 'I' => 'Pro',
46
- 'an' => 'Det',
47
- 'my' => 'Det',
48
- 'in' => 'P',
49
- }
50
-
51
- # Highly simplified tokenizer implementation.
52
- def tokenizer(aText, aGrammar)
53
- tokens = aText.scan(/\S+/).map do |word|
54
- term_name = Groucho_lexicon[word]
55
- if term_name.nil?
56
- raise StandardError, "Word '#{word}' not found in lexicon"
57
- end
58
- terminal = aGrammar.name2symbol[term_name]
59
- Rley::Parser::Token.new(word, terminal)
60
- end
61
-
62
- return tokens
63
- end
64
-
65
- ########################################
66
- # Step 3. Create a parser for that grammar
67
- parser = Rley::Parser::EarleyParser.new(groucho_grammar)
68
-
69
- ########################################
70
- # Step 3. Tokenize the input
71
- valid_input = 'I shot an elephant in my pajamas'
72
- tokens = tokenizer(valid_input, groucho_grammar)
73
-
74
- ########################################
75
- # Step 5. Let the parser process the input
76
- result = parser.parse(tokens)
77
-
78
- puts "Parsing success? #{result.success?}"
79
-
80
- #=begin
81
- ########################################
82
- # Step 6. Generate a parse tree from the parse result
83
- ptree = result.parse_tree
84
-
85
- ########################################
86
- # Step 7. Render the parse tree (in JSON)
87
- # Let's create a parse tree visitor
88
- visitor = Rley::ParseTreeVisitor.new(ptree)
89
-
90
- #Here we create a renderer object...
91
- renderer = Rley::Formatter::Json.new(STDOUT)
92
-
93
- # Now emit the parse tree as JSON on the console output
94
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
95
- renderer.render(visitor)
96
- #=end
97
- # End of file