rley 0.3.08 → 0.3.09

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,71 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree
2
- require 'rley' # Load the gem
3
-
4
- # Steps to render a parse tree (of a valid parsed input):
5
- # 1. Define a grammar
6
- # 2. Create a tokenizer for the language
7
- # 3. Create a parser for that grammar
8
- # 4. Tokenize the input
9
- # 5. Let the parser process the input
10
- # 6. Generate a parse tree from the parse result
11
- # 7. Render the parse tree (in JSON)
12
-
13
- ########################################
14
- # Step 1. Define a grammar for a very simple language
15
- # It recognizes/generates strings like 'b', 'abc', 'aabcc', 'aaabccc',...
16
- # (based on example in N. Wirth's book "Compiler Construction", p. 6)
17
- # Let's create the grammar step-by-step with the grammar builder:
18
- builder = Rley::Syntax::GrammarBuilder.new
19
- builder.add_terminals('a', 'b', 'c')
20
- builder.add_production('S' => 'A')
21
- builder.add_production('A' => %w(a A c))
22
- builder.add_production('A' => 'b')
23
-
24
- # And now build the grammar...
25
- grammar_abc = builder.grammar
26
-
27
-
28
- ########################################
29
- # 2. Create a tokenizer for the language
30
- # The tokenizer transforms the input into an array of tokens
31
- def tokenizer(aText, aGrammar)
32
- tokens = aText.chars.map do |ch|
33
- terminal = aGrammar.name2symbol[ch]
34
- raise StandardError, "Unknown input character '#{ch}'" if terminal.nil?
35
- Rley::Parser::Token.new(ch, terminal)
36
- end
37
-
38
- return tokens
39
- end
40
-
41
- ########################################
42
- # Step 3. Create a parser for that grammar
43
- parser = Rley::Parser::EarleyParser.new(grammar_abc)
44
-
45
- ########################################
46
- # Step 3. Tokenize the input
47
- valid_input = 'aabcc'
48
- tokens = tokenizer(valid_input, grammar_abc)
49
-
50
- ########################################
51
- # Step 5. Let the parser process the input
52
- result = parser.parse(tokens)
53
-
54
-
55
- ########################################
56
- # Step 6. Generate a parse tree from the parse result
57
- ptree = result.parse_tree
58
-
59
-
60
- ########################################
61
- # Step 7. Render the parse tree (in JSON)
62
- # Let's create a parse tree visitor
63
- visitor = Rley::ParseTreeVisitor.new(ptree)
64
-
65
- #Here we create a renderer object...
66
- renderer = Rley::Formatter::Json.new(STDOUT)
67
-
68
- # Now emit the parse tree as JSON on the console output
69
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
70
- renderer.render(visitor)
71
- # End of file
@@ -1,92 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree
2
-
3
- require 'pp' # TODO remove this dependency
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse tree from the parse result
13
- # 7. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Define a grammar for a very simple language
17
- # Grammar 3: An ambiguous arithmetic expression language
18
- # (based on example in article on Earley's algorithm in Wikipedia)
19
- # Let's create the grammar step-by-step with the grammar builder:
20
- builder = Rley::Syntax::GrammarBuilder.new
21
- builder.add_terminals('integer', '+', '*')
22
- builder.add_production('P' => 'S')
23
- builder.add_production('S' => %w(S + S))
24
- builder.add_production('S' => %w(S * S))
25
- builder.add_production('S' => 'L')
26
- builder.add_production('L' => 'integer')
27
-
28
- # And now build the grammar...
29
- grammar_amb = builder.grammar
30
-
31
-
32
- ########################################
33
- # 2. Create a tokenizer for the language
34
- # The tokenizer transforms the input into an array of tokens
35
- def tokenizer(aText, aGrammar)
36
- tokens = aText.scan(/\S+/).map do |lexeme|
37
- case lexeme
38
- when '+', '*'
39
- terminal = aGrammar.name2symbol[lexeme]
40
- when /^[-+]?\d+$/
41
- terminal = aGrammar.name2symbol['integer']
42
- else
43
- msg = "Unknown input text '#{lexeme}'"
44
- raise StandardError, msg
45
- end
46
- Rley::Parser::Token.new(lexeme, terminal)
47
- end
48
-
49
- return tokens
50
- end
51
-
52
- ########################################
53
- # Step 3. Create a parser for that grammar
54
- parser = Rley::Parser::EarleyParser.new(grammar_amb)
55
-
56
- ########################################
57
- # Step 3. Tokenize the input
58
- valid_input = '2 + 3 * 4'
59
- tokens = tokenizer(valid_input, grammar_amb)
60
-
61
- ########################################
62
- # Step 5. Let the parser process the input
63
- result = parser.parse(tokens)
64
- puts "Parsing success? #{result.success?}"
65
- puts "Ambiguous parse? #{result.ambiguous?}"
66
- # pp result
67
-
68
- result.chart.state_sets.each_with_index do |aStateSet, index|
69
- puts "State[#{index}]"
70
- puts "========"
71
- aStateSet.states.each { |aState| puts aState.to_s }
72
- end
73
-
74
- =begin
75
- ########################################
76
- # Step 6. Generate a parse tree from the parse result
77
- ptree = result.parse_tree
78
- pp ptree
79
-
80
- ########################################
81
- # Step 7. Render the parse tree (in JSON)
82
- # Let's create a parse tree visitor
83
- visitor = Rley::ParseTreeVisitor.new(ptree)
84
-
85
- #Here we create a renderer object...
86
- renderer = Rley::Formatter::Json.new(STDOUT)
87
-
88
- # Now emit the parse tree as JSON on the console output
89
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
90
- renderer.render(visitor)
91
- =end
92
- # End of file
@@ -1,70 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree
2
-
3
- require 'pp' # TODO remove this dependency
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse tree from the parse result
13
- # 7. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Define a problematic grammar
17
- # Grammar Z: A grammar with hidden left recursion and a cycle
18
- # (based on example 2 in article of Elizabeth Scott, "SPPF-Style Parsing From Earley Recognisers"
19
- # Electronic Notes in Theoretical Computer Science 203 (2008) 53–67
20
- # Let's create the grammar step-by-step with the grammar builder:
21
- builder = Rley::Syntax::GrammarBuilder.new
22
- builder.add_terminals('b')
23
- builder.add_production('S' => %w(S S))
24
- builder.add_production('S' => 'b')
25
-
26
- # And now build the grammar...
27
- grammar_tricky = builder.grammar
28
-
29
-
30
- ########################################
31
- # 2. Create a tokenizer for the language
32
- # The tokenizer transforms the input into an array of tokens
33
- def tokenizer(aText, aGrammar)
34
- tokens = aText.chars.map do |lexeme|
35
- case lexeme
36
- when 'b'
37
- terminal = aGrammar.name2symbol[lexeme]
38
- else
39
- msg = "Unknown input text '#{lexeme}'"
40
- raise StandardError, msg
41
- end
42
- Rley::Parser::Token.new(lexeme, terminal)
43
- end
44
-
45
- return tokens
46
- end
47
-
48
- ########################################
49
- # Step 3. Create a parser for that grammar
50
- parser = Rley::Parser::EarleyParser.new(grammar_tricky)
51
-
52
- ########################################
53
- # Step 3. Tokenize the input
54
- valid_input = 'bbb'
55
- tokens = tokenizer(valid_input, grammar_tricky)
56
-
57
- ########################################
58
- # Step 5. Let the parser process the input
59
- result = parser.parse(tokens)
60
- puts "Parsing success? #{result.success?}"
61
- puts "Parsing ambiguous? #{result.ambiguous?}"
62
- #pp result
63
-
64
- result.chart.state_sets.each_with_index do |aStateSet, index|
65
- puts "State[#{index}]"
66
- puts "========"
67
- aStateSet.states.each { |aState| puts aState.to_s }
68
- end
69
-
70
- # End of file
@@ -1,85 +0,0 @@
1
- # Purpose: to demonstrate how to parse basic arithmetic expressions
2
- # and render a parse tree
3
- require 'pp' # TODO remove this dependency
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse tree from the parse result
13
- # 7. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Define a grammar for a very simple arithmetic expression language
17
- # (based on example in article on Earley's algorithm in Wikipedia)
18
-
19
- # Let's create the grammar piece by piece
20
- builder = Rley::Syntax::GrammarBuilder.new
21
- builder.add_terminals('+', '*', 'integer')
22
- builder.add_production('P' => 'S')
23
- builder.add_production('S' => %w(S + M))
24
- builder.add_production('S' => 'M')
25
- builder.add_production('M' => %w(M * T))
26
- builder.add_production('M' => 'T')
27
- builder.add_production('T' => 'integer')
28
-
29
- # And now build the grammar...
30
- grammar_s_expr = builder.grammar
31
-
32
-
33
- ########################################
34
- # 2. Create a tokenizer for the language
35
- # The tokenizer transforms the input into an array of tokens
36
- def tokenizer(aText, aGrammar)
37
- tokens = aText.scan(/\S+/).map do |lexeme|
38
- case lexeme
39
- when '+', '*'
40
- terminal = aGrammar.name2symbol[lexeme]
41
- when /^[-+]?\d+$/
42
- terminal = aGrammar.name2symbol['integer']
43
- else
44
- msg = "Unknown input text '#{lexeme}'"
45
- raise StandardError, msg
46
- end
47
- Rley::Parser::Token.new(lexeme, terminal)
48
- end
49
-
50
- return tokens
51
- end
52
-
53
- ########################################
54
- # Step 3. Create a parser for that grammar
55
- parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
56
-
57
- ########################################
58
- # Step 3. Tokenize the input
59
- valid_input = '2 + 3 * 4'
60
- tokens = tokenizer(valid_input, grammar_s_expr)
61
-
62
- ########################################
63
- # Step 5. Let the parser process the input
64
- result = parser.parse(tokens)
65
- puts "Parse successful? #{result.success?}"
66
-
67
-
68
- ########################################
69
- # Step 6. Generate a parse tree from the parse result
70
- ptree = result.parse_tree
71
- pp ptree
72
-
73
- ########################################
74
- # Step 7. Render the parse tree (in JSON)
75
- # Let's create a parse tree visitor
76
- visitor = Rley::ParseTreeVisitor.new(ptree)
77
-
78
- #Here we create a renderer object...
79
- renderer = Rley::Formatter::Json.new(STDOUT)
80
-
81
- # Now emit the parse tree as JSON on the console output
82
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
83
- renderer.render(visitor)
84
-
85
- # End of file
@@ -1,74 +0,0 @@
1
- # Purpose: to demonstrate how to catch parsing errors
2
-
3
- require 'pp' # TODO remove this dependency
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the invalid input
12
-
13
-
14
- ########################################
15
- # Step 1. Define a grammar for a very simple arithmetic expression language
16
- # (based on example in article on Earley's algorithm in Wikipedia)
17
-
18
- # Let's create the grammar piece by piece
19
- builder = Rley::Syntax::GrammarBuilder.new
20
- builder.add_terminals('+', '*', 'integer')
21
- builder.add_production('P' => 'S')
22
- builder.add_production('S' => %w(S + M))
23
- builder.add_production('S' => 'M')
24
- builder.add_production('M' => %w(M * T))
25
- builder.add_production('M' => 'T')
26
- builder.add_production('T' => 'integer')
27
-
28
- # And now build the grammar...
29
- grammar_s_expr = builder.grammar
30
-
31
-
32
- ########################################
33
- # 2. Create a tokenizer for the language
34
- # The tokenizer transforms the input into an array of tokens
35
- def tokenizer(aText, aGrammar)
36
- tokens = aText.scan(/\S+/).map do |lexeme|
37
- case lexeme
38
- when '+', '*'
39
- terminal = aGrammar.name2symbol[lexeme]
40
- when /^[-+]?\d+$/
41
- terminal = aGrammar.name2symbol['integer']
42
- else
43
- msg = "Unknown input text '#{lexeme}'"
44
- raise StandardError, msg
45
- end
46
- Rley::Parser::Token.new(lexeme, terminal)
47
- end
48
-
49
- return tokens
50
- end
51
-
52
- ########################################
53
- # Step 3. Create a parser for that grammar
54
- parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
55
-
56
- ########################################
57
- # Step 4. Tokenize the invalid input
58
- invalid_input = '2 + 3 * * 4' # Notice the repeated stars (*)
59
- puts "Invalid expression to parse: #{invalid_input}"
60
- puts ''
61
- tokens = tokenizer(invalid_input, grammar_s_expr)
62
-
63
- ########################################
64
- # Step 5. Let catch the exception caused by a syntax error...
65
- # ... and display the error message
66
- begin
67
- parser.parse(tokens)
68
- rescue StandardError => exc
69
- puts exc.message
70
- end
71
-
72
-
73
-
74
- # End of file
@@ -1,97 +0,0 @@
1
- # Purpose: to demonstrate how to parse an emblematic ambiguous sentence
2
- # Based on example found at: http://www.nltk.org/book_1ed/ch08.html
3
-
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse forest from the parse result
13
-
14
- ########################################
15
- # Step 1. Define a grammar for a micro English-like language
16
- # based on Jurafky & Martin L0 language (chapter 12 of the book).
17
- # It defines the syntax of a sentence in a language with a
18
- # very limited syntax and lexicon in the context of airline reservation.
19
- builder = Rley::Syntax::GrammarBuilder.new
20
- builder.add_terminals('N', 'V', 'Pro') # N(oun), V(erb), Pro(noun)
21
- builder.add_terminals('Det', 'P') # Det(erminer), P(reposition)
22
- builder.add_production('S' => %w[NP VP])
23
- builder.add_production('NP' => %w[Det N])
24
- builder.add_production('NP' => %w[Det N PP])
25
- builder.add_production('NP' => 'Pro')
26
- builder.add_production('VP' => %w[V NP])
27
- builder.add_production('VP' => %w[VP PP])
28
- builder.add_production('PP' => %w[P NP])
29
-
30
- # And now build the grammar...
31
- groucho_grammar = builder.grammar
32
-
33
-
34
- ########################################
35
- # 2. Create a tokenizer for the language
36
- # The tokenizer transforms the input into an array of tokens
37
- # This is a very simplistic implementation for demo purposes.
38
-
39
- # The lexicon is just a Hash with pairs of the form:
40
- # word => terminal symbol name
41
- Groucho_lexicon = {
42
- 'elephant' => 'N',
43
- 'pajamas' => 'N',
44
- 'shot' => 'V',
45
- 'I' => 'Pro',
46
- 'an' => 'Det',
47
- 'my' => 'Det',
48
- 'in' => 'P',
49
- }
50
-
51
- # Highly simplified tokenizer implementation.
52
- def tokenizer(aText, aGrammar)
53
- tokens = aText.scan(/\S+/).map do |word|
54
- term_name = Groucho_lexicon[word]
55
- if term_name.nil?
56
- raise StandardError, "Word '#{word}' not found in lexicon"
57
- end
58
- terminal = aGrammar.name2symbol[term_name]
59
- Rley::Parser::Token.new(word, terminal)
60
- end
61
-
62
- return tokens
63
- end
64
-
65
- ########################################
66
- # Step 3. Create a parser for that grammar
67
- parser = Rley::Parser::EarleyParser.new(groucho_grammar)
68
-
69
- ########################################
70
- # Step 3. Tokenize the input
71
- valid_input = 'I shot an elephant in my pajamas'
72
- tokens = tokenizer(valid_input, groucho_grammar)
73
-
74
- ########################################
75
- # Step 5. Let the parser process the input
76
- result = parser.parse(tokens)
77
-
78
- puts "Parsing success? #{result.success?}"
79
-
80
- #=begin
81
- ########################################
82
- # Step 6. Generate a parse tree from the parse result
83
- ptree = result.parse_tree
84
-
85
- ########################################
86
- # Step 7. Render the parse tree (in JSON)
87
- # Let's create a parse tree visitor
88
- visitor = Rley::ParseTreeVisitor.new(ptree)
89
-
90
- #Here we create a renderer object...
91
- renderer = Rley::Formatter::Json.new(STDOUT)
92
-
93
- # Now emit the parse tree as JSON on the console output
94
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
95
- renderer.render(visitor)
96
- #=end
97
- # End of file