rley 0.3.08 → 0.3.09
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +190 -35
- data/examples/NLP/mini_en_demo.rb +92 -0
- data/lib/rley/constants.rb +1 -1
- metadata +3 -20
- data/examples/grammars/grammar_L0.rb +0 -32
- data/examples/grammars/grammar_abc.rb +0 -26
- data/examples/parsers/demo-JSON/JSON_grammar.rb +0 -31
- data/examples/parsers/demo-JSON/JSON_lexer.rb +0 -114
- data/examples/parsers/demo-JSON/JSON_parser.rb +0 -89
- data/examples/parsers/demo-JSON/demo_json.rb +0 -42
- data/examples/parsers/parsing_L0.rb +0 -124
- data/examples/parsers/parsing_L1.rb +0 -137
- data/examples/parsers/parsing_abc.rb +0 -71
- data/examples/parsers/parsing_ambig.rb +0 -92
- data/examples/parsers/parsing_another.rb +0 -70
- data/examples/parsers/parsing_b_expr.rb +0 -85
- data/examples/parsers/parsing_err_expr.rb +0 -74
- data/examples/parsers/parsing_groucho.rb +0 -97
- data/examples/parsers/parsing_right_recursive.rb +0 -70
- data/examples/parsers/parsing_tricky.rb +0 -91
- data/examples/parsers/tracing_parser.rb +0 -54
- data/examples/recognizers/recognizer_abc.rb +0 -71
@@ -1,70 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree
|
2
|
-
|
3
|
-
require 'pp' # TODO remove this dependency
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
|
6
|
-
# Steps to render a parse tree (of a valid parsed input):
|
7
|
-
# 1. Define a grammar
|
8
|
-
# 2. Create a tokenizer for the language
|
9
|
-
# 3. Create a parser for that grammar
|
10
|
-
# 4. Tokenize the input
|
11
|
-
# 5. Let the parser process the input
|
12
|
-
# 6. Generate a parse tree from the parse result
|
13
|
-
# 7. Render the parse tree (in JSON)
|
14
|
-
|
15
|
-
########################################
|
16
|
-
# Step 1. Define a problematic grammar
|
17
|
-
# Grammar Z: A grammar with hidden left recursion and a cycle
|
18
|
-
# (based on example in book of D. Grune, C JH. Jacobs,
|
19
|
-
# "Parsing Techniques: A Practical Guide"
|
20
|
-
# Springer, ISBN: 978-1-4419-1901-4, (2010) p. 224
|
21
|
-
# Let's create the grammar step-by-step with the grammar builder:
|
22
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
23
|
-
builder.add_terminals('a')
|
24
|
-
builder.add_production('S' => %w(a S))
|
25
|
-
builder.add_production('S' => []) # Empty RHS
|
26
|
-
|
27
|
-
# And now build the grammar...
|
28
|
-
right_recursive_gram = builder.grammar
|
29
|
-
|
30
|
-
|
31
|
-
########################################
|
32
|
-
# 2. Create a tokenizer for the language
|
33
|
-
# The tokenizer transforms the input into an array of tokens
|
34
|
-
def tokenizer(aText, aGrammar)
|
35
|
-
tokens = aText.chars.map do |lexeme|
|
36
|
-
case lexeme
|
37
|
-
when 'a'
|
38
|
-
terminal = aGrammar.name2symbol[lexeme]
|
39
|
-
else
|
40
|
-
msg = "Unknown input text '#{lexeme}'"
|
41
|
-
raise StandardError, msg
|
42
|
-
end
|
43
|
-
Rley::Parser::Token.new(lexeme, terminal)
|
44
|
-
end
|
45
|
-
|
46
|
-
return tokens
|
47
|
-
end
|
48
|
-
|
49
|
-
########################################
|
50
|
-
# Step 3. Create a parser for that grammar
|
51
|
-
parser = Rley::Parser::EarleyParser.new(right_recursive_gram)
|
52
|
-
|
53
|
-
########################################
|
54
|
-
# Step 3. Tokenize the input
|
55
|
-
valid_input = 'aaaa'
|
56
|
-
tokens = tokenizer(valid_input, right_recursive_gram)
|
57
|
-
|
58
|
-
########################################
|
59
|
-
# Step 5. Let the parser process the input
|
60
|
-
result = parser.parse(tokens)
|
61
|
-
puts "Parsing success? #{result.success?}"
|
62
|
-
#pp result
|
63
|
-
|
64
|
-
result.chart.state_sets.each_with_index do |aStateSet, index|
|
65
|
-
puts "State[#{index}]"
|
66
|
-
puts "========"
|
67
|
-
aStateSet.states.each { |aState| puts aState.to_s }
|
68
|
-
end
|
69
|
-
|
70
|
-
# End of file
|
@@ -1,91 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree
|
2
|
-
|
3
|
-
require 'pp' # TODO remove this dependency
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
|
6
|
-
# Steps to render a parse tree (of a valid parsed input):
|
7
|
-
# 1. Define a grammar
|
8
|
-
# 2. Create a tokenizer for the language
|
9
|
-
# 3. Create a parser for that grammar
|
10
|
-
# 4. Tokenize the input
|
11
|
-
# 5. Let the parser process the input
|
12
|
-
# 6. Generate a parse tree from the parse result
|
13
|
-
# 7. Render the parse tree (in JSON)
|
14
|
-
|
15
|
-
########################################
|
16
|
-
# Step 1. Define a problematic grammar
|
17
|
-
# Grammar Z: A grammar with hidden left recursion and a cycle
|
18
|
-
# (based on example in article of Elizabeth Scott, "SPPF-Style Parsing From Earley Recognisers"
|
19
|
-
# Electronic Notes in Theoretical Computer Science 203 (2008) 53–67
|
20
|
-
# Let's create the grammar step-by-step with the grammar builder:
|
21
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
22
|
-
builder.add_terminals('a', 'b')
|
23
|
-
builder.add_production('S' => %w(A T))
|
24
|
-
builder.add_production('S' => %w(a T))
|
25
|
-
builder.add_production('A' => 'a')
|
26
|
-
builder.add_production('A' => %w(B A))
|
27
|
-
builder.add_production('B' => []) # Empty RHS
|
28
|
-
builder.add_production('T' => %w(b b b))
|
29
|
-
|
30
|
-
# And now build the grammar...
|
31
|
-
grammar_tricky = builder.grammar
|
32
|
-
|
33
|
-
|
34
|
-
########################################
|
35
|
-
# 2. Create a tokenizer for the language
|
36
|
-
# The tokenizer transforms the input into an array of tokens
|
37
|
-
def tokenizer(aText, aGrammar)
|
38
|
-
tokens = aText.chars.map do |lexeme|
|
39
|
-
case lexeme
|
40
|
-
when 'a', 'b'
|
41
|
-
terminal = aGrammar.name2symbol[lexeme]
|
42
|
-
else
|
43
|
-
msg = "Unknown input text '#{lexeme}'"
|
44
|
-
raise StandardError, msg
|
45
|
-
end
|
46
|
-
Rley::Parser::Token.new(lexeme, terminal)
|
47
|
-
end
|
48
|
-
|
49
|
-
return tokens
|
50
|
-
end
|
51
|
-
|
52
|
-
########################################
|
53
|
-
# Step 3. Create a parser for that grammar
|
54
|
-
parser = Rley::Parser::EarleyParser.new(grammar_tricky)
|
55
|
-
|
56
|
-
########################################
|
57
|
-
# Step 3. Tokenize the input
|
58
|
-
valid_input = 'abbb'
|
59
|
-
tokens = tokenizer(valid_input, grammar_tricky)
|
60
|
-
|
61
|
-
########################################
|
62
|
-
# Step 5. Let the parser process the input
|
63
|
-
result = parser.parse(tokens)
|
64
|
-
puts "Parsing success? #{result.success?}"
|
65
|
-
#pp result
|
66
|
-
|
67
|
-
result.chart.state_sets.each_with_index do |aStateSet, index|
|
68
|
-
puts "State[#{index}]"
|
69
|
-
puts "========"
|
70
|
-
aStateSet.states.each { |aState| puts aState.to_s }
|
71
|
-
end
|
72
|
-
|
73
|
-
=begin
|
74
|
-
########################################
|
75
|
-
# Step 6. Generate a parse tree from the parse result
|
76
|
-
ptree = result.parse_tree
|
77
|
-
pp ptree
|
78
|
-
#=begin
|
79
|
-
########################################
|
80
|
-
# Step 7. Render the parse tree (in JSON)
|
81
|
-
# Let's create a parse tree visitor
|
82
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
83
|
-
|
84
|
-
#Here we create a renderer object...
|
85
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
86
|
-
|
87
|
-
# Now emit the parse tree as JSON on the console output
|
88
|
-
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
89
|
-
renderer.render(visitor)
|
90
|
-
=end
|
91
|
-
# End of file
|
@@ -1,54 +0,0 @@
|
|
1
|
-
# Purpose: to use a grammar that causes some Earley parsers to fail.
|
2
|
-
# See: http://stackoverflow.com/questions/22311323/earley-parser-recursion
|
3
|
-
require 'rley' # Load the gem
|
4
|
-
|
5
|
-
# Steps to parse some valid input:
|
6
|
-
# 1. Define a grammar
|
7
|
-
# 2. Create a tokenizer for the language
|
8
|
-
# 3. Create a parser for that grammar
|
9
|
-
# 4. Tokenize the input
|
10
|
-
# 5. Let the parser process the input & trace its progress
|
11
|
-
|
12
|
-
|
13
|
-
########################################
|
14
|
-
# Step 1. Define a grammar that might cause infinite recursion
|
15
|
-
# Let's create the grammar step-by-step with the grammar builder:
|
16
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
17
|
-
builder.add_terminals('ident')
|
18
|
-
builder.add_production('S' => 'E')
|
19
|
-
builder.add_production('E' => ['E', 'E'] )
|
20
|
-
builder.add_production('E' => 'ident')
|
21
|
-
|
22
|
-
# And now build the grammar...
|
23
|
-
grammar_tricky = builder.grammar
|
24
|
-
|
25
|
-
|
26
|
-
########################################
|
27
|
-
# 2. Create a tokenizer for the language
|
28
|
-
# The tokenizer transforms the input into an array of tokens
|
29
|
-
def tokenizer(aText, aGrammar)
|
30
|
-
terminal = aGrammar.name2symbol['ident']
|
31
|
-
|
32
|
-
tokens = aText.chars.map do |ch|
|
33
|
-
Rley::Parser::Token.new(ch, terminal)
|
34
|
-
end
|
35
|
-
|
36
|
-
return tokens
|
37
|
-
end
|
38
|
-
|
39
|
-
########################################
|
40
|
-
# Step 3. Create a parser for that grammar
|
41
|
-
parser = Rley::Parser::EarleyParser.new(grammar_tricky)
|
42
|
-
|
43
|
-
########################################
|
44
|
-
# Step 3. Tokenize the input
|
45
|
-
valid_input = 'abcdefg'
|
46
|
-
tokens = tokenizer(valid_input, grammar_tricky)
|
47
|
-
|
48
|
-
########################################
|
49
|
-
# Step 5. Let the parser process the input
|
50
|
-
# Force the parser to trace its parsing progress.
|
51
|
-
result = parser.parse(tokens, 1)
|
52
|
-
puts "Parsing success? #{result.success?}"
|
53
|
-
|
54
|
-
# End of file
|
@@ -1,71 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build a recognizer
|
2
|
-
# A recognizer is a kind of parser that indicates whether the input
|
3
|
-
# complies to the grammar or not.
|
4
|
-
|
5
|
-
require 'rley' # Load the gem
|
6
|
-
|
7
|
-
# Steps to build a recognizer:
|
8
|
-
# 1. Define a grammar
|
9
|
-
# 2. Create a parser for that grammar
|
10
|
-
# 3. Build the input
|
11
|
-
# 4. Let the parser process the input
|
12
|
-
# 5. Check the parser's result to see whether the input was valid (=recognized)
|
13
|
-
|
14
|
-
########################################
|
15
|
-
# Step 1. Define a grammar for a very simple language
|
16
|
-
# It recognizes/generates strings like 'b', 'abc', 'aabcc', 'aaabccc',...
|
17
|
-
# (based on example in N. Wirth's book "Compiler Construction", p. 6)
|
18
|
-
# Let's create the grammar step-by-step with the grammar builder:
|
19
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
20
|
-
builder.add_terminals('a', 'b', 'c')
|
21
|
-
builder.add_production('S' => 'A')
|
22
|
-
builder.add_production('A' => %w(a A c))
|
23
|
-
builder.add_production('A' => 'b')
|
24
|
-
|
25
|
-
# And now build the grammar...
|
26
|
-
grammar_abc = builder.grammar
|
27
|
-
|
28
|
-
# Keep track of the terminal symbols of the grammar:
|
29
|
-
term_a = grammar_abc.name2symbol['a']
|
30
|
-
term_b = grammar_abc.name2symbol['b']
|
31
|
-
term_c = grammar_abc.name2symbol['c']
|
32
|
-
|
33
|
-
########################################
|
34
|
-
# Step 2. Create a parser for that grammar
|
35
|
-
parser = Rley::Parser::EarleyParser.new(grammar_abc)
|
36
|
-
|
37
|
-
########################################
|
38
|
-
# Step 3. Build the input
|
39
|
-
# Mimicking the output of a tokenizer
|
40
|
-
valid_input = [
|
41
|
-
Rley::Parser::Token.new('a', term_a),
|
42
|
-
Rley::Parser::Token.new('a', term_a),
|
43
|
-
Rley::Parser::Token.new('b', term_b),
|
44
|
-
Rley::Parser::Token.new('c', term_c),
|
45
|
-
Rley::Parser::Token.new('c', term_c)
|
46
|
-
]
|
47
|
-
|
48
|
-
########################################
|
49
|
-
# Step 4. Let the parser process the input
|
50
|
-
result = parser.parse(valid_input)
|
51
|
-
|
52
|
-
|
53
|
-
########################################
|
54
|
-
# Step 5. Check the parser's result to see whether the input was valid
|
55
|
-
puts "Successful parse of 'aabcc'? #{result.success?}"
|
56
|
-
# Output: Successful parse of 'aabcc'? true
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# Let's redo steps 3, 4, 5 again with an invalid input.
|
61
|
-
invalid_input = [
|
62
|
-
Rley::Parser::Token.new('a', term_a),
|
63
|
-
Rley::Parser::Token.new('a', term_a),
|
64
|
-
Rley::Parser::Token.new('b', term_b),
|
65
|
-
Rley::Parser::Token.new('c', term_c)
|
66
|
-
]
|
67
|
-
result = parser.parse(invalid_input)
|
68
|
-
puts "Successful parse of 'aabc'? #{result.success?}"
|
69
|
-
# Output: Successful parse of 'aabc'? false
|
70
|
-
|
71
|
-
# End of file
|