rley 0.3.08 → 0.3.09
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +190 -35
- data/examples/NLP/mini_en_demo.rb +92 -0
- data/lib/rley/constants.rb +1 -1
- metadata +3 -20
- data/examples/grammars/grammar_L0.rb +0 -32
- data/examples/grammars/grammar_abc.rb +0 -26
- data/examples/parsers/demo-JSON/JSON_grammar.rb +0 -31
- data/examples/parsers/demo-JSON/JSON_lexer.rb +0 -114
- data/examples/parsers/demo-JSON/JSON_parser.rb +0 -89
- data/examples/parsers/demo-JSON/demo_json.rb +0 -42
- data/examples/parsers/parsing_L0.rb +0 -124
- data/examples/parsers/parsing_L1.rb +0 -137
- data/examples/parsers/parsing_abc.rb +0 -71
- data/examples/parsers/parsing_ambig.rb +0 -92
- data/examples/parsers/parsing_another.rb +0 -70
- data/examples/parsers/parsing_b_expr.rb +0 -85
- data/examples/parsers/parsing_err_expr.rb +0 -74
- data/examples/parsers/parsing_groucho.rb +0 -97
- data/examples/parsers/parsing_right_recursive.rb +0 -70
- data/examples/parsers/parsing_tricky.rb +0 -91
- data/examples/parsers/tracing_parser.rb +0 -54
- data/examples/recognizers/recognizer_abc.rb +0 -71
@@ -1,70 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree
|
2
|
-
|
3
|
-
require 'pp' # TODO remove this dependency
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
|
6
|
-
# Steps to render a parse tree (of a valid parsed input):
|
7
|
-
# 1. Define a grammar
|
8
|
-
# 2. Create a tokenizer for the language
|
9
|
-
# 3. Create a parser for that grammar
|
10
|
-
# 4. Tokenize the input
|
11
|
-
# 5. Let the parser process the input
|
12
|
-
# 6. Generate a parse tree from the parse result
|
13
|
-
# 7. Render the parse tree (in JSON)
|
14
|
-
|
15
|
-
########################################
|
16
|
-
# Step 1. Define a problematic grammar
|
17
|
-
# Grammar Z: A grammar with hidden left recursion and a cycle
|
18
|
-
# (based on example in book of D. Grune, C JH. Jacobs,
|
19
|
-
# "Parsing Techniques: A Practical Guide"
|
20
|
-
# Springer, ISBN: 978-1-4419-1901-4, (2010) p. 224
|
21
|
-
# Let's create the grammar step-by-step with the grammar builder:
|
22
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
23
|
-
builder.add_terminals('a')
|
24
|
-
builder.add_production('S' => %w(a S))
|
25
|
-
builder.add_production('S' => []) # Empty RHS
|
26
|
-
|
27
|
-
# And now build the grammar...
|
28
|
-
right_recursive_gram = builder.grammar
|
29
|
-
|
30
|
-
|
31
|
-
########################################
|
32
|
-
# 2. Create a tokenizer for the language
|
33
|
-
# The tokenizer transforms the input into an array of tokens
|
34
|
-
def tokenizer(aText, aGrammar)
|
35
|
-
tokens = aText.chars.map do |lexeme|
|
36
|
-
case lexeme
|
37
|
-
when 'a'
|
38
|
-
terminal = aGrammar.name2symbol[lexeme]
|
39
|
-
else
|
40
|
-
msg = "Unknown input text '#{lexeme}'"
|
41
|
-
raise StandardError, msg
|
42
|
-
end
|
43
|
-
Rley::Parser::Token.new(lexeme, terminal)
|
44
|
-
end
|
45
|
-
|
46
|
-
return tokens
|
47
|
-
end
|
48
|
-
|
49
|
-
########################################
|
50
|
-
# Step 3. Create a parser for that grammar
|
51
|
-
parser = Rley::Parser::EarleyParser.new(right_recursive_gram)
|
52
|
-
|
53
|
-
########################################
|
54
|
-
# Step 3. Tokenize the input
|
55
|
-
valid_input = 'aaaa'
|
56
|
-
tokens = tokenizer(valid_input, right_recursive_gram)
|
57
|
-
|
58
|
-
########################################
|
59
|
-
# Step 5. Let the parser process the input
|
60
|
-
result = parser.parse(tokens)
|
61
|
-
puts "Parsing success? #{result.success?}"
|
62
|
-
#pp result
|
63
|
-
|
64
|
-
result.chart.state_sets.each_with_index do |aStateSet, index|
|
65
|
-
puts "State[#{index}]"
|
66
|
-
puts "========"
|
67
|
-
aStateSet.states.each { |aState| puts aState.to_s }
|
68
|
-
end
|
69
|
-
|
70
|
-
# End of file
|
@@ -1,91 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree
|
2
|
-
|
3
|
-
require 'pp' # TODO remove this dependency
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
|
6
|
-
# Steps to render a parse tree (of a valid parsed input):
|
7
|
-
# 1. Define a grammar
|
8
|
-
# 2. Create a tokenizer for the language
|
9
|
-
# 3. Create a parser for that grammar
|
10
|
-
# 4. Tokenize the input
|
11
|
-
# 5. Let the parser process the input
|
12
|
-
# 6. Generate a parse tree from the parse result
|
13
|
-
# 7. Render the parse tree (in JSON)
|
14
|
-
|
15
|
-
########################################
|
16
|
-
# Step 1. Define a problematic grammar
|
17
|
-
# Grammar Z: A grammar with hidden left recursion and a cycle
|
18
|
-
# (based on example in article of Elizabeth Scott, "SPPF-Style Parsing From Earley Recognisers"
|
19
|
-
# Electronic Notes in Theoretical Computer Science 203 (2008) 53–67
|
20
|
-
# Let's create the grammar step-by-step with the grammar builder:
|
21
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
22
|
-
builder.add_terminals('a', 'b')
|
23
|
-
builder.add_production('S' => %w(A T))
|
24
|
-
builder.add_production('S' => %w(a T))
|
25
|
-
builder.add_production('A' => 'a')
|
26
|
-
builder.add_production('A' => %w(B A))
|
27
|
-
builder.add_production('B' => []) # Empty RHS
|
28
|
-
builder.add_production('T' => %w(b b b))
|
29
|
-
|
30
|
-
# And now build the grammar...
|
31
|
-
grammar_tricky = builder.grammar
|
32
|
-
|
33
|
-
|
34
|
-
########################################
|
35
|
-
# 2. Create a tokenizer for the language
|
36
|
-
# The tokenizer transforms the input into an array of tokens
|
37
|
-
def tokenizer(aText, aGrammar)
|
38
|
-
tokens = aText.chars.map do |lexeme|
|
39
|
-
case lexeme
|
40
|
-
when 'a', 'b'
|
41
|
-
terminal = aGrammar.name2symbol[lexeme]
|
42
|
-
else
|
43
|
-
msg = "Unknown input text '#{lexeme}'"
|
44
|
-
raise StandardError, msg
|
45
|
-
end
|
46
|
-
Rley::Parser::Token.new(lexeme, terminal)
|
47
|
-
end
|
48
|
-
|
49
|
-
return tokens
|
50
|
-
end
|
51
|
-
|
52
|
-
########################################
|
53
|
-
# Step 3. Create a parser for that grammar
|
54
|
-
parser = Rley::Parser::EarleyParser.new(grammar_tricky)
|
55
|
-
|
56
|
-
########################################
|
57
|
-
# Step 3. Tokenize the input
|
58
|
-
valid_input = 'abbb'
|
59
|
-
tokens = tokenizer(valid_input, grammar_tricky)
|
60
|
-
|
61
|
-
########################################
|
62
|
-
# Step 5. Let the parser process the input
|
63
|
-
result = parser.parse(tokens)
|
64
|
-
puts "Parsing success? #{result.success?}"
|
65
|
-
#pp result
|
66
|
-
|
67
|
-
result.chart.state_sets.each_with_index do |aStateSet, index|
|
68
|
-
puts "State[#{index}]"
|
69
|
-
puts "========"
|
70
|
-
aStateSet.states.each { |aState| puts aState.to_s }
|
71
|
-
end
|
72
|
-
|
73
|
-
=begin
|
74
|
-
########################################
|
75
|
-
# Step 6. Generate a parse tree from the parse result
|
76
|
-
ptree = result.parse_tree
|
77
|
-
pp ptree
|
78
|
-
#=begin
|
79
|
-
########################################
|
80
|
-
# Step 7. Render the parse tree (in JSON)
|
81
|
-
# Let's create a parse tree visitor
|
82
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
83
|
-
|
84
|
-
#Here we create a renderer object...
|
85
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
86
|
-
|
87
|
-
# Now emit the parse tree as JSON on the console output
|
88
|
-
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
89
|
-
renderer.render(visitor)
|
90
|
-
=end
|
91
|
-
# End of file
|
@@ -1,54 +0,0 @@
|
|
1
|
-
# Purpose: to use a grammar that causes some Earley parsers to fail.
|
2
|
-
# See: http://stackoverflow.com/questions/22311323/earley-parser-recursion
|
3
|
-
require 'rley' # Load the gem
|
4
|
-
|
5
|
-
# Steps to parse some valid input:
|
6
|
-
# 1. Define a grammar
|
7
|
-
# 2. Create a tokenizer for the language
|
8
|
-
# 3. Create a parser for that grammar
|
9
|
-
# 4. Tokenize the input
|
10
|
-
# 5. Let the parser process the input & trace its progress
|
11
|
-
|
12
|
-
|
13
|
-
########################################
|
14
|
-
# Step 1. Define a grammar that might cause infinite recursion
|
15
|
-
# Let's create the grammar step-by-step with the grammar builder:
|
16
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
17
|
-
builder.add_terminals('ident')
|
18
|
-
builder.add_production('S' => 'E')
|
19
|
-
builder.add_production('E' => ['E', 'E'] )
|
20
|
-
builder.add_production('E' => 'ident')
|
21
|
-
|
22
|
-
# And now build the grammar...
|
23
|
-
grammar_tricky = builder.grammar
|
24
|
-
|
25
|
-
|
26
|
-
########################################
|
27
|
-
# 2. Create a tokenizer for the language
|
28
|
-
# The tokenizer transforms the input into an array of tokens
|
29
|
-
def tokenizer(aText, aGrammar)
|
30
|
-
terminal = aGrammar.name2symbol['ident']
|
31
|
-
|
32
|
-
tokens = aText.chars.map do |ch|
|
33
|
-
Rley::Parser::Token.new(ch, terminal)
|
34
|
-
end
|
35
|
-
|
36
|
-
return tokens
|
37
|
-
end
|
38
|
-
|
39
|
-
########################################
|
40
|
-
# Step 3. Create a parser for that grammar
|
41
|
-
parser = Rley::Parser::EarleyParser.new(grammar_tricky)
|
42
|
-
|
43
|
-
########################################
|
44
|
-
# Step 3. Tokenize the input
|
45
|
-
valid_input = 'abcdefg'
|
46
|
-
tokens = tokenizer(valid_input, grammar_tricky)
|
47
|
-
|
48
|
-
########################################
|
49
|
-
# Step 5. Let the parser process the input
|
50
|
-
# Force the parser to trace its parsing progress.
|
51
|
-
result = parser.parse(tokens, 1)
|
52
|
-
puts "Parsing success? #{result.success?}"
|
53
|
-
|
54
|
-
# End of file
|
@@ -1,71 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build a recognizer
|
2
|
-
# A recognizer is a kind of parser that indicates whether the input
|
3
|
-
# complies to the grammar or not.
|
4
|
-
|
5
|
-
require 'rley' # Load the gem
|
6
|
-
|
7
|
-
# Steps to build a recognizer:
|
8
|
-
# 1. Define a grammar
|
9
|
-
# 2. Create a parser for that grammar
|
10
|
-
# 3. Build the input
|
11
|
-
# 4. Let the parser process the input
|
12
|
-
# 5. Check the parser's result to see whether the input was valid (=recognized)
|
13
|
-
|
14
|
-
########################################
|
15
|
-
# Step 1. Define a grammar for a very simple language
|
16
|
-
# It recognizes/generates strings like 'b', 'abc', 'aabcc', 'aaabccc',...
|
17
|
-
# (based on example in N. Wirth's book "Compiler Construction", p. 6)
|
18
|
-
# Let's create the grammar step-by-step with the grammar builder:
|
19
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
20
|
-
builder.add_terminals('a', 'b', 'c')
|
21
|
-
builder.add_production('S' => 'A')
|
22
|
-
builder.add_production('A' => %w(a A c))
|
23
|
-
builder.add_production('A' => 'b')
|
24
|
-
|
25
|
-
# And now build the grammar...
|
26
|
-
grammar_abc = builder.grammar
|
27
|
-
|
28
|
-
# Keep track of the terminal symbols of the grammar:
|
29
|
-
term_a = grammar_abc.name2symbol['a']
|
30
|
-
term_b = grammar_abc.name2symbol['b']
|
31
|
-
term_c = grammar_abc.name2symbol['c']
|
32
|
-
|
33
|
-
########################################
|
34
|
-
# Step 2. Create a parser for that grammar
|
35
|
-
parser = Rley::Parser::EarleyParser.new(grammar_abc)
|
36
|
-
|
37
|
-
########################################
|
38
|
-
# Step 3. Build the input
|
39
|
-
# Mimicking the output of a tokenizer
|
40
|
-
valid_input = [
|
41
|
-
Rley::Parser::Token.new('a', term_a),
|
42
|
-
Rley::Parser::Token.new('a', term_a),
|
43
|
-
Rley::Parser::Token.new('b', term_b),
|
44
|
-
Rley::Parser::Token.new('c', term_c),
|
45
|
-
Rley::Parser::Token.new('c', term_c)
|
46
|
-
]
|
47
|
-
|
48
|
-
########################################
|
49
|
-
# Step 4. Let the parser process the input
|
50
|
-
result = parser.parse(valid_input)
|
51
|
-
|
52
|
-
|
53
|
-
########################################
|
54
|
-
# Step 5. Check the parser's result to see whether the input was valid
|
55
|
-
puts "Successful parse of 'aabcc'? #{result.success?}"
|
56
|
-
# Output: Successful parse of 'aabcc'? true
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# Let's redo steps 3, 4, 5 again with an invalid input.
|
61
|
-
invalid_input = [
|
62
|
-
Rley::Parser::Token.new('a', term_a),
|
63
|
-
Rley::Parser::Token.new('a', term_a),
|
64
|
-
Rley::Parser::Token.new('b', term_b),
|
65
|
-
Rley::Parser::Token.new('c', term_c)
|
66
|
-
]
|
67
|
-
result = parser.parse(invalid_input)
|
68
|
-
puts "Successful parse of 'aabc'? #{result.success?}"
|
69
|
-
# Output: Successful parse of 'aabc'? false
|
70
|
-
|
71
|
-
# End of file
|