rley 0.2.11 → 0.2.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/examples/parsers/parsing_ambig.rb +1 -1
- data/examples/parsers/parsing_tricky.rb +91 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/chart.rb +8 -3
- data/lib/rley/parser/earley_parser.rb +8 -5
- data/lib/rley/parser/parsing.rb +2 -2
- data/spec/rley/parser/chart_spec.rb +4 -5
- data/spec/rley/parser/earley_parser_spec.rb +4 -4
- data/spec/rley/parser/parsing_spec.rb +5 -5
- data/spec/rley/support/grammar_abc_helper.rb +2 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d9b026414b1eb219f64ea54d5246bef7f38752a
|
4
|
+
data.tar.gz: 27f2cd60e08fbe83d51c40edc1cd32ff909ff14d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 89bf91254a63bbdeeef9fe523da4662705beecb874f687415aa5e396b5ad244b6461a371b11357f651f014c3908942f05ecd226306279ce97b88fe1c2edf6dd5
|
7
|
+
data.tar.gz: d4e4cd201db5de2b04497726580b1b83773429fef206df81aa7040ef1e34b5df4b134e86bbb53cbb1fc84949224de6cfb6cfb6a42403cf7a1465bfbd3a519c51
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
### 0.2.12 / 2015-11-20
|
2
|
+
* [FIX] In special cases the parsing didn't work correctly when there more than one
|
3
|
+
production rule for the start symbol of a grammar.
|
4
|
+
|
1
5
|
### 0.2.11 / 2015-09-05
|
2
6
|
* [CHANGE] Code re-formatted to please Rubocop 0.34.0
|
3
7
|
* [CHANGE] File `.travis.yml`: added new Rubies: MRI 2.2.0 and JRuby 9.0.
|
@@ -14,7 +14,7 @@ require 'rley' # Load the gem
|
|
14
14
|
|
15
15
|
########################################
|
16
16
|
# Step 1. Define a grammar for a very simple language
|
17
|
-
# Grammar 3:
|
17
|
+
# Grammar 3: An ambiguous arithmetic expression language
|
18
18
|
# (based on example in article on Earley's algorithm in Wikipedia)
|
19
19
|
# Let's create the grammar step-by-step with the grammar builder:
|
20
20
|
builder = Rley::Syntax::GrammarBuilder.new
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree
|
2
|
+
|
3
|
+
require 'pp' # TODO remove this dependency
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
# Steps to render a parse tree (of a valid parsed input):
|
7
|
+
# 1. Define a grammar
|
8
|
+
# 2. Create a tokenizer for the language
|
9
|
+
# 3. Create a parser for that grammar
|
10
|
+
# 4. Tokenize the input
|
11
|
+
# 5. Let the parser process the input
|
12
|
+
# 6. Generate a parse tree from the parse result
|
13
|
+
# 7. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Define a problematic grammar
|
17
|
+
# Grammar Z: A grammar with hidden left recursion and a cycle
|
18
|
+
# (based on example in article of Elizabeth Scott, "SPPF-Style Parsing From Earley Recognisers"
|
19
|
+
# Electronic Notes in Theoretical Computer Science 203 (2008) 53–67
|
20
|
+
# Let's create the grammar step-by-step with the grammar builder:
|
21
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
22
|
+
builder.add_terminals('a', 'b')
|
23
|
+
builder.add_production('S' => %w(A T))
|
24
|
+
builder.add_production('S' => %w(a T))
|
25
|
+
builder.add_production('A' => 'a')
|
26
|
+
builder.add_production('A' => %w(B A))
|
27
|
+
builder.add_production('B' => []) # Empty RHS
|
28
|
+
builder.add_production('T' => %w(b b b))
|
29
|
+
|
30
|
+
# And now build the grammar...
|
31
|
+
grammar_tricky = builder.grammar
|
32
|
+
|
33
|
+
|
34
|
+
########################################
|
35
|
+
# 2. Create a tokenizer for the language
|
36
|
+
# The tokenizer transforms the input into an array of tokens
|
37
|
+
def tokenizer(aText, aGrammar)
|
38
|
+
tokens = aText.chars.map do |lexeme|
|
39
|
+
case lexeme
|
40
|
+
when 'a', 'b'
|
41
|
+
terminal = aGrammar.name2symbol[lexeme]
|
42
|
+
else
|
43
|
+
msg = "Unknown input text '#{lexeme}'"
|
44
|
+
fail StandardError, msg
|
45
|
+
end
|
46
|
+
Rley::Parser::Token.new(lexeme, terminal)
|
47
|
+
end
|
48
|
+
|
49
|
+
return tokens
|
50
|
+
end
|
51
|
+
|
52
|
+
########################################
|
53
|
+
# Step 3. Create a parser for that grammar
|
54
|
+
parser = Rley::Parser::EarleyParser.new(grammar_tricky)
|
55
|
+
|
56
|
+
########################################
|
57
|
+
# Step 3. Tokenize the input
|
58
|
+
valid_input = 'abbb'
|
59
|
+
tokens = tokenizer(valid_input, grammar_tricky)
|
60
|
+
|
61
|
+
########################################
|
62
|
+
# Step 5. Let the parser process the input
|
63
|
+
result = parser.parse(tokens)
|
64
|
+
puts "Parsing success? #{result.success?}"
|
65
|
+
#pp result
|
66
|
+
|
67
|
+
result.chart.state_sets.each_with_index do |aStateSet, index|
|
68
|
+
puts "State[#{index}]"
|
69
|
+
puts "========"
|
70
|
+
aStateSet.states.each { |aState| puts aState.to_s }
|
71
|
+
end
|
72
|
+
|
73
|
+
=begin
|
74
|
+
########################################
|
75
|
+
# Step 6. Generate a parse tree from the parse result
|
76
|
+
ptree = result.parse_tree
|
77
|
+
pp ptree
|
78
|
+
#=begin
|
79
|
+
########################################
|
80
|
+
# Step 7. Render the parse tree (in JSON)
|
81
|
+
# Let's create a parse tree visitor
|
82
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
83
|
+
|
84
|
+
#Here we create a renderer object...
|
85
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
86
|
+
|
87
|
+
# Now emit the parse tree as JSON on the console output
|
88
|
+
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
89
|
+
renderer.render(visitor)
|
90
|
+
=end
|
91
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/parser/chart.rb
CHANGED
@@ -15,11 +15,16 @@ module Rley # This module is used as a namespace
|
|
15
15
|
# 2: Same as of 1 with the addition of the prediction rules
|
16
16
|
attr_reader(:tracer)
|
17
17
|
|
18
|
-
# @param
|
19
|
-
|
18
|
+
# @param startItems [Array] A non-empty Array of dotted items for
|
19
|
+
# the start symbol.
|
20
|
+
# @param tokenCount [Fixnum] The number of lexemes in the input to parse.
|
21
|
+
# @param aTracer [ParseTracer] A tracer object.
|
22
|
+
def initialize(startItems, tokenCount, aTracer)
|
20
23
|
@tracer = aTracer
|
21
24
|
@state_sets = Array.new(tokenCount + 1) { |_| StateSet.new }
|
22
|
-
|
25
|
+
startItems.each do |startDottedItem|
|
26
|
+
push_state(startDottedItem, 0, 0, :start_rule)
|
27
|
+
end
|
23
28
|
end
|
24
29
|
|
25
30
|
# The dotted item/rule used to seed the parse chart.
|
@@ -41,7 +41,7 @@ module Rley # This module is used as a namespace
|
|
41
41
|
# @return [Parsing] an object that embeds the parse results.
|
42
42
|
def parse(aTokenSequence, aTraceLevel = 0)
|
43
43
|
tracer = ParseTracer.new(aTraceLevel, $stdout, aTokenSequence)
|
44
|
-
result = Parsing.new(
|
44
|
+
result = Parsing.new(start_dotted_items, aTokenSequence, tracer)
|
45
45
|
last_token_index = aTokenSequence.size
|
46
46
|
(0..last_token_index).each do |i|
|
47
47
|
handle_error(result) if result.chart[i].empty?
|
@@ -115,10 +115,13 @@ module Rley # This module is used as a namespace
|
|
115
115
|
|
116
116
|
# The dotted item for the start production and
|
117
117
|
# with the dot at the beginning of the rhs
|
118
|
-
def
|
119
|
-
|
120
|
-
|
121
|
-
|
118
|
+
def start_dotted_items()
|
119
|
+
start_symbol = grammar.start_symbol
|
120
|
+
start_items = dotted_items.select do |anItem|
|
121
|
+
(anItem.lhs == start_symbol) && anItem.at_start?
|
122
|
+
end
|
123
|
+
|
124
|
+
return start_items
|
122
125
|
end
|
123
126
|
|
124
127
|
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -16,9 +16,9 @@ module Rley # This module is used as a namespace
|
|
16
16
|
# 0: No trace output (default case)
|
17
17
|
# 1: Show trace of scanning and completion rules
|
18
18
|
# 2: Same as of 1 with the addition of the prediction rules
|
19
|
-
def initialize(
|
19
|
+
def initialize(startDottedRules, theTokens, aTracer)
|
20
20
|
@tokens = theTokens.dup
|
21
|
-
@chart = Chart.new(
|
21
|
+
@chart = Chart.new(startDottedRules, tokens.size, aTracer)
|
22
22
|
end
|
23
23
|
|
24
24
|
# Return true if the parse was successful (= input tokens
|
@@ -26,13 +26,13 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
26
26
|
end
|
27
27
|
|
28
28
|
let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
|
29
|
+
|
30
|
+
# Default instantiation rule
|
31
|
+
subject { Chart.new([ dotted_rule ], count_token, sample_tracer) }
|
29
32
|
|
30
33
|
context 'Initialization:' do
|
31
|
-
# Default instantiation rule
|
32
|
-
subject { Chart.new(dotted_rule, count_token, sample_tracer) }
|
33
|
-
|
34
34
|
it 'should be created with start dotted rule, token count, tracer' do
|
35
|
-
expect { Chart.new(dotted_rule, count_token, sample_tracer) }
|
35
|
+
expect { Chart.new([ dotted_rule ], count_token, sample_tracer) }
|
36
36
|
.not_to raise_error
|
37
37
|
end
|
38
38
|
|
@@ -82,7 +82,6 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
82
82
|
ParseState.new(aDottedRule, origin)
|
83
83
|
end
|
84
84
|
|
85
|
-
subject { Chart.new(dotted_rule, count_token, sample_tracer) }
|
86
85
|
|
87
86
|
it 'should trace its initialization' do
|
88
87
|
subject[0] # Force constructor call here
|
@@ -270,9 +270,9 @@ SNIPPET
|
|
270
270
|
'P => . S | 0', # start rule
|
271
271
|
"S => . S '+' M | 0", # predict from (1)
|
272
272
|
'S => . M | 0', # predict from (1)
|
273
|
-
"M => . M '*' T | 0", # predict from (
|
274
|
-
'M => . T | 0', # predict from (
|
275
|
-
'T => . integer | 0' # predict from (
|
273
|
+
"M => . M '*' T | 0", # predict from (4)
|
274
|
+
'M => . T | 0', # predict from (4)
|
275
|
+
'T => . integer | 0' # predict from (4)
|
276
276
|
]
|
277
277
|
compare_state_texts(parse_result.chart[0], expected)
|
278
278
|
|
@@ -637,7 +637,7 @@ MSG
|
|
637
637
|
###################### S(0) == . a a / a
|
638
638
|
# Expectation chart[0]:
|
639
639
|
expected = [
|
640
|
-
'Z => . E | 0', # start rule
|
640
|
+
'Z => . E | 0', # start rule
|
641
641
|
'E => . E Q F | 0', # predict from (1)
|
642
642
|
'E => . F | 0', # predict from (1)
|
643
643
|
"F => . 'a' | 0" # predict from (3)
|
@@ -54,14 +54,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
54
54
|
let(:sample_tracer) { ParseTracer.new(0, output, grm1_tokens) }
|
55
55
|
|
56
56
|
# Default instantiation rule
|
57
|
-
subject { Parsing.new(start_dotted_rule, grm1_tokens, sample_tracer) }
|
57
|
+
subject { Parsing.new([ start_dotted_rule ], grm1_tokens, sample_tracer) }
|
58
58
|
|
59
59
|
context 'Initialization:' do
|
60
|
-
it 'should be created with list of tokens, start dotted
|
61
|
-
|
60
|
+
it 'should be created with list of tokens, start dotted rules, trace' do
|
61
|
+
start_rules = [ start_dotted_rule ]
|
62
62
|
tokens = grm1_tokens
|
63
63
|
tracer = sample_tracer
|
64
|
-
expect { Parsing.new(
|
64
|
+
expect { Parsing.new(start_rules, tokens, tracer) }.not_to raise_error
|
65
65
|
end
|
66
66
|
|
67
67
|
it 'should know the input tokens' do
|
@@ -74,7 +74,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
74
74
|
|
75
75
|
it 'should emit trace level 1 info' do
|
76
76
|
tracer = ParseTracer.new(1, output, grm1_tokens)
|
77
|
-
Parsing.new(start_dotted_rule, grm1_tokens, tracer)
|
77
|
+
Parsing.new([ start_dotted_rule ], grm1_tokens, tracer)
|
78
78
|
expectations = <<-SNIPPET
|
79
79
|
['a', 'a', 'b', 'c', 'c']
|
80
80
|
|. a . a . b . c . c .|
|
@@ -8,9 +8,9 @@ module GrammarABCHelper
|
|
8
8
|
def grammar_abc_builder()
|
9
9
|
builder = Rley::Syntax::GrammarBuilder.new
|
10
10
|
builder.add_terminals('a', 'b', 'c')
|
11
|
-
builder.add_production('S' =>
|
11
|
+
builder.add_production('S' => 'A')
|
12
12
|
builder.add_production('A' => %w(a A c))
|
13
|
-
builder.add_production('A' =>
|
13
|
+
builder.add_production('A' => 'b')
|
14
14
|
|
15
15
|
return builder
|
16
16
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -130,6 +130,7 @@ files:
|
|
130
130
|
- examples/parsers/parsing_b_expr.rb
|
131
131
|
- examples/parsers/parsing_err_expr.rb
|
132
132
|
- examples/parsers/parsing_groucho.rb
|
133
|
+
- examples/parsers/parsing_tricky.rb
|
133
134
|
- examples/parsers/tracing_parser.rb
|
134
135
|
- examples/recognizers/recognizer_abc.rb
|
135
136
|
- lib/rley.rb
|