rley 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/examples/parsers/parsing_ambig.rb +1 -1
- data/examples/parsers/parsing_tricky.rb +91 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/chart.rb +8 -3
- data/lib/rley/parser/earley_parser.rb +8 -5
- data/lib/rley/parser/parsing.rb +2 -2
- data/spec/rley/parser/chart_spec.rb +4 -5
- data/spec/rley/parser/earley_parser_spec.rb +4 -4
- data/spec/rley/parser/parsing_spec.rb +5 -5
- data/spec/rley/support/grammar_abc_helper.rb +2 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d9b026414b1eb219f64ea54d5246bef7f38752a
|
4
|
+
data.tar.gz: 27f2cd60e08fbe83d51c40edc1cd32ff909ff14d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 89bf91254a63bbdeeef9fe523da4662705beecb874f687415aa5e396b5ad244b6461a371b11357f651f014c3908942f05ecd226306279ce97b88fe1c2edf6dd5
|
7
|
+
data.tar.gz: d4e4cd201db5de2b04497726580b1b83773429fef206df81aa7040ef1e34b5df4b134e86bbb53cbb1fc84949224de6cfb6cfb6a42403cf7a1465bfbd3a519c51
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
### 0.2.12 / 2015-11-20
|
2
|
+
* [FIX] In special cases the parsing didn't work correctly when there more than one
|
3
|
+
production rule for the start symbol of a grammar.
|
4
|
+
|
1
5
|
### 0.2.11 / 2015-09-05
|
2
6
|
* [CHANGE] Code re-formatted to please Rubocop 0.34.0
|
3
7
|
* [CHANGE] File `.travis.yml`: added new Rubies: MRI 2.2.0 and JRuby 9.0.
|
@@ -14,7 +14,7 @@ require 'rley' # Load the gem
|
|
14
14
|
|
15
15
|
########################################
|
16
16
|
# Step 1. Define a grammar for a very simple language
|
17
|
-
# Grammar 3:
|
17
|
+
# Grammar 3: An ambiguous arithmetic expression language
|
18
18
|
# (based on example in article on Earley's algorithm in Wikipedia)
|
19
19
|
# Let's create the grammar step-by-step with the grammar builder:
|
20
20
|
builder = Rley::Syntax::GrammarBuilder.new
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree
|
2
|
+
|
3
|
+
require 'pp' # TODO remove this dependency
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
# Steps to render a parse tree (of a valid parsed input):
|
7
|
+
# 1. Define a grammar
|
8
|
+
# 2. Create a tokenizer for the language
|
9
|
+
# 3. Create a parser for that grammar
|
10
|
+
# 4. Tokenize the input
|
11
|
+
# 5. Let the parser process the input
|
12
|
+
# 6. Generate a parse tree from the parse result
|
13
|
+
# 7. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Define a problematic grammar
|
17
|
+
# Grammar Z: A grammar with hidden left recursion and a cycle
|
18
|
+
# (based on example in article of Elizabeth Scott, "SPPF-Style Parsing From Earley Recognisers"
|
19
|
+
# Electronic Notes in Theoretical Computer Science 203 (2008) 53–67
|
20
|
+
# Let's create the grammar step-by-step with the grammar builder:
|
21
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
22
|
+
builder.add_terminals('a', 'b')
|
23
|
+
builder.add_production('S' => %w(A T))
|
24
|
+
builder.add_production('S' => %w(a T))
|
25
|
+
builder.add_production('A' => 'a')
|
26
|
+
builder.add_production('A' => %w(B A))
|
27
|
+
builder.add_production('B' => []) # Empty RHS
|
28
|
+
builder.add_production('T' => %w(b b b))
|
29
|
+
|
30
|
+
# And now build the grammar...
|
31
|
+
grammar_tricky = builder.grammar
|
32
|
+
|
33
|
+
|
34
|
+
########################################
|
35
|
+
# 2. Create a tokenizer for the language
|
36
|
+
# The tokenizer transforms the input into an array of tokens
|
37
|
+
def tokenizer(aText, aGrammar)
|
38
|
+
tokens = aText.chars.map do |lexeme|
|
39
|
+
case lexeme
|
40
|
+
when 'a', 'b'
|
41
|
+
terminal = aGrammar.name2symbol[lexeme]
|
42
|
+
else
|
43
|
+
msg = "Unknown input text '#{lexeme}'"
|
44
|
+
fail StandardError, msg
|
45
|
+
end
|
46
|
+
Rley::Parser::Token.new(lexeme, terminal)
|
47
|
+
end
|
48
|
+
|
49
|
+
return tokens
|
50
|
+
end
|
51
|
+
|
52
|
+
########################################
|
53
|
+
# Step 3. Create a parser for that grammar
|
54
|
+
parser = Rley::Parser::EarleyParser.new(grammar_tricky)
|
55
|
+
|
56
|
+
########################################
|
57
|
+
# Step 3. Tokenize the input
|
58
|
+
valid_input = 'abbb'
|
59
|
+
tokens = tokenizer(valid_input, grammar_tricky)
|
60
|
+
|
61
|
+
########################################
|
62
|
+
# Step 5. Let the parser process the input
|
63
|
+
result = parser.parse(tokens)
|
64
|
+
puts "Parsing success? #{result.success?}"
|
65
|
+
#pp result
|
66
|
+
|
67
|
+
result.chart.state_sets.each_with_index do |aStateSet, index|
|
68
|
+
puts "State[#{index}]"
|
69
|
+
puts "========"
|
70
|
+
aStateSet.states.each { |aState| puts aState.to_s }
|
71
|
+
end
|
72
|
+
|
73
|
+
=begin
|
74
|
+
########################################
|
75
|
+
# Step 6. Generate a parse tree from the parse result
|
76
|
+
ptree = result.parse_tree
|
77
|
+
pp ptree
|
78
|
+
#=begin
|
79
|
+
########################################
|
80
|
+
# Step 7. Render the parse tree (in JSON)
|
81
|
+
# Let's create a parse tree visitor
|
82
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
83
|
+
|
84
|
+
#Here we create a renderer object...
|
85
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
86
|
+
|
87
|
+
# Now emit the parse tree as JSON on the console output
|
88
|
+
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
89
|
+
renderer.render(visitor)
|
90
|
+
=end
|
91
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/parser/chart.rb
CHANGED
@@ -15,11 +15,16 @@ module Rley # This module is used as a namespace
|
|
15
15
|
# 2: Same as of 1 with the addition of the prediction rules
|
16
16
|
attr_reader(:tracer)
|
17
17
|
|
18
|
-
# @param
|
19
|
-
|
18
|
+
# @param startItems [Array] A non-empty Array of dotted items for
|
19
|
+
# the start symbol.
|
20
|
+
# @param tokenCount [Fixnum] The number of lexemes in the input to parse.
|
21
|
+
# @param aTracer [ParseTracer] A tracer object.
|
22
|
+
def initialize(startItems, tokenCount, aTracer)
|
20
23
|
@tracer = aTracer
|
21
24
|
@state_sets = Array.new(tokenCount + 1) { |_| StateSet.new }
|
22
|
-
|
25
|
+
startItems.each do |startDottedItem|
|
26
|
+
push_state(startDottedItem, 0, 0, :start_rule)
|
27
|
+
end
|
23
28
|
end
|
24
29
|
|
25
30
|
# The dotted item/rule used to seed the parse chart.
|
@@ -41,7 +41,7 @@ module Rley # This module is used as a namespace
|
|
41
41
|
# @return [Parsing] an object that embeds the parse results.
|
42
42
|
def parse(aTokenSequence, aTraceLevel = 0)
|
43
43
|
tracer = ParseTracer.new(aTraceLevel, $stdout, aTokenSequence)
|
44
|
-
result = Parsing.new(
|
44
|
+
result = Parsing.new(start_dotted_items, aTokenSequence, tracer)
|
45
45
|
last_token_index = aTokenSequence.size
|
46
46
|
(0..last_token_index).each do |i|
|
47
47
|
handle_error(result) if result.chart[i].empty?
|
@@ -115,10 +115,13 @@ module Rley # This module is used as a namespace
|
|
115
115
|
|
116
116
|
# The dotted item for the start production and
|
117
117
|
# with the dot at the beginning of the rhs
|
118
|
-
def
|
119
|
-
|
120
|
-
|
121
|
-
|
118
|
+
def start_dotted_items()
|
119
|
+
start_symbol = grammar.start_symbol
|
120
|
+
start_items = dotted_items.select do |anItem|
|
121
|
+
(anItem.lhs == start_symbol) && anItem.at_start?
|
122
|
+
end
|
123
|
+
|
124
|
+
return start_items
|
122
125
|
end
|
123
126
|
|
124
127
|
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -16,9 +16,9 @@ module Rley # This module is used as a namespace
|
|
16
16
|
# 0: No trace output (default case)
|
17
17
|
# 1: Show trace of scanning and completion rules
|
18
18
|
# 2: Same as of 1 with the addition of the prediction rules
|
19
|
-
def initialize(
|
19
|
+
def initialize(startDottedRules, theTokens, aTracer)
|
20
20
|
@tokens = theTokens.dup
|
21
|
-
@chart = Chart.new(
|
21
|
+
@chart = Chart.new(startDottedRules, tokens.size, aTracer)
|
22
22
|
end
|
23
23
|
|
24
24
|
# Return true if the parse was successful (= input tokens
|
@@ -26,13 +26,13 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
26
26
|
end
|
27
27
|
|
28
28
|
let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
|
29
|
+
|
30
|
+
# Default instantiation rule
|
31
|
+
subject { Chart.new([ dotted_rule ], count_token, sample_tracer) }
|
29
32
|
|
30
33
|
context 'Initialization:' do
|
31
|
-
# Default instantiation rule
|
32
|
-
subject { Chart.new(dotted_rule, count_token, sample_tracer) }
|
33
|
-
|
34
34
|
it 'should be created with start dotted rule, token count, tracer' do
|
35
|
-
expect { Chart.new(dotted_rule, count_token, sample_tracer) }
|
35
|
+
expect { Chart.new([ dotted_rule ], count_token, sample_tracer) }
|
36
36
|
.not_to raise_error
|
37
37
|
end
|
38
38
|
|
@@ -82,7 +82,6 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
82
82
|
ParseState.new(aDottedRule, origin)
|
83
83
|
end
|
84
84
|
|
85
|
-
subject { Chart.new(dotted_rule, count_token, sample_tracer) }
|
86
85
|
|
87
86
|
it 'should trace its initialization' do
|
88
87
|
subject[0] # Force constructor call here
|
@@ -270,9 +270,9 @@ SNIPPET
|
|
270
270
|
'P => . S | 0', # start rule
|
271
271
|
"S => . S '+' M | 0", # predict from (1)
|
272
272
|
'S => . M | 0', # predict from (1)
|
273
|
-
"M => . M '*' T | 0", # predict from (
|
274
|
-
'M => . T | 0', # predict from (
|
275
|
-
'T => . integer | 0' # predict from (
|
273
|
+
"M => . M '*' T | 0", # predict from (4)
|
274
|
+
'M => . T | 0', # predict from (4)
|
275
|
+
'T => . integer | 0' # predict from (4)
|
276
276
|
]
|
277
277
|
compare_state_texts(parse_result.chart[0], expected)
|
278
278
|
|
@@ -637,7 +637,7 @@ MSG
|
|
637
637
|
###################### S(0) == . a a / a
|
638
638
|
# Expectation chart[0]:
|
639
639
|
expected = [
|
640
|
-
'Z => . E | 0', # start rule
|
640
|
+
'Z => . E | 0', # start rule
|
641
641
|
'E => . E Q F | 0', # predict from (1)
|
642
642
|
'E => . F | 0', # predict from (1)
|
643
643
|
"F => . 'a' | 0" # predict from (3)
|
@@ -54,14 +54,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
54
54
|
let(:sample_tracer) { ParseTracer.new(0, output, grm1_tokens) }
|
55
55
|
|
56
56
|
# Default instantiation rule
|
57
|
-
subject { Parsing.new(start_dotted_rule, grm1_tokens, sample_tracer) }
|
57
|
+
subject { Parsing.new([ start_dotted_rule ], grm1_tokens, sample_tracer) }
|
58
58
|
|
59
59
|
context 'Initialization:' do
|
60
|
-
it 'should be created with list of tokens, start dotted
|
61
|
-
|
60
|
+
it 'should be created with list of tokens, start dotted rules, trace' do
|
61
|
+
start_rules = [ start_dotted_rule ]
|
62
62
|
tokens = grm1_tokens
|
63
63
|
tracer = sample_tracer
|
64
|
-
expect { Parsing.new(
|
64
|
+
expect { Parsing.new(start_rules, tokens, tracer) }.not_to raise_error
|
65
65
|
end
|
66
66
|
|
67
67
|
it 'should know the input tokens' do
|
@@ -74,7 +74,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
74
74
|
|
75
75
|
it 'should emit trace level 1 info' do
|
76
76
|
tracer = ParseTracer.new(1, output, grm1_tokens)
|
77
|
-
Parsing.new(start_dotted_rule, grm1_tokens, tracer)
|
77
|
+
Parsing.new([ start_dotted_rule ], grm1_tokens, tracer)
|
78
78
|
expectations = <<-SNIPPET
|
79
79
|
['a', 'a', 'b', 'c', 'c']
|
80
80
|
|. a . a . b . c . c .|
|
@@ -8,9 +8,9 @@ module GrammarABCHelper
|
|
8
8
|
def grammar_abc_builder()
|
9
9
|
builder = Rley::Syntax::GrammarBuilder.new
|
10
10
|
builder.add_terminals('a', 'b', 'c')
|
11
|
-
builder.add_production('S' =>
|
11
|
+
builder.add_production('S' => 'A')
|
12
12
|
builder.add_production('A' => %w(a A c))
|
13
|
-
builder.add_production('A' =>
|
13
|
+
builder.add_production('A' => 'b')
|
14
14
|
|
15
15
|
return builder
|
16
16
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -130,6 +130,7 @@ files:
|
|
130
130
|
- examples/parsers/parsing_b_expr.rb
|
131
131
|
- examples/parsers/parsing_err_expr.rb
|
132
132
|
- examples/parsers/parsing_groucho.rb
|
133
|
+
- examples/parsers/parsing_tricky.rb
|
133
134
|
- examples/parsers/tracing_parser.rb
|
134
135
|
- examples/recognizers/recognizer_abc.rb
|
135
136
|
- lib/rley.rb
|