rley 0.2.12 → 0.2.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/examples/parsers/parsing_ambig.rb +10 -3
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/chart.rb +7 -1
- data/lib/rley/parser/parsing.rb +28 -13
- data/lib/rley/parser/state_set.rb +20 -0
- data/spec/rley/parser/chart_spec.rb +10 -1
- data/spec/rley/parser/earley_parser_spec.rb +5 -3
- data/spec/rley/parser/parsing_spec.rb +1 -1
- data/spec/rley/parser/state_set_spec.rb +25 -0
- metadata +14 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3487e06363e33e6a4e0c0e5c05a74379913589ab
|
4
|
+
data.tar.gz: f0907aa318f914f1ae63082b4e7158f929b2e746
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bdda4a8211470a1290befe63adfd65c278a3376a706ca367542f7ed1a7bbacc072a0167f08f539d94272ca3145e8b4821ac1f9912dc0d2d8824e01768689ea10
|
7
|
+
data.tar.gz: a23fe29e4573e412275e5ae8fcc5da35763954e130085628353cfce78651f30b0bc5a08ce0130c6edfd317ed952d57029fbfec1842c6fd46f895475061289750
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
### 0.2.14 / 2015-11-25
|
2
|
+
* [FIX] Method `StateSet#ambiguities` overlooked some ambiguities in parse sets.
|
3
|
+
|
4
|
+
### 0.2.13 / 2015-11-25
|
5
|
+
* [NEW] method `Parsing#ambiguous?` returns true if more than one successful parse tree can be retried from parse results.
|
6
|
+
* [CHANGED] method `Parsing#success?`. New implementation that relies on start symbol derivation.
|
7
|
+
* [NEW] New method `Chart#start_symbol` added. Returns the start symbol of the grammar.
|
8
|
+
* [NEW] New method `StateSet#ambiguities` added. Returns the parse sets that are ambiguous (= distinct derivation for same input tokens).
|
9
|
+
* [FIX] In special cases the parsing didn't work correctly when there more than one
|
10
|
+
production rule for the start symbol of a grammar.
|
11
|
+
|
1
12
|
### 0.2.12 / 2015-11-20
|
2
13
|
* [FIX] In special cases the parsing didn't work correctly when there more than one
|
3
14
|
production rule for the start symbol of a grammar.
|
@@ -62,14 +62,21 @@ tokens = tokenizer(valid_input, grammar_amb)
|
|
62
62
|
# Step 5. Let the parser process the input
|
63
63
|
result = parser.parse(tokens)
|
64
64
|
puts "Parsing success? #{result.success?}"
|
65
|
-
|
65
|
+
puts "Ambiguous parse? #{result.ambiguous?}"
|
66
|
+
# pp result
|
66
67
|
|
68
|
+
result.chart.state_sets.each_with_index do |aStateSet, index|
|
69
|
+
puts "State[#{index}]"
|
70
|
+
puts "========"
|
71
|
+
aStateSet.states.each { |aState| puts aState.to_s }
|
72
|
+
end
|
67
73
|
|
74
|
+
=begin
|
68
75
|
########################################
|
69
76
|
# Step 6. Generate a parse tree from the parse result
|
70
77
|
ptree = result.parse_tree
|
71
78
|
pp ptree
|
72
|
-
|
79
|
+
|
73
80
|
########################################
|
74
81
|
# Step 7. Render the parse tree (in JSON)
|
75
82
|
# Let's create a parse tree visitor
|
@@ -81,5 +88,5 @@ renderer = Rley::Formatter::Json.new(STDOUT)
|
|
81
88
|
# Now emit the parse tree as JSON on the console output
|
82
89
|
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
83
90
|
renderer.render(visitor)
|
84
|
-
|
91
|
+
=end
|
85
92
|
# End of file
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/parser/chart.rb
CHANGED
@@ -6,8 +6,9 @@ module Rley # This module is used as a namespace
|
|
6
6
|
# Also called a parse table
|
7
7
|
# A one-dimensional array with n + 1 entries (n = number of input tokens).
|
8
8
|
class Chart
|
9
|
+
# An array of state sets (one per input token + 1)
|
9
10
|
attr_reader(:state_sets)
|
10
|
-
|
11
|
+
|
11
12
|
# The level of trace details reported on stdout during the parse.
|
12
13
|
# The possible values are:
|
13
14
|
# 0: No trace output (default case)
|
@@ -33,6 +34,11 @@ module Rley # This module is used as a namespace
|
|
33
34
|
def start_dotted_rule()
|
34
35
|
return self[0].states.first.dotted_rule
|
35
36
|
end
|
37
|
+
|
38
|
+
# Return the start (non-terminal) symbol of the grammar.
|
39
|
+
def start_symbol()
|
40
|
+
return state_sets.first.states[0].dotted_rule.lhs
|
41
|
+
end
|
36
42
|
|
37
43
|
# Access the state set at given position
|
38
44
|
def [](index)
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -25,10 +25,23 @@ module Rley # This module is used as a namespace
|
|
25
25
|
# followed the syntax specified by the grammar)
|
26
26
|
def success?()
|
27
27
|
# Success can be detected as follows:
|
28
|
-
# The last chart entry has
|
29
|
-
#
|
30
|
-
|
31
|
-
|
28
|
+
# The last chart entry has at least one complete parse state
|
29
|
+
# for the start symbol with an origin == 0
|
30
|
+
last_chart_entry = chart.state_sets[-1]
|
31
|
+
start_symbol = chart.start_symbol
|
32
|
+
|
33
|
+
# Retrieve all the complete states with start symbol in lhs
|
34
|
+
end_states = last_chart_entry.states_rewriting(start_symbol)
|
35
|
+
success_states = end_states.select { |st| st.origin == 0 }
|
36
|
+
|
37
|
+
return !success_states.empty?
|
38
|
+
end
|
39
|
+
|
40
|
+
# Return true if there are more than one complete state
|
41
|
+
# for the same lhs and same origin in any state set.
|
42
|
+
def ambiguous?()
|
43
|
+
found = chart.state_sets.find { |set| !set.ambiguities.empty? }
|
44
|
+
return ! found.nil?
|
32
45
|
end
|
33
46
|
|
34
47
|
# Factory method. Builds a ParseTree from the parse result.
|
@@ -48,7 +61,7 @@ module Rley # This module is used as a namespace
|
|
48
61
|
# puts "Matching symbol: #{match_symbol}"
|
49
62
|
# puts 'Parse tree:'
|
50
63
|
# puts builder.root.to_string(0)
|
51
|
-
|
64
|
+
|
52
65
|
# Place the symbol on left of the dot in the parse tree
|
53
66
|
done = insert_matched_symbol(state_tracker, builder)
|
54
67
|
break if done
|
@@ -130,16 +143,18 @@ module Rley # This module is used as a namespace
|
|
130
143
|
return predicted + others
|
131
144
|
end
|
132
145
|
|
133
|
-
# Retrieve the parse state that represents a complete, successful parse
|
146
|
+
# Retrieve the parse state(s) that represents a complete, successful parse
|
134
147
|
# After a successful parse, the last chart entry
|
135
|
-
# has a parse state that involves the start
|
148
|
+
# has a parse state that involves the start symbol and
|
136
149
|
# has a dot positioned at the end of its rhs.
|
137
|
-
def
|
138
|
-
start_dotted_rule = chart.start_dotted_rule
|
139
|
-
start_production = start_dotted_rule.production
|
150
|
+
def end_parse_states()
|
140
151
|
last_chart_entry = chart.state_sets[-1]
|
141
|
-
|
142
|
-
|
152
|
+
start_symbol = chart.start_symbol
|
153
|
+
|
154
|
+
# Retrieve all the complete states with origin at 0
|
155
|
+
end_states = last_chart_entry.states_rewriting(start_symbol)
|
156
|
+
|
157
|
+
return end_states
|
143
158
|
end
|
144
159
|
|
145
160
|
|
@@ -179,7 +194,7 @@ module Rley # This module is used as a namespace
|
|
179
194
|
# Factory method. Creates and initializes a ParseStateTracker instance.
|
180
195
|
def new_state_tracker()
|
181
196
|
instance = ParseStateTracker.new(chart.last_index)
|
182
|
-
instance.parse_state =
|
197
|
+
instance.parse_state = end_parse_states.first
|
183
198
|
|
184
199
|
return instance
|
185
200
|
end
|
@@ -35,6 +35,7 @@ module Rley # This module is used as a namespace
|
|
35
35
|
def states_expecting(aSymbol)
|
36
36
|
return states.select { |s| s.dotted_rule.next_symbol == aSymbol }
|
37
37
|
end
|
38
|
+
|
38
39
|
|
39
40
|
# The list of complete ParseState that have the given non-terminal
|
40
41
|
# symbol as the lhs of their production.
|
@@ -70,6 +71,25 @@ module Rley # This module is used as a namespace
|
|
70
71
|
terminals = expecting_terminals.map { |s| s.dotted_rule.next_symbol }
|
71
72
|
return terminals.uniq
|
72
73
|
end
|
74
|
+
|
75
|
+
# Return an Array of Arrays of ambiguous parse states.
|
76
|
+
def ambiguities()
|
77
|
+
complete_states = states.select { |st| st.complete? }
|
78
|
+
return [] if complete_states.size <= 1
|
79
|
+
|
80
|
+
# Group parse state by lhs symbol and origin
|
81
|
+
groupings = complete_states.group_by do |st|
|
82
|
+
"#{st.dotted_rule.lhs.object_id}"
|
83
|
+
end
|
84
|
+
|
85
|
+
# Retain the groups having more than one element.
|
86
|
+
ambiguous_groups = []
|
87
|
+
groupings.each_value do |a_group|
|
88
|
+
ambiguous_groups << a_group if a_group.size > 1
|
89
|
+
end
|
90
|
+
|
91
|
+
return ambiguous_groups
|
92
|
+
end
|
73
93
|
|
74
94
|
private
|
75
95
|
|
@@ -16,6 +16,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
16
16
|
module Parser # Open this namespace to avoid module qualifier prefixes
|
17
17
|
describe Chart do
|
18
18
|
let(:count_token) { 20 }
|
19
|
+
let(:sample_start_symbol) { double('fake_non-terminal') }
|
19
20
|
let(:dotted_rule) { double('fake-dotted-item') }
|
20
21
|
|
21
22
|
let(:output) { StringIO.new('', 'w') }
|
@@ -28,10 +29,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
28
29
|
let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
|
29
30
|
|
30
31
|
# Default instantiation rule
|
31
|
-
subject
|
32
|
+
subject do
|
33
|
+
allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
|
34
|
+
Chart.new([ dotted_rule ], count_token, sample_tracer)
|
35
|
+
end
|
32
36
|
|
33
37
|
context 'Initialization:' do
|
34
38
|
it 'should be created with start dotted rule, token count, tracer' do
|
39
|
+
allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
|
35
40
|
expect { Chart.new([ dotted_rule ], count_token, sample_tracer) }
|
36
41
|
.not_to raise_error
|
37
42
|
end
|
@@ -51,6 +56,10 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
51
56
|
it 'should know the start dotted rule' do
|
52
57
|
expect(subject.start_dotted_rule).to eq(dotted_rule)
|
53
58
|
end
|
59
|
+
|
60
|
+
it 'should know the start symbol' do
|
61
|
+
expect(subject.start_symbol).to eq(sample_start_symbol)
|
62
|
+
end
|
54
63
|
|
55
64
|
it 'should have at least one non-empty state set' do
|
56
65
|
expect(subject.last_index).to eq(0)
|
@@ -163,6 +163,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
163
163
|
it 'should parse a valid simple input' do
|
164
164
|
parse_result = subject.parse(grm1_tokens)
|
165
165
|
expect(parse_result.success?).to eq(true)
|
166
|
+
expect(parse_result.ambiguous?).to eq(false)
|
166
167
|
|
167
168
|
######################
|
168
169
|
# Expectation chart[0]:
|
@@ -263,6 +264,7 @@ SNIPPET
|
|
263
264
|
instance = EarleyParser.new(grammar_expr)
|
264
265
|
parse_result = instance.parse(grm2_tokens)
|
265
266
|
expect(parse_result.success?).to eq(true)
|
267
|
+
expect(parse_result.ambiguous?).to eq(false)
|
266
268
|
|
267
269
|
###################### S(0): . 2 + 3 * 4
|
268
270
|
# Expectation chart[0]:
|
@@ -397,6 +399,7 @@ SNIPPET
|
|
397
399
|
expect { instance.parse(tokens) }.not_to raise_error
|
398
400
|
parse_result = instance.parse(tokens)
|
399
401
|
expect(parse_result.success?).to eq(true)
|
402
|
+
expect(parse_result.ambiguous?).to eq(true)
|
400
403
|
|
401
404
|
###################### S(0): . 2 + 3 * 4
|
402
405
|
# Expectation chart[0]:
|
@@ -484,6 +487,7 @@ SNIPPET
|
|
484
487
|
expect { instance.parse(tokens) }.not_to raise_error
|
485
488
|
parse_result = instance.parse(tokens)
|
486
489
|
expect(parse_result.success?).to eq(true)
|
490
|
+
expect(parse_result.ambiguous?).to eq(true)
|
487
491
|
|
488
492
|
###################### S(0): . abc + def + ghi
|
489
493
|
# Expectation chart[0]:
|
@@ -565,8 +569,7 @@ MSG
|
|
565
569
|
expect { subject.parse(wrong) }
|
566
570
|
.to raise_error(err, err_msg.chomp)
|
567
571
|
=begin
|
568
|
-
|
569
|
-
|
572
|
+
# This code is never reached (because of exception)
|
570
573
|
###################### S(0) == . a a c c
|
571
574
|
# Expectation chart[0]:
|
572
575
|
expected = [
|
@@ -592,7 +595,6 @@ MSG
|
|
592
595
|
]
|
593
596
|
compare_state_texts(parse_result.chart[2], expected)
|
594
597
|
|
595
|
-
|
596
598
|
###################### S(3) == a a c? c
|
597
599
|
state_set_3 = parse_result.chart[3]
|
598
600
|
expect(state_set_3.states).to be_empty # This is an error symptom
|
@@ -71,6 +71,31 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
71
71
|
expect(subject.states_rewriting(non_term)).to eq([state2])
|
72
72
|
end
|
73
73
|
|
74
|
+
it 'should list of ambiguous states' do
|
75
|
+
prod1 = double('fake-production1')
|
76
|
+
prod2 = double('fake-production2')
|
77
|
+
expect(subject.ambiguities.size).to eq(0)
|
78
|
+
|
79
|
+
# Adding states
|
80
|
+
subject.push_state(state1)
|
81
|
+
allow(dotted_rule1).to receive(:production).and_return(prod1)
|
82
|
+
allow(dotted_rule1).to receive(:"reduce_item?").and_return(true)
|
83
|
+
allow(dotted_rule1).to receive(:lhs).and_return(:something)
|
84
|
+
expect(subject.ambiguities.size).to eq(0)
|
85
|
+
allow(dotted_rule2).to receive(:production).and_return(prod2)
|
86
|
+
allow(dotted_rule2).to receive(:"reduce_item?").and_return(true)
|
87
|
+
allow(dotted_rule2).to receive(:lhs).and_return(:something_else)
|
88
|
+
subject.push_state(state2)
|
89
|
+
expect(subject.ambiguities.size).to eq(0)
|
90
|
+
dotted_rule3 = double('fake_dotted_rule3')
|
91
|
+
allow(dotted_rule3).to receive(:production).and_return(prod2)
|
92
|
+
allow(dotted_rule3).to receive(:"reduce_item?").and_return(true)
|
93
|
+
allow(dotted_rule3).to receive(:lhs).and_return(:something_else)
|
94
|
+
state3 = ParseState.new(dotted_rule3, 5)
|
95
|
+
subject.push_state(state3)
|
96
|
+
expect(subject.ambiguities[0]).to eq([state2, state3])
|
97
|
+
end
|
98
|
+
|
74
99
|
it 'should complain when impossible predecessor of parse state' do
|
75
100
|
subject.push_state(state1)
|
76
101
|
subject.push_state(state2)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -54,6 +54,9 @@ dependencies:
|
|
54
54
|
name: simplecov
|
55
55
|
requirement: !ruby/object:Gem::Requirement
|
56
56
|
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0.8'
|
57
60
|
- - ">="
|
58
61
|
- !ruby/object:Gem::Version
|
59
62
|
version: 0.8.0
|
@@ -61,6 +64,9 @@ dependencies:
|
|
61
64
|
prerelease: false
|
62
65
|
version_requirements: !ruby/object:Gem::Requirement
|
63
66
|
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0.8'
|
64
70
|
- - ">="
|
65
71
|
- !ruby/object:Gem::Version
|
66
72
|
version: 0.8.0
|
@@ -68,6 +74,9 @@ dependencies:
|
|
68
74
|
name: coveralls
|
69
75
|
requirement: !ruby/object:Gem::Requirement
|
70
76
|
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0.7'
|
71
80
|
- - ">="
|
72
81
|
- !ruby/object:Gem::Version
|
73
82
|
version: 0.7.0
|
@@ -75,6 +84,9 @@ dependencies:
|
|
75
84
|
prerelease: false
|
76
85
|
version_requirements: !ruby/object:Gem::Requirement
|
77
86
|
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.7'
|
78
90
|
- - ">="
|
79
91
|
- !ruby/object:Gem::Version
|
80
92
|
version: 0.7.0
|