rley 0.2.12 → 0.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/examples/parsers/parsing_ambig.rb +10 -3
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/chart.rb +7 -1
- data/lib/rley/parser/parsing.rb +28 -13
- data/lib/rley/parser/state_set.rb +20 -0
- data/spec/rley/parser/chart_spec.rb +10 -1
- data/spec/rley/parser/earley_parser_spec.rb +5 -3
- data/spec/rley/parser/parsing_spec.rb +1 -1
- data/spec/rley/parser/state_set_spec.rb +25 -0
- metadata +14 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3487e06363e33e6a4e0c0e5c05a74379913589ab
|
4
|
+
data.tar.gz: f0907aa318f914f1ae63082b4e7158f929b2e746
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bdda4a8211470a1290befe63adfd65c278a3376a706ca367542f7ed1a7bbacc072a0167f08f539d94272ca3145e8b4821ac1f9912dc0d2d8824e01768689ea10
|
7
|
+
data.tar.gz: a23fe29e4573e412275e5ae8fcc5da35763954e130085628353cfce78651f30b0bc5a08ce0130c6edfd317ed952d57029fbfec1842c6fd46f895475061289750
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
### 0.2.14 / 2015-11-25
|
2
|
+
* [FIX] Method `StateSet#ambiguities` overlooked some ambiguities in parse sets.
|
3
|
+
|
4
|
+
### 0.2.13 / 2015-11-25
|
5
|
+
* [NEW] method `Parsing#ambiguous?` returns true if more than one successful parse tree can be retried from parse results.
|
6
|
+
* [CHANGED] method `Parsing#success?`. New implementation that relies on start symbol derivation.
|
7
|
+
* [NEW] New method `Chart#start_symbol` added. Returns the start symbol of the grammar.
|
8
|
+
* [NEW] New method `StateSet#ambiguities` added. Returns the parse sets that are ambiguous (= distinct derivation for same input tokens).
|
9
|
+
* [FIX] In special cases the parsing didn't work correctly when there more than one
|
10
|
+
production rule for the start symbol of a grammar.
|
11
|
+
|
1
12
|
### 0.2.12 / 2015-11-20
|
2
13
|
* [FIX] In special cases the parsing didn't work correctly when there more than one
|
3
14
|
production rule for the start symbol of a grammar.
|
@@ -62,14 +62,21 @@ tokens = tokenizer(valid_input, grammar_amb)
|
|
62
62
|
# Step 5. Let the parser process the input
|
63
63
|
result = parser.parse(tokens)
|
64
64
|
puts "Parsing success? #{result.success?}"
|
65
|
-
|
65
|
+
puts "Ambiguous parse? #{result.ambiguous?}"
|
66
|
+
# pp result
|
66
67
|
|
68
|
+
result.chart.state_sets.each_with_index do |aStateSet, index|
|
69
|
+
puts "State[#{index}]"
|
70
|
+
puts "========"
|
71
|
+
aStateSet.states.each { |aState| puts aState.to_s }
|
72
|
+
end
|
67
73
|
|
74
|
+
=begin
|
68
75
|
########################################
|
69
76
|
# Step 6. Generate a parse tree from the parse result
|
70
77
|
ptree = result.parse_tree
|
71
78
|
pp ptree
|
72
|
-
|
79
|
+
|
73
80
|
########################################
|
74
81
|
# Step 7. Render the parse tree (in JSON)
|
75
82
|
# Let's create a parse tree visitor
|
@@ -81,5 +88,5 @@ renderer = Rley::Formatter::Json.new(STDOUT)
|
|
81
88
|
# Now emit the parse tree as JSON on the console output
|
82
89
|
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
83
90
|
renderer.render(visitor)
|
84
|
-
|
91
|
+
=end
|
85
92
|
# End of file
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/parser/chart.rb
CHANGED
@@ -6,8 +6,9 @@ module Rley # This module is used as a namespace
|
|
6
6
|
# Also called a parse table
|
7
7
|
# A one-dimensional array with n + 1 entries (n = number of input tokens).
|
8
8
|
class Chart
|
9
|
+
# An array of state sets (one per input token + 1)
|
9
10
|
attr_reader(:state_sets)
|
10
|
-
|
11
|
+
|
11
12
|
# The level of trace details reported on stdout during the parse.
|
12
13
|
# The possible values are:
|
13
14
|
# 0: No trace output (default case)
|
@@ -33,6 +34,11 @@ module Rley # This module is used as a namespace
|
|
33
34
|
def start_dotted_rule()
|
34
35
|
return self[0].states.first.dotted_rule
|
35
36
|
end
|
37
|
+
|
38
|
+
# Return the start (non-terminal) symbol of the grammar.
|
39
|
+
def start_symbol()
|
40
|
+
return state_sets.first.states[0].dotted_rule.lhs
|
41
|
+
end
|
36
42
|
|
37
43
|
# Access the state set at given position
|
38
44
|
def [](index)
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -25,10 +25,23 @@ module Rley # This module is used as a namespace
|
|
25
25
|
# followed the syntax specified by the grammar)
|
26
26
|
def success?()
|
27
27
|
# Success can be detected as follows:
|
28
|
-
# The last chart entry has
|
29
|
-
#
|
30
|
-
|
31
|
-
|
28
|
+
# The last chart entry has at least one complete parse state
|
29
|
+
# for the start symbol with an origin == 0
|
30
|
+
last_chart_entry = chart.state_sets[-1]
|
31
|
+
start_symbol = chart.start_symbol
|
32
|
+
|
33
|
+
# Retrieve all the complete states with start symbol in lhs
|
34
|
+
end_states = last_chart_entry.states_rewriting(start_symbol)
|
35
|
+
success_states = end_states.select { |st| st.origin == 0 }
|
36
|
+
|
37
|
+
return !success_states.empty?
|
38
|
+
end
|
39
|
+
|
40
|
+
# Return true if there are more than one complete state
|
41
|
+
# for the same lhs and same origin in any state set.
|
42
|
+
def ambiguous?()
|
43
|
+
found = chart.state_sets.find { |set| !set.ambiguities.empty? }
|
44
|
+
return ! found.nil?
|
32
45
|
end
|
33
46
|
|
34
47
|
# Factory method. Builds a ParseTree from the parse result.
|
@@ -48,7 +61,7 @@ module Rley # This module is used as a namespace
|
|
48
61
|
# puts "Matching symbol: #{match_symbol}"
|
49
62
|
# puts 'Parse tree:'
|
50
63
|
# puts builder.root.to_string(0)
|
51
|
-
|
64
|
+
|
52
65
|
# Place the symbol on left of the dot in the parse tree
|
53
66
|
done = insert_matched_symbol(state_tracker, builder)
|
54
67
|
break if done
|
@@ -130,16 +143,18 @@ module Rley # This module is used as a namespace
|
|
130
143
|
return predicted + others
|
131
144
|
end
|
132
145
|
|
133
|
-
# Retrieve the parse state that represents a complete, successful parse
|
146
|
+
# Retrieve the parse state(s) that represents a complete, successful parse
|
134
147
|
# After a successful parse, the last chart entry
|
135
|
-
# has a parse state that involves the start
|
148
|
+
# has a parse state that involves the start symbol and
|
136
149
|
# has a dot positioned at the end of its rhs.
|
137
|
-
def
|
138
|
-
start_dotted_rule = chart.start_dotted_rule
|
139
|
-
start_production = start_dotted_rule.production
|
150
|
+
def end_parse_states()
|
140
151
|
last_chart_entry = chart.state_sets[-1]
|
141
|
-
|
142
|
-
|
152
|
+
start_symbol = chart.start_symbol
|
153
|
+
|
154
|
+
# Retrieve all the complete states with origin at 0
|
155
|
+
end_states = last_chart_entry.states_rewriting(start_symbol)
|
156
|
+
|
157
|
+
return end_states
|
143
158
|
end
|
144
159
|
|
145
160
|
|
@@ -179,7 +194,7 @@ module Rley # This module is used as a namespace
|
|
179
194
|
# Factory method. Creates and initializes a ParseStateTracker instance.
|
180
195
|
def new_state_tracker()
|
181
196
|
instance = ParseStateTracker.new(chart.last_index)
|
182
|
-
instance.parse_state =
|
197
|
+
instance.parse_state = end_parse_states.first
|
183
198
|
|
184
199
|
return instance
|
185
200
|
end
|
@@ -35,6 +35,7 @@ module Rley # This module is used as a namespace
|
|
35
35
|
def states_expecting(aSymbol)
|
36
36
|
return states.select { |s| s.dotted_rule.next_symbol == aSymbol }
|
37
37
|
end
|
38
|
+
|
38
39
|
|
39
40
|
# The list of complete ParseState that have the given non-terminal
|
40
41
|
# symbol as the lhs of their production.
|
@@ -70,6 +71,25 @@ module Rley # This module is used as a namespace
|
|
70
71
|
terminals = expecting_terminals.map { |s| s.dotted_rule.next_symbol }
|
71
72
|
return terminals.uniq
|
72
73
|
end
|
74
|
+
|
75
|
+
# Return an Array of Arrays of ambiguous parse states.
|
76
|
+
def ambiguities()
|
77
|
+
complete_states = states.select { |st| st.complete? }
|
78
|
+
return [] if complete_states.size <= 1
|
79
|
+
|
80
|
+
# Group parse state by lhs symbol and origin
|
81
|
+
groupings = complete_states.group_by do |st|
|
82
|
+
"#{st.dotted_rule.lhs.object_id}"
|
83
|
+
end
|
84
|
+
|
85
|
+
# Retain the groups having more than one element.
|
86
|
+
ambiguous_groups = []
|
87
|
+
groupings.each_value do |a_group|
|
88
|
+
ambiguous_groups << a_group if a_group.size > 1
|
89
|
+
end
|
90
|
+
|
91
|
+
return ambiguous_groups
|
92
|
+
end
|
73
93
|
|
74
94
|
private
|
75
95
|
|
@@ -16,6 +16,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
16
16
|
module Parser # Open this namespace to avoid module qualifier prefixes
|
17
17
|
describe Chart do
|
18
18
|
let(:count_token) { 20 }
|
19
|
+
let(:sample_start_symbol) { double('fake_non-terminal') }
|
19
20
|
let(:dotted_rule) { double('fake-dotted-item') }
|
20
21
|
|
21
22
|
let(:output) { StringIO.new('', 'w') }
|
@@ -28,10 +29,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
28
29
|
let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
|
29
30
|
|
30
31
|
# Default instantiation rule
|
31
|
-
subject
|
32
|
+
subject do
|
33
|
+
allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
|
34
|
+
Chart.new([ dotted_rule ], count_token, sample_tracer)
|
35
|
+
end
|
32
36
|
|
33
37
|
context 'Initialization:' do
|
34
38
|
it 'should be created with start dotted rule, token count, tracer' do
|
39
|
+
allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
|
35
40
|
expect { Chart.new([ dotted_rule ], count_token, sample_tracer) }
|
36
41
|
.not_to raise_error
|
37
42
|
end
|
@@ -51,6 +56,10 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
51
56
|
it 'should know the start dotted rule' do
|
52
57
|
expect(subject.start_dotted_rule).to eq(dotted_rule)
|
53
58
|
end
|
59
|
+
|
60
|
+
it 'should know the start symbol' do
|
61
|
+
expect(subject.start_symbol).to eq(sample_start_symbol)
|
62
|
+
end
|
54
63
|
|
55
64
|
it 'should have at least one non-empty state set' do
|
56
65
|
expect(subject.last_index).to eq(0)
|
@@ -163,6 +163,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
163
163
|
it 'should parse a valid simple input' do
|
164
164
|
parse_result = subject.parse(grm1_tokens)
|
165
165
|
expect(parse_result.success?).to eq(true)
|
166
|
+
expect(parse_result.ambiguous?).to eq(false)
|
166
167
|
|
167
168
|
######################
|
168
169
|
# Expectation chart[0]:
|
@@ -263,6 +264,7 @@ SNIPPET
|
|
263
264
|
instance = EarleyParser.new(grammar_expr)
|
264
265
|
parse_result = instance.parse(grm2_tokens)
|
265
266
|
expect(parse_result.success?).to eq(true)
|
267
|
+
expect(parse_result.ambiguous?).to eq(false)
|
266
268
|
|
267
269
|
###################### S(0): . 2 + 3 * 4
|
268
270
|
# Expectation chart[0]:
|
@@ -397,6 +399,7 @@ SNIPPET
|
|
397
399
|
expect { instance.parse(tokens) }.not_to raise_error
|
398
400
|
parse_result = instance.parse(tokens)
|
399
401
|
expect(parse_result.success?).to eq(true)
|
402
|
+
expect(parse_result.ambiguous?).to eq(true)
|
400
403
|
|
401
404
|
###################### S(0): . 2 + 3 * 4
|
402
405
|
# Expectation chart[0]:
|
@@ -484,6 +487,7 @@ SNIPPET
|
|
484
487
|
expect { instance.parse(tokens) }.not_to raise_error
|
485
488
|
parse_result = instance.parse(tokens)
|
486
489
|
expect(parse_result.success?).to eq(true)
|
490
|
+
expect(parse_result.ambiguous?).to eq(true)
|
487
491
|
|
488
492
|
###################### S(0): . abc + def + ghi
|
489
493
|
# Expectation chart[0]:
|
@@ -565,8 +569,7 @@ MSG
|
|
565
569
|
expect { subject.parse(wrong) }
|
566
570
|
.to raise_error(err, err_msg.chomp)
|
567
571
|
=begin
|
568
|
-
|
569
|
-
|
572
|
+
# This code is never reached (because of exception)
|
570
573
|
###################### S(0) == . a a c c
|
571
574
|
# Expectation chart[0]:
|
572
575
|
expected = [
|
@@ -592,7 +595,6 @@ MSG
|
|
592
595
|
]
|
593
596
|
compare_state_texts(parse_result.chart[2], expected)
|
594
597
|
|
595
|
-
|
596
598
|
###################### S(3) == a a c? c
|
597
599
|
state_set_3 = parse_result.chart[3]
|
598
600
|
expect(state_set_3.states).to be_empty # This is an error symptom
|
@@ -71,6 +71,31 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
71
71
|
expect(subject.states_rewriting(non_term)).to eq([state2])
|
72
72
|
end
|
73
73
|
|
74
|
+
it 'should list of ambiguous states' do
|
75
|
+
prod1 = double('fake-production1')
|
76
|
+
prod2 = double('fake-production2')
|
77
|
+
expect(subject.ambiguities.size).to eq(0)
|
78
|
+
|
79
|
+
# Adding states
|
80
|
+
subject.push_state(state1)
|
81
|
+
allow(dotted_rule1).to receive(:production).and_return(prod1)
|
82
|
+
allow(dotted_rule1).to receive(:"reduce_item?").and_return(true)
|
83
|
+
allow(dotted_rule1).to receive(:lhs).and_return(:something)
|
84
|
+
expect(subject.ambiguities.size).to eq(0)
|
85
|
+
allow(dotted_rule2).to receive(:production).and_return(prod2)
|
86
|
+
allow(dotted_rule2).to receive(:"reduce_item?").and_return(true)
|
87
|
+
allow(dotted_rule2).to receive(:lhs).and_return(:something_else)
|
88
|
+
subject.push_state(state2)
|
89
|
+
expect(subject.ambiguities.size).to eq(0)
|
90
|
+
dotted_rule3 = double('fake_dotted_rule3')
|
91
|
+
allow(dotted_rule3).to receive(:production).and_return(prod2)
|
92
|
+
allow(dotted_rule3).to receive(:"reduce_item?").and_return(true)
|
93
|
+
allow(dotted_rule3).to receive(:lhs).and_return(:something_else)
|
94
|
+
state3 = ParseState.new(dotted_rule3, 5)
|
95
|
+
subject.push_state(state3)
|
96
|
+
expect(subject.ambiguities[0]).to eq([state2, state3])
|
97
|
+
end
|
98
|
+
|
74
99
|
it 'should complain when impossible predecessor of parse state' do
|
75
100
|
subject.push_state(state1)
|
76
101
|
subject.push_state(state2)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -54,6 +54,9 @@ dependencies:
|
|
54
54
|
name: simplecov
|
55
55
|
requirement: !ruby/object:Gem::Requirement
|
56
56
|
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0.8'
|
57
60
|
- - ">="
|
58
61
|
- !ruby/object:Gem::Version
|
59
62
|
version: 0.8.0
|
@@ -61,6 +64,9 @@ dependencies:
|
|
61
64
|
prerelease: false
|
62
65
|
version_requirements: !ruby/object:Gem::Requirement
|
63
66
|
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0.8'
|
64
70
|
- - ">="
|
65
71
|
- !ruby/object:Gem::Version
|
66
72
|
version: 0.8.0
|
@@ -68,6 +74,9 @@ dependencies:
|
|
68
74
|
name: coveralls
|
69
75
|
requirement: !ruby/object:Gem::Requirement
|
70
76
|
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0.7'
|
71
80
|
- - ">="
|
72
81
|
- !ruby/object:Gem::Version
|
73
82
|
version: 0.7.0
|
@@ -75,6 +84,9 @@ dependencies:
|
|
75
84
|
prerelease: false
|
76
85
|
version_requirements: !ruby/object:Gem::Requirement
|
77
86
|
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.7'
|
78
90
|
- - ">="
|
79
91
|
- !ruby/object:Gem::Version
|
80
92
|
version: 0.7.0
|