rley 0.2.12 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d9b026414b1eb219f64ea54d5246bef7f38752a
4
- data.tar.gz: 27f2cd60e08fbe83d51c40edc1cd32ff909ff14d
3
+ metadata.gz: 3487e06363e33e6a4e0c0e5c05a74379913589ab
4
+ data.tar.gz: f0907aa318f914f1ae63082b4e7158f929b2e746
5
5
  SHA512:
6
- metadata.gz: 89bf91254a63bbdeeef9fe523da4662705beecb874f687415aa5e396b5ad244b6461a371b11357f651f014c3908942f05ecd226306279ce97b88fe1c2edf6dd5
7
- data.tar.gz: d4e4cd201db5de2b04497726580b1b83773429fef206df81aa7040ef1e34b5df4b134e86bbb53cbb1fc84949224de6cfb6cfb6a42403cf7a1465bfbd3a519c51
6
+ metadata.gz: bdda4a8211470a1290befe63adfd65c278a3376a706ca367542f7ed1a7bbacc072a0167f08f539d94272ca3145e8b4821ac1f9912dc0d2d8824e01768689ea10
7
+ data.tar.gz: a23fe29e4573e412275e5ae8fcc5da35763954e130085628353cfce78651f30b0bc5a08ce0130c6edfd317ed952d57029fbfec1842c6fd46f895475061289750
@@ -1,3 +1,14 @@
1
+ ### 0.2.14 / 2015-11-25
2
+ * [FIX] Method `StateSet#ambiguities` overlooked some ambiguities in parse sets.
3
+
4
+ ### 0.2.13 / 2015-11-25
5
+ * [NEW] method `Parsing#ambiguous?` returns true if more than one successful parse tree can be retried from parse results.
6
+ * [CHANGED] method `Parsing#success?`. New implementation that relies on start symbol derivation.
7
+ * [NEW] New method `Chart#start_symbol` added. Returns the start symbol of the grammar.
8
+ * [NEW] New method `StateSet#ambiguities` added. Returns the parse sets that are ambiguous (= distinct derivation for same input tokens).
9
+ * [FIX] In special cases the parsing didn't work correctly when there more than one
10
+ production rule for the start symbol of a grammar.
11
+
1
12
  ### 0.2.12 / 2015-11-20
2
13
  * [FIX] In special cases the parsing didn't work correctly when there more than one
3
14
  production rule for the start symbol of a grammar.
@@ -62,14 +62,21 @@ tokens = tokenizer(valid_input, grammar_amb)
62
62
  # Step 5. Let the parser process the input
63
63
  result = parser.parse(tokens)
64
64
  puts "Parsing success? #{result.success?}"
65
- pp result
65
+ puts "Ambiguous parse? #{result.ambiguous?}"
66
+ # pp result
66
67
 
68
+ result.chart.state_sets.each_with_index do |aStateSet, index|
69
+ puts "State[#{index}]"
70
+ puts "========"
71
+ aStateSet.states.each { |aState| puts aState.to_s }
72
+ end
67
73
 
74
+ =begin
68
75
  ########################################
69
76
  # Step 6. Generate a parse tree from the parse result
70
77
  ptree = result.parse_tree
71
78
  pp ptree
72
- #=begin
79
+
73
80
  ########################################
74
81
  # Step 7. Render the parse tree (in JSON)
75
82
  # Let's create a parse tree visitor
@@ -81,5 +88,5 @@ renderer = Rley::Formatter::Json.new(STDOUT)
81
88
  # Now emit the parse tree as JSON on the console output
82
89
  puts "JSON rendering of the parse tree for '#{valid_input}' input:"
83
90
  renderer.render(visitor)
84
- #=end
91
+ =end
85
92
  # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.2.12'
6
+ Version = '0.2.14'
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm"
@@ -6,8 +6,9 @@ module Rley # This module is used as a namespace
6
6
  # Also called a parse table
7
7
  # A one-dimensional array with n + 1 entries (n = number of input tokens).
8
8
  class Chart
9
+ # An array of state sets (one per input token + 1)
9
10
  attr_reader(:state_sets)
10
-
11
+
11
12
  # The level of trace details reported on stdout during the parse.
12
13
  # The possible values are:
13
14
  # 0: No trace output (default case)
@@ -33,6 +34,11 @@ module Rley # This module is used as a namespace
33
34
  def start_dotted_rule()
34
35
  return self[0].states.first.dotted_rule
35
36
  end
37
+
38
+ # Return the start (non-terminal) symbol of the grammar.
39
+ def start_symbol()
40
+ return state_sets.first.states[0].dotted_rule.lhs
41
+ end
36
42
 
37
43
  # Access the state set at given position
38
44
  def [](index)
@@ -25,10 +25,23 @@ module Rley # This module is used as a namespace
25
25
  # followed the syntax specified by the grammar)
26
26
  def success?()
27
27
  # Success can be detected as follows:
28
- # The last chart entry has a complete parse state
29
- # with the start symbol as lhs
30
- found = end_parse_state
31
- return !found.nil?
28
+ # The last chart entry has at least one complete parse state
29
+ # for the start symbol with an origin == 0
30
+ last_chart_entry = chart.state_sets[-1]
31
+ start_symbol = chart.start_symbol
32
+
33
+ # Retrieve all the complete states with start symbol in lhs
34
+ end_states = last_chart_entry.states_rewriting(start_symbol)
35
+ success_states = end_states.select { |st| st.origin == 0 }
36
+
37
+ return !success_states.empty?
38
+ end
39
+
40
+ # Return true if there are more than one complete state
41
+ # for the same lhs and same origin in any state set.
42
+ def ambiguous?()
43
+ found = chart.state_sets.find { |set| !set.ambiguities.empty? }
44
+ return ! found.nil?
32
45
  end
33
46
 
34
47
  # Factory method. Builds a ParseTree from the parse result.
@@ -48,7 +61,7 @@ module Rley # This module is used as a namespace
48
61
  # puts "Matching symbol: #{match_symbol}"
49
62
  # puts 'Parse tree:'
50
63
  # puts builder.root.to_string(0)
51
-
64
+
52
65
  # Place the symbol on left of the dot in the parse tree
53
66
  done = insert_matched_symbol(state_tracker, builder)
54
67
  break if done
@@ -130,16 +143,18 @@ module Rley # This module is used as a namespace
130
143
  return predicted + others
131
144
  end
132
145
 
133
- # Retrieve the parse state that represents a complete, successful parse
146
+ # Retrieve the parse state(s) that represents a complete, successful parse
134
147
  # After a successful parse, the last chart entry
135
- # has a parse state that involves the start production and
148
+ # has a parse state that involves the start symbol and
136
149
  # has a dot positioned at the end of its rhs.
137
- def end_parse_state()
138
- start_dotted_rule = chart.start_dotted_rule
139
- start_production = start_dotted_rule.production
150
+ def end_parse_states()
140
151
  last_chart_entry = chart.state_sets[-1]
141
- candidate_states = last_chart_entry.states_for(start_production)
142
- return candidate_states.find(&:complete?)
152
+ start_symbol = chart.start_symbol
153
+
154
+ # Retrieve all the complete states with origin at 0
155
+ end_states = last_chart_entry.states_rewriting(start_symbol)
156
+
157
+ return end_states
143
158
  end
144
159
 
145
160
 
@@ -179,7 +194,7 @@ module Rley # This module is used as a namespace
179
194
  # Factory method. Creates and initializes a ParseStateTracker instance.
180
195
  def new_state_tracker()
181
196
  instance = ParseStateTracker.new(chart.last_index)
182
- instance.parse_state = end_parse_state
197
+ instance.parse_state = end_parse_states.first
183
198
 
184
199
  return instance
185
200
  end
@@ -35,6 +35,7 @@ module Rley # This module is used as a namespace
35
35
  def states_expecting(aSymbol)
36
36
  return states.select { |s| s.dotted_rule.next_symbol == aSymbol }
37
37
  end
38
+
38
39
 
39
40
  # The list of complete ParseState that have the given non-terminal
40
41
  # symbol as the lhs of their production.
@@ -70,6 +71,25 @@ module Rley # This module is used as a namespace
70
71
  terminals = expecting_terminals.map { |s| s.dotted_rule.next_symbol }
71
72
  return terminals.uniq
72
73
  end
74
+
75
+ # Return an Array of Arrays of ambiguous parse states.
76
+ def ambiguities()
77
+ complete_states = states.select { |st| st.complete? }
78
+ return [] if complete_states.size <= 1
79
+
80
+ # Group parse state by lhs symbol and origin
81
+ groupings = complete_states.group_by do |st|
82
+ "#{st.dotted_rule.lhs.object_id}"
83
+ end
84
+
85
+ # Retain the groups having more than one element.
86
+ ambiguous_groups = []
87
+ groupings.each_value do |a_group|
88
+ ambiguous_groups << a_group if a_group.size > 1
89
+ end
90
+
91
+ return ambiguous_groups
92
+ end
73
93
 
74
94
  private
75
95
 
@@ -16,6 +16,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
16
16
  module Parser # Open this namespace to avoid module qualifier prefixes
17
17
  describe Chart do
18
18
  let(:count_token) { 20 }
19
+ let(:sample_start_symbol) { double('fake_non-terminal') }
19
20
  let(:dotted_rule) { double('fake-dotted-item') }
20
21
 
21
22
  let(:output) { StringIO.new('', 'w') }
@@ -28,10 +29,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
28
29
  let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
29
30
 
30
31
  # Default instantiation rule
31
- subject { Chart.new([ dotted_rule ], count_token, sample_tracer) }
32
+ subject do
33
+ allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
34
+ Chart.new([ dotted_rule ], count_token, sample_tracer)
35
+ end
32
36
 
33
37
  context 'Initialization:' do
34
38
  it 'should be created with start dotted rule, token count, tracer' do
39
+ allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
35
40
  expect { Chart.new([ dotted_rule ], count_token, sample_tracer) }
36
41
  .not_to raise_error
37
42
  end
@@ -51,6 +56,10 @@ module Rley # Open this namespace to avoid module qualifier prefixes
51
56
  it 'should know the start dotted rule' do
52
57
  expect(subject.start_dotted_rule).to eq(dotted_rule)
53
58
  end
59
+
60
+ it 'should know the start symbol' do
61
+ expect(subject.start_symbol).to eq(sample_start_symbol)
62
+ end
54
63
 
55
64
  it 'should have at least one non-empty state set' do
56
65
  expect(subject.last_index).to eq(0)
@@ -163,6 +163,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
163
163
  it 'should parse a valid simple input' do
164
164
  parse_result = subject.parse(grm1_tokens)
165
165
  expect(parse_result.success?).to eq(true)
166
+ expect(parse_result.ambiguous?).to eq(false)
166
167
 
167
168
  ######################
168
169
  # Expectation chart[0]:
@@ -263,6 +264,7 @@ SNIPPET
263
264
  instance = EarleyParser.new(grammar_expr)
264
265
  parse_result = instance.parse(grm2_tokens)
265
266
  expect(parse_result.success?).to eq(true)
267
+ expect(parse_result.ambiguous?).to eq(false)
266
268
 
267
269
  ###################### S(0): . 2 + 3 * 4
268
270
  # Expectation chart[0]:
@@ -397,6 +399,7 @@ SNIPPET
397
399
  expect { instance.parse(tokens) }.not_to raise_error
398
400
  parse_result = instance.parse(tokens)
399
401
  expect(parse_result.success?).to eq(true)
402
+ expect(parse_result.ambiguous?).to eq(true)
400
403
 
401
404
  ###################### S(0): . 2 + 3 * 4
402
405
  # Expectation chart[0]:
@@ -484,6 +487,7 @@ SNIPPET
484
487
  expect { instance.parse(tokens) }.not_to raise_error
485
488
  parse_result = instance.parse(tokens)
486
489
  expect(parse_result.success?).to eq(true)
490
+ expect(parse_result.ambiguous?).to eq(true)
487
491
 
488
492
  ###################### S(0): . abc + def + ghi
489
493
  # Expectation chart[0]:
@@ -565,8 +569,7 @@ MSG
565
569
  expect { subject.parse(wrong) }
566
570
  .to raise_error(err, err_msg.chomp)
567
571
  =begin
568
- expect(parse_result.success?).to eq(false)
569
-
572
+ # This code is never reached (because of exception)
570
573
  ###################### S(0) == . a a c c
571
574
  # Expectation chart[0]:
572
575
  expected = [
@@ -592,7 +595,6 @@ MSG
592
595
  ]
593
596
  compare_state_texts(parse_result.chart[2], expected)
594
597
 
595
-
596
598
  ###################### S(3) == a a c? c
597
599
  state_set_3 = parse_result.chart[3]
598
600
  expect(state_set_3.states).to be_empty # This is an error symptom
@@ -170,7 +170,7 @@ SNIPPET
170
170
  return [state_tracker, builder]
171
171
  end
172
172
 
173
-
173
+ # Spiff
174
174
  it 'should create the root of a parse tree' do
175
175
  (state_tracker, builder) = prepare_parse_tree(subject)
176
176
  # The root node should correspond to the start symbol and
@@ -71,6 +71,31 @@ module Rley # Open this namespace to avoid module qualifier prefixes
71
71
  expect(subject.states_rewriting(non_term)).to eq([state2])
72
72
  end
73
73
 
74
+ it 'should list of ambiguous states' do
75
+ prod1 = double('fake-production1')
76
+ prod2 = double('fake-production2')
77
+ expect(subject.ambiguities.size).to eq(0)
78
+
79
+ # Adding states
80
+ subject.push_state(state1)
81
+ allow(dotted_rule1).to receive(:production).and_return(prod1)
82
+ allow(dotted_rule1).to receive(:"reduce_item?").and_return(true)
83
+ allow(dotted_rule1).to receive(:lhs).and_return(:something)
84
+ expect(subject.ambiguities.size).to eq(0)
85
+ allow(dotted_rule2).to receive(:production).and_return(prod2)
86
+ allow(dotted_rule2).to receive(:"reduce_item?").and_return(true)
87
+ allow(dotted_rule2).to receive(:lhs).and_return(:something_else)
88
+ subject.push_state(state2)
89
+ expect(subject.ambiguities.size).to eq(0)
90
+ dotted_rule3 = double('fake_dotted_rule3')
91
+ allow(dotted_rule3).to receive(:production).and_return(prod2)
92
+ allow(dotted_rule3).to receive(:"reduce_item?").and_return(true)
93
+ allow(dotted_rule3).to receive(:lhs).and_return(:something_else)
94
+ state3 = ParseState.new(dotted_rule3, 5)
95
+ subject.push_state(state3)
96
+ expect(subject.ambiguities[0]).to eq([state2, state3])
97
+ end
98
+
74
99
  it 'should complain when impossible predecessor of parse state' do
75
100
  subject.push_state(state1)
76
101
  subject.push_state(state2)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-20 00:00:00.000000000 Z
11
+ date: 2015-11-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -54,6 +54,9 @@ dependencies:
54
54
  name: simplecov
55
55
  requirement: !ruby/object:Gem::Requirement
56
56
  requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '0.8'
57
60
  - - ">="
58
61
  - !ruby/object:Gem::Version
59
62
  version: 0.8.0
@@ -61,6 +64,9 @@ dependencies:
61
64
  prerelease: false
62
65
  version_requirements: !ruby/object:Gem::Requirement
63
66
  requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '0.8'
64
70
  - - ">="
65
71
  - !ruby/object:Gem::Version
66
72
  version: 0.8.0
@@ -68,6 +74,9 @@ dependencies:
68
74
  name: coveralls
69
75
  requirement: !ruby/object:Gem::Requirement
70
76
  requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: '0.7'
71
80
  - - ">="
72
81
  - !ruby/object:Gem::Version
73
82
  version: 0.7.0
@@ -75,6 +84,9 @@ dependencies:
75
84
  prerelease: false
76
85
  version_requirements: !ruby/object:Gem::Requirement
77
86
  requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.7'
78
90
  - - ">="
79
91
  - !ruby/object:Gem::Version
80
92
  version: 0.7.0