rley 0.4.01 → 0.4.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +2 -2
  4. data/README.md +3 -3
  5. data/examples/NLP/mini_en_demo.rb +1 -1
  6. data/examples/data_formats/JSON/JSON_demo.rb +1 -0
  7. data/examples/data_formats/JSON/JSON_lexer.rb +4 -4
  8. data/examples/general/calc/calc_lexer.rb +2 -2
  9. data/lib/rley.rb +1 -1
  10. data/lib/rley/constants.rb +1 -1
  11. data/lib/rley/formatter/debug.rb +2 -2
  12. data/lib/rley/formatter/json.rb +4 -4
  13. data/lib/rley/parse_tree_visitor.rb +9 -9
  14. data/lib/rley/parser/base_parser.rb +1 -1
  15. data/lib/rley/parser/gfg_parsing.rb +9 -0
  16. data/lib/rley/parser/parse_tree_builder.rb +176 -126
  17. data/lib/rley/parser/parse_tree_factory.rb +57 -0
  18. data/lib/rley/ptree/non_terminal_node.rb +10 -9
  19. data/lib/rley/ptree/parse_tree_node.rb +10 -5
  20. data/lib/rley/ptree/terminal_node.rb +14 -6
  21. data/lib/rley/sppf/sppf_node.rb +2 -2
  22. data/lib/rley/{parser → tokens}/token.rb +1 -4
  23. data/lib/rley/{ptree → tokens}/token_range.rb +1 -1
  24. data/spec/rley/formatter/debug_spec.rb +16 -16
  25. data/spec/rley/formatter/json_spec.rb +8 -8
  26. data/spec/rley/parse_forest_visitor_spec.rb +1 -1
  27. data/spec/rley/parse_tree_visitor_spec.rb +28 -28
  28. data/spec/rley/parser/error_reason_spec.rb +3 -3
  29. data/spec/rley/parser/gfg_chart_spec.rb +2 -2
  30. data/spec/rley/parser/gfg_earley_parser_spec.rb +2 -2
  31. data/spec/rley/parser/gfg_parsing_spec.rb +2 -2
  32. data/spec/rley/parser/groucho_spec.rb +1 -1
  33. data/spec/rley/parser/parse_tracer_spec.rb +2 -2
  34. data/spec/rley/parser/parse_tree_builder_spec.rb +213 -140
  35. data/spec/rley/parser/parse_tree_factory_spec.rb +85 -0
  36. data/spec/rley/parser/parse_walker_factory_spec.rb +11 -10
  37. data/spec/rley/ptree/non_terminal_node_spec.rb +23 -20
  38. data/spec/rley/ptree/terminal_node_spec.rb +7 -12
  39. data/spec/rley/sppf/alternative_node_spec.rb +2 -2
  40. data/spec/rley/sppf/non_terminal_node_spec.rb +2 -2
  41. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  42. data/spec/rley/support/expectation_helper.rb +1 -1
  43. data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
  44. data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
  45. data/spec/rley/support/grammar_helper.rb +3 -3
  46. data/spec/rley/support/grammar_l0_helper.rb +2 -2
  47. data/spec/rley/support/grammar_pb_helper.rb +2 -2
  48. data/spec/rley/{ptree → tokens}/token_range_spec.rb +2 -2
  49. data/spec/rley/{parser → tokens}/token_spec.rb +2 -2
  50. metadata +11 -17
  51. data/lib/rley/parser/chart.rb +0 -82
  52. data/lib/rley/parser/earley_parser.rb +0 -203
  53. data/lib/rley/parser/parsing.rb +0 -265
  54. data/spec/rley/parser/chart_spec.rb +0 -120
  55. data/spec/rley/parser/earley_parser_spec.rb +0 -710
  56. data/spec/rley/parser/parsing_spec.rb +0 -408
@@ -1,120 +0,0 @@
1
- require_relative '../../spec_helper'
2
- require 'stringio'
3
-
4
- require_relative '../../../lib/rley/syntax/terminal'
5
- require_relative '../../../lib/rley/syntax/non_terminal'
6
- require_relative '../../../lib/rley/syntax/production'
7
- require_relative '../../../lib/rley/parser/token'
8
- require_relative '../../../lib/rley/parser/dotted_item'
9
- require_relative '../../../lib/rley/parser/parse_state'
10
- require_relative '../../../lib/rley/parser/parse_tracer'
11
-
12
- # Load the class under test
13
- require_relative '../../../lib/rley/parser/chart'
14
-
15
- module Rley # Open this namespace to avoid module qualifier prefixes
16
- module Parser # Open this namespace to avoid module qualifier prefixes
17
- describe Chart do
18
- let(:count_token) { 20 }
19
- let(:sample_start_symbol) { double('fake_non-terminal') }
20
- let(:dotted_rule) { double('fake-dotted-item') }
21
-
22
- let(:output) { StringIO.new('', 'w') }
23
-
24
- let(:token_seq) do
25
- literals = %w(I saw John with a dog)
26
- literals.map { |lexeme| Token.new(lexeme, nil) }
27
- end
28
-
29
- let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
30
-
31
- # Default instantiation rule
32
- subject do
33
- allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
34
- Chart.new([ dotted_rule ], count_token, sample_tracer)
35
- end
36
-
37
- context 'Initialization:' do
38
- it 'should be created with start dotted rule, token count, tracer' do
39
- allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
40
- expect { Chart.new([ dotted_rule ], count_token, sample_tracer) }
41
- .not_to raise_error
42
- end
43
-
44
- it 'should have a seed state in first state_set' do
45
- seed_state = ParseState.new(dotted_rule, 0)
46
- expect(subject[0].states).to eq([seed_state])
47
-
48
- # Shorthand syntax
49
- expect(subject[0].first).to eq(seed_state)
50
- end
51
-
52
- it 'should have the correct state_set count' do
53
- expect(subject.state_sets.size).to eq(count_token + 1)
54
- end
55
-
56
- it 'should know the start dotted rule' do
57
- expect(subject.start_dotted_rule).to eq(dotted_rule)
58
- end
59
-
60
- it 'should know the start symbol' do
61
- expect(subject.start_symbol).to eq(sample_start_symbol)
62
- end
63
-
64
- it 'should have at least one non-empty state set' do
65
- expect(subject.last_index).to eq(0)
66
- end
67
-
68
- it 'should reference a tracer' do
69
- expect(subject.tracer).to eq(sample_tracer)
70
- end
71
- end # context
72
-
73
- context 'Provided services:' do
74
- let(:t_a) { Syntax::Terminal.new('A') }
75
- let(:t_b) { Syntax::Terminal.new('B') }
76
- let(:t_c) { Syntax::Terminal.new('C') }
77
- let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
78
-
79
- let(:sample_prod) do
80
- Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
81
- end
82
-
83
- let(:origin_val) { 3 }
84
- let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
85
- let(:complete_rule) { DottedItem.new(sample_prod, 3) }
86
- let(:sample_parse_state) { ParseState.new(dotted_rule, origin_val) }
87
- let(:sample_tracer) { ParseTracer.new(1, output, token_seq) }
88
-
89
- # Factory method.
90
- def parse_state(origin, aDottedRule)
91
- ParseState.new(aDottedRule, origin)
92
- end
93
-
94
-
95
- it 'should trace its initialization' do
96
- subject[0] # Force constructor call here
97
- expectation = <<-SNIPPET
98
- ['I', 'saw', 'John', 'with', 'a', 'dog']
99
- |. I . saw . John . with . a . dog .|
100
- |> . . . . . .| [0:0] sentence => A B . C
101
- SNIPPET
102
- expect(output.string).to eq(expectation)
103
- end
104
-
105
- it 'should trace parse state pushing' do
106
- subject[0] # Force constructor call here
107
- output.string = ''
108
-
109
- subject.push_state(dotted_rule, 3, 5, :prediction)
110
- expectation = <<-SNIPPET
111
- |. . . > .| [3:5] sentence => A B . C
112
- SNIPPET
113
- expect(output.string).to eq(expectation)
114
- end
115
- end # context
116
- end # describe
117
- end # module
118
- end # module
119
-
120
- # End of file
@@ -1,710 +0,0 @@
1
- require_relative '../../spec_helper'
2
- require 'stringio'
3
-
4
- require_relative '../../../lib/rley/syntax/verbatim_symbol'
5
- require_relative '../../../lib/rley/syntax/non_terminal'
6
- require_relative '../../../lib/rley/syntax/production'
7
- require_relative '../../../lib/rley/syntax/grammar_builder'
8
- require_relative '../../../lib/rley/parser/token'
9
- require_relative '../../../lib/rley/parser/dotted_item'
10
- require_relative '../support/ambiguous_grammar_helper'
11
- # Load the class under test
12
- require_relative '../../../lib/rley/parser/earley_parser'
13
-
14
- module Rley # Open this namespace to avoid module qualifier prefixes
15
- module Parser # Open this namespace to avoid module qualifier prefixes
16
- describe EarleyParser do
17
- =begin
18
- let(:kw_true) { Syntax::VerbatimSymbol.new('true') }
19
- let(:kw_false) { Syntax::VerbatimSymbol.new('false') }
20
- let(:kw_null) { Syntax::VerbatimSymbol.new('null') }
21
- let(:number) do
22
- number_pattern = /[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/
23
- Syntax::Literal.new('number', number_pattern)
24
- end
25
- let(:string) do
26
- string_pattern = /"([^\\"]|\\.)*"/
27
- Syntax::Literal('string', string_pattern)
28
- end
29
- let(:lbracket) { Syntax::VerbatimSymbol.new('[') }
30
- let(:rbracket) { Syntax::VerbatimSymbol.new(']') }
31
- let(:comma) { Syntax::VerbatimSymbol.new(',') }
32
- let(:array) { Syntax::NonTerminal.new('Array') }
33
- let(:object) { Syntax::NonTerminal.new('Object') }
34
-
35
- let(:array_prod) do
36
- Production.new(array, )
37
- end
38
- =end
39
-
40
-
41
- # Grammar 1: A very simple language
42
- # (based on example in N. Wirth "Compiler Construction" book, p. 6)
43
- # S => A.
44
- # A => "a" A "c".
45
- # A => "b".
46
- # Let's create the grammar piece by piece
47
- let(:nt_S) { Syntax::NonTerminal.new('S') }
48
- let(:nt_A) { Syntax::NonTerminal.new('A') }
49
- let(:a_) { Syntax::VerbatimSymbol.new('a') }
50
- let(:b_) { Syntax::VerbatimSymbol.new('b') }
51
- let(:c_) { Syntax::VerbatimSymbol.new('c') }
52
- let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
53
- let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
54
- let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
55
- let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
56
-
57
- # Helper method that mimicks the output of a tokenizer
58
- # for the language specified by grammar_abc
59
- def grm1_tokens()
60
- tokens = [
61
- Token.new('a', a_),
62
- Token.new('a', a_),
63
- Token.new('b', b_),
64
- Token.new('c', c_),
65
- Token.new('c', c_)
66
- ]
67
-
68
- return tokens
69
- end
70
-
71
-
72
- # Grammar 2: A simple arithmetic expression language
73
- # (based on example in article on Earley's algorithm in Wikipedia)
74
- # P ::= S.
75
- # S ::= S "+" M.
76
- # S ::= M.
77
- # M ::= M "*" M.
78
- # M ::= T.
79
- # T ::= an integer number token.
80
- # Let's create the grammar piece by piece
81
- let(:nt_P) { Syntax::NonTerminal.new('P') }
82
- let(:nt_M) { Syntax::NonTerminal.new('M') }
83
- let(:nt_T) { Syntax::NonTerminal.new('T') }
84
- let(:plus) { Syntax::VerbatimSymbol.new('+') }
85
- let(:star) { Syntax::VerbatimSymbol.new('*') }
86
- let(:integer) do
87
- integer_pattern = /[-+]?[0-9]+/ # Decimal notation
88
- Syntax::Literal.new('integer', integer_pattern)
89
- end
90
- let(:prod_P) { Syntax::Production.new(nt_P, [nt_S]) }
91
- let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
92
- let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
93
- let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_T]) }
94
- let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
95
- let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
96
- let(:grammar_expr) do
97
- all_prods = [prod_P, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
98
- Syntax::Grammar.new(all_prods)
99
- end
100
-
101
- # Helper method that mimicks the output of a tokenizer
102
- # for the language specified by grammar_expr
103
- def grm2_tokens()
104
- tokens = [
105
- Token.new('2', integer),
106
- Token.new('+', plus),
107
- Token.new('3', integer),
108
- Token.new('*', star),
109
- Token.new('4', integer)
110
- ]
111
-
112
- return tokens
113
- end
114
-
115
-
116
- # Default instantiation rule
117
- subject { EarleyParser.new(grammar_abc) }
118
-
119
- context 'Initialization:' do
120
- it 'should be created with a grammar' do
121
- expect { EarleyParser.new(grammar_abc) }.not_to raise_error
122
- expect { EarleyParser.new(grammar_expr) }.not_to raise_error
123
- end
124
-
125
- it 'should know its grammar' do
126
- expect(subject.grammar).to eq(grammar_abc)
127
- end
128
-
129
- it 'should know its dotted items' do
130
- expect(subject.dotted_items.size).to eq(8)
131
- end
132
-
133
- it 'should have its start mapping initialized' do
134
- expect(subject.start_mapping.size).to eq(2)
135
-
136
- start_items_S = subject.start_mapping[nt_S]
137
- expect(start_items_S.size).to eq(1)
138
- expect(start_items_S[0].production).to eq(prod_S)
139
-
140
- start_items_A = subject.start_mapping[nt_A]
141
- expect(start_items_A.size).to eq(2)
142
-
143
- # Assuming that dotted_items are created in same order
144
- # than production in grammar.
145
- expect(start_items_A[0].production).to eq(prod_A1)
146
- expect(start_items_A[1].production).to eq(prod_A2)
147
- end
148
-
149
- it 'should have its next mapping initialized' do
150
- expect(subject.next_mapping.size).to eq(5)
151
- end
152
- end # context
153
-
154
- context 'Parsing: ' do
155
- # Helper method. Compare the data from all the parse states
156
- # of a given StateSet with an array of expectation string.
157
- def compare_state_texts(aStateSet, expectations)
158
- (0...expectations.size).each do |i|
159
- expect(aStateSet.states[i].to_s).to eq(expectations[i])
160
- end
161
- end
162
-
163
- it 'should parse a valid simple input' do
164
- parse_result = subject.parse(grm1_tokens)
165
- expect(parse_result.success?).to eq(true)
166
- expect(parse_result.ambiguous?).to eq(false)
167
-
168
- ######################
169
- # Expectation chart[0]:
170
- expected = [
171
- 'S => . A | 0', # start rule
172
- "A => . 'a' A 'c' | 0", # predict from 0
173
- "A => . 'b' | 0" # predict from 0
174
- ]
175
- compare_state_texts(parse_result.chart[0], expected)
176
-
177
- ######################
178
- # Expectation chart[1]:
179
- expected = [
180
- "A => 'a' . A 'c' | 0", # scan from S(0) 1
181
- "A => . 'a' A 'c' | 1", # predict from 0
182
- "A => . 'b' | 1" # predict from 0
183
- ]
184
- state_set_1 = parse_result.chart[1]
185
- expect(state_set_1.states.size).to eq(3)
186
- compare_state_texts(state_set_1, expected)
187
-
188
- ######################
189
- # Expectation chart[2]:
190
- expected = [
191
- "A => 'a' . A 'c' | 1", # scan from S(0) 1
192
- "A => . 'a' A 'c' | 2", # predict from 0
193
- "A => . 'b' | 2" # predict from 0
194
- ]
195
- state_set_2 = parse_result.chart[2]
196
- expect(state_set_2.states.size).to eq(3)
197
- compare_state_texts(state_set_2, expected)
198
-
199
- ######################
200
- # Expectation chart[3]:
201
- expected = [
202
- "A => 'b' . | 2", # scan from S(2) 2
203
- "A => 'a' A . 'c' | 1" # complete from 0 and S(2) 0
204
- ]
205
- state_set_3 = parse_result.chart[3]
206
- expect(state_set_3.states.size).to eq(2)
207
- compare_state_texts(state_set_3, expected)
208
-
209
-
210
- ######################
211
- # Expectation chart[4]:
212
- expected = [
213
- "A => 'a' A 'c' . | 1", # scan from S(3) 1
214
- "A => 'a' A . 'c' | 0" # complete from 0 and S(1) 0
215
- ]
216
- state_set_4 = parse_result.chart[4]
217
- expect(state_set_4.states.size).to eq(2)
218
- compare_state_texts(state_set_4, expected)
219
-
220
- ######################
221
- # Expectation chart[5]:
222
- expected = [
223
- "A => 'a' A 'c' . | 0", # scan from S(4) 1
224
- 'S => A . | 0' # complete from 0 and S(0) 0
225
- ]
226
- state_set_5 = parse_result.chart[5]
227
- expect(state_set_5.states.size).to eq(2)
228
- compare_state_texts(state_set_5, expected)
229
- end
230
-
231
- it 'should trace a parse with level 1' do
232
- # Substitute temporarily $stdout by a StringIO
233
- prev_ostream = $stdout
234
- $stdout = StringIO.new('', 'w')
235
-
236
- trace_level = 1
237
- subject.parse(grm1_tokens, trace_level)
238
- expectations = <<-SNIPPET
239
- ['a', 'a', 'b', 'c', 'c']
240
- |. a . a . b . c . c .|
241
- |> . . . . .| [0:0] S => . A
242
- |> . . . . .| [0:0] A => . 'a' A 'c'
243
- |> . . . . .| [0:0] A => . 'b'
244
- |[---] . . . .| [0:1] A => 'a' . A 'c'
245
- |. > . . . .| [1:1] A => . 'a' A 'c'
246
- |. > . . . .| [1:1] A => . 'b'
247
- |. [---] . . .| [1:2] A => 'a' . A 'c'
248
- |. . > . . .| [2:2] A => . 'a' A 'c'
249
- |. . > . . .| [2:2] A => . 'b'
250
- |. . [---] . .| [2:3] A => 'b' .
251
- |. [-------> . .| [1:3] A => 'a' A . 'c'
252
- |. . . [---] .| [3:4] A => 'a' A 'c' .
253
- |[---------------> .| [0:4] A => 'a' A . 'c'
254
- |. . . . [---]| [4:5] A => 'a' A 'c' .
255
- |[===================]| [0:5] S => A .
256
- SNIPPET
257
- expect($stdout.string).to eq(expectations)
258
-
259
- # Restore standard ouput stream
260
- $stdout = prev_ostream
261
- end
262
-
263
- it 'should parse a valid simple expression' do
264
- instance = EarleyParser.new(grammar_expr)
265
- parse_result = instance.parse(grm2_tokens)
266
- expect(parse_result.success?).to eq(true)
267
- expect(parse_result.ambiguous?).to eq(false)
268
-
269
- ###################### S(0): . 2 + 3 * 4
270
- # Expectation chart[0]:
271
- expected = [
272
- 'P => . S | 0', # start rule
273
- "S => . S '+' M | 0", # predict from (1)
274
- 'S => . M | 0', # predict from (1)
275
- "M => . M '*' T | 0", # predict from (4)
276
- 'M => . T | 0', # predict from (4)
277
- 'T => . integer | 0' # predict from (4)
278
- ]
279
- compare_state_texts(parse_result.chart[0], expected)
280
-
281
-
282
- ###################### S(1): 2 . + 3 * 4
283
- # Expectation chart[1]:
284
- expected = [
285
- 'T => integer . | 0', # scan from S(0) 6
286
- 'M => T . | 0', # complete from (1) and S(0) 5
287
- 'S => M . | 0', # complete from (2) and S(0) 3
288
- "M => M . '*' T | 0", # complete from (2) and S(0) 4
289
- 'P => S . | 0', # complete from (4) and S(0) 1
290
- "S => S . '+' M | 0" # complete from (4) and S(0) 2
291
- ]
292
- compare_state_texts(parse_result.chart[1], expected)
293
-
294
-
295
- ###################### S(2): 2 + . 3 * 4
296
- # Expectation chart[2]:
297
- expected = [
298
- "S => S '+' . M | 0", # scan from S(1) 6
299
- "M => . M '*' T | 2", # predict from (1)
300
- 'M => . T | 2', # predict from (1)
301
- 'T => . integer | 2' # predict from (3)
302
- ]
303
- compare_state_texts(parse_result.chart[2], expected)
304
-
305
-
306
- ###################### S(3): 2 + 3 . * 4
307
- # Expectation chart[3]:
308
- expected = [
309
- 'T => integer . | 2', # scan from S(2) 4
310
- 'M => T . | 2', # complete from (1) and S(2) 3
311
- "S => S '+' M . | 0", # complete from (1) and S(2) 1
312
- "M => M . '*' T | 2", # complete from (2) and S(2) 2
313
- 'P => S . | 0' # complete from (4) and S(0) 1
314
- ]
315
- compare_state_texts(parse_result.chart[3], expected)
316
-
317
- ###################### S(4): 2 + 3 * . 4
318
- # Expectation chart[4]:
319
- expected = [
320
- "M => M '*' . T | 2", # scan from S(3) 4
321
- 'T => . integer | 4' # predict from (1)
322
- ]
323
- compare_state_texts(parse_result.chart[4], expected)
324
-
325
- ###################### S(5): 2 + 3 * 4 .
326
- # Expectation chart[5]:
327
- expected = [
328
- 'T => integer . | 4', # scan from S(4) 2
329
- "M => M '*' T . | 2", # complete from (1) and S(4) 1
330
- "S => S '+' M . | 0", # complete from (2) and S(2) 1
331
- "M => M . '*' T | 2", # complete from (2) and S(2) 2
332
- 'P => S . | 0' # complete from (3) and S(2) 2
333
- ]
334
- compare_state_texts(parse_result.chart[5], expected)
335
- end
336
-
337
- it 'should parse a nullable grammar' do
338
- # Simple but problematic grammar for the original Earley parser
339
- # (based on example in D. Grune, C. Jacobs "Parsing Techniques" book)
340
- # Ss => A A 'x';
341
- # A => ;
342
- t_x = Syntax::VerbatimSymbol.new('x')
343
-
344
- builder = Syntax::GrammarBuilder.new
345
- builder.add_terminals(t_x)
346
- builder.add_production('Ss' => %w(A A x))
347
- builder.add_production('A' => [])
348
- tokens = [ Token.new('x', t_x) ]
349
-
350
- instance = EarleyParser.new(builder.grammar)
351
- expect { instance.parse(tokens) }.not_to raise_error
352
- parse_result = instance.parse(tokens)
353
- expect(parse_result.success?).to eq(true)
354
- ###################### S(0): . x
355
- # Expectation chart[0]:
356
- expected = [
357
- "Ss => . A A 'x' | 0", # Start rule
358
- 'A => . | 0', # predict from (1)
359
- "Ss => A . A 'x' | 0", # modified predict from (1)
360
- "Ss => A A . 'x' | 0" # modified predict from (1)
361
- ]
362
- compare_state_texts(parse_result.chart[0], expected)
363
-
364
- ###################### S(1): x .
365
- # Expectation chart[1]:
366
- expected = [
367
- "Ss => A A 'x' . | 0" # scan from S(0) 4
368
- ]
369
- compare_state_texts(parse_result.chart[1], expected)
370
- end
371
-
372
- it 'should parse an ambiguous grammar (I)' do
373
- # Grammar 3: A ambiguous arithmetic expression language
374
- # (based on example in article on Earley's algorithm in Wikipedia)
375
- # P => S.
376
- # S => S "+" S.
377
- # S => S "*" S.
378
- # S => L.
379
- # L => an integer number token.
380
- t_int = Syntax::Literal.new('integer', /[-+]?\d+/)
381
- t_plus = Syntax::VerbatimSymbol.new('+')
382
- t_star = Syntax::VerbatimSymbol.new('*')
383
-
384
- builder = Syntax::GrammarBuilder.new
385
- builder.add_terminals(t_int, t_plus, t_star)
386
- builder.add_production('P' => 'S')
387
- builder.add_production('S' => %w(S + S))
388
- builder.add_production('S' => %w(S * S))
389
- builder.add_production('S' => 'L')
390
- builder.add_production('L' => 'integer')
391
- tokens = [
392
- Token.new('2', t_int),
393
- Token.new('+', t_plus),
394
- Token.new('3', t_int),
395
- Token.new('*', t_star),
396
- Token.new('4', t_int)
397
- ]
398
- instance = EarleyParser.new(builder.grammar)
399
- expect { instance.parse(tokens) }.not_to raise_error
400
- parse_result = instance.parse(tokens)
401
- expect(parse_result.success?).to eq(true)
402
- expect(parse_result.ambiguous?).to eq(true)
403
-
404
- ###################### S(0): . 2 + 3 * 4
405
- # Expectation chart[0]:
406
- expected = [
407
- 'P => . S | 0', # Start rule
408
- "S => . S '+' S | 0", # predict from (1)
409
- "S => . S '*' S | 0", # predict from (1)
410
- 'S => . L | 0', # predict from (1)
411
- 'L => . integer | 0' # predict from (4)
412
- ]
413
- compare_state_texts(parse_result.chart[0], expected)
414
-
415
- ###################### S(1): 2 . + 3 * 4
416
- # Expectation chart[1]:
417
- expected = [
418
- 'L => integer . | 0', # scan from S(0) 4
419
- 'S => L . | 0', # complete from (1) and S(0) 4
420
- 'P => S . | 0', # complete from (2) and S(0) 1
421
- "S => S . '+' S | 0", # complete from (2) and S(0) 2
422
- "S => S . '*' S | 0", # complete from (2) and S(0) 3
423
- ]
424
- compare_state_texts(parse_result.chart[1], expected)
425
-
426
- ###################### S(2): 2 + . 3 * 4
427
- # Expectation chart[2]:
428
- expected = [
429
- "S => S '+' . S | 0", # scan from S(1) 4
430
- "S => . S '+' S | 2", # predict from (1)
431
- "S => . S '*' S | 2", # predict from (1)
432
- 'S => . L | 2', # predict from (1)
433
- 'L => . integer | 2' # predict from (4)
434
- ]
435
- compare_state_texts(parse_result.chart[2], expected)
436
-
437
- ###################### S(3): 2 + 3 . * 4
438
- # Expectation chart[3]:
439
- expected = [
440
- 'L => integer . | 2', # scan from S(2) 5
441
- 'S => L . | 2', # complete from (1) and S(2) 4
442
- "S => S '+' S . | 0", # complete from (2) and S(2) 1
443
- "S => S . '+' S | 2", # complete from (2) and S(2) 2
444
- "S => S . '*' S | 2", # complete from (2) and S(2) 3
445
- 'P => S . | 0', # complete from (2) and S(0) 1
446
- "S => S . '+' S | 0", # complete from (2) and S(0) 2
447
- "S => S . '*' S | 0", # complete from (2) and S(0) 3
448
- ]
449
- compare_state_texts(parse_result.chart[3], expected)
450
-
451
- ###################### S(4): 2 + 3 * . 4
452
- # Expectation chart[4]:
453
- expected = [
454
- "S => S '*' . S | 2", # scan from S(3) 5
455
- "S => S '*' . S | 0", # scan from S(3) 8
456
- "S => . S '+' S | 4", # predict from (1)
457
- "S => . S '*' S | 4", # predict from (1)
458
- 'S => . L | 4', # predict from (1)
459
- 'L => . integer | 4' # predict from (4)
460
- ]
461
- compare_state_texts(parse_result.chart[4], expected)
462
-
463
- ###################### S(5): 2 + 3 * 4 .
464
- # Expectation chart[5]:
465
- expected = [
466
- 'L => integer . | 4', # scan from S(4) 6
467
- 'S => L . | 4', # complete from (1) and S(4) 5
468
- "S => S '*' S . | 2", # complete from (2) and S(4) 1
469
- "S => S '*' S . | 0", # complete from (2) and S(4) 2
470
- "S => S . '+' S | 4", # complete from (2) and S(4) 3
471
- "S => S . '*' S | 4", # complete from (2) and S(4) 4
472
- "S => S '+' S . | 0", # complete from (2) and S(2) 1
473
- "S => S . '+' S | 2", # complete from (2) and S(2) 2
474
- "S => S . '*' S | 2", # complete from (2) and S(2) 3
475
- 'P => S . | 0', # complete from (2) and S(0) 1
476
- "S => S . '+' S | 0", # complete from (2) and S(0) 2
477
- "S => S . '*' S | 0" # complete from (2) and S(0) 3
478
- ]
479
- compare_state_texts(parse_result.chart[5], expected)
480
- end
481
-
482
- it 'should parse an ambiguous grammar (II)' do
483
- extend(AmbiguousGrammarHelper)
484
- grammar = grammar_builder.grammar
485
- instance = EarleyParser.new(grammar)
486
- tokens = tokenize('abc + def + ghi', grammar)
487
- expect { instance.parse(tokens) }.not_to raise_error
488
- parse_result = instance.parse(tokens)
489
- expect(parse_result.success?).to eq(true)
490
- expect(parse_result.ambiguous?).to eq(true)
491
-
492
- ###################### S(0): . abc + def + ghi
493
- # Expectation chart[0]:
494
- expected = [
495
- 'S => . E | 0', # Start rule
496
- 'E => . E + E | 0', # predict from (1)
497
- 'E => . id | 0' # predict from (1)
498
- ]
499
- compare_state_texts(parse_result.chart[0], expected)
500
-
501
- ###################### S(1): abc . + def + ghi
502
- # Expectation chart[1]:
503
- expected = [
504
- 'E => id . | 0', # scan from S(0) 3
505
- 'S => E . | 0', # complete from (1) and S(0) 2
506
- 'E => E . + E | 0' # complete from (1) and S(0) 3
507
- ]
508
- compare_state_texts(parse_result.chart[1], expected)
509
-
510
- ###################### S(2): abc + . def + ghi
511
- # Expectation chart[2]:
512
- expected = [
513
- 'E => E + . E | 0', # Scan from S(1) 3
514
- 'E => . E + E | 2', # predict from (1)
515
- 'E => . id | 2' # predict from (1)
516
- ]
517
- compare_state_texts(parse_result.chart[2], expected)
518
-
519
- ###################### S(3): abc + def . + ghi
520
- # Expectation chart[3]:
521
- expected = [
522
- 'E => id . | 2', # Scan from S(2) 3
523
- 'E => E + E . | 0', # complete from (1) and S(2) 1
524
- 'E => E . + E | 2', # complete from (1) and S(2) 2
525
- 'S => E . | 0', # complete from (1) and S(0) 1
526
- 'E => E . + E | 0' # complete from (1) and S(0) 2
527
- ]
528
- compare_state_texts(parse_result.chart[3], expected)
529
-
530
- ###################### S(4): abc + def + . ghi
531
- # Expectation chart[4]:
532
- expected = [
533
- 'E => E + . E | 2', # Scan from S(3) 3
534
- 'E => E + . E | 0', # Scan from S(3) 5
535
- 'E => . E + E | 4', # predict from (1)
536
- 'E => . id | 4' # predict from (1)
537
- ]
538
- compare_state_texts(parse_result.chart[4], expected)
539
-
540
- ###################### S(5): abc + def + ghi .
541
- # Expectation chart[5]:
542
- expected = [
543
- 'E => id . | 4', # Scan from S(4) 4
544
- 'E => E + E . | 2', # complete from (1) and S(4) 1
545
- 'E => E + E . | 0', # complete from (1) and S(4) 2
546
- 'E => E . + E | 4', # complete from (1) and S(4) 3
547
- 'E => E . + E | 2', # complete from (1) and S(2) 2
548
- 'S => E . | 0', # complete from (1) and S(0) 1
549
- 'E => E . + E | 0', # complete from (1) and S(0) 2
550
- ]
551
- compare_state_texts(parse_result.chart[5], expected)
552
- end
553
-
554
-
555
-
556
- it 'should parse an invalid simple input' do
557
- # Parse an erroneous input (b is missing)
558
- wrong = [
559
- Token.new('a', a_),
560
- Token.new('a', a_),
561
- Token.new('c', c_),
562
- Token.new('c', c_)
563
- ]
564
- err_msg = <<-MSG
565
- Syntax error at or near token 3>>>c<<<:
566
- Expected one of: ['a', 'b'], found a 'c' instead.
567
- MSG
568
- err = StandardError
569
- expect { subject.parse(wrong) }
570
- .to raise_error(err, err_msg.chomp)
571
- =begin
572
- # This code is never reached (because of exception)
573
- ###################### S(0) == . a a c c
574
- # Expectation chart[0]:
575
- expected = [
576
- 'S => . A | 0', # start rule
577
- "A => . 'a' A 'c' | 0", # predict from 0
578
- "A => . 'b' | 0" # predict from 0
579
- ]
580
- compare_state_texts(parse_result.chart[0], expected)
581
-
582
- ###################### S(1) == a . a c c
583
- expected = [
584
- "A => 'a' . A 'c' | 0", # scan from S(0) 1
585
- "A => . 'a' A 'c' | 1", # predict from 0
586
- "A => . 'b' | 1" # predict from 0
587
- ]
588
- compare_state_texts(parse_result.chart[1], expected)
589
-
590
- ###################### S(2) == a a . c c
591
- expected = [
592
- "A => 'a' . A 'c' | 1", # scan from S(0) 1
593
- "A => . 'a' A 'c' | 2", # predict from 0
594
- "A => . 'b' | 2" # predict from 0
595
- ]
596
- compare_state_texts(parse_result.chart[2], expected)
597
-
598
- ###################### S(3) == a a c? c
599
- state_set_3 = parse_result.chart[3]
600
- expect(state_set_3.states).to be_empty # This is an error symptom
601
- =end
602
- end
603
-
604
- it 'should parse a grammar with nullable nonterminals' do
605
- # Grammar 4: A grammar with nullable nonterminal
606
- # based on example in "Parsing Techniques" book (D. Grune, C. Jabobs)
607
- # Z ::= E.
608
- # E ::= E Q F.
609
- # E ::= F.
610
- # F ::= a.
611
- # Q ::= *.
612
- # Q ::= /.
613
- # Q ::=.
614
- t_a = Syntax::VerbatimSymbol.new('a')
615
- t_star = Syntax::VerbatimSymbol.new('*')
616
- t_slash = Syntax::VerbatimSymbol.new('/')
617
-
618
- builder = Syntax::GrammarBuilder.new
619
- builder.add_terminals(t_a, t_star, t_slash)
620
- builder.add_production('Z' => 'E')
621
- builder.add_production('E' => %w(E Q F))
622
- builder.add_production('E' => 'F')
623
- builder.add_production('F' => t_a)
624
- builder.add_production('Q' => t_star)
625
- builder.add_production('Q' => t_slash)
626
- builder.add_production('Q' => []) # Empty production
627
- tokens = [
628
- Token.new('a', t_a),
629
- Token.new('a', t_a),
630
- Token.new('/', t_slash),
631
- Token.new('a', t_a)
632
- ]
633
-
634
- instance = EarleyParser.new(builder.grammar)
635
- expect { instance.parse(tokens) }.not_to raise_error
636
- parse_result = instance.parse(tokens)
637
- expect(parse_result.success?).to eq(true)
638
-
639
- ###################### S(0) == . a a / a
640
- # Expectation chart[0]:
641
- expected = [
642
- 'Z => . E | 0', # start rule
643
- 'E => . E Q F | 0', # predict from (1)
644
- 'E => . F | 0', # predict from (1)
645
- "F => . 'a' | 0" # predict from (3)
646
- ]
647
- compare_state_texts(parse_result.chart[0], expected)
648
-
649
- ###################### S(1) == a . a / a
650
- # Expectation chart[1]:
651
- expected = [
652
- "F => 'a' . | 0", # scan from S(0) 4
653
- 'E => F . | 0', # complete from (1) and S(0) 3
654
- 'Z => E . | 0', # complete from (2) and S(0) 1
655
- 'E => E . Q F | 0', # complete from (2) and S(0) 2
656
- "Q => . '*' | 1", # Predict from (4)
657
- "Q => . '/' | 1", # Predict from (4)
658
- 'Q => . | 1', # Predict from (4)
659
- 'E => E Q . F | 0', # Modified predict from (4)
660
- "F => . 'a' | 1" # Predict from (8)
661
- ]
662
- compare_state_texts(parse_result.chart[1], expected)
663
-
664
- ###################### S(2) == a a . / a
665
- # Expectation chart[2]:
666
- expected = [
667
- "F => 'a' . | 1", # scan from S(1) 9
668
- 'E => E Q F . | 0', # complete from (1) and S(1) 8
669
- 'Z => E . | 0', # complete from (1) and S(0) 1
670
- 'E => E . Q F | 0', # complete from (1) and S(0) 2
671
- "Q => . '*' | 2", # Predict from (4)
672
- "Q => . '/' | 2", # Predict from (4)
673
- 'Q => . | 2', # Predict from (4)
674
- 'E => E Q . F | 0', # Complete from (5) and S(1) 4
675
- "F => . 'a' | 2" # Predict from (8)
676
- ]
677
- compare_state_texts(parse_result.chart[2], expected)
678
-
679
-
680
- ###################### S(3) == a a / . a
681
- # Expectation chart[3]:
682
- expected = [
683
- "Q => '/' . | 2", # scan from S(2) 6
684
- 'E => E Q . F | 0', # complete from (1) and S(1) 4
685
- "F => . 'a' | 3" # Predict from (2)
686
- ]
687
- compare_state_texts(parse_result.chart[3], expected)
688
-
689
-
690
- ###################### S(4) == a a / a .
691
- # Expectation chart[4]:
692
- expected = [
693
- "F => 'a' . | 3", # scan from S(3) 3
694
- 'E => E Q F . | 0', # complete from (1) and S(3) 2
695
- 'Z => E . | 0', # complete from (2) and S(0) 1
696
- 'E => E . Q F | 0', # complete from (2) and S(0) 2
697
- "Q => . '*' | 4", # Predict from (4)
698
- "Q => . '/' | 4", # Predict from (4)
699
- 'Q => . | 4', # Predict from (4)
700
- 'E => E Q . F | 0', # Modified predict from (4)
701
- "F => . 'a' | 4" # Predict from (8)
702
- ]
703
- compare_state_texts(parse_result.chart[4], expected)
704
- end
705
- end # context
706
- end # describe
707
- end # module
708
- end # module
709
-
710
- # End of file