rley 0.4.01 → 0.4.02

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +2 -2
  4. data/README.md +3 -3
  5. data/examples/NLP/mini_en_demo.rb +1 -1
  6. data/examples/data_formats/JSON/JSON_demo.rb +1 -0
  7. data/examples/data_formats/JSON/JSON_lexer.rb +4 -4
  8. data/examples/general/calc/calc_lexer.rb +2 -2
  9. data/lib/rley.rb +1 -1
  10. data/lib/rley/constants.rb +1 -1
  11. data/lib/rley/formatter/debug.rb +2 -2
  12. data/lib/rley/formatter/json.rb +4 -4
  13. data/lib/rley/parse_tree_visitor.rb +9 -9
  14. data/lib/rley/parser/base_parser.rb +1 -1
  15. data/lib/rley/parser/gfg_parsing.rb +9 -0
  16. data/lib/rley/parser/parse_tree_builder.rb +176 -126
  17. data/lib/rley/parser/parse_tree_factory.rb +57 -0
  18. data/lib/rley/ptree/non_terminal_node.rb +10 -9
  19. data/lib/rley/ptree/parse_tree_node.rb +10 -5
  20. data/lib/rley/ptree/terminal_node.rb +14 -6
  21. data/lib/rley/sppf/sppf_node.rb +2 -2
  22. data/lib/rley/{parser → tokens}/token.rb +1 -4
  23. data/lib/rley/{ptree → tokens}/token_range.rb +1 -1
  24. data/spec/rley/formatter/debug_spec.rb +16 -16
  25. data/spec/rley/formatter/json_spec.rb +8 -8
  26. data/spec/rley/parse_forest_visitor_spec.rb +1 -1
  27. data/spec/rley/parse_tree_visitor_spec.rb +28 -28
  28. data/spec/rley/parser/error_reason_spec.rb +3 -3
  29. data/spec/rley/parser/gfg_chart_spec.rb +2 -2
  30. data/spec/rley/parser/gfg_earley_parser_spec.rb +2 -2
  31. data/spec/rley/parser/gfg_parsing_spec.rb +2 -2
  32. data/spec/rley/parser/groucho_spec.rb +1 -1
  33. data/spec/rley/parser/parse_tracer_spec.rb +2 -2
  34. data/spec/rley/parser/parse_tree_builder_spec.rb +213 -140
  35. data/spec/rley/parser/parse_tree_factory_spec.rb +85 -0
  36. data/spec/rley/parser/parse_walker_factory_spec.rb +11 -10
  37. data/spec/rley/ptree/non_terminal_node_spec.rb +23 -20
  38. data/spec/rley/ptree/terminal_node_spec.rb +7 -12
  39. data/spec/rley/sppf/alternative_node_spec.rb +2 -2
  40. data/spec/rley/sppf/non_terminal_node_spec.rb +2 -2
  41. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  42. data/spec/rley/support/expectation_helper.rb +1 -1
  43. data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
  44. data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
  45. data/spec/rley/support/grammar_helper.rb +3 -3
  46. data/spec/rley/support/grammar_l0_helper.rb +2 -2
  47. data/spec/rley/support/grammar_pb_helper.rb +2 -2
  48. data/spec/rley/{ptree → tokens}/token_range_spec.rb +2 -2
  49. data/spec/rley/{parser → tokens}/token_spec.rb +2 -2
  50. metadata +11 -17
  51. data/lib/rley/parser/chart.rb +0 -82
  52. data/lib/rley/parser/earley_parser.rb +0 -203
  53. data/lib/rley/parser/parsing.rb +0 -265
  54. data/spec/rley/parser/chart_spec.rb +0 -120
  55. data/spec/rley/parser/earley_parser_spec.rb +0 -710
  56. data/spec/rley/parser/parsing_spec.rb +0 -408
@@ -1,408 +0,0 @@
1
- require_relative '../../spec_helper'
2
- require 'stringio'
3
-
4
- require_relative '../../../lib/rley/syntax/non_terminal'
5
- require_relative '../../../lib/rley/syntax/verbatim_symbol'
6
- require_relative '../../../lib/rley/syntax/production'
7
- require_relative '../../../lib/rley/syntax/grammar_builder'
8
- require_relative '../../../lib/rley/parser/dotted_item'
9
- require_relative '../../../lib/rley/parser/token'
10
- require_relative '../../../lib/rley/parser/parse_tracer'
11
- require_relative '../../../lib/rley/parser/earley_parser'
12
- require_relative '../support/grammar_abc_helper'
13
- require_relative '../support/grammar_b_expr_helper'
14
-
15
-
16
- # Load the class under test
17
- require_relative '../../../lib/rley/parser/parsing'
18
-
19
- module Rley # Open this namespace to avoid module qualifier prefixes
20
- module Parser # Open this namespace to avoid module qualifier prefixes
21
- describe Parsing do
22
- include GrammarABCHelper # Mix-in module with builder for grammar abc
23
- include GrammarBExprHelper # Mix-in with builder for simple expressions
24
-
25
- # Grammar 1: A very simple language
26
- # S => A.
27
- # A => "a" A "c".
28
- # A => "b".
29
- let(:nt_S) { Syntax::NonTerminal.new('S') }
30
- let(:nt_A) { Syntax::NonTerminal.new('A') }
31
- let(:a_) { Syntax::VerbatimSymbol.new('a') }
32
- let(:b_) { Syntax::VerbatimSymbol.new('b') }
33
- let(:c_) { Syntax::VerbatimSymbol.new('c') }
34
- let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
35
- let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
36
- let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
37
-
38
-
39
- # Helper method that mimicks the output of a tokenizer
40
- # for the language specified by gramma_abc
41
- let(:grm1_tokens) do
42
- [
43
- Token.new('a', a_),
44
- Token.new('a', a_),
45
- Token.new('b', b_),
46
- Token.new('c', c_),
47
- Token.new('c', c_)
48
- ]
49
- end
50
-
51
-
52
- let(:start_dotted_rule) { DottedItem.new(prod_S, 0) }
53
- let(:output) { StringIO.new('', 'w') }
54
- let(:sample_tracer) { ParseTracer.new(0, output, grm1_tokens) }
55
-
56
- # Default instantiation rule
57
- subject { Parsing.new([ start_dotted_rule ], grm1_tokens, sample_tracer) }
58
-
59
- context 'Initialization:' do
60
- it 'should be created with list of tokens, start dotted rules, trace' do
61
- start_rules = [ start_dotted_rule ]
62
- tokens = grm1_tokens
63
- tracer = sample_tracer
64
- expect { Parsing.new(start_rules, tokens, tracer) }.not_to raise_error
65
- end
66
-
67
- it 'should know the input tokens' do
68
- expect(subject.tokens).to eq(grm1_tokens)
69
- end
70
-
71
- it 'should know its chart object' do
72
- expect(subject.chart).to be_kind_of(Chart)
73
- end
74
-
75
- it 'should emit trace level 1 info' do
76
- tracer = ParseTracer.new(1, output, grm1_tokens)
77
- Parsing.new([ start_dotted_rule ], grm1_tokens, tracer)
78
- expectations = <<-SNIPPET
79
- ['a', 'a', 'b', 'c', 'c']
80
- |. a . a . b . c . c .|
81
- |> . . . . .| [0:0] S => . A
82
- SNIPPET
83
- expect(output.string).to eq(expectations)
84
- end
85
- end # context
86
-
87
- context 'Parsing:' do
88
- it 'should push a state to a given chart entry' do
89
- expect(subject.chart[1]).to be_empty
90
- item = DottedItem.new(prod_A1, 1)
91
-
92
- subject.push_state(item, 1, 1, :scanning)
93
- expect(subject.chart[1]).not_to be_empty
94
- expect(subject.chart[1].first.dotted_rule).to eq(item)
95
-
96
- # Pushing twice the same state must be no-op
97
- subject.push_state(item, 1, 1, :scanning)
98
- expect(subject.chart[1].size).to eq(1)
99
- end
100
-
101
- it 'should complain when trying to push a nil dotted item' do
102
- err = StandardError
103
- msg = 'Dotted item may not be nil'
104
- expect { subject.push_state(nil, 1, 1, :prediction) }
105
- .to raise_error(err, msg)
106
- end
107
-
108
-
109
- it 'should retrieve the parse states that expect a given terminal' do
110
- item1 = DottedItem.new(prod_A1, 2)
111
- item2 = DottedItem.new(prod_A1, 1)
112
- subject.push_state(item1, 2, 2, :scanning)
113
- subject.push_state(item2, 2, 2, :scanning)
114
- states = subject.states_expecting(c_, 2, false)
115
- expect(states.size).to eq(1)
116
- expect(states[0].dotted_rule).to eq(item1)
117
- end
118
-
119
- it 'should update the states upon token match' do
120
- # When a input token matches an expected terminal symbol
121
- # then new parse states must be pushed to the following chart slot
122
- expect(subject.chart[1]).to be_empty
123
-
124
- item1 = DottedItem.new(prod_A1, 0)
125
- item2 = DottedItem.new(prod_A2, 0)
126
- subject.push_state(item1, 0, 0, :completion)
127
- subject.push_state(item2, 0, 0, :completion)
128
- subject.scanning(a_, 0) { |i| i } # Code block is mock
129
-
130
- # Expected side effect: a new state at chart[1]
131
- expect(subject.chart[1].size).to eq(1)
132
- new_state = subject.chart[1].states[0]
133
- expect(new_state.dotted_rule).to eq(item1)
134
- expect(new_state.origin).to eq(0)
135
- end
136
- end # context
137
-
138
- context 'Parse tree building:' do
139
- let(:sample_grammar1) do
140
- builder = grammar_abc_builder
141
- builder.grammar
142
- end
143
-
144
- let(:token_seq1) do
145
- %w(a a b c c).map do |letter|
146
- Token.new(letter, sample_grammar1.name2symbol[letter])
147
- end
148
- end
149
-
150
- let(:b_expr_grammar) do
151
- builder = grammar_expr_builder
152
- builder.grammar
153
- end
154
-
155
- def grm_symbol(aSymbolName)
156
- b_expr_grammar.name2symbol[aSymbolName]
157
- end
158
-
159
- subject do
160
- parser = EarleyParser.new(b_expr_grammar)
161
- tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
162
- parser.parse(tokens)
163
- end
164
-
165
- # Helper. Build a state tracker and a parse tree builder.
166
- def prepare_parse_tree(aParsing)
167
- # Accessing private methods by sending message
168
- state_tracker = aParsing.send(:new_state_tracker)
169
- builder = aParsing.send(:tree_builder, state_tracker.state_set_index)
170
- return [state_tracker, builder]
171
- end
172
-
173
- it 'should create the root of a parse tree' do
174
- (state_tracker, builder) = prepare_parse_tree(subject)
175
- # The root node should correspond to the start symbol and
176
- # its direct children should correspond to rhs of start production
177
- expected_text = <<-SNIPPET
178
- P[0, 5]
179
- +- S[0, 5]
180
- SNIPPET
181
- root_text = builder.root.to_string(0)
182
- expect(root_text).to eq(expected_text.chomp)
183
-
184
- expect(state_tracker.state_set_index).to eq(subject.tokens.size)
185
- expected_state = 'P => S . | 0'
186
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
187
- expect(builder.current_node.to_string(0)).to eq('S[0, 5]')
188
- end
189
-
190
- it 'should use a reduce item for a matched non-terminal' do
191
- # Setup
192
- (state_tracker, builder) = prepare_parse_tree(subject)
193
- # Same state as in previous example
194
-
195
- # Given matched symbol is S[0, 5]
196
- # And its reduce item is S => S + M . | 0
197
- # Then add child nodes corresponding to the rhs symbols
198
- # And make M[?, 5] the current symbol
199
- subject.insert_matched_symbol(state_tracker, builder)
200
- expected_text = <<-SNIPPET
201
- P[0, 5]
202
- +- S[0, 5]
203
- +- S[0, ?]
204
- +- +[?, ?]: '(nil)'
205
- +- M[?, 5]
206
- SNIPPET
207
- root_text = builder.root.to_string(0)
208
- expect(root_text).to eq(expected_text.chomp)
209
- expected_state = 'S => S + M . | 0'
210
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
211
- expect(state_tracker.state_set_index).to eq(5)
212
- expect(builder.current_node.to_string(0)).to eq('M[?, 5]')
213
-
214
- # Second similar test
215
-
216
- # Given matched symbol is M[?, 5]
217
- # And its reduce item is M => M * T . | 2
218
- # Then add child nodes corresponding to the rhs symbols
219
- # And make T[?, 5] the current symbol
220
- subject.insert_matched_symbol(state_tracker, builder)
221
- expected_text = <<-SNIPPET
222
- P[0, 5]
223
- +- S[0, 5]
224
- +- S[0, ?]
225
- +- +[?, ?]: '(nil)'
226
- +- M[2, 5]
227
- +- M[2, ?]
228
- +- *[?, ?]: '(nil)'
229
- +- T[?, 5]
230
- SNIPPET
231
- root_text = builder.root.to_string(0)
232
- expect(root_text).to eq(expected_text.chomp)
233
- expected_state = 'M => M * T . | 2'
234
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
235
- expect(state_tracker.state_set_index).to eq(5)
236
- expect(builder.current_node.to_string(0)).to eq('T[?, 5]')
237
- end
238
-
239
-
240
-
241
- it 'should use a previous item for a terminal symbol' do
242
- # Setup
243
- (state_tracker, builder) = prepare_parse_tree(subject)
244
- 3.times do
245
- subject.insert_matched_symbol(state_tracker, builder)
246
- end
247
-
248
- # Given matched symbol is T[?, 5]
249
- # And its reduce item is T => integer . | 4
250
- # Then add child node corresponding to the rhs symbol
251
- # And make integer[4, 5]: '(nil)' the current symbol
252
- expected_text = <<-SNIPPET
253
- P[0, 5]
254
- +- S[0, 5]
255
- +- S[0, ?]
256
- +- +[?, ?]: '(nil)'
257
- +- M[2, 5]
258
- +- M[2, ?]
259
- +- *[?, ?]: '(nil)'
260
- +- T[4, 5]
261
- +- integer[4, 5]: '(nil)'
262
- SNIPPET
263
- root_text = builder.root.to_string(0)
264
- expect(root_text).to eq(expected_text.chomp)
265
- expected_state = 'T => integer . | 4'
266
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
267
- expect(state_tracker.state_set_index).to eq(5)
268
- integer_repr = "integer[4, 5]: '(nil)'"
269
- expect(builder.current_node.to_string(0)).to eq(integer_repr)
270
-
271
- # Given current tree symbol is integer[4, 5]: '(nil)'
272
- # And its previous item is T => . integer | 4
273
- # Then attach the token to the terminal node
274
- # And decrement the state index by one
275
- # Make *[?, ?]: '(nil)' the current symbol
276
- subject.insert_matched_symbol(state_tracker, builder)
277
- expected_text = <<-SNIPPET
278
- P[0, 5]
279
- +- S[0, 5]
280
- +- S[0, ?]
281
- +- +[?, ?]: '(nil)'
282
- +- M[2, 5]
283
- +- M[2, ?]
284
- +- *[?, ?]: '(nil)'
285
- +- T[4, 5]
286
- +- integer[4, 5]: '4'
287
- SNIPPET
288
- root_text = builder.root.to_string(0)
289
- expect(root_text).to eq(expected_text.chomp)
290
- expected_state = 'T => . integer | 4'
291
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
292
- expect(state_tracker.state_set_index).to eq(4)
293
- next_symbol = "*[?, ?]: '(nil)'"
294
- expect(builder.current_node.to_string(0)).to eq(next_symbol)
295
- end
296
-
297
- it 'should handle [no symbol before dot, terminal tree node] case' do
298
- # Setup
299
- (state_tracker, builder) = prepare_parse_tree(subject)
300
- 4.times do
301
- subject.insert_matched_symbol(state_tracker, builder)
302
- end
303
-
304
- # Given current tree symbol is *[?, ?]: '(nil)'
305
- # And current dotted item is T => . integer | 4
306
- # When one retrieves the parse state expecting the T
307
- # Then new parse state is changed to: M => M * . T | 2
308
- subject.insert_matched_symbol(state_tracker, builder)
309
-
310
- expected_text = <<-SNIPPET
311
- P[0, 5]
312
- +- S[0, 5]
313
- +- S[0, ?]
314
- +- +[?, ?]: '(nil)'
315
- +- M[2, 5]
316
- +- M[2, ?]
317
- +- *[?, ?]: '(nil)'
318
- +- T[4, 5]
319
- +- integer[4, 5]: '4'
320
- SNIPPET
321
- root_text = builder.root.to_string(0)
322
- expect(root_text).to eq(expected_text.chomp)
323
- expected_state = 'M => M * . T | 2'
324
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
325
- expect(state_tracker.state_set_index).to eq(4)
326
- next_symbol = "*[?, ?]: '(nil)'"
327
- expect(builder.current_node.to_string(0)).to eq(next_symbol)
328
-
329
- subject.insert_matched_symbol(state_tracker, builder)
330
- next_symbol = 'M[2, ?]'
331
- expect(builder.current_node.to_string(0)).to eq(next_symbol)
332
- end
333
-
334
- it 'should handle the end of parse tree generation' do
335
- # Begin setup
336
- is_done = false
337
- (state_tracker, builder) = prepare_parse_tree(subject)
338
- 16.times do
339
- is_done = subject.insert_matched_symbol(state_tracker, builder)
340
- end
341
-
342
- expected_text = <<-SNIPPET
343
- P[0, 5]
344
- +- S[0, 5]
345
- +- S[0, 1]
346
- +- M[0, 1]
347
- +- T[0, 1]
348
- +- integer[0, 1]: '2'
349
- +- +[1, 2]: '+'
350
- +- M[2, 5]
351
- +- M[2, 3]
352
- +- T[2, 3]
353
- +- integer[2, 3]: '3'
354
- +- *[3, 4]: '*'
355
- +- T[4, 5]
356
- +- integer[4, 5]: '4'
357
- SNIPPET
358
- root_text = builder.root.to_string(0)
359
- expect(root_text).to eq(expected_text.chomp)
360
-
361
- expected_state = 'T => . integer | 0'
362
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
363
- expect(state_tracker.state_set_index).to eq(0)
364
- expect(is_done).to eq(true)
365
- end
366
-
367
-
368
-
369
- it 'should build the parse tree for a simple non-ambiguous grammar' do
370
- parser = EarleyParser.new(sample_grammar1)
371
- instance = parser.parse(token_seq1)
372
- ptree = instance.parse_tree
373
- expect(ptree).to be_kind_of(PTree::ParseTree)
374
- end
375
-
376
- it 'should build the parse tree for a simple expression grammar' do
377
- parser = EarleyParser.new(b_expr_grammar)
378
- tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
379
- instance = parser.parse(tokens)
380
- ptree = instance.parse_tree
381
- expect(ptree).to be_kind_of(PTree::ParseTree)
382
-
383
- # Expect parse tree:
384
- expected_text = <<-SNIPPET
385
- P[0, 5]
386
- +- S[0, 5]
387
- +- S[0, 1]
388
- +- M[0, 1]
389
- +- T[0, 1]
390
- +- integer[0, 1]: '2'
391
- +- +[1, 2]: '+'
392
- +- M[2, 5]
393
- +- M[2, 3]
394
- +- T[2, 3]
395
- +- integer[2, 3]: '3'
396
- +- *[3, 4]: '*'
397
- +- T[4, 5]
398
- +- integer[4, 5]: '4'
399
- SNIPPET
400
- actual = ptree.root.to_string(0)
401
- expect(actual).to eq(expected_text.chomp)
402
- end
403
- end # context
404
- end # describe
405
- end # module
406
- end # module
407
-
408
- # End of file