rley 0.4.01 → 0.4.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +2 -2
  4. data/README.md +3 -3
  5. data/examples/NLP/mini_en_demo.rb +1 -1
  6. data/examples/data_formats/JSON/JSON_demo.rb +1 -0
  7. data/examples/data_formats/JSON/JSON_lexer.rb +4 -4
  8. data/examples/general/calc/calc_lexer.rb +2 -2
  9. data/lib/rley.rb +1 -1
  10. data/lib/rley/constants.rb +1 -1
  11. data/lib/rley/formatter/debug.rb +2 -2
  12. data/lib/rley/formatter/json.rb +4 -4
  13. data/lib/rley/parse_tree_visitor.rb +9 -9
  14. data/lib/rley/parser/base_parser.rb +1 -1
  15. data/lib/rley/parser/gfg_parsing.rb +9 -0
  16. data/lib/rley/parser/parse_tree_builder.rb +176 -126
  17. data/lib/rley/parser/parse_tree_factory.rb +57 -0
  18. data/lib/rley/ptree/non_terminal_node.rb +10 -9
  19. data/lib/rley/ptree/parse_tree_node.rb +10 -5
  20. data/lib/rley/ptree/terminal_node.rb +14 -6
  21. data/lib/rley/sppf/sppf_node.rb +2 -2
  22. data/lib/rley/{parser → tokens}/token.rb +1 -4
  23. data/lib/rley/{ptree → tokens}/token_range.rb +1 -1
  24. data/spec/rley/formatter/debug_spec.rb +16 -16
  25. data/spec/rley/formatter/json_spec.rb +8 -8
  26. data/spec/rley/parse_forest_visitor_spec.rb +1 -1
  27. data/spec/rley/parse_tree_visitor_spec.rb +28 -28
  28. data/spec/rley/parser/error_reason_spec.rb +3 -3
  29. data/spec/rley/parser/gfg_chart_spec.rb +2 -2
  30. data/spec/rley/parser/gfg_earley_parser_spec.rb +2 -2
  31. data/spec/rley/parser/gfg_parsing_spec.rb +2 -2
  32. data/spec/rley/parser/groucho_spec.rb +1 -1
  33. data/spec/rley/parser/parse_tracer_spec.rb +2 -2
  34. data/spec/rley/parser/parse_tree_builder_spec.rb +213 -140
  35. data/spec/rley/parser/parse_tree_factory_spec.rb +85 -0
  36. data/spec/rley/parser/parse_walker_factory_spec.rb +11 -10
  37. data/spec/rley/ptree/non_terminal_node_spec.rb +23 -20
  38. data/spec/rley/ptree/terminal_node_spec.rb +7 -12
  39. data/spec/rley/sppf/alternative_node_spec.rb +2 -2
  40. data/spec/rley/sppf/non_terminal_node_spec.rb +2 -2
  41. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  42. data/spec/rley/support/expectation_helper.rb +1 -1
  43. data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
  44. data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
  45. data/spec/rley/support/grammar_helper.rb +3 -3
  46. data/spec/rley/support/grammar_l0_helper.rb +2 -2
  47. data/spec/rley/support/grammar_pb_helper.rb +2 -2
  48. data/spec/rley/{ptree → tokens}/token_range_spec.rb +2 -2
  49. data/spec/rley/{parser → tokens}/token_spec.rb +2 -2
  50. metadata +11 -17
  51. data/lib/rley/parser/chart.rb +0 -82
  52. data/lib/rley/parser/earley_parser.rb +0 -203
  53. data/lib/rley/parser/parsing.rb +0 -265
  54. data/spec/rley/parser/chart_spec.rb +0 -120
  55. data/spec/rley/parser/earley_parser_spec.rb +0 -710
  56. data/spec/rley/parser/parsing_spec.rb +0 -408
@@ -1,408 +0,0 @@
1
- require_relative '../../spec_helper'
2
- require 'stringio'
3
-
4
- require_relative '../../../lib/rley/syntax/non_terminal'
5
- require_relative '../../../lib/rley/syntax/verbatim_symbol'
6
- require_relative '../../../lib/rley/syntax/production'
7
- require_relative '../../../lib/rley/syntax/grammar_builder'
8
- require_relative '../../../lib/rley/parser/dotted_item'
9
- require_relative '../../../lib/rley/parser/token'
10
- require_relative '../../../lib/rley/parser/parse_tracer'
11
- require_relative '../../../lib/rley/parser/earley_parser'
12
- require_relative '../support/grammar_abc_helper'
13
- require_relative '../support/grammar_b_expr_helper'
14
-
15
-
16
- # Load the class under test
17
- require_relative '../../../lib/rley/parser/parsing'
18
-
19
- module Rley # Open this namespace to avoid module qualifier prefixes
20
- module Parser # Open this namespace to avoid module qualifier prefixes
21
- describe Parsing do
22
- include GrammarABCHelper # Mix-in module with builder for grammar abc
23
- include GrammarBExprHelper # Mix-in with builder for simple expressions
24
-
25
- # Grammar 1: A very simple language
26
- # S => A.
27
- # A => "a" A "c".
28
- # A => "b".
29
- let(:nt_S) { Syntax::NonTerminal.new('S') }
30
- let(:nt_A) { Syntax::NonTerminal.new('A') }
31
- let(:a_) { Syntax::VerbatimSymbol.new('a') }
32
- let(:b_) { Syntax::VerbatimSymbol.new('b') }
33
- let(:c_) { Syntax::VerbatimSymbol.new('c') }
34
- let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
35
- let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
36
- let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
37
-
38
-
39
- # Helper method that mimicks the output of a tokenizer
40
- # for the language specified by gramma_abc
41
- let(:grm1_tokens) do
42
- [
43
- Token.new('a', a_),
44
- Token.new('a', a_),
45
- Token.new('b', b_),
46
- Token.new('c', c_),
47
- Token.new('c', c_)
48
- ]
49
- end
50
-
51
-
52
- let(:start_dotted_rule) { DottedItem.new(prod_S, 0) }
53
- let(:output) { StringIO.new('', 'w') }
54
- let(:sample_tracer) { ParseTracer.new(0, output, grm1_tokens) }
55
-
56
- # Default instantiation rule
57
- subject { Parsing.new([ start_dotted_rule ], grm1_tokens, sample_tracer) }
58
-
59
- context 'Initialization:' do
60
- it 'should be created with list of tokens, start dotted rules, trace' do
61
- start_rules = [ start_dotted_rule ]
62
- tokens = grm1_tokens
63
- tracer = sample_tracer
64
- expect { Parsing.new(start_rules, tokens, tracer) }.not_to raise_error
65
- end
66
-
67
- it 'should know the input tokens' do
68
- expect(subject.tokens).to eq(grm1_tokens)
69
- end
70
-
71
- it 'should know its chart object' do
72
- expect(subject.chart).to be_kind_of(Chart)
73
- end
74
-
75
- it 'should emit trace level 1 info' do
76
- tracer = ParseTracer.new(1, output, grm1_tokens)
77
- Parsing.new([ start_dotted_rule ], grm1_tokens, tracer)
78
- expectations = <<-SNIPPET
79
- ['a', 'a', 'b', 'c', 'c']
80
- |. a . a . b . c . c .|
81
- |> . . . . .| [0:0] S => . A
82
- SNIPPET
83
- expect(output.string).to eq(expectations)
84
- end
85
- end # context
86
-
87
- context 'Parsing:' do
88
- it 'should push a state to a given chart entry' do
89
- expect(subject.chart[1]).to be_empty
90
- item = DottedItem.new(prod_A1, 1)
91
-
92
- subject.push_state(item, 1, 1, :scanning)
93
- expect(subject.chart[1]).not_to be_empty
94
- expect(subject.chart[1].first.dotted_rule).to eq(item)
95
-
96
- # Pushing twice the same state must be no-op
97
- subject.push_state(item, 1, 1, :scanning)
98
- expect(subject.chart[1].size).to eq(1)
99
- end
100
-
101
- it 'should complain when trying to push a nil dotted item' do
102
- err = StandardError
103
- msg = 'Dotted item may not be nil'
104
- expect { subject.push_state(nil, 1, 1, :prediction) }
105
- .to raise_error(err, msg)
106
- end
107
-
108
-
109
- it 'should retrieve the parse states that expect a given terminal' do
110
- item1 = DottedItem.new(prod_A1, 2)
111
- item2 = DottedItem.new(prod_A1, 1)
112
- subject.push_state(item1, 2, 2, :scanning)
113
- subject.push_state(item2, 2, 2, :scanning)
114
- states = subject.states_expecting(c_, 2, false)
115
- expect(states.size).to eq(1)
116
- expect(states[0].dotted_rule).to eq(item1)
117
- end
118
-
119
- it 'should update the states upon token match' do
120
- # When a input token matches an expected terminal symbol
121
- # then new parse states must be pushed to the following chart slot
122
- expect(subject.chart[1]).to be_empty
123
-
124
- item1 = DottedItem.new(prod_A1, 0)
125
- item2 = DottedItem.new(prod_A2, 0)
126
- subject.push_state(item1, 0, 0, :completion)
127
- subject.push_state(item2, 0, 0, :completion)
128
- subject.scanning(a_, 0) { |i| i } # Code block is mock
129
-
130
- # Expected side effect: a new state at chart[1]
131
- expect(subject.chart[1].size).to eq(1)
132
- new_state = subject.chart[1].states[0]
133
- expect(new_state.dotted_rule).to eq(item1)
134
- expect(new_state.origin).to eq(0)
135
- end
136
- end # context
137
-
138
- context 'Parse tree building:' do
139
- let(:sample_grammar1) do
140
- builder = grammar_abc_builder
141
- builder.grammar
142
- end
143
-
144
- let(:token_seq1) do
145
- %w(a a b c c).map do |letter|
146
- Token.new(letter, sample_grammar1.name2symbol[letter])
147
- end
148
- end
149
-
150
- let(:b_expr_grammar) do
151
- builder = grammar_expr_builder
152
- builder.grammar
153
- end
154
-
155
- def grm_symbol(aSymbolName)
156
- b_expr_grammar.name2symbol[aSymbolName]
157
- end
158
-
159
- subject do
160
- parser = EarleyParser.new(b_expr_grammar)
161
- tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
162
- parser.parse(tokens)
163
- end
164
-
165
- # Helper. Build a state tracker and a parse tree builder.
166
- def prepare_parse_tree(aParsing)
167
- # Accessing private methods by sending message
168
- state_tracker = aParsing.send(:new_state_tracker)
169
- builder = aParsing.send(:tree_builder, state_tracker.state_set_index)
170
- return [state_tracker, builder]
171
- end
172
-
173
- it 'should create the root of a parse tree' do
174
- (state_tracker, builder) = prepare_parse_tree(subject)
175
- # The root node should correspond to the start symbol and
176
- # its direct children should correspond to rhs of start production
177
- expected_text = <<-SNIPPET
178
- P[0, 5]
179
- +- S[0, 5]
180
- SNIPPET
181
- root_text = builder.root.to_string(0)
182
- expect(root_text).to eq(expected_text.chomp)
183
-
184
- expect(state_tracker.state_set_index).to eq(subject.tokens.size)
185
- expected_state = 'P => S . | 0'
186
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
187
- expect(builder.current_node.to_string(0)).to eq('S[0, 5]')
188
- end
189
-
190
- it 'should use a reduce item for a matched non-terminal' do
191
- # Setup
192
- (state_tracker, builder) = prepare_parse_tree(subject)
193
- # Same state as in previous example
194
-
195
- # Given matched symbol is S[0, 5]
196
- # And its reduce item is S => S + M . | 0
197
- # Then add child nodes corresponding to the rhs symbols
198
- # And make M[?, 5] the current symbol
199
- subject.insert_matched_symbol(state_tracker, builder)
200
- expected_text = <<-SNIPPET
201
- P[0, 5]
202
- +- S[0, 5]
203
- +- S[0, ?]
204
- +- +[?, ?]: '(nil)'
205
- +- M[?, 5]
206
- SNIPPET
207
- root_text = builder.root.to_string(0)
208
- expect(root_text).to eq(expected_text.chomp)
209
- expected_state = 'S => S + M . | 0'
210
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
211
- expect(state_tracker.state_set_index).to eq(5)
212
- expect(builder.current_node.to_string(0)).to eq('M[?, 5]')
213
-
214
- # Second similar test
215
-
216
- # Given matched symbol is M[?, 5]
217
- # And its reduce item is M => M * T . | 2
218
- # Then add child nodes corresponding to the rhs symbols
219
- # And make T[?, 5] the current symbol
220
- subject.insert_matched_symbol(state_tracker, builder)
221
- expected_text = <<-SNIPPET
222
- P[0, 5]
223
- +- S[0, 5]
224
- +- S[0, ?]
225
- +- +[?, ?]: '(nil)'
226
- +- M[2, 5]
227
- +- M[2, ?]
228
- +- *[?, ?]: '(nil)'
229
- +- T[?, 5]
230
- SNIPPET
231
- root_text = builder.root.to_string(0)
232
- expect(root_text).to eq(expected_text.chomp)
233
- expected_state = 'M => M * T . | 2'
234
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
235
- expect(state_tracker.state_set_index).to eq(5)
236
- expect(builder.current_node.to_string(0)).to eq('T[?, 5]')
237
- end
238
-
239
-
240
-
241
- it 'should use a previous item for a terminal symbol' do
242
- # Setup
243
- (state_tracker, builder) = prepare_parse_tree(subject)
244
- 3.times do
245
- subject.insert_matched_symbol(state_tracker, builder)
246
- end
247
-
248
- # Given matched symbol is T[?, 5]
249
- # And its reduce item is T => integer . | 4
250
- # Then add child node corresponding to the rhs symbol
251
- # And make integer[4, 5]: '(nil)' the current symbol
252
- expected_text = <<-SNIPPET
253
- P[0, 5]
254
- +- S[0, 5]
255
- +- S[0, ?]
256
- +- +[?, ?]: '(nil)'
257
- +- M[2, 5]
258
- +- M[2, ?]
259
- +- *[?, ?]: '(nil)'
260
- +- T[4, 5]
261
- +- integer[4, 5]: '(nil)'
262
- SNIPPET
263
- root_text = builder.root.to_string(0)
264
- expect(root_text).to eq(expected_text.chomp)
265
- expected_state = 'T => integer . | 4'
266
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
267
- expect(state_tracker.state_set_index).to eq(5)
268
- integer_repr = "integer[4, 5]: '(nil)'"
269
- expect(builder.current_node.to_string(0)).to eq(integer_repr)
270
-
271
- # Given current tree symbol is integer[4, 5]: '(nil)'
272
- # And its previous item is T => . integer | 4
273
- # Then attach the token to the terminal node
274
- # And decrement the state index by one
275
- # Make *[?, ?]: '(nil)' the current symbol
276
- subject.insert_matched_symbol(state_tracker, builder)
277
- expected_text = <<-SNIPPET
278
- P[0, 5]
279
- +- S[0, 5]
280
- +- S[0, ?]
281
- +- +[?, ?]: '(nil)'
282
- +- M[2, 5]
283
- +- M[2, ?]
284
- +- *[?, ?]: '(nil)'
285
- +- T[4, 5]
286
- +- integer[4, 5]: '4'
287
- SNIPPET
288
- root_text = builder.root.to_string(0)
289
- expect(root_text).to eq(expected_text.chomp)
290
- expected_state = 'T => . integer | 4'
291
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
292
- expect(state_tracker.state_set_index).to eq(4)
293
- next_symbol = "*[?, ?]: '(nil)'"
294
- expect(builder.current_node.to_string(0)).to eq(next_symbol)
295
- end
296
-
297
- it 'should handle [no symbol before dot, terminal tree node] case' do
298
- # Setup
299
- (state_tracker, builder) = prepare_parse_tree(subject)
300
- 4.times do
301
- subject.insert_matched_symbol(state_tracker, builder)
302
- end
303
-
304
- # Given current tree symbol is *[?, ?]: '(nil)'
305
- # And current dotted item is T => . integer | 4
306
- # When one retrieves the parse state expecting the T
307
- # Then new parse state is changed to: M => M * . T | 2
308
- subject.insert_matched_symbol(state_tracker, builder)
309
-
310
- expected_text = <<-SNIPPET
311
- P[0, 5]
312
- +- S[0, 5]
313
- +- S[0, ?]
314
- +- +[?, ?]: '(nil)'
315
- +- M[2, 5]
316
- +- M[2, ?]
317
- +- *[?, ?]: '(nil)'
318
- +- T[4, 5]
319
- +- integer[4, 5]: '4'
320
- SNIPPET
321
- root_text = builder.root.to_string(0)
322
- expect(root_text).to eq(expected_text.chomp)
323
- expected_state = 'M => M * . T | 2'
324
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
325
- expect(state_tracker.state_set_index).to eq(4)
326
- next_symbol = "*[?, ?]: '(nil)'"
327
- expect(builder.current_node.to_string(0)).to eq(next_symbol)
328
-
329
- subject.insert_matched_symbol(state_tracker, builder)
330
- next_symbol = 'M[2, ?]'
331
- expect(builder.current_node.to_string(0)).to eq(next_symbol)
332
- end
333
-
334
- it 'should handle the end of parse tree generation' do
335
- # Begin setup
336
- is_done = false
337
- (state_tracker, builder) = prepare_parse_tree(subject)
338
- 16.times do
339
- is_done = subject.insert_matched_symbol(state_tracker, builder)
340
- end
341
-
342
- expected_text = <<-SNIPPET
343
- P[0, 5]
344
- +- S[0, 5]
345
- +- S[0, 1]
346
- +- M[0, 1]
347
- +- T[0, 1]
348
- +- integer[0, 1]: '2'
349
- +- +[1, 2]: '+'
350
- +- M[2, 5]
351
- +- M[2, 3]
352
- +- T[2, 3]
353
- +- integer[2, 3]: '3'
354
- +- *[3, 4]: '*'
355
- +- T[4, 5]
356
- +- integer[4, 5]: '4'
357
- SNIPPET
358
- root_text = builder.root.to_string(0)
359
- expect(root_text).to eq(expected_text.chomp)
360
-
361
- expected_state = 'T => . integer | 0'
362
- expect(state_tracker.parse_state.to_s).to eq(expected_state)
363
- expect(state_tracker.state_set_index).to eq(0)
364
- expect(is_done).to eq(true)
365
- end
366
-
367
-
368
-
369
- it 'should build the parse tree for a simple non-ambiguous grammar' do
370
- parser = EarleyParser.new(sample_grammar1)
371
- instance = parser.parse(token_seq1)
372
- ptree = instance.parse_tree
373
- expect(ptree).to be_kind_of(PTree::ParseTree)
374
- end
375
-
376
- it 'should build the parse tree for a simple expression grammar' do
377
- parser = EarleyParser.new(b_expr_grammar)
378
- tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
379
- instance = parser.parse(tokens)
380
- ptree = instance.parse_tree
381
- expect(ptree).to be_kind_of(PTree::ParseTree)
382
-
383
- # Expect parse tree:
384
- expected_text = <<-SNIPPET
385
- P[0, 5]
386
- +- S[0, 5]
387
- +- S[0, 1]
388
- +- M[0, 1]
389
- +- T[0, 1]
390
- +- integer[0, 1]: '2'
391
- +- +[1, 2]: '+'
392
- +- M[2, 5]
393
- +- M[2, 3]
394
- +- T[2, 3]
395
- +- integer[2, 3]: '3'
396
- +- *[3, 4]: '*'
397
- +- T[4, 5]
398
- +- integer[4, 5]: '4'
399
- SNIPPET
400
- actual = ptree.root.to_string(0)
401
- expect(actual).to eq(expected_text.chomp)
402
- end
403
- end # context
404
- end # describe
405
- end # module
406
- end # module
407
-
408
- # End of file