rley 0.0.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +15 -0
  2. data/.rspec +1 -0
  3. data/.rubocop.yml +74 -0
  4. data/.ruby-gemset +1 -0
  5. data/.ruby-version +1 -0
  6. data/.simplecov +7 -0
  7. data/.travis.yml +21 -0
  8. data/.yardopts +6 -0
  9. data/CHANGELOG.md +10 -0
  10. data/Gemfile +8 -0
  11. data/LICENSE.txt +19 -0
  12. data/README.md +19 -0
  13. data/Rakefile +32 -0
  14. data/lib/rley/constants.rb +26 -0
  15. data/lib/rley/parser/chart.rb +39 -0
  16. data/lib/rley/parser/dotted_item.rb +80 -0
  17. data/lib/rley/parser/earley_parser.rb +177 -0
  18. data/lib/rley/parser/parse_state.rb +54 -0
  19. data/lib/rley/parser/parsing.rb +101 -0
  20. data/lib/rley/parser/state_set.rb +47 -0
  21. data/lib/rley/parser/token.rb +21 -0
  22. data/lib/rley/syntax/grammar.rb +59 -0
  23. data/lib/rley/syntax/grm_symbol.rb +18 -0
  24. data/lib/rley/syntax/literal.rb +20 -0
  25. data/lib/rley/syntax/non_terminal.rb +18 -0
  26. data/lib/rley/syntax/production.rb +42 -0
  27. data/lib/rley/syntax/symbol_seq.rb +36 -0
  28. data/lib/rley/syntax/terminal.rb +18 -0
  29. data/lib/rley/syntax/verbatim_symbol.rb +21 -0
  30. data/spec/rley/parser/chart_spec.rb +47 -0
  31. data/spec/rley/parser/dotted_item_spec.rb +108 -0
  32. data/spec/rley/parser/earley_parser_spec.rb +271 -0
  33. data/spec/rley/parser/parse_state_spec.rb +99 -0
  34. data/spec/rley/parser/parsing_spec.rb +118 -0
  35. data/spec/rley/parser/state_set_spec.rb +68 -0
  36. data/spec/rley/parser/token_spec.rb +40 -0
  37. data/spec/rley/syntax/grammar_spec.rb +149 -0
  38. data/spec/rley/syntax/grm_symbol_spec.rb +29 -0
  39. data/spec/rley/syntax/literal_spec.rb +32 -0
  40. data/spec/rley/syntax/non_terminal_spec.rb +29 -0
  41. data/spec/rley/syntax/production_spec.rb +50 -0
  42. data/spec/rley/syntax/symbol_seq_spec.rb +65 -0
  43. data/spec/rley/syntax/terminal_spec.rb +29 -0
  44. data/spec/rley/syntax/verbatim_symbol_spec.rb +32 -0
  45. data/spec/spec_helper.rb +21 -0
  46. metadata +166 -0
@@ -0,0 +1,21 @@
1
+ require_relative 'terminal' # Load superclass
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Syntax # This module is used as a namespace
5
+
6
+ # A verbatim word is terminal symbol that represents one unique word
7
+ # in the language defined the grammar.
8
+ class VerbatimSymbol < Terminal
9
+ # The exact text representation of the word.
10
+ attr_reader(:text)
11
+
12
+ def initialize(aText)
13
+ super(aText) # Do we need to separate the text from the name?
14
+ @text = aText.dup
15
+ end
16
+ end # class
17
+
18
+ end # module
19
+ end # module
20
+
21
+ # End of file
@@ -0,0 +1,47 @@
1
+ require_relative '../../spec_helper'
2
+
3
+
4
+ # Load the class under test
5
+ require_relative '../../../lib/rley/parser/chart'
6
+
7
+ module Rley # Open this namespace to avoid module qualifier prefixes
8
+ module Parser # Open this namespace to avoid module qualifier prefixes
9
+
10
+ describe Chart do
11
+
12
+ let(:count_token) { 20 }
13
+ let(:dotted_rule) { double('fake-dotted-item') }
14
+
15
+ context 'Initialization:' do
16
+
17
+ # Default instantiation rule
18
+ subject { Chart.new(dotted_rule, count_token) }
19
+
20
+ it 'should be created with a start dotted rule and a token count' do
21
+ expect { Chart.new(dotted_rule, count_token) }.not_to raise_error
22
+ end
23
+
24
+ it 'should have a seed state in first state_set' do
25
+ seed_state = ParseState.new(dotted_rule, 0)
26
+ expect(subject[0].states).to eq([seed_state])
27
+
28
+ # Shorthand syntax
29
+ expect(subject[0].first).to eq(seed_state)
30
+ end
31
+
32
+ it 'should have the correct state_set count' do
33
+ expect(subject.state_sets.size).to eq(count_token + 1)
34
+ end
35
+
36
+ it 'should the start dotted rule' do
37
+ expect(subject.start_dotted_rule).to eq(dotted_rule)
38
+ end
39
+
40
+ end # context
41
+
42
+ end # describe
43
+
44
+ end # module
45
+ end # module
46
+
47
+ # End of file
@@ -0,0 +1,108 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/terminal'
4
+ require_relative '../../../lib/rley/syntax/non_terminal'
5
+ require_relative '../../../lib/rley/syntax/production'
6
+
7
+ # Load the class under test
8
+ require_relative '../../../lib/rley/parser/dotted_item'
9
+
10
+ module Rley # Open this namespace to avoid module qualifier prefixes
11
+ module Parser # Open this namespace to avoid module qualifier prefixes
12
+
13
+ describe DottedItem do
14
+ let(:t_a) { Syntax::Terminal.new('A') }
15
+ let(:t_b) { Syntax::Terminal.new('B') }
16
+ let(:t_c) { Syntax::Terminal.new('C') }
17
+ let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
18
+
19
+ let(:sample_prod) do
20
+ Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
21
+ end
22
+
23
+ let(:other_prod) do
24
+ Syntax::Production.new(nt_sentence, [t_a])
25
+ end
26
+
27
+ let(:empty_prod) do
28
+ Syntax::Production.new(nt_sentence,[])
29
+ end
30
+
31
+
32
+ subject { DottedItem.new(sample_prod, 1) }
33
+
34
+ context 'Initialization:' do
35
+ it 'should be created with a production and an index' do
36
+ expect { DottedItem.new(sample_prod, 0) }.not_to raise_error
37
+ expect { DottedItem.new(sample_prod, 3) }.not_to raise_error
38
+ end
39
+
40
+ it 'should complain when the index is out-of-bounds' do
41
+ err = StandardError
42
+ msg = 'Out of bound index'
43
+ expect { DottedItem.new(sample_prod, 4) }.to raise_error(err, msg)
44
+ end
45
+
46
+ it 'should know its production' do
47
+ expect(subject.production).to eq(sample_prod)
48
+ end
49
+
50
+ it 'should know the lhs of the production' do
51
+ expect(subject.lhs).to eq(sample_prod.lhs)
52
+ end
53
+
54
+ it 'should know its position' do
55
+ # At start position
56
+ instance1 = DottedItem.new(sample_prod, 0)
57
+ expect(instance1.position).to eq(0)
58
+
59
+ # At (before) last symbol
60
+ instance2 = DottedItem.new(sample_prod, 2)
61
+ expect(instance2.position).to eq(2)
62
+
63
+ # After all symbols in rhs
64
+ instance3 = DottedItem.new(sample_prod, 3)
65
+ expect(instance3.position).to eq(-1)
66
+
67
+ # At start/end at the same time (production is empty)
68
+ instance4 = DottedItem.new(Syntax::Production.new(nt_sentence, []), 0)
69
+ expect(instance4.position).to eq(-2)
70
+ end
71
+
72
+ end # context
73
+
74
+ context 'Provided service:' do
75
+ it 'should whether its dot is at start position' do
76
+ expect(subject).not_to be_at_start
77
+
78
+ # At start position
79
+ instance1 = DottedItem.new(sample_prod, 0)
80
+ expect(instance1).to be_at_start
81
+
82
+ # At start/end at the same time (production is empty)
83
+ instance2 = DottedItem.new(Syntax::Production.new(nt_sentence, []), 0)
84
+ expect(instance2).to be_at_start
85
+ end
86
+
87
+ it 'should whether it is a reduce item' do
88
+ expect(subject).not_to be_reduce_item
89
+
90
+ first_instance = DottedItem.new(sample_prod, 3)
91
+ expect(first_instance).to be_reduce_item
92
+
93
+ second_instance = DottedItem.new(empty_prod, 0)
94
+ expect(second_instance).to be_reduce_item
95
+ end
96
+
97
+ it 'should know the symbol after the dot' do
98
+ expect(subject.next_symbol).to eq(t_b)
99
+ end
100
+ end
101
+
102
+ end # describe
103
+
104
+ end # module
105
+ end # module
106
+
107
+ # End of file
108
+
@@ -0,0 +1,271 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/verbatim_symbol'
4
+ require_relative '../../../lib/rley/syntax/non_terminal'
5
+ require_relative '../../../lib/rley/syntax/production'
6
+ require_relative '../../../lib/rley/parser/token'
7
+ # Load the class under test
8
+ require_relative '../../../lib/rley/parser/earley_parser'
9
+
10
+ module Rley # Open this namespace to avoid module qualifier prefixes
11
+ module Parser # Open this namespace to avoid module qualifier prefixes
12
+
13
+ describe EarleyParser do
14
+ =begin
15
+ let(:kw_true) { Syntax::VerbatimSymbol('true') }
16
+ let(:kw_false) { Syntax::VerbatimSymbol('false') }
17
+ let(:kw_null) { Syntax::VerbatimSymbol('null') }
18
+ let(:number) do
19
+ number_pattern = /[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/
20
+ Syntax::Literal('number', number_pattern)
21
+ end
22
+ let(:string) do
23
+ string_pattern = /"([^\\"]|\\.)*"/
24
+ Syntax::Literal('string', string_pattern)
25
+ end
26
+ let(:lbracket) { Syntax::VerbatimSymbol('[') }
27
+ let(:rbracket) { Syntax::VerbatimSymbol(']') }
28
+ let(:comma) { Syntax::VerbatimSymbol(',') }
29
+ let(:array) { Syntax::NonTerminal('Array') }
30
+ let(:object) { Syntax::NonTerminal('Object') }
31
+
32
+ let(:array_prod) do
33
+ Production.new(array, )
34
+ end
35
+ =end
36
+
37
+ # Grammar 1: A very simple language
38
+ # S ::= A.
39
+ # A ::= "a" A "c".
40
+ # A ::= "b".
41
+ let(:nt_S) { Syntax::NonTerminal.new('S') }
42
+ let(:nt_A) { Syntax::NonTerminal.new('A') }
43
+ let(:a_) { Syntax::VerbatimSymbol.new('a') }
44
+ let(:b_) { Syntax::VerbatimSymbol.new('b') }
45
+ let(:c_) { Syntax::VerbatimSymbol.new('c') }
46
+ let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
47
+ let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
48
+ let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
49
+ let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
50
+
51
+ # Helper method that mimicks the output of a tokenizer
52
+ # for the language specified by gramma_abc
53
+ def grm1_tokens()
54
+ tokens = [
55
+ Token.new('a', a_),
56
+ Token.new('a', a_),
57
+ Token.new('b', b_),
58
+ Token.new('c', c_),
59
+ Token.new('c', c_)
60
+ ]
61
+
62
+ return tokens
63
+ end
64
+
65
+
66
+ # Default instantiation rule
67
+ subject { EarleyParser.new(grammar_abc) }
68
+
69
+ context 'Initialization:' do
70
+ it 'should be created with a grammar' do
71
+ expect { EarleyParser.new(grammar_abc) }.not_to raise_error
72
+ end
73
+
74
+ it 'should know its grammar' do
75
+ expect(subject.grammar).to eq(grammar_abc)
76
+ end
77
+
78
+ it 'should know its dotted items' do
79
+ expect(subject.dotted_items.size).to eq(8)
80
+ end
81
+
82
+ it 'should have its start mapping initialized' do
83
+ expect(subject.start_mapping.size).to eq(2)
84
+
85
+ start_items_S = subject.start_mapping[nt_S]
86
+ expect(start_items_S.size).to eq(1)
87
+ expect(start_items_S[0].production).to eq(prod_S)
88
+
89
+ start_items_A = subject.start_mapping[nt_A]
90
+ expect(start_items_A.size).to eq(2)
91
+
92
+ # Assuming that dotted_items are created in same order
93
+ # than production in grammar.
94
+ expect(start_items_A[0].production).to eq(prod_A1)
95
+ expect(start_items_A[1].production).to eq(prod_A2)
96
+ end
97
+
98
+ it 'should have its next mapping initialized' do
99
+ expect(subject.next_mapping.size).to eq(5)
100
+ end
101
+ end # context
102
+
103
+ context 'Parsing: ' do
104
+ # Helper method. Compare the data from the parse state
105
+ # with values from expectation hash.
106
+ def compare_state(aState, expectations)
107
+ expect(aState.origin).to eq(expectations[:origin])
108
+ dotted_item = aState.dotted_rule
109
+ expect(dotted_item.production).to eq(expectations[:production])
110
+ expect(dotted_item.position).to eq(expectations[:dot])
111
+ end
112
+
113
+ it 'should parse a valid simple input' do
114
+ parse_result = subject.parse(grm1_tokens)
115
+ expect(parse_result.success?).to eq(true)
116
+
117
+ ######################
118
+ state_set_0 = parse_result.chart[0]
119
+ # Expectation chart[0]:
120
+ # S -> . A, 0 # start rule
121
+ # A -> . "a" A "c", 0 # predict from 0
122
+ # A -> . "b", 0 # predict from 0
123
+ expectations = { origin: 0, production: prod_S, dot: 0 }
124
+ compare_state(state_set_0.states[0], expectations)
125
+
126
+ expectations = { origin: 0, production: prod_A1, dot: 0 }
127
+ compare_state(state_set_0.states[1], expectations)
128
+
129
+ expectations = { origin: 0, production: prod_A2, dot: 0 }
130
+ compare_state(state_set_0.states[2], expectations)
131
+
132
+ ######################
133
+ state_set_1 = parse_result.chart[1]
134
+ expect(state_set_1.states.size).to eq(3)
135
+ # Expectation chart[1]:
136
+ # 0: A -> "a" . A "c", 0 # scan from S(0) 1
137
+ # 1: A -> . "a" A "c", 1 # predict from 0
138
+ # 2: A -> . "b", 1 # predict from 0
139
+ expectations = { origin: 0, production: prod_A1, dot: 1 }
140
+ compare_state(state_set_1.states[0], expectations)
141
+
142
+ expectations = { origin: 1, production: prod_A1, dot: 0 }
143
+ compare_state(state_set_1.states[1], expectations)
144
+
145
+ expectations = { origin: 1, production: prod_A2, dot: 0 }
146
+ compare_state(state_set_1.states[2], expectations)
147
+
148
+ ######################
149
+ state_set_2 = parse_result.chart[2]
150
+ expect(state_set_2.states.size).to eq(3)
151
+ # Expectation chart[2]:
152
+ # 0: A -> "a" . A "c", 1 # scan from S(0) 1
153
+ # 1: A -> . "a" A "c", 2 # predict from 0
154
+ # 2: A -> . "b", 2 # predict from 0
155
+ expectations = { origin: 1, production: prod_A1, dot: 1 }
156
+ compare_state(state_set_2.states[0], expectations)
157
+
158
+ expectations = { origin: 2, production: prod_A1, dot: 0 }
159
+ compare_state(state_set_2.states[1], expectations)
160
+
161
+ expectations = { origin: 2, production: prod_A2, dot: 0 }
162
+ compare_state(state_set_2.states[2], expectations)
163
+
164
+ ######################
165
+ state_set_3 = parse_result.chart[3]
166
+ expect(state_set_3.states.size).to eq(2)
167
+ # Expectation chart[3]:
168
+ # 0: A -> "b" ., 2 # scan from S(2) 2
169
+ # 1: A -> "a" A . "c", 1 # complete from 0 and S(2) 0
170
+ expectations = { origin: 2, production: prod_A2, dot: -1 }
171
+ compare_state(state_set_3.states[0], expectations)
172
+
173
+ expectations = { origin: 1, production: prod_A1, dot: 2 }
174
+ compare_state(state_set_3.states[1], expectations)
175
+
176
+ ######################
177
+ state_set_4 = parse_result.chart[4]
178
+ expect(state_set_4.states.size).to eq(2)
179
+ # Expectation chart[4]:
180
+ # 0: A -> "a" A "c" ., 1 # scan from S(3) 1
181
+ # 1: A -> "a" A . "c", 0 # complete from 0 and S(1) 0
182
+ expectations = { origin: 1, production: prod_A1, dot: -1 }
183
+ compare_state(state_set_4.states[0], expectations)
184
+
185
+ expectations = { origin: 0, production: prod_A1, dot: 2 }
186
+ compare_state(state_set_4.states[1], expectations)
187
+
188
+ ######################
189
+ state_set_5 = parse_result.chart[5]
190
+ expect(state_set_5.states.size).to eq(2)
191
+ # Expectation chart[5]:
192
+ # 0: A -> "a" A "c" ., 0 # scan from S(4) 1
193
+ # 1: S -> A ., 0 # complete from 0 and S(0) 0
194
+ expectations = { origin: 0, production: prod_A1, dot: -1 }
195
+ compare_state(state_set_5.states[0], expectations)
196
+
197
+ expectations = { origin: 0, production: prod_S, dot: -1 }
198
+ compare_state(state_set_5.states[1], expectations)
199
+ end
200
+
201
+ it 'should parse an invalid simple input' do
202
+ # Parse an erroneous input (b is missing)
203
+ wrong = [
204
+ Token.new('a', a_),
205
+ Token.new('a', a_),
206
+ Token.new('c', c_),
207
+ Token.new('c', c_)
208
+ ]
209
+ parse_result = subject.parse(wrong)
210
+ expect(parse_result.success?).to eq(false)
211
+
212
+ ###################### S(0) == . a a c c
213
+ state_set_0 = parse_result.chart[0]
214
+ # Expectation chart[0]:
215
+ # S -> . A, 0 # start rule
216
+ # A -> . "a" A "c", 0
217
+ # A -> . "b", 0
218
+ expectations = { origin: 0, production: prod_S, dot: 0 }
219
+ compare_state(state_set_0.states[0], expectations)
220
+
221
+ expectations = { origin: 0, production: prod_A1, dot: 0 }
222
+ compare_state(state_set_0.states[1], expectations)
223
+
224
+ expectations = { origin: 0, production: prod_A2, dot: 0 }
225
+ compare_state(state_set_0.states[2], expectations)
226
+
227
+ ###################### S(1) == a . a c c
228
+ state_set_1 = parse_result.chart[1]
229
+ expect(state_set_1.states.size).to eq(3)
230
+ # Expectation chart[1]:
231
+ # 0: A -> "a" . A "c", 0 # scan from S(0) 1
232
+ # 1: A -> . "a" A "c", 1 # predict from 0
233
+ # 2: A -> . "b", 1 # predict from 0
234
+ expectations = { origin: 0, production: prod_A1, dot: 1 }
235
+ compare_state(state_set_1.states[0], expectations)
236
+
237
+ expectations = { origin: 1, production: prod_A1, dot: 0 }
238
+ compare_state(state_set_1.states[1], expectations)
239
+
240
+ expectations = { origin: 1, production: prod_A2, dot: 0 }
241
+ compare_state(state_set_1.states[2], expectations)
242
+
243
+ ###################### S(2) == a a . c c
244
+ state_set_2 = parse_result.chart[2]
245
+ expect(state_set_2.states.size).to eq(3)
246
+ # Expectation chart[2]:
247
+ # 0: A -> "a" . A "c", 1 # scan from S(0) 1
248
+ # 1: A -> . "a" A "c", 2 # predict from 0
249
+ # 2: A -> . "b", 2 # predict from 0
250
+ expectations = { origin: 1, production: prod_A1, dot: 1 }
251
+ compare_state(state_set_2.states[0], expectations)
252
+
253
+ expectations = { origin: 2, production: prod_A1, dot: 0 }
254
+ compare_state(state_set_2.states[1], expectations)
255
+
256
+ expectations = { origin: 2, production: prod_A2, dot: 0 }
257
+ compare_state(state_set_2.states[2], expectations)
258
+
259
+ ###################### S(3) == a a c? c
260
+ state_set_3 = parse_result.chart[3]
261
+ expect(state_set_3.states).to be_empty # This is an error symptom
262
+ end
263
+ end # context
264
+
265
+ end # describe
266
+
267
+ end # module
268
+ end # module
269
+
270
+ # End of file
271
+