rley 0.5.01 → 0.5.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/examples/data_formats/JSON/cli_options.rb +25 -9
  4. data/examples/data_formats/JSON/json_ast_builder.rb +152 -0
  5. data/examples/data_formats/JSON/json_ast_nodes.rb +141 -0
  6. data/examples/data_formats/JSON/json_demo.rb +24 -8
  7. data/examples/general/calc_iter1/calc_ast_builder.rb +142 -0
  8. data/examples/general/calc_iter1/calc_ast_nodes.rb +151 -0
  9. data/examples/general/calc_iter1/calc_demo.rb +38 -0
  10. data/examples/general/calc_iter1/calc_grammar.rb +25 -0
  11. data/examples/general/calc_iter1/calc_lexer.rb +81 -0
  12. data/examples/general/{calc → calc_iter1}/calc_parser.rb +0 -0
  13. data/examples/general/calc_iter1/spec/calculator_spec.rb +73 -0
  14. data/examples/general/calc_iter2/calc_ast_builder.rb +186 -0
  15. data/examples/general/calc_iter2/calc_ast_nodes.rb +151 -0
  16. data/examples/general/{calc → calc_iter2}/calc_demo.rb +3 -2
  17. data/examples/general/{calc → calc_iter2}/calc_grammar.rb +0 -0
  18. data/examples/general/calc_iter2/calc_lexer.rb +81 -0
  19. data/examples/general/calc_iter2/calc_parser.rb +24 -0
  20. data/lib/rley.rb +1 -0
  21. data/lib/rley/constants.rb +1 -1
  22. data/lib/rley/parser/cst_builder.rb +5 -225
  23. data/lib/rley/parser/gfg_parsing.rb +2 -2
  24. data/lib/rley/parser/parse_forest_factory.rb +1 -1
  25. data/lib/rley/parser/parse_rep_creator.rb +2 -2
  26. data/lib/rley/parser/parse_tree_builder.rb +161 -104
  27. data/lib/rley/parser/parse_tree_factory.rb +6 -2
  28. data/spec/rley/parser/ast_builder_spec.rb +395 -0
  29. data/spec/rley/support/grammar_arr_int_helper.rb +21 -11
  30. metadata +20 -9
  31. data/examples/general/calc/calc_lexer.rb +0 -90
  32. data/spec/rley/parser/parse_tree_builder_spec.rb +0 -249
@@ -1,3 +1,5 @@
1
+ require 'strscan'
2
+
1
3
  # Load the builder class
2
4
  require_relative '../../../lib/rley/syntax/grammar_builder'
3
5
  require_relative '../../../lib/rley/tokens/token'
@@ -14,7 +16,7 @@ module GrammarArrIntHelper
14
16
  rule 'arr' => %w( [ sequence ] )
15
17
  rule 'sequence' => ['list']
16
18
  rule 'sequence' => []
17
- rule 'list' => %w[list , integer]
19
+ rule 'list' => %w[list , integer] # Right-recursive rule
18
20
  rule 'list' => 'integer'
19
21
  end
20
22
  builder
@@ -22,17 +24,25 @@ module GrammarArrIntHelper
22
24
 
23
25
  # Basic tokenizer for array of integers
24
26
  def arr_int_tokenizer(aText, aGrammar)
25
- tokens = aText.scan(/\S+/).map do |lexeme|
26
- case lexeme
27
- when '[', ']', ','
28
- terminal = aGrammar.name2symbol[lexeme]
29
- when /^[-+]?\d+$/
30
- terminal = aGrammar.name2symbol['integer']
31
- else
32
- msg = "Unknown input text '#{lexeme}'"
33
- raise StandardError, msg
27
+ tokens = []
28
+ scanner = StringScanner.new(aText)
29
+
30
+ until scanner.eos? do
31
+ scanner.skip(/\s+/)
32
+ lexeme = scanner.scan(/[\[,\]]/)
33
+ if lexeme
34
+ terminal = aGrammar.name2symbol[lexeme]
35
+ tokens << Rley::Tokens::Token.new(lexeme, terminal)
36
+ next
37
+ end
38
+ lexeme = scanner.scan(/^[-+]?\d+/)
39
+ if lexeme
40
+ terminal = aGrammar.name2symbol['integer']
41
+ tokens << Rley::Tokens::Token.new(lexeme, terminal)
42
+ else
43
+ msg = "Unknown input text '#{lexeme}'"
44
+ raise StandardError, msg
34
45
  end
35
- Rley::Tokens::Token.new(lexeme, terminal)
36
46
  end
37
47
 
38
48
  return tokens
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.01
4
+ version: 0.5.02
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-20 00:00:00.000000000 Z
11
+ date: 2017-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: coveralls
@@ -132,15 +132,26 @@ files:
132
132
  - appveyor.yml
133
133
  - examples/NLP/mini_en_demo.rb
134
134
  - examples/data_formats/JSON/cli_options.rb
135
+ - examples/data_formats/JSON/json_ast_builder.rb
136
+ - examples/data_formats/JSON/json_ast_nodes.rb
135
137
  - examples/data_formats/JSON/json_demo.rb
136
138
  - examples/data_formats/JSON/json_grammar.rb
137
139
  - examples/data_formats/JSON/json_lexer.rb
138
140
  - examples/data_formats/JSON/json_minifier.rb
139
141
  - examples/data_formats/JSON/json_parser.rb
140
- - examples/general/calc/calc_demo.rb
141
- - examples/general/calc/calc_grammar.rb
142
- - examples/general/calc/calc_lexer.rb
143
- - examples/general/calc/calc_parser.rb
142
+ - examples/general/calc_iter1/calc_ast_builder.rb
143
+ - examples/general/calc_iter1/calc_ast_nodes.rb
144
+ - examples/general/calc_iter1/calc_demo.rb
145
+ - examples/general/calc_iter1/calc_grammar.rb
146
+ - examples/general/calc_iter1/calc_lexer.rb
147
+ - examples/general/calc_iter1/calc_parser.rb
148
+ - examples/general/calc_iter1/spec/calculator_spec.rb
149
+ - examples/general/calc_iter2/calc_ast_builder.rb
150
+ - examples/general/calc_iter2/calc_ast_nodes.rb
151
+ - examples/general/calc_iter2/calc_demo.rb
152
+ - examples/general/calc_iter2/calc_grammar.rb
153
+ - examples/general/calc_iter2/calc_lexer.rb
154
+ - examples/general/calc_iter2/calc_parser.rb
144
155
  - lib/rley.rb
145
156
  - lib/rley/constants.rb
146
157
  - lib/rley/formatter/asciitree.rb
@@ -226,6 +237,7 @@ files:
226
237
  - spec/rley/parse_forest_visitor_spec.rb
227
238
  - spec/rley/parse_tree_visitor_spec.rb
228
239
  - spec/rley/parser/ambiguous_parse_spec.rb
240
+ - spec/rley/parser/ast_builder_spec.rb
229
241
  - spec/rley/parser/cst_builder_spec.rb
230
242
  - spec/rley/parser/dotted_item_spec.rb
231
243
  - spec/rley/parser/error_reason_spec.rb
@@ -240,7 +252,6 @@ files:
240
252
  - spec/rley/parser/parse_forest_factory_spec.rb
241
253
  - spec/rley/parser/parse_state_spec.rb
242
254
  - spec/rley/parser/parse_tracer_spec.rb
243
- - spec/rley/parser/parse_tree_builder_spec.rb
244
255
  - spec/rley/parser/parse_tree_factory_spec.rb
245
256
  - spec/rley/parser/parse_walker_factory_spec.rb
246
257
  - spec/rley/parser/state_set_spec.rb
@@ -296,7 +307,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
296
307
  version: '0'
297
308
  requirements: []
298
309
  rubyforge_project:
299
- rubygems_version: 2.6.7
310
+ rubygems_version: 2.6.13
300
311
  signing_key:
301
312
  specification_version: 4
302
313
  summary: Ruby implementation of the Earley's parsing algorithm
@@ -318,6 +329,7 @@ test_files:
318
329
  - spec/rley/gfg/start_vertex_spec.rb
319
330
  - spec/rley/gfg/vertex_spec.rb
320
331
  - spec/rley/parser/ambiguous_parse_spec.rb
332
+ - spec/rley/parser/ast_builder_spec.rb
321
333
  - spec/rley/parser/cst_builder_spec.rb
322
334
  - spec/rley/parser/dotted_item_spec.rb
323
335
  - spec/rley/parser/error_reason_spec.rb
@@ -332,7 +344,6 @@ test_files:
332
344
  - spec/rley/parser/parse_forest_factory_spec.rb
333
345
  - spec/rley/parser/parse_state_spec.rb
334
346
  - spec/rley/parser/parse_tracer_spec.rb
335
- - spec/rley/parser/parse_tree_builder_spec.rb
336
347
  - spec/rley/parser/parse_tree_factory_spec.rb
337
348
  - spec/rley/parser/parse_walker_factory_spec.rb
338
349
  - spec/rley/parser/state_set_spec.rb
@@ -1,90 +0,0 @@
1
- # File: calc_lexer.rb
2
- # Lexer for a basic arithmetical expression parser
3
- require 'strscan'
4
- require 'rley' # Load the gem
5
-
6
-
7
- class CalcLexer
8
- attr_reader(:scanner)
9
- attr_reader(:lineno)
10
- attr_reader(:line_start)
11
- attr_reader(:name2symbol)
12
-
13
- @@lexeme2name = {
14
- '(' => 'LPAREN',
15
- ')' => 'RPAREN',
16
- '+' => 'PLUS',
17
- '-' => 'MINUS',
18
- '*' => 'STAR',
19
- '/' => 'DIVIDE'
20
- }.freeze
21
-
22
- class ScanError < StandardError; end
23
-
24
- def initialize(source, aGrammar)
25
- @scanner = StringScanner.new(source)
26
- @name2symbol = aGrammar.name2symbol
27
- @lineno = 1
28
- end
29
-
30
- def tokens()
31
- tok_sequence = []
32
- until @scanner.eos?
33
- token = _next_token
34
- tok_sequence << token unless token.nil?
35
- end
36
-
37
- return tok_sequence
38
- end
39
-
40
- private
41
-
42
- def _next_token()
43
- token = nil
44
- skip_whitespaces
45
- curr_ch = scanner.getch # curr_ch is at start of token or eof reached...
46
-
47
- loop do
48
- break if curr_ch.nil?
49
-
50
- case curr_ch
51
- when '(', ')', '+', '-', '*', '/'
52
- type_name = @@lexeme2name[curr_ch]
53
- token_type = name2symbol[type_name]
54
- token = Rley::Tokens::Token.new(curr_ch, token_type)
55
-
56
- # LITERALS
57
- when /[-0-9]/ # Start character of number literal found
58
- @scanner.pos = scanner.pos - 1 # Simulate putback
59
- value = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
60
- token_type = name2symbol['NUMBER']
61
- token = Rley::Tokens::Token.new(value, token_type)
62
-
63
-
64
- else # Unknown token
65
- erroneous = curr_ch.nil? ? '' : curr_ch
66
- sequel = scanner.scan(/.{1,20}/)
67
- erroneous += sequel unless sequel.nil?
68
- raise ScanError.new("Unknown token #{erroneous}")
69
- end # case
70
-
71
- break unless token.nil? && (curr_ch = scanner.getch)
72
- end
73
-
74
- return token
75
- end
76
-
77
- def skip_whitespaces()
78
- matched = scanner.scan(/[ \t\f\n\r]+/)
79
- return if matched.nil?
80
-
81
- newline_count = 0
82
- matched.scan(/\n\r?|\r/) { |_| newline_count += 1 }
83
- newline_detected(newline_count)
84
- end
85
-
86
- def newline_detected(count)
87
- @lineno += count
88
- @line_start = scanner.pos
89
- end
90
- end # class
@@ -1,249 +0,0 @@
1
- require_relative '../../spec_helper'
2
-
3
- require_relative '../../../lib/rley/parser/gfg_earley_parser'
4
- require_relative '../../../lib/rley/parser/parse_walker_factory'
5
-
6
- require_relative '../support/expectation_helper'
7
- require_relative '../support/grammar_b_expr_helper'
8
-
9
- # Load the class under test
10
- require_relative '../../../lib/rley/parser/parse_tree_builder'
11
-
12
- module Rley # Open this namespace to avoid module qualifier prefixes
13
- module Parser
14
- describe ParseTreeBuilder do
15
- include ExpectationHelper # Mix-in with expectation on parse entry sets
16
- include GrammarBExprHelper # Mix-in for basic arithmetic language
17
-
18
- let(:sample_grammar) do
19
- builder = grammar_expr_builder
20
- builder.grammar
21
- end
22
-
23
- let(:sample_tokens) do
24
- expr_tokenizer('2 + 3 * 4', sample_grammar)
25
- end
26
-
27
- let(:sample_result) do
28
- parser = Parser::GFGEarleyParser.new(sample_grammar)
29
- parser.parse(sample_tokens)
30
- end
31
-
32
- subject { ParseTreeBuilder.new(sample_tokens) }
33
-
34
- # Emit a text representation of the current path.
35
- def path_to_s()
36
- text_parts = subject.curr_path.map(&:to_s)
37
- return text_parts.join('/')
38
- end
39
-
40
- def next_event(eventType, anEntryText)
41
- event = @walker.next
42
- subject.receive_event(*event)
43
- expect(event[0]).to eq(eventType)
44
- expect(event[1].to_s).to eq(anEntryText)
45
- end
46
-
47
- def expected_curr_parent(anExpectation)
48
- expect(subject.curr_parent.to_string(0)).to eq(anExpectation)
49
- end
50
-
51
- def expected_curr_path(anExpectation)
52
- expect(path_to_s).to eq(anExpectation)
53
- end
54
-
55
- def expected_first_child(anExpectation)
56
- child = subject.curr_parent.subnodes.first
57
- expect(child.to_string(0)).to eq(anExpectation)
58
- end
59
-
60
- context 'Initialization:' do
61
- it 'should be created with a sequence of tokens' do
62
- expect { ParseTreeBuilder.new(sample_tokens) }.not_to raise_error
63
- end
64
-
65
- it 'should know the input tokens' do
66
- expect(subject.tokens).to eq(sample_tokens)
67
- end
68
-
69
- it 'should have an empty path' do
70
- expect(subject.curr_path).to be_empty
71
- end
72
- end # context
73
-
74
- context 'Parse tree construction:' do
75
- before(:each) do
76
- factory = ParseWalkerFactory.new
77
- accept_entry = sample_result.accepting_entry
78
- accept_index = sample_result.chart.last_index
79
- @walker = factory.build_walker(accept_entry, accept_index)
80
- end
81
-
82
- it 'should initialize the root node' do
83
- next_event(:visit, 'P. | 0')
84
- tree = subject.result
85
-
86
- expect(tree.root.to_string(0)).to eq('P[0, 5]')
87
- expected_curr_path('P[0, 5]')
88
- end
89
-
90
- it 'should initialize the first child of the root node' do
91
- next_event(:visit, 'P. | 0') # Event 1
92
- next_event(:visit, 'P => S . | 0') # Event 2
93
- next_event(:visit, 'S. | 0') # Event 3
94
- next_event(:visit, 'S => S + M . | 0') # Event 4
95
- expected_curr_path('P[0, 5]/S[0, 5]')
96
- next_event(:visit, 'M. | 2') # Event 5
97
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]')
98
- next_event(:visit, 'M => M * T . | 2') # Event 6
99
- next_event(:visit, 'T. | 4') # Event 7
100
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]/T[4, 5]')
101
- next_event(:visit, 'T => integer . | 4') # Event 8
102
- end
103
-
104
- it 'should build token node when scan edge was detected' do
105
- 8.times do
106
- event = @walker.next
107
- subject.receive_event(*event)
108
- end
109
-
110
- next_event(:visit, 'T => . integer | 4') # Event 9
111
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]/T[4, 5]')
112
- expected_first_child("integer[4, 5]: '4'")
113
- expect(subject.curr_parent.subnodes.size).to eq(1)
114
- end
115
-
116
- it 'should handle the remaining events' do
117
- 9.times do
118
- event = @walker.next
119
- subject.receive_event(*event)
120
- end
121
-
122
- next_event(:visit, '.T | 4') # Event 10
123
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]')
124
-
125
- next_event(:visit, 'M => M * . T | 2') # Event 11
126
-
127
- next_event(:visit, 'M => M . * T | 2') # Event 12
128
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]')
129
- expect(subject.curr_parent.subnodes.size).to eq(2)
130
- expected_first_child("*[3, 4]: '*'")
131
-
132
- next_event(:visit, 'M. | 2') # Event 13
133
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]/M[2, 3]')
134
-
135
- next_event(:visit, 'M => T . | 2') # Event 14
136
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]/M[2, 3]')
137
-
138
- next_event(:visit, 'T. | 2') # Event 15
139
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]/M[2, 3]/T[2, 3]')
140
-
141
- next_event(:visit, 'T => integer . | 2') # Event 16
142
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]/M[2, 3]/T[2, 3]')
143
- expect(subject.curr_parent.subnodes.size).to eq(1)
144
- expected_first_child("integer[2, 3]: '3'")
145
-
146
- next_event(:visit, 'T => . integer | 2') # Event 17
147
-
148
- next_event(:visit, '.T | 2') # Event 18
149
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]/M[2, 3]')
150
-
151
- next_event(:visit, 'M => . T | 2') # Event 19
152
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]/M[2, 3]')
153
-
154
- next_event(:visit, '.M | 2') # Event 20
155
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]')
156
-
157
- next_event(:visit, 'M => . M * T | 2') # Event 21
158
- expected_curr_path('P[0, 5]/S[0, 5]/M[2, 5]')
159
-
160
- next_event(:revisit, '.M | 2') # Revisit Event 22
161
- expected_curr_path('P[0, 5]/S[0, 5]')
162
-
163
- next_event(:visit, 'S => S + . M | 0') # Event 23
164
- expected_curr_path('P[0, 5]/S[0, 5]')
165
-
166
- next_event(:visit, 'S => S . + M | 0') # Event 24
167
- expected_curr_path('P[0, 5]/S[0, 5]')
168
- expect(subject.curr_parent.subnodes.size).to eq(2)
169
- expected_first_child("+[1, 2]: '+'")
170
-
171
- next_event(:visit, 'S. | 0') # Event 25
172
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]')
173
-
174
- next_event(:visit, 'S => M . | 0') # Event 26
175
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]')
176
-
177
- next_event(:visit, 'M. | 0') # Event 27
178
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]/M[0, 1]')
179
-
180
- next_event(:visit, 'M => T . | 0') # Event 28
181
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]/M[0, 1]')
182
-
183
- next_event(:visit, 'T. | 0') # Event 29
184
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]/M[0, 1]/T[0, 1]')
185
-
186
- next_event(:visit, 'T => integer . | 0') # Event 30
187
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]/M[0, 1]/T[0, 1]')
188
-
189
- next_event(:visit, 'T => . integer | 0') # Event 31
190
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]/M[0, 1]/T[0, 1]')
191
- expect(subject.curr_parent.subnodes.size).to eq(1)
192
- expected_first_child("integer[0, 1]: '2'")
193
-
194
- next_event(:visit, '.T | 0') # Event 32
195
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]/M[0, 1]')
196
-
197
- next_event(:visit, 'M => . T | 0') # Event 33
198
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]/M[0, 1]')
199
-
200
- next_event(:visit, '.M | 0') # Event 34
201
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]')
202
-
203
- next_event(:visit, 'S => . M | 0') # Event 35
204
- expected_curr_path('P[0, 5]/S[0, 5]/S[0, 1]')
205
-
206
- next_event(:visit, '.S | 0') # Event 36
207
- expected_curr_path('P[0, 5]/S[0, 5]')
208
-
209
- next_event(:visit, 'S => . S + M | 0') # Event 37
210
- expected_curr_path('P[0, 5]/S[0, 5]')
211
-
212
- next_event(:revisit, '.S | 0') # Event 38
213
- expected_curr_path('P[0, 5]')
214
-
215
- next_event(:visit, 'P => . S | 0') # Event 39
216
- expected_curr_path('P[0, 5]')
217
-
218
- next_event(:visit, '.P | 0') # Event 39
219
- expect(path_to_s).to be_empty
220
- end
221
-
222
- it 'should build parse trees' do
223
- loop do
224
- event = @walker.next
225
- subject.receive_event(*event)
226
- break if path_to_s.empty?
227
- end
228
-
229
- # Lightweight sanity check
230
- expect(subject.result).not_to be_nil
231
- expect(subject.result).to be_kind_of(PTree::ParseTree)
232
- expect(subject.result.root.to_s).to eq('P[0, 5]')
233
- expect(subject.result.root.subnodes.size).to eq(1)
234
- child_node = subject.result.root.subnodes[0]
235
- expect(child_node.to_s).to eq('S[0, 5]')
236
-
237
- expect(child_node.subnodes.size).to eq(3)
238
- first_grandchild = child_node.subnodes[0]
239
- expect(first_grandchild.to_s).to eq('S[0, 1]')
240
- second_grandchild = child_node.subnodes[1]
241
- expect(second_grandchild.to_s).to eq("+[1, 2]: '+'")
242
- third_grandchild = child_node.subnodes[2]
243
- expect(third_grandchild.to_s).to eq('M[2, 5]')
244
- end
245
- end # context
246
- end # describe
247
- end # module
248
- end # module
249
- # End of file