rley 0.8.06 → 0.8.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +23 -2
- data/CHANGELOG.md +21 -1
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/appveyor.yml +1 -3
- data/examples/NLP/benchmark_pico_en.rb +6 -6
- data/examples/NLP/engtagger.rb +6 -6
- data/examples/general/calc_iter1/calc_lexer.rb +1 -1
- data/examples/general/calc_iter2/calc_lexer.rb +1 -1
- data/examples/general/left.rb +1 -1
- data/examples/general/right.rb +1 -1
- data/examples/tokenizer/loxxy_raw_scanner.rex.rb +3 -0
- data/examples/tokenizer/loxxy_tokenizer.rb +2 -2
- data/examples/tokenizer/run_tokenizer.rb +1 -1
- data/examples/tokenizer/{tokens.yaml → tokens.yml} +0 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +2 -2
- data/lib/rley/interface.rb +3 -3
- data/lib/rley/lexical/token.rb +1 -1
- data/lib/rley/ptree/non_terminal_node.rb +1 -1
- data/lib/rley/rgn/all_notation_nodes.rb +5 -0
- data/lib/rley/{notation → rgn}/ast_builder.rb +19 -12
- data/lib/rley/{notation → rgn}/ast_node.rb +13 -12
- data/lib/rley/{notation → rgn}/ast_visitor.rb +10 -10
- data/lib/rley/rgn/composite_node.rb +28 -0
- data/lib/rley/{notation → rgn}/grammar.rb +1 -1
- data/lib/rley/{notation → rgn}/grammar_builder.rb +86 -124
- data/lib/rley/{notation → rgn}/parser.rb +7 -7
- data/lib/rley/rgn/repetition_node.rb +62 -0
- data/lib/rley/rgn/sequence_node.rb +30 -0
- data/lib/rley/{notation → rgn}/symbol_node.rb +15 -7
- data/lib/rley/{notation → rgn}/tokenizer.rb +71 -60
- data/lib/rley/syntax/grm_symbol.rb +0 -4
- data/lib/rley/syntax/non_terminal.rb +4 -0
- data/lib/rley/syntax/terminal.rb +10 -6
- data/spec/rley/parser/dangling_else_spec.rb +3 -3
- data/spec/rley/parser/gfg_earley_parser_spec.rb +48 -50
- data/spec/rley/{notation → rgn}/grammar_builder_spec.rb +58 -54
- data/spec/rley/{notation → rgn}/parser_spec.rb +36 -24
- data/spec/rley/rgn/repetition_node_spec.rb +56 -0
- data/spec/rley/rgn/sequence_node_spec.rb +48 -0
- data/spec/rley/rgn/symbol_node_spec.rb +33 -0
- data/spec/rley/{notation → rgn}/tokenizer_spec.rb +2 -2
- data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
- data/spec/rley/support/grammar_int_seq_helper.rb +2 -2
- metadata +40 -33
- data/lib/rley/notation/all_notation_nodes.rb +0 -4
- data/lib/rley/notation/grouping_node.rb +0 -23
- data/lib/rley/notation/sequence_node.rb +0 -35
@@ -7,8 +7,9 @@ require_relative 'ast_visitor'
|
|
7
7
|
require_relative '../syntax/match_closest'
|
8
8
|
|
9
9
|
module Rley # This module is used as a namespace
|
10
|
-
|
11
|
-
|
10
|
+
# Namespace for classes that define RGN (Rley Grammar Notation)
|
11
|
+
module RGN # This module is used as a namespace
|
12
|
+
# Structure used by Rley to generate implicdit production rules.
|
12
13
|
RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
|
13
14
|
|
14
15
|
# Builder GoF pattern. Builder builds a complex object
|
@@ -19,7 +20,7 @@ module Rley # This module is used as a namespace
|
|
19
20
|
# to the matching grammar symbol object.
|
20
21
|
attr_reader(:symbols)
|
21
22
|
|
22
|
-
# @return [
|
23
|
+
# @return [RGN::Parser] Parser for the right-side of productions
|
23
24
|
attr_reader(:parser)
|
24
25
|
|
25
26
|
# @return [Hash{ASTVisitor, Array}]
|
@@ -32,21 +33,12 @@ module Rley # This module is used as a namespace
|
|
32
33
|
# @return [Hash{String, String}] The synthesized raw productions
|
33
34
|
attr_reader(:synthetized)
|
34
35
|
|
35
|
-
# Creates a new grammar builder.
|
36
|
+
# Creates a new RGN grammar builder.
|
36
37
|
# @param aBlock [Proc] code block used to build the grammar.
|
37
|
-
# @example Building a tiny English grammar
|
38
|
-
# builder = Rley::Notation::GrammarBuilder.new do
|
39
|
-
# add_terminals('n', 'v', 'adj', 'det')
|
40
|
-
# rule 'S' => 'NP VP'
|
41
|
-
# rule 'VP' => 'v NP'
|
42
|
-
# rule 'NP' => 'det n'
|
43
|
-
# rule 'NP' => 'adj NP'
|
44
|
-
# end
|
45
|
-
# tiny_eng = builder.grammar
|
46
38
|
def initialize(&aBlock)
|
47
39
|
@symbols = {}
|
48
40
|
@productions = []
|
49
|
-
@parser =
|
41
|
+
@parser = RGN::Parser.new
|
50
42
|
@visitor2rhs = {}
|
51
43
|
@synthetized = {}
|
52
44
|
|
@@ -73,7 +65,7 @@ module Rley # This module is used as a namespace
|
|
73
65
|
end
|
74
66
|
|
75
67
|
# Add the given marker symbol to the grammar of the language
|
76
|
-
# @param aMarkerSymbol [String] A
|
68
|
+
# @param aMarkerSymbol [String] A marker symbol
|
77
69
|
# @return [void]
|
78
70
|
def add_marker(aMarkerSymbol)
|
79
71
|
new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
|
@@ -227,105 +219,64 @@ module Rley # This module is used as a namespace
|
|
227
219
|
# ################################
|
228
220
|
def after_symbol_node(aSymbolNode, aVisitor)
|
229
221
|
symb_name = aSymbolNode.name
|
230
|
-
|
231
|
-
case aSymbolNode.repetition
|
232
|
-
when :zero_or_one
|
233
|
-
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
234
|
-
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
235
|
-
name_modified = "#{symb_name}#{suffix_qmark}"
|
236
|
-
unless symbols.include? name_modified
|
237
|
-
add_nonterminal(name_modified)
|
238
|
-
add_raw_rule(name_modified, symb_name, suffix_qmark_one)
|
239
|
-
add_raw_rule(name_modified, '', suffix_qmark_none)
|
240
|
-
end
|
241
|
-
symb_name = name_modified
|
242
|
-
|
243
|
-
when :zero_or_more
|
244
|
-
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
245
|
-
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
246
|
-
name_modified = "#{symb_name}#{suffix_star}"
|
247
|
-
unless symbols.include? name_modified
|
248
|
-
add_nonterminal(name_modified)
|
249
|
-
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
|
250
|
-
add_raw_rule(name_modified, [], suffix_star_none)
|
251
|
-
end
|
252
|
-
symb_name = name_modified
|
253
|
-
|
254
|
-
when :exactly_one
|
255
|
-
# Do nothing
|
256
|
-
|
257
|
-
when :one_or_more
|
258
|
-
name_modified = "#{symb_name}#{suffix_plus}"
|
259
|
-
unless symbols.include? name_modified
|
260
|
-
add_nonterminal(name_modified)
|
261
|
-
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
|
262
|
-
add_raw_rule(name_modified, symb_name, suffix_plus_one)
|
263
|
-
end
|
264
|
-
symb_name = name_modified
|
265
|
-
else
|
266
|
-
raise StandardError, 'Unhandled multiplicity'
|
267
|
-
end
|
268
|
-
|
269
222
|
symb = get_grm_symbol(symb_name)
|
270
223
|
visitor2rhs[aVisitor] << symb
|
271
224
|
end
|
272
225
|
|
273
226
|
def after_sequence_node(aSequenceNode, _visitor)
|
274
|
-
aSequenceNode
|
275
|
-
next if sn.annotation.empty?
|
276
|
-
|
277
|
-
matching = sn.annotation['match_closest']
|
278
|
-
aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
|
279
|
-
end
|
227
|
+
add_constraints(aSequenceNode)
|
280
228
|
end
|
281
229
|
|
282
|
-
def
|
283
|
-
|
284
|
-
|
230
|
+
def after_repetition_node(aRepNode, aVisitor)
|
231
|
+
add_constraints(aRepNode)
|
232
|
+
return if aRepNode.repetition == :exactly_one
|
285
233
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
234
|
+
node_name = aRepNode.name
|
235
|
+
child_name = aRepNode.subnodes[0].name
|
236
|
+
|
237
|
+
if aRepNode.child.is_a?(SequenceNode) &&
|
238
|
+
!symbols.include?(child_name) && aRepNode.repetition != :zero_or_one
|
239
|
+
add_nonterminal(child_name)
|
240
|
+
rhs = aRepNode.child.to_text
|
241
|
+
add_raw_rule(child_name, rhs, 'return_children', true)
|
290
242
|
end
|
291
|
-
name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
|
292
243
|
|
293
|
-
case
|
244
|
+
case aRepNode.repetition
|
294
245
|
when :zero_or_one
|
295
|
-
# implicitly called: rule('
|
296
|
-
# implicitly called: rule('
|
297
|
-
unless symbols.include?
|
298
|
-
add_nonterminal(
|
299
|
-
|
300
|
-
|
246
|
+
# implicitly called: rule('node_name_qmark' => 'node_name_qmark').tag suffix_qmark_one
|
247
|
+
# implicitly called: rule('node_name_qmark' => '').tag suffix_qmark_none
|
248
|
+
unless symbols.include? node_name
|
249
|
+
add_nonterminal(node_name)
|
250
|
+
if aRepNode.child.is_a?(SequenceNode) && !aRepNode.child.constraints.empty?
|
251
|
+
aRepNode.constraints.merge(aRepNode.child.constraints)
|
252
|
+
end
|
253
|
+
rhs = aRepNode.child.to_text
|
254
|
+
add_raw_rule(node_name, rhs, 'return_children', false, aRepNode.constraints)
|
255
|
+
add_raw_rule(node_name, [], suffix_qmark_none, true)
|
301
256
|
end
|
302
257
|
|
303
258
|
when :zero_or_more
|
304
|
-
# implicitly called: rule('
|
305
|
-
# implicitly called: rule('
|
306
|
-
unless symbols.include?
|
307
|
-
add_nonterminal(
|
308
|
-
|
309
|
-
add_raw_rule(
|
259
|
+
# implicitly called: rule('node_name_star' => 'node_name_star node_name').tag suffix_star_more
|
260
|
+
# implicitly called: rule('node_name_star' => '').tag suffix_star_none
|
261
|
+
unless symbols.include? node_name
|
262
|
+
add_nonterminal(node_name)
|
263
|
+
rhs = "#{node_name} #{child_name}"
|
264
|
+
add_raw_rule(node_name, rhs, suffix_star_more)
|
265
|
+
add_raw_rule(node_name, '', suffix_star_none)
|
310
266
|
end
|
311
267
|
|
312
|
-
when :exactly_one
|
313
|
-
# Do nothing
|
314
|
-
|
315
268
|
when :one_or_more
|
316
|
-
unless symbols.include?
|
317
|
-
add_nonterminal(
|
318
|
-
add_raw_rule(
|
319
|
-
add_raw_rule(
|
269
|
+
unless symbols.include? node_name
|
270
|
+
add_nonterminal(node_name)
|
271
|
+
add_raw_rule(node_name, "#{node_name} #{child_name}", suffix_plus_more)
|
272
|
+
add_raw_rule(node_name, child_name, suffix_plus_one)
|
320
273
|
end
|
321
274
|
else
|
322
275
|
raise StandardError, 'Unhandled multiplicity'
|
323
276
|
end
|
324
277
|
|
325
|
-
|
326
|
-
|
327
|
-
visitor2rhs[aVisitor] << symb
|
328
|
-
end
|
278
|
+
symb = get_grm_symbol(node_name)
|
279
|
+
visitor2rhs[aVisitor] << symb
|
329
280
|
end
|
330
281
|
|
331
282
|
# A notification to the builderobject that the programmer
|
@@ -425,22 +376,33 @@ module Rley # This module is used as a namespace
|
|
425
376
|
symbols[name]
|
426
377
|
end
|
427
378
|
|
428
|
-
def
|
429
|
-
|
430
|
-
|
431
|
-
case subn
|
432
|
-
when SymbolNode
|
433
|
-
subnode_names << "_#{subn.name}"
|
434
|
-
when SequenceNode
|
435
|
-
subnode_names << "_#{sequence_name(subn)}"
|
436
|
-
end
|
437
|
-
suffix = repetition2suffix(subn.repetition)
|
438
|
-
subnode_names << suffix
|
439
|
-
end
|
379
|
+
def add_constraints(aCompositeNode)
|
380
|
+
aCompositeNode.subnodes.each_with_index do |sn, i|
|
381
|
+
next if sn.annotation.empty?
|
440
382
|
|
441
|
-
|
383
|
+
matching = sn.annotation['match_closest']
|
384
|
+
constraint = Syntax::MatchClosest.new(aCompositeNode, i, matching)
|
385
|
+
aCompositeNode.constraints << constraint
|
386
|
+
end
|
442
387
|
end
|
443
388
|
|
389
|
+
# def sequence_name(aSequenceNode)
|
390
|
+
# subnode_names = +''
|
391
|
+
# aSequenceNode.subnodes.each do |subn|
|
392
|
+
# case subn
|
393
|
+
# when SymbolNode
|
394
|
+
# subnode_names << "_#{subn.name}"
|
395
|
+
# when SequenceNode
|
396
|
+
# subnode_names << "_#{sequence_name(subn)}"
|
397
|
+
# when RepetitionNode
|
398
|
+
# suffix = repetition2suffix(subn.repetition)
|
399
|
+
# subnode_names << suffix
|
400
|
+
# end
|
401
|
+
# end
|
402
|
+
#
|
403
|
+
# "seq#{subnode_names}"
|
404
|
+
# end
|
405
|
+
|
444
406
|
def node_base_name(aNode)
|
445
407
|
if aNode.kind_of?(SymbolNode)
|
446
408
|
aNode.name
|
@@ -456,23 +418,23 @@ module Rley # This module is used as a namespace
|
|
456
418
|
"#{base_name}#{suffix}"
|
457
419
|
end
|
458
420
|
|
459
|
-
def serialize_sequence(aSequenceNode)
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
end
|
421
|
+
# def serialize_sequence(aSequenceNode)
|
422
|
+
# text = +''
|
423
|
+
# aSequenceNode.subnodes.each do |sn|
|
424
|
+
# text << ' '
|
425
|
+
# case sn
|
426
|
+
# when SymbolNode
|
427
|
+
# text << sn.name
|
428
|
+
# when SequenceNode
|
429
|
+
# text << sequence_name(sn)
|
430
|
+
# when RepetitionNode
|
431
|
+
# suffix = repetition2suffix(sn.repetition)
|
432
|
+
# text << suffix
|
433
|
+
# end
|
434
|
+
# end
|
435
|
+
#
|
436
|
+
# text.strip
|
437
|
+
# end
|
476
438
|
|
477
439
|
def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
|
478
440
|
raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
|
@@ -484,7 +446,7 @@ module Rley # This module is used as a namespace
|
|
484
446
|
end
|
485
447
|
|
486
448
|
def process_raw_rules
|
487
|
-
until synthetized.empty?
|
449
|
+
until synthetized.empty?
|
488
450
|
raw_rules = synthetized.delete(synthetized.keys.first)
|
489
451
|
raw_rules.each do |raw|
|
490
452
|
new_prod = nil
|
@@ -494,7 +456,7 @@ module Rley # This module is used as a namespace
|
|
494
456
|
new_prod = rule(raw.lhs => raw.rhs)
|
495
457
|
end
|
496
458
|
new_prod.tag(raw.tag)
|
497
|
-
new_prod.constraints
|
459
|
+
new_prod.constraints.concat(raw.constraints)
|
498
460
|
end
|
499
461
|
end
|
500
462
|
end
|
@@ -5,8 +5,8 @@ require_relative 'grammar'
|
|
5
5
|
require_relative 'ast_builder'
|
6
6
|
|
7
7
|
module Rley
|
8
|
-
module
|
9
|
-
# A
|
8
|
+
module RGN
|
9
|
+
# A RRN (Rley Rule Notation) parser that produce concrete parse trees.
|
10
10
|
# Concrete parse trees are the default kind of parse tree
|
11
11
|
# generated by the Rley library.
|
12
12
|
# They consist of two node types only:
|
@@ -28,16 +28,16 @@ module Rley
|
|
28
28
|
# Create a Rley facade object
|
29
29
|
@engine = Rley::Engine.new do |cfg|
|
30
30
|
cfg.diagnose = true
|
31
|
-
cfg.repr_builder =
|
31
|
+
cfg.repr_builder = RGN::ASTBuilder
|
32
32
|
end
|
33
33
|
|
34
34
|
# Step 1. Load RGN grammar
|
35
|
-
@engine.use_grammar(Rley::
|
35
|
+
@engine.use_grammar(Rley::RGN::RGNGrammar)
|
36
36
|
end
|
37
37
|
|
38
|
-
# Parse the given
|
39
|
-
# @param source [String]
|
40
|
-
# @return [Rley::ParseTree] A parse tree equivalent to the
|
38
|
+
# Parse the given RGN snippet into a parse tree.
|
39
|
+
# @param source [String] Snippet to parse
|
40
|
+
# @return [Rley::ParseTree] A parse tree equivalent to the RGN input.
|
41
41
|
def parse(source)
|
42
42
|
lexer = Tokenizer.new(source)
|
43
43
|
result = engine.parse(lexer.tokens)
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'composite_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module RGN
|
7
|
+
# A RGN syntax node representing an expression quantified by a ?, * or +.
|
8
|
+
class RepetitionNode < CompositeNode
|
9
|
+
# @return [Symbol] one of: :zero_or_one, :zero_or_more, :one_or_more
|
10
|
+
attr_accessor :repetition
|
11
|
+
|
12
|
+
Repetition2suffix = {
|
13
|
+
zero_or_one: '_qmark',
|
14
|
+
zero_or_more: '_star',
|
15
|
+
exactly_one: '',
|
16
|
+
one_or_more: '_plus'
|
17
|
+
}.freeze
|
18
|
+
|
19
|
+
# @param child [Array<ASTNode>] sequence of AST nodes
|
20
|
+
# @param theRepetition [Symbol] how many times the child node can be repeated
|
21
|
+
def initialize(child, theRepetition)
|
22
|
+
super([child])
|
23
|
+
@repetition = theRepetition
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [RGN::ASTNode]
|
27
|
+
def child
|
28
|
+
subnodes[0]
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [String]
|
32
|
+
def name
|
33
|
+
child_name = subnodes[0].name
|
34
|
+
"rep_#{child_name}#{Repetition2suffix[repetition]}"
|
35
|
+
end
|
36
|
+
|
37
|
+
# @return [String]
|
38
|
+
def to_text
|
39
|
+
child_text = subnodes[0].to_text
|
40
|
+
"rep_#{child_text}#{Repetition2suffix[repetition]}"
|
41
|
+
end
|
42
|
+
|
43
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
44
|
+
# @param visitor [RGN::ASTVisitor] the visitor
|
45
|
+
def accept(visitor)
|
46
|
+
visitor.visit_repetition_node(self)
|
47
|
+
end
|
48
|
+
|
49
|
+
def suffix_qmark
|
50
|
+
Repetition2suffix[:zero_or_one]
|
51
|
+
end
|
52
|
+
|
53
|
+
def suffix_star
|
54
|
+
Repetition2suffix[:zero_or_more]
|
55
|
+
end
|
56
|
+
|
57
|
+
def suffix_plus
|
58
|
+
Repetition2suffix[:one_or_more]
|
59
|
+
end
|
60
|
+
end # class
|
61
|
+
end # module
|
62
|
+
end # module
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'composite_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module RGN
|
7
|
+
# A syntax node for a sequence of AST nodes
|
8
|
+
class SequenceNode < CompositeNode
|
9
|
+
def name
|
10
|
+
result = +''
|
11
|
+
subnodes.each do |sn|
|
12
|
+
result << "_#{sn.name}"
|
13
|
+
end
|
14
|
+
|
15
|
+
"seq#{result}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_text
|
19
|
+
arr = subnodes.map(&:to_text)
|
20
|
+
arr.join(' ')
|
21
|
+
end
|
22
|
+
|
23
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
24
|
+
# @param visitor [RGN::ASTVisitor] the visitor
|
25
|
+
def accept(visitor)
|
26
|
+
visitor.visit_sequence_node(self)
|
27
|
+
end
|
28
|
+
end # class
|
29
|
+
end # module
|
30
|
+
end # module
|
@@ -3,24 +3,32 @@
|
|
3
3
|
require_relative 'ast_node'
|
4
4
|
|
5
5
|
module Rley
|
6
|
-
module
|
7
|
-
# A syntax node for a grammar symbol occurring in rhs of a rule
|
6
|
+
module RGN
|
7
|
+
# A syntax node for a grammar symbol occurring in rhs of a rule.
|
8
|
+
# symbol nodes are leaf nodes of RRN parse trees.
|
8
9
|
class SymbolNode < ASTNode
|
10
|
+
# @return [Rley::Lexical::Position] Position of the entry in the input stream.
|
11
|
+
attr_reader :position
|
12
|
+
|
9
13
|
# @return [String] name of grammar symbol
|
10
14
|
attr_reader :name
|
11
15
|
|
12
16
|
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
13
17
|
# @param aName [String] name of grammar symbol
|
14
|
-
|
15
|
-
|
16
|
-
|
18
|
+
def initialize(aPosition, aName)
|
19
|
+
super()
|
20
|
+
@position = aPosition
|
17
21
|
@name = aName
|
18
|
-
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [String] name of grammar symbol
|
25
|
+
def to_text
|
26
|
+
annotation.empty? ? name : "#{name} #{annotation_to_text}"
|
19
27
|
end
|
20
28
|
|
21
29
|
# Abstract method (must be overriden in subclasses).
|
22
30
|
# Part of the 'visitee' role in Visitor design pattern.
|
23
|
-
# @param
|
31
|
+
# @param visitor [RGN::ASTVisitor] the visitor
|
24
32
|
def accept(visitor)
|
25
33
|
visitor.visit_symbol_node(self)
|
26
34
|
end
|
@@ -4,7 +4,7 @@ require 'strscan'
|
|
4
4
|
require_relative '../lexical/token'
|
5
5
|
|
6
6
|
module Rley
|
7
|
-
module
|
7
|
+
module RGN
|
8
8
|
# A tokenizer for the Rley notation language.
|
9
9
|
# Responsibility: break input into a sequence of token objects.
|
10
10
|
# The tokenizer should recognize:
|
@@ -14,6 +14,13 @@ module Rley
|
|
14
14
|
# Delimiters: e.g. parentheses '(', ')'
|
15
15
|
# Separators: e.g. comma
|
16
16
|
class Tokenizer
|
17
|
+
PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
|
18
|
+
PATT_INTEGER = /\d+/.freeze
|
19
|
+
PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
|
20
|
+
PATT_STRING_START = /"|'/.freeze
|
21
|
+
PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
|
22
|
+
PATT_WHITESPACE = /[ \t\f]+/.freeze
|
23
|
+
|
17
24
|
# @return [StringScanner] Low-level input scanner
|
18
25
|
attr_reader(:scanner)
|
19
26
|
|
@@ -24,7 +31,7 @@ module Rley
|
|
24
31
|
attr_reader(:line_start)
|
25
32
|
|
26
33
|
# One or two special character tokens.
|
27
|
-
|
34
|
+
Lexeme2name = {
|
28
35
|
'(' => 'LEFT_PAREN',
|
29
36
|
')' => 'RIGHT_PAREN',
|
30
37
|
'{' => 'LEFT_BRACE',
|
@@ -41,19 +48,19 @@ module Rley
|
|
41
48
|
match_closest repeat
|
42
49
|
].map { |x| [x, x] }.to_h
|
43
50
|
|
44
|
-
# Constructor. Initialize a tokenizer for
|
45
|
-
# @param source [String]
|
51
|
+
# Constructor. Initialize a tokenizer for RGN input.
|
52
|
+
# @param source [String] RGN text to tokenize.
|
46
53
|
def initialize(source = nil)
|
47
|
-
|
48
|
-
|
54
|
+
reset
|
55
|
+
input = source || ''
|
56
|
+
@scanner = StringScanner.new(input)
|
49
57
|
end
|
50
58
|
|
51
59
|
# Reset the tokenizer and make the given text, the current input.
|
52
|
-
# @param source [String]
|
60
|
+
# @param source [String] RGN text to tokenize.
|
53
61
|
def start_with(source)
|
62
|
+
reset
|
54
63
|
@scanner.string = source
|
55
|
-
@lineno = 1
|
56
|
-
@line_start = 0
|
57
64
|
end
|
58
65
|
|
59
66
|
# Scan the source and return an array of tokens.
|
@@ -65,47 +72,67 @@ module Rley
|
|
65
72
|
tok_sequence << token unless token.nil?
|
66
73
|
end
|
67
74
|
|
68
|
-
|
75
|
+
tok_sequence
|
69
76
|
end
|
70
77
|
|
71
78
|
private
|
72
79
|
|
73
|
-
def
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
curr_ch = scanner.peek(1)
|
78
|
-
return nil if curr_ch.nil? || curr_ch.empty?
|
80
|
+
def reset
|
81
|
+
@lineno = 1
|
82
|
+
@line_start = 0
|
83
|
+
end
|
79
84
|
|
85
|
+
def _next_token
|
80
86
|
token = nil
|
87
|
+
ws_found = false
|
81
88
|
|
82
|
-
|
83
|
-
|
84
|
-
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
85
|
-
elsif '?*+,'.include? curr_ch # modifier character
|
86
|
-
# modifiers without prefix text are symbols
|
87
|
-
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
88
|
-
token = build_token(symb, scanner.getch)
|
89
|
-
elsif (lexeme = scanner.scan(/\.\./))
|
90
|
-
# One or two special character tokens
|
91
|
-
token = build_token(@@lexeme2name[lexeme], lexeme)
|
92
|
-
elsif scanner.check(/"|'/) # Start of string detected...
|
93
|
-
token = build_string_token
|
94
|
-
elsif (lexeme = scanner.scan(/\d+/))
|
95
|
-
token = build_token('INT_LIT', lexeme)
|
96
|
-
elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
|
97
|
-
keyw = @@keywords[lexeme.chop!]
|
98
|
-
token = build_token('KEY', lexeme) if keyw
|
99
|
-
# ... error case
|
100
|
-
elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
|
101
|
-
token = build_token('SYMBOL', lexeme)
|
102
|
-
else # Unknown token
|
103
|
-
col = scanner.pos - @line_start + 1
|
104
|
-
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
105
|
-
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
106
|
-
end
|
89
|
+
# Loop until end of input reached or token found
|
90
|
+
until token || scanner.eos?
|
107
91
|
|
108
|
-
|
92
|
+
nl_found = scanner.skip(PATT_NEWLINE)
|
93
|
+
if nl_found
|
94
|
+
next_line_scanned
|
95
|
+
next
|
96
|
+
end
|
97
|
+
if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
|
98
|
+
ws_found = true
|
99
|
+
next
|
100
|
+
end
|
101
|
+
|
102
|
+
curr_ch = scanner.peek(1)
|
103
|
+
|
104
|
+
if '(){},'.include? curr_ch
|
105
|
+
# Single delimiter, separator or character
|
106
|
+
token = build_token(Lexeme2name[curr_ch], scanner.getch)
|
107
|
+
elsif '?*+,'.include? curr_ch # modifier character
|
108
|
+
# modifiers without prefix text are symbols
|
109
|
+
symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
|
110
|
+
token = build_token(symb, scanner.getch)
|
111
|
+
elsif (lexeme = scanner.scan(/\.\./))
|
112
|
+
# One or two special character tokens
|
113
|
+
token = build_token(Lexeme2name[lexeme], lexeme)
|
114
|
+
elsif scanner.check(PATT_STRING_START) # Start of string detected...
|
115
|
+
token = build_string_token
|
116
|
+
elsif (lexeme = scanner.scan(PATT_INTEGER))
|
117
|
+
token = build_token('INT_LIT', lexeme)
|
118
|
+
elsif (lexeme = scanner.scan(PATT_KEY))
|
119
|
+
keyw = @@keywords[lexeme.chop!]
|
120
|
+
token = build_token('KEY', lexeme) if keyw
|
121
|
+
# ... error case
|
122
|
+
elsif (lexeme = scanner.scan(PATT_SYMBOL))
|
123
|
+
token = build_token('SYMBOL', lexeme)
|
124
|
+
else # Unknown token
|
125
|
+
col = scanner.pos - @line_start + 1
|
126
|
+
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
127
|
+
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
128
|
+
end
|
129
|
+
ws_found = false
|
130
|
+
end # until
|
131
|
+
|
132
|
+
# unterminated(@string_start.line, @string_start.column) if state == :multiline
|
133
|
+
token
|
134
|
+
|
135
|
+
# return token
|
109
136
|
end
|
110
137
|
|
111
138
|
def build_token(aSymbolName, aLexeme)
|
@@ -154,24 +181,8 @@ module Rley
|
|
154
181
|
Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
|
155
182
|
end
|
156
183
|
|
157
|
-
#
|
158
|
-
|
159
|
-
def skip_intertoken_spaces
|
160
|
-
loop do
|
161
|
-
ws_found = scanner.skip(/[ \t\f]+/) ? true : false
|
162
|
-
nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
|
163
|
-
if nl_found
|
164
|
-
ws_found = true
|
165
|
-
next_line
|
166
|
-
end
|
167
|
-
|
168
|
-
break unless ws_found
|
169
|
-
end
|
170
|
-
|
171
|
-
scanner.pos
|
172
|
-
end
|
173
|
-
|
174
|
-
def next_line
|
184
|
+
# Event: next line detected.
|
185
|
+
def next_line_scanned
|
175
186
|
@lineno += 1
|
176
187
|
@line_start = scanner.pos
|
177
188
|
end
|
@@ -8,10 +8,6 @@ module Rley # This module is used as a namespace
|
|
8
8
|
# @return [String] The name of the grammar symbol
|
9
9
|
attr_reader(:name)
|
10
10
|
|
11
|
-
# An indicator that tells whether the grammar symbol can generate a
|
12
|
-
# non-empty string of terminals.
|
13
|
-
attr_writer(:generative)
|
14
|
-
|
15
11
|
# Constructor.
|
16
12
|
# aName [String] The name of the grammar symbol.
|
17
13
|
def initialize(aName)
|
@@ -7,6 +7,10 @@ module Rley # This module is used as a namespace
|
|
7
7
|
# A non-terminal symbol (sometimes called a syntactic variable) represents
|
8
8
|
# a composition of terminal or non-terminal symbols
|
9
9
|
class NonTerminal < GrmSymbol
|
10
|
+
# An indicator that tells whether the grammar symbol can generate a
|
11
|
+
# non-empty string of terminals.
|
12
|
+
attr_writer(:generative)
|
13
|
+
|
10
14
|
# A non-terminal symbol is nullable if it can match an empty string.
|
11
15
|
attr_writer(:nullable)
|
12
16
|
|