rley 0.8.06 → 0.8.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +23 -2
- data/CHANGELOG.md +21 -1
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/appveyor.yml +1 -3
- data/examples/NLP/benchmark_pico_en.rb +6 -6
- data/examples/NLP/engtagger.rb +6 -6
- data/examples/general/calc_iter1/calc_lexer.rb +1 -1
- data/examples/general/calc_iter2/calc_lexer.rb +1 -1
- data/examples/general/left.rb +1 -1
- data/examples/general/right.rb +1 -1
- data/examples/tokenizer/loxxy_raw_scanner.rex.rb +3 -0
- data/examples/tokenizer/loxxy_tokenizer.rb +2 -2
- data/examples/tokenizer/run_tokenizer.rb +1 -1
- data/examples/tokenizer/{tokens.yaml → tokens.yml} +0 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +2 -2
- data/lib/rley/interface.rb +3 -3
- data/lib/rley/lexical/token.rb +1 -1
- data/lib/rley/ptree/non_terminal_node.rb +1 -1
- data/lib/rley/rgn/all_notation_nodes.rb +5 -0
- data/lib/rley/{notation → rgn}/ast_builder.rb +19 -12
- data/lib/rley/{notation → rgn}/ast_node.rb +13 -12
- data/lib/rley/{notation → rgn}/ast_visitor.rb +10 -10
- data/lib/rley/rgn/composite_node.rb +28 -0
- data/lib/rley/{notation → rgn}/grammar.rb +1 -1
- data/lib/rley/{notation → rgn}/grammar_builder.rb +86 -124
- data/lib/rley/{notation → rgn}/parser.rb +7 -7
- data/lib/rley/rgn/repetition_node.rb +62 -0
- data/lib/rley/rgn/sequence_node.rb +30 -0
- data/lib/rley/{notation → rgn}/symbol_node.rb +15 -7
- data/lib/rley/{notation → rgn}/tokenizer.rb +71 -60
- data/lib/rley/syntax/grm_symbol.rb +0 -4
- data/lib/rley/syntax/non_terminal.rb +4 -0
- data/lib/rley/syntax/terminal.rb +10 -6
- data/spec/rley/parser/dangling_else_spec.rb +3 -3
- data/spec/rley/parser/gfg_earley_parser_spec.rb +48 -50
- data/spec/rley/{notation → rgn}/grammar_builder_spec.rb +58 -54
- data/spec/rley/{notation → rgn}/parser_spec.rb +36 -24
- data/spec/rley/rgn/repetition_node_spec.rb +56 -0
- data/spec/rley/rgn/sequence_node_spec.rb +48 -0
- data/spec/rley/rgn/symbol_node_spec.rb +33 -0
- data/spec/rley/{notation → rgn}/tokenizer_spec.rb +2 -2
- data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
- data/spec/rley/support/grammar_int_seq_helper.rb +2 -2
- metadata +40 -33
- data/lib/rley/notation/all_notation_nodes.rb +0 -4
- data/lib/rley/notation/grouping_node.rb +0 -23
- data/lib/rley/notation/sequence_node.rb +0 -35
@@ -7,8 +7,9 @@ require_relative 'ast_visitor'
|
|
7
7
|
require_relative '../syntax/match_closest'
|
8
8
|
|
9
9
|
module Rley # This module is used as a namespace
|
10
|
-
|
11
|
-
|
10
|
+
# Namespace for classes that define RGN (Rley Grammar Notation)
|
11
|
+
module RGN # This module is used as a namespace
|
12
|
+
# Structure used by Rley to generate implicdit production rules.
|
12
13
|
RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
|
13
14
|
|
14
15
|
# Builder GoF pattern. Builder builds a complex object
|
@@ -19,7 +20,7 @@ module Rley # This module is used as a namespace
|
|
19
20
|
# to the matching grammar symbol object.
|
20
21
|
attr_reader(:symbols)
|
21
22
|
|
22
|
-
# @return [
|
23
|
+
# @return [RGN::Parser] Parser for the right-side of productions
|
23
24
|
attr_reader(:parser)
|
24
25
|
|
25
26
|
# @return [Hash{ASTVisitor, Array}]
|
@@ -32,21 +33,12 @@ module Rley # This module is used as a namespace
|
|
32
33
|
# @return [Hash{String, String}] The synthesized raw productions
|
33
34
|
attr_reader(:synthetized)
|
34
35
|
|
35
|
-
# Creates a new grammar builder.
|
36
|
+
# Creates a new RGN grammar builder.
|
36
37
|
# @param aBlock [Proc] code block used to build the grammar.
|
37
|
-
# @example Building a tiny English grammar
|
38
|
-
# builder = Rley::Notation::GrammarBuilder.new do
|
39
|
-
# add_terminals('n', 'v', 'adj', 'det')
|
40
|
-
# rule 'S' => 'NP VP'
|
41
|
-
# rule 'VP' => 'v NP'
|
42
|
-
# rule 'NP' => 'det n'
|
43
|
-
# rule 'NP' => 'adj NP'
|
44
|
-
# end
|
45
|
-
# tiny_eng = builder.grammar
|
46
38
|
def initialize(&aBlock)
|
47
39
|
@symbols = {}
|
48
40
|
@productions = []
|
49
|
-
@parser =
|
41
|
+
@parser = RGN::Parser.new
|
50
42
|
@visitor2rhs = {}
|
51
43
|
@synthetized = {}
|
52
44
|
|
@@ -73,7 +65,7 @@ module Rley # This module is used as a namespace
|
|
73
65
|
end
|
74
66
|
|
75
67
|
# Add the given marker symbol to the grammar of the language
|
76
|
-
# @param aMarkerSymbol [String] A
|
68
|
+
# @param aMarkerSymbol [String] A marker symbol
|
77
69
|
# @return [void]
|
78
70
|
def add_marker(aMarkerSymbol)
|
79
71
|
new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
|
@@ -227,105 +219,64 @@ module Rley # This module is used as a namespace
|
|
227
219
|
# ################################
|
228
220
|
def after_symbol_node(aSymbolNode, aVisitor)
|
229
221
|
symb_name = aSymbolNode.name
|
230
|
-
|
231
|
-
case aSymbolNode.repetition
|
232
|
-
when :zero_or_one
|
233
|
-
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
234
|
-
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
235
|
-
name_modified = "#{symb_name}#{suffix_qmark}"
|
236
|
-
unless symbols.include? name_modified
|
237
|
-
add_nonterminal(name_modified)
|
238
|
-
add_raw_rule(name_modified, symb_name, suffix_qmark_one)
|
239
|
-
add_raw_rule(name_modified, '', suffix_qmark_none)
|
240
|
-
end
|
241
|
-
symb_name = name_modified
|
242
|
-
|
243
|
-
when :zero_or_more
|
244
|
-
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
245
|
-
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
246
|
-
name_modified = "#{symb_name}#{suffix_star}"
|
247
|
-
unless symbols.include? name_modified
|
248
|
-
add_nonterminal(name_modified)
|
249
|
-
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
|
250
|
-
add_raw_rule(name_modified, [], suffix_star_none)
|
251
|
-
end
|
252
|
-
symb_name = name_modified
|
253
|
-
|
254
|
-
when :exactly_one
|
255
|
-
# Do nothing
|
256
|
-
|
257
|
-
when :one_or_more
|
258
|
-
name_modified = "#{symb_name}#{suffix_plus}"
|
259
|
-
unless symbols.include? name_modified
|
260
|
-
add_nonterminal(name_modified)
|
261
|
-
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
|
262
|
-
add_raw_rule(name_modified, symb_name, suffix_plus_one)
|
263
|
-
end
|
264
|
-
symb_name = name_modified
|
265
|
-
else
|
266
|
-
raise StandardError, 'Unhandled multiplicity'
|
267
|
-
end
|
268
|
-
|
269
222
|
symb = get_grm_symbol(symb_name)
|
270
223
|
visitor2rhs[aVisitor] << symb
|
271
224
|
end
|
272
225
|
|
273
226
|
def after_sequence_node(aSequenceNode, _visitor)
|
274
|
-
aSequenceNode
|
275
|
-
next if sn.annotation.empty?
|
276
|
-
|
277
|
-
matching = sn.annotation['match_closest']
|
278
|
-
aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
|
279
|
-
end
|
227
|
+
add_constraints(aSequenceNode)
|
280
228
|
end
|
281
229
|
|
282
|
-
def
|
283
|
-
|
284
|
-
|
230
|
+
def after_repetition_node(aRepNode, aVisitor)
|
231
|
+
add_constraints(aRepNode)
|
232
|
+
return if aRepNode.repetition == :exactly_one
|
285
233
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
234
|
+
node_name = aRepNode.name
|
235
|
+
child_name = aRepNode.subnodes[0].name
|
236
|
+
|
237
|
+
if aRepNode.child.is_a?(SequenceNode) &&
|
238
|
+
!symbols.include?(child_name) && aRepNode.repetition != :zero_or_one
|
239
|
+
add_nonterminal(child_name)
|
240
|
+
rhs = aRepNode.child.to_text
|
241
|
+
add_raw_rule(child_name, rhs, 'return_children', true)
|
290
242
|
end
|
291
|
-
name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
|
292
243
|
|
293
|
-
case
|
244
|
+
case aRepNode.repetition
|
294
245
|
when :zero_or_one
|
295
|
-
# implicitly called: rule('
|
296
|
-
# implicitly called: rule('
|
297
|
-
unless symbols.include?
|
298
|
-
add_nonterminal(
|
299
|
-
|
300
|
-
|
246
|
+
# implicitly called: rule('node_name_qmark' => 'node_name_qmark').tag suffix_qmark_one
|
247
|
+
# implicitly called: rule('node_name_qmark' => '').tag suffix_qmark_none
|
248
|
+
unless symbols.include? node_name
|
249
|
+
add_nonterminal(node_name)
|
250
|
+
if aRepNode.child.is_a?(SequenceNode) && !aRepNode.child.constraints.empty?
|
251
|
+
aRepNode.constraints.merge(aRepNode.child.constraints)
|
252
|
+
end
|
253
|
+
rhs = aRepNode.child.to_text
|
254
|
+
add_raw_rule(node_name, rhs, 'return_children', false, aRepNode.constraints)
|
255
|
+
add_raw_rule(node_name, [], suffix_qmark_none, true)
|
301
256
|
end
|
302
257
|
|
303
258
|
when :zero_or_more
|
304
|
-
# implicitly called: rule('
|
305
|
-
# implicitly called: rule('
|
306
|
-
unless symbols.include?
|
307
|
-
add_nonterminal(
|
308
|
-
|
309
|
-
add_raw_rule(
|
259
|
+
# implicitly called: rule('node_name_star' => 'node_name_star node_name').tag suffix_star_more
|
260
|
+
# implicitly called: rule('node_name_star' => '').tag suffix_star_none
|
261
|
+
unless symbols.include? node_name
|
262
|
+
add_nonterminal(node_name)
|
263
|
+
rhs = "#{node_name} #{child_name}"
|
264
|
+
add_raw_rule(node_name, rhs, suffix_star_more)
|
265
|
+
add_raw_rule(node_name, '', suffix_star_none)
|
310
266
|
end
|
311
267
|
|
312
|
-
when :exactly_one
|
313
|
-
# Do nothing
|
314
|
-
|
315
268
|
when :one_or_more
|
316
|
-
unless symbols.include?
|
317
|
-
add_nonterminal(
|
318
|
-
add_raw_rule(
|
319
|
-
add_raw_rule(
|
269
|
+
unless symbols.include? node_name
|
270
|
+
add_nonterminal(node_name)
|
271
|
+
add_raw_rule(node_name, "#{node_name} #{child_name}", suffix_plus_more)
|
272
|
+
add_raw_rule(node_name, child_name, suffix_plus_one)
|
320
273
|
end
|
321
274
|
else
|
322
275
|
raise StandardError, 'Unhandled multiplicity'
|
323
276
|
end
|
324
277
|
|
325
|
-
|
326
|
-
|
327
|
-
visitor2rhs[aVisitor] << symb
|
328
|
-
end
|
278
|
+
symb = get_grm_symbol(node_name)
|
279
|
+
visitor2rhs[aVisitor] << symb
|
329
280
|
end
|
330
281
|
|
331
282
|
# A notification to the builderobject that the programmer
|
@@ -425,22 +376,33 @@ module Rley # This module is used as a namespace
|
|
425
376
|
symbols[name]
|
426
377
|
end
|
427
378
|
|
428
|
-
def
|
429
|
-
|
430
|
-
|
431
|
-
case subn
|
432
|
-
when SymbolNode
|
433
|
-
subnode_names << "_#{subn.name}"
|
434
|
-
when SequenceNode
|
435
|
-
subnode_names << "_#{sequence_name(subn)}"
|
436
|
-
end
|
437
|
-
suffix = repetition2suffix(subn.repetition)
|
438
|
-
subnode_names << suffix
|
439
|
-
end
|
379
|
+
def add_constraints(aCompositeNode)
|
380
|
+
aCompositeNode.subnodes.each_with_index do |sn, i|
|
381
|
+
next if sn.annotation.empty?
|
440
382
|
|
441
|
-
|
383
|
+
matching = sn.annotation['match_closest']
|
384
|
+
constraint = Syntax::MatchClosest.new(aCompositeNode, i, matching)
|
385
|
+
aCompositeNode.constraints << constraint
|
386
|
+
end
|
442
387
|
end
|
443
388
|
|
389
|
+
# def sequence_name(aSequenceNode)
|
390
|
+
# subnode_names = +''
|
391
|
+
# aSequenceNode.subnodes.each do |subn|
|
392
|
+
# case subn
|
393
|
+
# when SymbolNode
|
394
|
+
# subnode_names << "_#{subn.name}"
|
395
|
+
# when SequenceNode
|
396
|
+
# subnode_names << "_#{sequence_name(subn)}"
|
397
|
+
# when RepetitionNode
|
398
|
+
# suffix = repetition2suffix(subn.repetition)
|
399
|
+
# subnode_names << suffix
|
400
|
+
# end
|
401
|
+
# end
|
402
|
+
#
|
403
|
+
# "seq#{subnode_names}"
|
404
|
+
# end
|
405
|
+
|
444
406
|
def node_base_name(aNode)
|
445
407
|
if aNode.kind_of?(SymbolNode)
|
446
408
|
aNode.name
|
@@ -456,23 +418,23 @@ module Rley # This module is used as a namespace
|
|
456
418
|
"#{base_name}#{suffix}"
|
457
419
|
end
|
458
420
|
|
459
|
-
def serialize_sequence(aSequenceNode)
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
end
|
421
|
+
# def serialize_sequence(aSequenceNode)
|
422
|
+
# text = +''
|
423
|
+
# aSequenceNode.subnodes.each do |sn|
|
424
|
+
# text << ' '
|
425
|
+
# case sn
|
426
|
+
# when SymbolNode
|
427
|
+
# text << sn.name
|
428
|
+
# when SequenceNode
|
429
|
+
# text << sequence_name(sn)
|
430
|
+
# when RepetitionNode
|
431
|
+
# suffix = repetition2suffix(sn.repetition)
|
432
|
+
# text << suffix
|
433
|
+
# end
|
434
|
+
# end
|
435
|
+
#
|
436
|
+
# text.strip
|
437
|
+
# end
|
476
438
|
|
477
439
|
def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
|
478
440
|
raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
|
@@ -484,7 +446,7 @@ module Rley # This module is used as a namespace
|
|
484
446
|
end
|
485
447
|
|
486
448
|
def process_raw_rules
|
487
|
-
until synthetized.empty?
|
449
|
+
until synthetized.empty?
|
488
450
|
raw_rules = synthetized.delete(synthetized.keys.first)
|
489
451
|
raw_rules.each do |raw|
|
490
452
|
new_prod = nil
|
@@ -494,7 +456,7 @@ module Rley # This module is used as a namespace
|
|
494
456
|
new_prod = rule(raw.lhs => raw.rhs)
|
495
457
|
end
|
496
458
|
new_prod.tag(raw.tag)
|
497
|
-
new_prod.constraints
|
459
|
+
new_prod.constraints.concat(raw.constraints)
|
498
460
|
end
|
499
461
|
end
|
500
462
|
end
|
@@ -5,8 +5,8 @@ require_relative 'grammar'
|
|
5
5
|
require_relative 'ast_builder'
|
6
6
|
|
7
7
|
module Rley
|
8
|
-
module
|
9
|
-
# A
|
8
|
+
module RGN
|
9
|
+
# A RRN (Rley Rule Notation) parser that produce concrete parse trees.
|
10
10
|
# Concrete parse trees are the default kind of parse tree
|
11
11
|
# generated by the Rley library.
|
12
12
|
# They consist of two node types only:
|
@@ -28,16 +28,16 @@ module Rley
|
|
28
28
|
# Create a Rley facade object
|
29
29
|
@engine = Rley::Engine.new do |cfg|
|
30
30
|
cfg.diagnose = true
|
31
|
-
cfg.repr_builder =
|
31
|
+
cfg.repr_builder = RGN::ASTBuilder
|
32
32
|
end
|
33
33
|
|
34
34
|
# Step 1. Load RGN grammar
|
35
|
-
@engine.use_grammar(Rley::
|
35
|
+
@engine.use_grammar(Rley::RGN::RGNGrammar)
|
36
36
|
end
|
37
37
|
|
38
|
-
# Parse the given
|
39
|
-
# @param source [String]
|
40
|
-
# @return [Rley::ParseTree] A parse tree equivalent to the
|
38
|
+
# Parse the given RGN snippet into a parse tree.
|
39
|
+
# @param source [String] Snippet to parse
|
40
|
+
# @return [Rley::ParseTree] A parse tree equivalent to the RGN input.
|
41
41
|
def parse(source)
|
42
42
|
lexer = Tokenizer.new(source)
|
43
43
|
result = engine.parse(lexer.tokens)
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'composite_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module RGN
|
7
|
+
# A RGN syntax node representing an expression quantified by a ?, * or +.
|
8
|
+
class RepetitionNode < CompositeNode
|
9
|
+
# @return [Symbol] one of: :zero_or_one, :zero_or_more, :one_or_more
|
10
|
+
attr_accessor :repetition
|
11
|
+
|
12
|
+
Repetition2suffix = {
|
13
|
+
zero_or_one: '_qmark',
|
14
|
+
zero_or_more: '_star',
|
15
|
+
exactly_one: '',
|
16
|
+
one_or_more: '_plus'
|
17
|
+
}.freeze
|
18
|
+
|
19
|
+
# @param child [Array<ASTNode>] sequence of AST nodes
|
20
|
+
# @param theRepetition [Symbol] how many times the child node can be repeated
|
21
|
+
def initialize(child, theRepetition)
|
22
|
+
super([child])
|
23
|
+
@repetition = theRepetition
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [RGN::ASTNode]
|
27
|
+
def child
|
28
|
+
subnodes[0]
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [String]
|
32
|
+
def name
|
33
|
+
child_name = subnodes[0].name
|
34
|
+
"rep_#{child_name}#{Repetition2suffix[repetition]}"
|
35
|
+
end
|
36
|
+
|
37
|
+
# @return [String]
|
38
|
+
def to_text
|
39
|
+
child_text = subnodes[0].to_text
|
40
|
+
"rep_#{child_text}#{Repetition2suffix[repetition]}"
|
41
|
+
end
|
42
|
+
|
43
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
44
|
+
# @param visitor [RGN::ASTVisitor] the visitor
|
45
|
+
def accept(visitor)
|
46
|
+
visitor.visit_repetition_node(self)
|
47
|
+
end
|
48
|
+
|
49
|
+
def suffix_qmark
|
50
|
+
Repetition2suffix[:zero_or_one]
|
51
|
+
end
|
52
|
+
|
53
|
+
def suffix_star
|
54
|
+
Repetition2suffix[:zero_or_more]
|
55
|
+
end
|
56
|
+
|
57
|
+
def suffix_plus
|
58
|
+
Repetition2suffix[:one_or_more]
|
59
|
+
end
|
60
|
+
end # class
|
61
|
+
end # module
|
62
|
+
end # module
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'composite_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module RGN
|
7
|
+
# A syntax node for a sequence of AST nodes
|
8
|
+
class SequenceNode < CompositeNode
|
9
|
+
def name
|
10
|
+
result = +''
|
11
|
+
subnodes.each do |sn|
|
12
|
+
result << "_#{sn.name}"
|
13
|
+
end
|
14
|
+
|
15
|
+
"seq#{result}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_text
|
19
|
+
arr = subnodes.map(&:to_text)
|
20
|
+
arr.join(' ')
|
21
|
+
end
|
22
|
+
|
23
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
24
|
+
# @param visitor [RGN::ASTVisitor] the visitor
|
25
|
+
def accept(visitor)
|
26
|
+
visitor.visit_sequence_node(self)
|
27
|
+
end
|
28
|
+
end # class
|
29
|
+
end # module
|
30
|
+
end # module
|
@@ -3,24 +3,32 @@
|
|
3
3
|
require_relative 'ast_node'
|
4
4
|
|
5
5
|
module Rley
|
6
|
-
module
|
7
|
-
# A syntax node for a grammar symbol occurring in rhs of a rule
|
6
|
+
module RGN
|
7
|
+
# A syntax node for a grammar symbol occurring in rhs of a rule.
|
8
|
+
# symbol nodes are leaf nodes of RRN parse trees.
|
8
9
|
class SymbolNode < ASTNode
|
10
|
+
# @return [Rley::Lexical::Position] Position of the entry in the input stream.
|
11
|
+
attr_reader :position
|
12
|
+
|
9
13
|
# @return [String] name of grammar symbol
|
10
14
|
attr_reader :name
|
11
15
|
|
12
16
|
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
13
17
|
# @param aName [String] name of grammar symbol
|
14
|
-
|
15
|
-
|
16
|
-
|
18
|
+
def initialize(aPosition, aName)
|
19
|
+
super()
|
20
|
+
@position = aPosition
|
17
21
|
@name = aName
|
18
|
-
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [String] name of grammar symbol
|
25
|
+
def to_text
|
26
|
+
annotation.empty? ? name : "#{name} #{annotation_to_text}"
|
19
27
|
end
|
20
28
|
|
21
29
|
# Abstract method (must be overriden in subclasses).
|
22
30
|
# Part of the 'visitee' role in Visitor design pattern.
|
23
|
-
# @param
|
31
|
+
# @param visitor [RGN::ASTVisitor] the visitor
|
24
32
|
def accept(visitor)
|
25
33
|
visitor.visit_symbol_node(self)
|
26
34
|
end
|
@@ -4,7 +4,7 @@ require 'strscan'
|
|
4
4
|
require_relative '../lexical/token'
|
5
5
|
|
6
6
|
module Rley
|
7
|
-
module
|
7
|
+
module RGN
|
8
8
|
# A tokenizer for the Rley notation language.
|
9
9
|
# Responsibility: break input into a sequence of token objects.
|
10
10
|
# The tokenizer should recognize:
|
@@ -14,6 +14,13 @@ module Rley
|
|
14
14
|
# Delimiters: e.g. parentheses '(', ')'
|
15
15
|
# Separators: e.g. comma
|
16
16
|
class Tokenizer
|
17
|
+
PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
|
18
|
+
PATT_INTEGER = /\d+/.freeze
|
19
|
+
PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
|
20
|
+
PATT_STRING_START = /"|'/.freeze
|
21
|
+
PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
|
22
|
+
PATT_WHITESPACE = /[ \t\f]+/.freeze
|
23
|
+
|
17
24
|
# @return [StringScanner] Low-level input scanner
|
18
25
|
attr_reader(:scanner)
|
19
26
|
|
@@ -24,7 +31,7 @@ module Rley
|
|
24
31
|
attr_reader(:line_start)
|
25
32
|
|
26
33
|
# One or two special character tokens.
|
27
|
-
|
34
|
+
Lexeme2name = {
|
28
35
|
'(' => 'LEFT_PAREN',
|
29
36
|
')' => 'RIGHT_PAREN',
|
30
37
|
'{' => 'LEFT_BRACE',
|
@@ -41,19 +48,19 @@ module Rley
|
|
41
48
|
match_closest repeat
|
42
49
|
].map { |x| [x, x] }.to_h
|
43
50
|
|
44
|
-
# Constructor. Initialize a tokenizer for
|
45
|
-
# @param source [String]
|
51
|
+
# Constructor. Initialize a tokenizer for RGN input.
|
52
|
+
# @param source [String] RGN text to tokenize.
|
46
53
|
def initialize(source = nil)
|
47
|
-
|
48
|
-
|
54
|
+
reset
|
55
|
+
input = source || ''
|
56
|
+
@scanner = StringScanner.new(input)
|
49
57
|
end
|
50
58
|
|
51
59
|
# Reset the tokenizer and make the given text, the current input.
|
52
|
-
# @param source [String]
|
60
|
+
# @param source [String] RGN text to tokenize.
|
53
61
|
def start_with(source)
|
62
|
+
reset
|
54
63
|
@scanner.string = source
|
55
|
-
@lineno = 1
|
56
|
-
@line_start = 0
|
57
64
|
end
|
58
65
|
|
59
66
|
# Scan the source and return an array of tokens.
|
@@ -65,47 +72,67 @@ module Rley
|
|
65
72
|
tok_sequence << token unless token.nil?
|
66
73
|
end
|
67
74
|
|
68
|
-
|
75
|
+
tok_sequence
|
69
76
|
end
|
70
77
|
|
71
78
|
private
|
72
79
|
|
73
|
-
def
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
curr_ch = scanner.peek(1)
|
78
|
-
return nil if curr_ch.nil? || curr_ch.empty?
|
80
|
+
def reset
|
81
|
+
@lineno = 1
|
82
|
+
@line_start = 0
|
83
|
+
end
|
79
84
|
|
85
|
+
def _next_token
|
80
86
|
token = nil
|
87
|
+
ws_found = false
|
81
88
|
|
82
|
-
|
83
|
-
|
84
|
-
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
85
|
-
elsif '?*+,'.include? curr_ch # modifier character
|
86
|
-
# modifiers without prefix text are symbols
|
87
|
-
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
88
|
-
token = build_token(symb, scanner.getch)
|
89
|
-
elsif (lexeme = scanner.scan(/\.\./))
|
90
|
-
# One or two special character tokens
|
91
|
-
token = build_token(@@lexeme2name[lexeme], lexeme)
|
92
|
-
elsif scanner.check(/"|'/) # Start of string detected...
|
93
|
-
token = build_string_token
|
94
|
-
elsif (lexeme = scanner.scan(/\d+/))
|
95
|
-
token = build_token('INT_LIT', lexeme)
|
96
|
-
elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
|
97
|
-
keyw = @@keywords[lexeme.chop!]
|
98
|
-
token = build_token('KEY', lexeme) if keyw
|
99
|
-
# ... error case
|
100
|
-
elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
|
101
|
-
token = build_token('SYMBOL', lexeme)
|
102
|
-
else # Unknown token
|
103
|
-
col = scanner.pos - @line_start + 1
|
104
|
-
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
105
|
-
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
106
|
-
end
|
89
|
+
# Loop until end of input reached or token found
|
90
|
+
until token || scanner.eos?
|
107
91
|
|
108
|
-
|
92
|
+
nl_found = scanner.skip(PATT_NEWLINE)
|
93
|
+
if nl_found
|
94
|
+
next_line_scanned
|
95
|
+
next
|
96
|
+
end
|
97
|
+
if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
|
98
|
+
ws_found = true
|
99
|
+
next
|
100
|
+
end
|
101
|
+
|
102
|
+
curr_ch = scanner.peek(1)
|
103
|
+
|
104
|
+
if '(){},'.include? curr_ch
|
105
|
+
# Single delimiter, separator or character
|
106
|
+
token = build_token(Lexeme2name[curr_ch], scanner.getch)
|
107
|
+
elsif '?*+,'.include? curr_ch # modifier character
|
108
|
+
# modifiers without prefix text are symbols
|
109
|
+
symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
|
110
|
+
token = build_token(symb, scanner.getch)
|
111
|
+
elsif (lexeme = scanner.scan(/\.\./))
|
112
|
+
# One or two special character tokens
|
113
|
+
token = build_token(Lexeme2name[lexeme], lexeme)
|
114
|
+
elsif scanner.check(PATT_STRING_START) # Start of string detected...
|
115
|
+
token = build_string_token
|
116
|
+
elsif (lexeme = scanner.scan(PATT_INTEGER))
|
117
|
+
token = build_token('INT_LIT', lexeme)
|
118
|
+
elsif (lexeme = scanner.scan(PATT_KEY))
|
119
|
+
keyw = @@keywords[lexeme.chop!]
|
120
|
+
token = build_token('KEY', lexeme) if keyw
|
121
|
+
# ... error case
|
122
|
+
elsif (lexeme = scanner.scan(PATT_SYMBOL))
|
123
|
+
token = build_token('SYMBOL', lexeme)
|
124
|
+
else # Unknown token
|
125
|
+
col = scanner.pos - @line_start + 1
|
126
|
+
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
127
|
+
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
128
|
+
end
|
129
|
+
ws_found = false
|
130
|
+
end # until
|
131
|
+
|
132
|
+
# unterminated(@string_start.line, @string_start.column) if state == :multiline
|
133
|
+
token
|
134
|
+
|
135
|
+
# return token
|
109
136
|
end
|
110
137
|
|
111
138
|
def build_token(aSymbolName, aLexeme)
|
@@ -154,24 +181,8 @@ module Rley
|
|
154
181
|
Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
|
155
182
|
end
|
156
183
|
|
157
|
-
#
|
158
|
-
|
159
|
-
def skip_intertoken_spaces
|
160
|
-
loop do
|
161
|
-
ws_found = scanner.skip(/[ \t\f]+/) ? true : false
|
162
|
-
nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
|
163
|
-
if nl_found
|
164
|
-
ws_found = true
|
165
|
-
next_line
|
166
|
-
end
|
167
|
-
|
168
|
-
break unless ws_found
|
169
|
-
end
|
170
|
-
|
171
|
-
scanner.pos
|
172
|
-
end
|
173
|
-
|
174
|
-
def next_line
|
184
|
+
# Event: next line detected.
|
185
|
+
def next_line_scanned
|
175
186
|
@lineno += 1
|
176
187
|
@line_start = scanner.pos
|
177
188
|
end
|
@@ -8,10 +8,6 @@ module Rley # This module is used as a namespace
|
|
8
8
|
# @return [String] The name of the grammar symbol
|
9
9
|
attr_reader(:name)
|
10
10
|
|
11
|
-
# An indicator that tells whether the grammar symbol can generate a
|
12
|
-
# non-empty string of terminals.
|
13
|
-
attr_writer(:generative)
|
14
|
-
|
15
11
|
# Constructor.
|
16
12
|
# aName [String] The name of the grammar symbol.
|
17
13
|
def initialize(aName)
|
@@ -7,6 +7,10 @@ module Rley # This module is used as a namespace
|
|
7
7
|
# A non-terminal symbol (sometimes called a syntactic variable) represents
|
8
8
|
# a composition of terminal or non-terminal symbols
|
9
9
|
class NonTerminal < GrmSymbol
|
10
|
+
# An indicator that tells whether the grammar symbol can generate a
|
11
|
+
# non-empty string of terminals.
|
12
|
+
attr_writer(:generative)
|
13
|
+
|
10
14
|
# A non-terminal symbol is nullable if it can match an empty string.
|
11
15
|
attr_writer(:nullable)
|
12
16
|
|