rley 0.8.06 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +23 -2
  3. data/CHANGELOG.md +21 -1
  4. data/LICENSE.txt +1 -1
  5. data/README.md +1 -1
  6. data/appveyor.yml +1 -3
  7. data/examples/NLP/benchmark_pico_en.rb +6 -6
  8. data/examples/NLP/engtagger.rb +6 -6
  9. data/examples/general/calc_iter1/calc_lexer.rb +1 -1
  10. data/examples/general/calc_iter2/calc_lexer.rb +1 -1
  11. data/examples/general/left.rb +1 -1
  12. data/examples/general/right.rb +1 -1
  13. data/examples/tokenizer/loxxy_raw_scanner.rex.rb +3 -0
  14. data/examples/tokenizer/loxxy_tokenizer.rb +2 -2
  15. data/examples/tokenizer/run_tokenizer.rb +1 -1
  16. data/examples/tokenizer/{tokens.yaml → tokens.yml} +0 -0
  17. data/lib/rley/constants.rb +1 -1
  18. data/lib/rley/engine.rb +2 -2
  19. data/lib/rley/interface.rb +3 -3
  20. data/lib/rley/lexical/token.rb +1 -1
  21. data/lib/rley/ptree/non_terminal_node.rb +1 -1
  22. data/lib/rley/rgn/all_notation_nodes.rb +5 -0
  23. data/lib/rley/{notation → rgn}/ast_builder.rb +19 -12
  24. data/lib/rley/{notation → rgn}/ast_node.rb +13 -12
  25. data/lib/rley/{notation → rgn}/ast_visitor.rb +10 -10
  26. data/lib/rley/rgn/composite_node.rb +28 -0
  27. data/lib/rley/{notation → rgn}/grammar.rb +1 -1
  28. data/lib/rley/{notation → rgn}/grammar_builder.rb +86 -124
  29. data/lib/rley/{notation → rgn}/parser.rb +7 -7
  30. data/lib/rley/rgn/repetition_node.rb +62 -0
  31. data/lib/rley/rgn/sequence_node.rb +30 -0
  32. data/lib/rley/{notation → rgn}/symbol_node.rb +15 -7
  33. data/lib/rley/{notation → rgn}/tokenizer.rb +71 -60
  34. data/lib/rley/syntax/grm_symbol.rb +0 -4
  35. data/lib/rley/syntax/non_terminal.rb +4 -0
  36. data/lib/rley/syntax/terminal.rb +10 -6
  37. data/spec/rley/parser/dangling_else_spec.rb +3 -3
  38. data/spec/rley/parser/gfg_earley_parser_spec.rb +48 -50
  39. data/spec/rley/{notation → rgn}/grammar_builder_spec.rb +58 -54
  40. data/spec/rley/{notation → rgn}/parser_spec.rb +36 -24
  41. data/spec/rley/rgn/repetition_node_spec.rb +56 -0
  42. data/spec/rley/rgn/sequence_node_spec.rb +48 -0
  43. data/spec/rley/rgn/symbol_node_spec.rb +33 -0
  44. data/spec/rley/{notation → rgn}/tokenizer_spec.rb +2 -2
  45. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  46. data/spec/rley/support/grammar_int_seq_helper.rb +2 -2
  47. metadata +40 -33
  48. data/lib/rley/notation/all_notation_nodes.rb +0 -4
  49. data/lib/rley/notation/grouping_node.rb +0 -23
  50. data/lib/rley/notation/sequence_node.rb +0 -35
@@ -7,8 +7,9 @@ require_relative 'ast_visitor'
7
7
  require_relative '../syntax/match_closest'
8
8
 
9
9
  module Rley # This module is used as a namespace
10
- module Notation # This module is used as a namespace
11
- # Structure used for production rules that are implicitly generated by Rley
10
+ # Namespace for classes that define RGN (Rley Grammar Notation)
11
+ module RGN # This module is used as a namespace
12
+ # Structure used by Rley to generate implicdit production rules.
12
13
  RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
13
14
 
14
15
  # Builder GoF pattern. Builder builds a complex object
@@ -19,7 +20,7 @@ module Rley # This module is used as a namespace
19
20
  # to the matching grammar symbol object.
20
21
  attr_reader(:symbols)
21
22
 
22
- # @return [Notation::Parser] Parser for the right-side of productions
23
+ # @return [RGN::Parser] Parser for the right-side of productions
23
24
  attr_reader(:parser)
24
25
 
25
26
  # @return [Hash{ASTVisitor, Array}]
@@ -32,21 +33,12 @@ module Rley # This module is used as a namespace
32
33
  # @return [Hash{String, String}] The synthesized raw productions
33
34
  attr_reader(:synthetized)
34
35
 
35
- # Creates a new grammar builder.
36
+ # Creates a new RGN grammar builder.
36
37
  # @param aBlock [Proc] code block used to build the grammar.
37
- # @example Building a tiny English grammar
38
- # builder = Rley::Notation::GrammarBuilder.new do
39
- # add_terminals('n', 'v', 'adj', 'det')
40
- # rule 'S' => 'NP VP'
41
- # rule 'VP' => 'v NP'
42
- # rule 'NP' => 'det n'
43
- # rule 'NP' => 'adj NP'
44
- # end
45
- # tiny_eng = builder.grammar
46
38
  def initialize(&aBlock)
47
39
  @symbols = {}
48
40
  @productions = []
49
- @parser = Notation::Parser.new
41
+ @parser = RGN::Parser.new
50
42
  @visitor2rhs = {}
51
43
  @synthetized = {}
52
44
 
@@ -73,7 +65,7 @@ module Rley # This module is used as a namespace
73
65
  end
74
66
 
75
67
  # Add the given marker symbol to the grammar of the language
76
- # @param aMarkerSymbol [String] A mazker symbol
68
+ # @param aMarkerSymbol [String] A marker symbol
77
69
  # @return [void]
78
70
  def add_marker(aMarkerSymbol)
79
71
  new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
@@ -227,105 +219,64 @@ module Rley # This module is used as a namespace
227
219
  # ################################
228
220
  def after_symbol_node(aSymbolNode, aVisitor)
229
221
  symb_name = aSymbolNode.name
230
-
231
- case aSymbolNode.repetition
232
- when :zero_or_one
233
- # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
234
- # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
235
- name_modified = "#{symb_name}#{suffix_qmark}"
236
- unless symbols.include? name_modified
237
- add_nonterminal(name_modified)
238
- add_raw_rule(name_modified, symb_name, suffix_qmark_one)
239
- add_raw_rule(name_modified, '', suffix_qmark_none)
240
- end
241
- symb_name = name_modified
242
-
243
- when :zero_or_more
244
- # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
245
- # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
246
- name_modified = "#{symb_name}#{suffix_star}"
247
- unless symbols.include? name_modified
248
- add_nonterminal(name_modified)
249
- add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
250
- add_raw_rule(name_modified, [], suffix_star_none)
251
- end
252
- symb_name = name_modified
253
-
254
- when :exactly_one
255
- # Do nothing
256
-
257
- when :one_or_more
258
- name_modified = "#{symb_name}#{suffix_plus}"
259
- unless symbols.include? name_modified
260
- add_nonterminal(name_modified)
261
- add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
262
- add_raw_rule(name_modified, symb_name, suffix_plus_one)
263
- end
264
- symb_name = name_modified
265
- else
266
- raise StandardError, 'Unhandled multiplicity'
267
- end
268
-
269
222
  symb = get_grm_symbol(symb_name)
270
223
  visitor2rhs[aVisitor] << symb
271
224
  end
272
225
 
273
226
  def after_sequence_node(aSequenceNode, _visitor)
274
- aSequenceNode.subnodes.each_with_index do |sn, i|
275
- next if sn.annotation.empty?
276
-
277
- matching = sn.annotation['match_closest']
278
- aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
279
- end
227
+ add_constraints(aSequenceNode)
280
228
  end
281
229
 
282
- def after_grouping_node(aGroupingNode, aVisitor)
283
- after_sequence_node(aGroupingNode, aVisitor)
284
- symb_name = sequence_name(aGroupingNode)
230
+ def after_repetition_node(aRepNode, aVisitor)
231
+ add_constraints(aRepNode)
232
+ return if aRepNode.repetition == :exactly_one
285
233
 
286
- unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
287
- add_nonterminal(symb_name)
288
- rhs = serialize_sequence(aGroupingNode)
289
- add_raw_rule(symb_name, rhs, 'return_children', true, aGroupingNode.constraints)
234
+ node_name = aRepNode.name
235
+ child_name = aRepNode.subnodes[0].name
236
+
237
+ if aRepNode.child.is_a?(SequenceNode) &&
238
+ !symbols.include?(child_name) && aRepNode.repetition != :zero_or_one
239
+ add_nonterminal(child_name)
240
+ rhs = aRepNode.child.to_text
241
+ add_raw_rule(child_name, rhs, 'return_children', true)
290
242
  end
291
- name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
292
243
 
293
- case aGroupingNode.repetition
244
+ case aRepNode.repetition
294
245
  when :zero_or_one
295
- # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
296
- # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
297
- unless symbols.include? name_modified
298
- add_nonterminal(name_modified)
299
- add_raw_rule(name_modified, symb_name, suffix_qmark_one, true)
300
- add_raw_rule(name_modified, [], suffix_qmark_none, true)
246
+ # implicitly called: rule('node_name_qmark' => 'node_name_qmark').tag suffix_qmark_one
247
+ # implicitly called: rule('node_name_qmark' => '').tag suffix_qmark_none
248
+ unless symbols.include? node_name
249
+ add_nonterminal(node_name)
250
+ if aRepNode.child.is_a?(SequenceNode) && !aRepNode.child.constraints.empty?
251
+ aRepNode.constraints.merge(aRepNode.child.constraints)
252
+ end
253
+ rhs = aRepNode.child.to_text
254
+ add_raw_rule(node_name, rhs, 'return_children', false, aRepNode.constraints)
255
+ add_raw_rule(node_name, [], suffix_qmark_none, true)
301
256
  end
302
257
 
303
258
  when :zero_or_more
304
- # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
305
- # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
306
- unless symbols.include? name_modified
307
- add_nonterminal(name_modified)
308
- add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
309
- add_raw_rule(name_modified, '', suffix_star_none)
259
+ # implicitly called: rule('node_name_star' => 'node_name_star node_name').tag suffix_star_more
260
+ # implicitly called: rule('node_name_star' => '').tag suffix_star_none
261
+ unless symbols.include? node_name
262
+ add_nonterminal(node_name)
263
+ rhs = "#{node_name} #{child_name}"
264
+ add_raw_rule(node_name, rhs, suffix_star_more)
265
+ add_raw_rule(node_name, '', suffix_star_none)
310
266
  end
311
267
 
312
- when :exactly_one
313
- # Do nothing
314
-
315
268
  when :one_or_more
316
- unless symbols.include? name_modified
317
- add_nonterminal(name_modified)
318
- add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
319
- add_raw_rule(name_modified, symb_name, suffix_plus_one)
269
+ unless symbols.include? node_name
270
+ add_nonterminal(node_name)
271
+ add_raw_rule(node_name, "#{node_name} #{child_name}", suffix_plus_more)
272
+ add_raw_rule(node_name, child_name, suffix_plus_one)
320
273
  end
321
274
  else
322
275
  raise StandardError, 'Unhandled multiplicity'
323
276
  end
324
277
 
325
- unless aGroupingNode.repetition == :exactly_one
326
- symb = get_grm_symbol(name_modified)
327
- visitor2rhs[aVisitor] << symb
328
- end
278
+ symb = get_grm_symbol(node_name)
279
+ visitor2rhs[aVisitor] << symb
329
280
  end
330
281
 
331
282
  # A notification to the builderobject that the programmer
@@ -425,22 +376,33 @@ module Rley # This module is used as a namespace
425
376
  symbols[name]
426
377
  end
427
378
 
428
- def sequence_name(aSequenceNode)
429
- subnode_names = +''
430
- aSequenceNode.subnodes.each do |subn|
431
- case subn
432
- when SymbolNode
433
- subnode_names << "_#{subn.name}"
434
- when SequenceNode
435
- subnode_names << "_#{sequence_name(subn)}"
436
- end
437
- suffix = repetition2suffix(subn.repetition)
438
- subnode_names << suffix
439
- end
379
+ def add_constraints(aCompositeNode)
380
+ aCompositeNode.subnodes.each_with_index do |sn, i|
381
+ next if sn.annotation.empty?
440
382
 
441
- "seq#{subnode_names}"
383
+ matching = sn.annotation['match_closest']
384
+ constraint = Syntax::MatchClosest.new(aCompositeNode, i, matching)
385
+ aCompositeNode.constraints << constraint
386
+ end
442
387
  end
443
388
 
389
+ # def sequence_name(aSequenceNode)
390
+ # subnode_names = +''
391
+ # aSequenceNode.subnodes.each do |subn|
392
+ # case subn
393
+ # when SymbolNode
394
+ # subnode_names << "_#{subn.name}"
395
+ # when SequenceNode
396
+ # subnode_names << "_#{sequence_name(subn)}"
397
+ # when RepetitionNode
398
+ # suffix = repetition2suffix(subn.repetition)
399
+ # subnode_names << suffix
400
+ # end
401
+ # end
402
+ #
403
+ # "seq#{subnode_names}"
404
+ # end
405
+
444
406
  def node_base_name(aNode)
445
407
  if aNode.kind_of?(SymbolNode)
446
408
  aNode.name
@@ -456,23 +418,23 @@ module Rley # This module is used as a namespace
456
418
  "#{base_name}#{suffix}"
457
419
  end
458
420
 
459
- def serialize_sequence(aSequenceNode)
460
- text = +''
461
- aSequenceNode.subnodes.each do |sn|
462
- text << ' '
463
- case sn
464
- when SymbolNode
465
- text << sn.name
466
- when SequenceNode
467
- text << sequence_name(sn)
468
- end
469
-
470
- suffix = repetition2suffix(sn.repetition)
471
- text << suffix
472
- end
473
-
474
- text.strip
475
- end
421
+ # def serialize_sequence(aSequenceNode)
422
+ # text = +''
423
+ # aSequenceNode.subnodes.each do |sn|
424
+ # text << ' '
425
+ # case sn
426
+ # when SymbolNode
427
+ # text << sn.name
428
+ # when SequenceNode
429
+ # text << sequence_name(sn)
430
+ # when RepetitionNode
431
+ # suffix = repetition2suffix(sn.repetition)
432
+ # text << suffix
433
+ # end
434
+ # end
435
+ #
436
+ # text.strip
437
+ # end
476
438
 
477
439
  def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
478
440
  raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
@@ -484,7 +446,7 @@ module Rley # This module is used as a namespace
484
446
  end
485
447
 
486
448
  def process_raw_rules
487
- until synthetized.empty? do
449
+ until synthetized.empty?
488
450
  raw_rules = synthetized.delete(synthetized.keys.first)
489
451
  raw_rules.each do |raw|
490
452
  new_prod = nil
@@ -494,7 +456,7 @@ module Rley # This module is used as a namespace
494
456
  new_prod = rule(raw.lhs => raw.rhs)
495
457
  end
496
458
  new_prod.tag(raw.tag)
497
- new_prod.constraints = raw.constraints
459
+ new_prod.constraints.concat(raw.constraints)
498
460
  end
499
461
  end
500
462
  end
@@ -5,8 +5,8 @@ require_relative 'grammar'
5
5
  require_relative 'ast_builder'
6
6
 
7
7
  module Rley
8
- module Notation
9
- # A Lox parser that produce concrete parse trees.
8
+ module RGN
9
+ # A RRN (Rley Rule Notation) parser that produce concrete parse trees.
10
10
  # Concrete parse trees are the default kind of parse tree
11
11
  # generated by the Rley library.
12
12
  # They consist of two node types only:
@@ -28,16 +28,16 @@ module Rley
28
28
  # Create a Rley facade object
29
29
  @engine = Rley::Engine.new do |cfg|
30
30
  cfg.diagnose = true
31
- cfg.repr_builder = Notation::ASTBuilder
31
+ cfg.repr_builder = RGN::ASTBuilder
32
32
  end
33
33
 
34
34
  # Step 1. Load RGN grammar
35
- @engine.use_grammar(Rley::Notation::RGNGrammar)
35
+ @engine.use_grammar(Rley::RGN::RGNGrammar)
36
36
  end
37
37
 
38
- # Parse the given Lox program into a parse tree.
39
- # @param source [String] Lox program to parse
40
- # @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
38
+ # Parse the given RGN snippet into a parse tree.
39
+ # @param source [String] Snippet to parse
40
+ # @return [Rley::ParseTree] A parse tree equivalent to the RGN input.
41
41
  def parse(source)
42
42
  lexer = Tokenizer.new(source)
43
43
  result = engine.parse(lexer.tokens)
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'composite_node'
4
+
5
+ module Rley
6
+ module RGN
7
+ # A RGN syntax node representing an expression quantified by a ?, * or +.
8
+ class RepetitionNode < CompositeNode
9
+ # @return [Symbol] one of: :zero_or_one, :zero_or_more, :one_or_more
10
+ attr_accessor :repetition
11
+
12
+ Repetition2suffix = {
13
+ zero_or_one: '_qmark',
14
+ zero_or_more: '_star',
15
+ exactly_one: '',
16
+ one_or_more: '_plus'
17
+ }.freeze
18
+
19
+ # @param child [Array<ASTNode>] sequence of AST nodes
20
+ # @param theRepetition [Symbol] how many times the child node can be repeated
21
+ def initialize(child, theRepetition)
22
+ super([child])
23
+ @repetition = theRepetition
24
+ end
25
+
26
+ # @return [RGN::ASTNode]
27
+ def child
28
+ subnodes[0]
29
+ end
30
+
31
+ # @return [String]
32
+ def name
33
+ child_name = subnodes[0].name
34
+ "rep_#{child_name}#{Repetition2suffix[repetition]}"
35
+ end
36
+
37
+ # @return [String]
38
+ def to_text
39
+ child_text = subnodes[0].to_text
40
+ "rep_#{child_text}#{Repetition2suffix[repetition]}"
41
+ end
42
+
43
+ # Part of the 'visitee' role in Visitor design pattern.
44
+ # @param visitor [RGN::ASTVisitor] the visitor
45
+ def accept(visitor)
46
+ visitor.visit_repetition_node(self)
47
+ end
48
+
49
+ def suffix_qmark
50
+ Repetition2suffix[:zero_or_one]
51
+ end
52
+
53
+ def suffix_star
54
+ Repetition2suffix[:zero_or_more]
55
+ end
56
+
57
+ def suffix_plus
58
+ Repetition2suffix[:one_or_more]
59
+ end
60
+ end # class
61
+ end # module
62
+ end # module
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'composite_node'
4
+
5
+ module Rley
6
+ module RGN
7
+ # A syntax node for a sequence of AST nodes
8
+ class SequenceNode < CompositeNode
9
+ def name
10
+ result = +''
11
+ subnodes.each do |sn|
12
+ result << "_#{sn.name}"
13
+ end
14
+
15
+ "seq#{result}"
16
+ end
17
+
18
+ def to_text
19
+ arr = subnodes.map(&:to_text)
20
+ arr.join(' ')
21
+ end
22
+
23
+ # Part of the 'visitee' role in Visitor design pattern.
24
+ # @param visitor [RGN::ASTVisitor] the visitor
25
+ def accept(visitor)
26
+ visitor.visit_sequence_node(self)
27
+ end
28
+ end # class
29
+ end # module
30
+ end # module
@@ -3,24 +3,32 @@
3
3
  require_relative 'ast_node'
4
4
 
5
5
  module Rley
6
- module Notation
7
- # A syntax node for a grammar symbol occurring in rhs of a rule
6
+ module RGN
7
+ # A syntax node for a grammar symbol occurring in rhs of a rule.
8
+ # symbol nodes are leaf nodes of RRN parse trees.
8
9
  class SymbolNode < ASTNode
10
+ # @return [Rley::Lexical::Position] Position of the entry in the input stream.
11
+ attr_reader :position
12
+
9
13
  # @return [String] name of grammar symbol
10
14
  attr_reader :name
11
15
 
12
16
  # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
13
17
  # @param aName [String] name of grammar symbol
14
- # @param theRepetition [Symbol] indicates how many times the symbol can be repeated
15
- def initialize(aPosition, aName, theRepetition = nil)
16
- super(aPosition)
18
+ def initialize(aPosition, aName)
19
+ super()
20
+ @position = aPosition
17
21
  @name = aName
18
- self.repetition = theRepetition if theRepetition
22
+ end
23
+
24
+ # @return [String] name of grammar symbol
25
+ def to_text
26
+ annotation.empty? ? name : "#{name} #{annotation_to_text}"
19
27
  end
20
28
 
21
29
  # Abstract method (must be overriden in subclasses).
22
30
  # Part of the 'visitee' role in Visitor design pattern.
23
- # @param _visitor [LoxxyTreeVisitor] the visitor
31
+ # @param visitor [RGN::ASTVisitor] the visitor
24
32
  def accept(visitor)
25
33
  visitor.visit_symbol_node(self)
26
34
  end
@@ -4,7 +4,7 @@ require 'strscan'
4
4
  require_relative '../lexical/token'
5
5
 
6
6
  module Rley
7
- module Notation
7
+ module RGN
8
8
  # A tokenizer for the Rley notation language.
9
9
  # Responsibility: break input into a sequence of token objects.
10
10
  # The tokenizer should recognize:
@@ -14,6 +14,13 @@ module Rley
14
14
  # Delimiters: e.g. parentheses '(', ')'
15
15
  # Separators: e.g. comma
16
16
  class Tokenizer
17
+ PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
18
+ PATT_INTEGER = /\d+/.freeze
19
+ PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
20
+ PATT_STRING_START = /"|'/.freeze
21
+ PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
22
+ PATT_WHITESPACE = /[ \t\f]+/.freeze
23
+
17
24
  # @return [StringScanner] Low-level input scanner
18
25
  attr_reader(:scanner)
19
26
 
@@ -24,7 +31,7 @@ module Rley
24
31
  attr_reader(:line_start)
25
32
 
26
33
  # One or two special character tokens.
27
- @@lexeme2name = {
34
+ Lexeme2name = {
28
35
  '(' => 'LEFT_PAREN',
29
36
  ')' => 'RIGHT_PAREN',
30
37
  '{' => 'LEFT_BRACE',
@@ -41,19 +48,19 @@ module Rley
41
48
  match_closest repeat
42
49
  ].map { |x| [x, x] }.to_h
43
50
 
44
- # Constructor. Initialize a tokenizer for Lox input.
45
- # @param source [String] Lox text to tokenize.
51
+ # Constructor. Initialize a tokenizer for RGN input.
52
+ # @param source [String] RGN text to tokenize.
46
53
  def initialize(source = nil)
47
- @scanner = StringScanner.new('')
48
- start_with(source) if source
54
+ reset
55
+ input = source || ''
56
+ @scanner = StringScanner.new(input)
49
57
  end
50
58
 
51
59
  # Reset the tokenizer and make the given text, the current input.
52
- # @param source [String] Lox text to tokenize.
60
+ # @param source [String] RGN text to tokenize.
53
61
  def start_with(source)
62
+ reset
54
63
  @scanner.string = source
55
- @lineno = 1
56
- @line_start = 0
57
64
  end
58
65
 
59
66
  # Scan the source and return an array of tokens.
@@ -65,47 +72,67 @@ module Rley
65
72
  tok_sequence << token unless token.nil?
66
73
  end
67
74
 
68
- return tok_sequence
75
+ tok_sequence
69
76
  end
70
77
 
71
78
  private
72
79
 
73
- def _next_token
74
- pos_before = scanner.pos
75
- skip_intertoken_spaces
76
- ws_found = true if scanner.pos > pos_before
77
- curr_ch = scanner.peek(1)
78
- return nil if curr_ch.nil? || curr_ch.empty?
80
+ def reset
81
+ @lineno = 1
82
+ @line_start = 0
83
+ end
79
84
 
85
+ def _next_token
80
86
  token = nil
87
+ ws_found = false
81
88
 
82
- if '(){},'.include? curr_ch
83
- # Single delimiter, separator or character
84
- token = build_token(@@lexeme2name[curr_ch], scanner.getch)
85
- elsif '?*+,'.include? curr_ch # modifier character
86
- # modifiers without prefix text are symbols
87
- symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
88
- token = build_token(symb, scanner.getch)
89
- elsif (lexeme = scanner.scan(/\.\./))
90
- # One or two special character tokens
91
- token = build_token(@@lexeme2name[lexeme], lexeme)
92
- elsif scanner.check(/"|'/) # Start of string detected...
93
- token = build_string_token
94
- elsif (lexeme = scanner.scan(/\d+/))
95
- token = build_token('INT_LIT', lexeme)
96
- elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
97
- keyw = @@keywords[lexeme.chop!]
98
- token = build_token('KEY', lexeme) if keyw
99
- # ... error case
100
- elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
101
- token = build_token('SYMBOL', lexeme)
102
- else # Unknown token
103
- col = scanner.pos - @line_start + 1
104
- _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
105
- raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
106
- end
89
+ # Loop until end of input reached or token found
90
+ until token || scanner.eos?
107
91
 
108
- return token
92
+ nl_found = scanner.skip(PATT_NEWLINE)
93
+ if nl_found
94
+ next_line_scanned
95
+ next
96
+ end
97
+ if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
98
+ ws_found = true
99
+ next
100
+ end
101
+
102
+ curr_ch = scanner.peek(1)
103
+
104
+ if '(){},'.include? curr_ch
105
+ # Single delimiter, separator or character
106
+ token = build_token(Lexeme2name[curr_ch], scanner.getch)
107
+ elsif '?*+,'.include? curr_ch # modifier character
108
+ # modifiers without prefix text are symbols
109
+ symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
110
+ token = build_token(symb, scanner.getch)
111
+ elsif (lexeme = scanner.scan(/\.\./))
112
+ # One or two special character tokens
113
+ token = build_token(Lexeme2name[lexeme], lexeme)
114
+ elsif scanner.check(PATT_STRING_START) # Start of string detected...
115
+ token = build_string_token
116
+ elsif (lexeme = scanner.scan(PATT_INTEGER))
117
+ token = build_token('INT_LIT', lexeme)
118
+ elsif (lexeme = scanner.scan(PATT_KEY))
119
+ keyw = @@keywords[lexeme.chop!]
120
+ token = build_token('KEY', lexeme) if keyw
121
+ # ... error case
122
+ elsif (lexeme = scanner.scan(PATT_SYMBOL))
123
+ token = build_token('SYMBOL', lexeme)
124
+ else # Unknown token
125
+ col = scanner.pos - @line_start + 1
126
+ _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
127
+ raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
128
+ end
129
+ ws_found = false
130
+ end # until
131
+
132
+ # unterminated(@string_start.line, @string_start.column) if state == :multiline
133
+ token
134
+
135
+ # return token
109
136
  end
110
137
 
111
138
  def build_token(aSymbolName, aLexeme)
@@ -154,24 +181,8 @@ module Rley
154
181
  Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
155
182
  end
156
183
 
157
- # Skip non-significant whitespaces and comments.
158
- # Advance the scanner until something significant is found.
159
- def skip_intertoken_spaces
160
- loop do
161
- ws_found = scanner.skip(/[ \t\f]+/) ? true : false
162
- nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
163
- if nl_found
164
- ws_found = true
165
- next_line
166
- end
167
-
168
- break unless ws_found
169
- end
170
-
171
- scanner.pos
172
- end
173
-
174
- def next_line
184
+ # Event: next line detected.
185
+ def next_line_scanned
175
186
  @lineno += 1
176
187
  @line_start = scanner.pos
177
188
  end
@@ -8,10 +8,6 @@ module Rley # This module is used as a namespace
8
8
  # @return [String] The name of the grammar symbol
9
9
  attr_reader(:name)
10
10
 
11
- # An indicator that tells whether the grammar symbol can generate a
12
- # non-empty string of terminals.
13
- attr_writer(:generative)
14
-
15
11
  # Constructor.
16
12
  # aName [String] The name of the grammar symbol.
17
13
  def initialize(aName)
@@ -7,6 +7,10 @@ module Rley # This module is used as a namespace
7
7
  # A non-terminal symbol (sometimes called a syntactic variable) represents
8
8
  # a composition of terminal or non-terminal symbols
9
9
  class NonTerminal < GrmSymbol
10
+ # An indicator that tells whether the grammar symbol can generate a
11
+ # non-empty string of terminals.
12
+ attr_writer(:generative)
13
+
10
14
  # A non-terminal symbol is nullable if it can match an empty string.
11
15
  attr_writer(:nullable)
12
16