rley 0.8.06 → 0.8.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +23 -2
  3. data/CHANGELOG.md +21 -1
  4. data/LICENSE.txt +1 -1
  5. data/README.md +1 -1
  6. data/appveyor.yml +1 -3
  7. data/examples/NLP/benchmark_pico_en.rb +6 -6
  8. data/examples/NLP/engtagger.rb +6 -6
  9. data/examples/general/calc_iter1/calc_lexer.rb +1 -1
  10. data/examples/general/calc_iter2/calc_lexer.rb +1 -1
  11. data/examples/general/left.rb +1 -1
  12. data/examples/general/right.rb +1 -1
  13. data/examples/tokenizer/loxxy_raw_scanner.rex.rb +3 -0
  14. data/examples/tokenizer/loxxy_tokenizer.rb +2 -2
  15. data/examples/tokenizer/run_tokenizer.rb +1 -1
  16. data/examples/tokenizer/{tokens.yaml → tokens.yml} +0 -0
  17. data/lib/rley/constants.rb +1 -1
  18. data/lib/rley/engine.rb +2 -2
  19. data/lib/rley/interface.rb +3 -3
  20. data/lib/rley/lexical/token.rb +1 -1
  21. data/lib/rley/ptree/non_terminal_node.rb +1 -1
  22. data/lib/rley/rgn/all_notation_nodes.rb +5 -0
  23. data/lib/rley/{notation → rgn}/ast_builder.rb +19 -12
  24. data/lib/rley/{notation → rgn}/ast_node.rb +13 -12
  25. data/lib/rley/{notation → rgn}/ast_visitor.rb +10 -10
  26. data/lib/rley/rgn/composite_node.rb +28 -0
  27. data/lib/rley/{notation → rgn}/grammar.rb +1 -1
  28. data/lib/rley/{notation → rgn}/grammar_builder.rb +86 -124
  29. data/lib/rley/{notation → rgn}/parser.rb +7 -7
  30. data/lib/rley/rgn/repetition_node.rb +62 -0
  31. data/lib/rley/rgn/sequence_node.rb +30 -0
  32. data/lib/rley/{notation → rgn}/symbol_node.rb +15 -7
  33. data/lib/rley/{notation → rgn}/tokenizer.rb +71 -60
  34. data/lib/rley/syntax/grm_symbol.rb +0 -4
  35. data/lib/rley/syntax/non_terminal.rb +4 -0
  36. data/lib/rley/syntax/terminal.rb +10 -6
  37. data/spec/rley/parser/dangling_else_spec.rb +3 -3
  38. data/spec/rley/parser/gfg_earley_parser_spec.rb +48 -50
  39. data/spec/rley/{notation → rgn}/grammar_builder_spec.rb +58 -54
  40. data/spec/rley/{notation → rgn}/parser_spec.rb +36 -24
  41. data/spec/rley/rgn/repetition_node_spec.rb +56 -0
  42. data/spec/rley/rgn/sequence_node_spec.rb +48 -0
  43. data/spec/rley/rgn/symbol_node_spec.rb +33 -0
  44. data/spec/rley/{notation → rgn}/tokenizer_spec.rb +2 -2
  45. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  46. data/spec/rley/support/grammar_int_seq_helper.rb +2 -2
  47. metadata +40 -33
  48. data/lib/rley/notation/all_notation_nodes.rb +0 -4
  49. data/lib/rley/notation/grouping_node.rb +0 -23
  50. data/lib/rley/notation/sequence_node.rb +0 -35
@@ -7,8 +7,9 @@ require_relative 'ast_visitor'
7
7
  require_relative '../syntax/match_closest'
8
8
 
9
9
  module Rley # This module is used as a namespace
10
- module Notation # This module is used as a namespace
11
- # Structure used for production rules that are implicitly generated by Rley
10
+ # Namespace for classes that define RGN (Rley Grammar Notation)
11
+ module RGN # This module is used as a namespace
12
+ # Structure used by Rley to generate implicdit production rules.
12
13
  RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
13
14
 
14
15
  # Builder GoF pattern. Builder builds a complex object
@@ -19,7 +20,7 @@ module Rley # This module is used as a namespace
19
20
  # to the matching grammar symbol object.
20
21
  attr_reader(:symbols)
21
22
 
22
- # @return [Notation::Parser] Parser for the right-side of productions
23
+ # @return [RGN::Parser] Parser for the right-side of productions
23
24
  attr_reader(:parser)
24
25
 
25
26
  # @return [Hash{ASTVisitor, Array}]
@@ -32,21 +33,12 @@ module Rley # This module is used as a namespace
32
33
  # @return [Hash{String, String}] The synthesized raw productions
33
34
  attr_reader(:synthetized)
34
35
 
35
- # Creates a new grammar builder.
36
+ # Creates a new RGN grammar builder.
36
37
  # @param aBlock [Proc] code block used to build the grammar.
37
- # @example Building a tiny English grammar
38
- # builder = Rley::Notation::GrammarBuilder.new do
39
- # add_terminals('n', 'v', 'adj', 'det')
40
- # rule 'S' => 'NP VP'
41
- # rule 'VP' => 'v NP'
42
- # rule 'NP' => 'det n'
43
- # rule 'NP' => 'adj NP'
44
- # end
45
- # tiny_eng = builder.grammar
46
38
  def initialize(&aBlock)
47
39
  @symbols = {}
48
40
  @productions = []
49
- @parser = Notation::Parser.new
41
+ @parser = RGN::Parser.new
50
42
  @visitor2rhs = {}
51
43
  @synthetized = {}
52
44
 
@@ -73,7 +65,7 @@ module Rley # This module is used as a namespace
73
65
  end
74
66
 
75
67
  # Add the given marker symbol to the grammar of the language
76
- # @param aMarkerSymbol [String] A mazker symbol
68
+ # @param aMarkerSymbol [String] A marker symbol
77
69
  # @return [void]
78
70
  def add_marker(aMarkerSymbol)
79
71
  new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
@@ -227,105 +219,64 @@ module Rley # This module is used as a namespace
227
219
  # ################################
228
220
  def after_symbol_node(aSymbolNode, aVisitor)
229
221
  symb_name = aSymbolNode.name
230
-
231
- case aSymbolNode.repetition
232
- when :zero_or_one
233
- # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
234
- # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
235
- name_modified = "#{symb_name}#{suffix_qmark}"
236
- unless symbols.include? name_modified
237
- add_nonterminal(name_modified)
238
- add_raw_rule(name_modified, symb_name, suffix_qmark_one)
239
- add_raw_rule(name_modified, '', suffix_qmark_none)
240
- end
241
- symb_name = name_modified
242
-
243
- when :zero_or_more
244
- # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
245
- # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
246
- name_modified = "#{symb_name}#{suffix_star}"
247
- unless symbols.include? name_modified
248
- add_nonterminal(name_modified)
249
- add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
250
- add_raw_rule(name_modified, [], suffix_star_none)
251
- end
252
- symb_name = name_modified
253
-
254
- when :exactly_one
255
- # Do nothing
256
-
257
- when :one_or_more
258
- name_modified = "#{symb_name}#{suffix_plus}"
259
- unless symbols.include? name_modified
260
- add_nonterminal(name_modified)
261
- add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
262
- add_raw_rule(name_modified, symb_name, suffix_plus_one)
263
- end
264
- symb_name = name_modified
265
- else
266
- raise StandardError, 'Unhandled multiplicity'
267
- end
268
-
269
222
  symb = get_grm_symbol(symb_name)
270
223
  visitor2rhs[aVisitor] << symb
271
224
  end
272
225
 
273
226
  def after_sequence_node(aSequenceNode, _visitor)
274
- aSequenceNode.subnodes.each_with_index do |sn, i|
275
- next if sn.annotation.empty?
276
-
277
- matching = sn.annotation['match_closest']
278
- aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
279
- end
227
+ add_constraints(aSequenceNode)
280
228
  end
281
229
 
282
- def after_grouping_node(aGroupingNode, aVisitor)
283
- after_sequence_node(aGroupingNode, aVisitor)
284
- symb_name = sequence_name(aGroupingNode)
230
+ def after_repetition_node(aRepNode, aVisitor)
231
+ add_constraints(aRepNode)
232
+ return if aRepNode.repetition == :exactly_one
285
233
 
286
- unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
287
- add_nonterminal(symb_name)
288
- rhs = serialize_sequence(aGroupingNode)
289
- add_raw_rule(symb_name, rhs, 'return_children', true, aGroupingNode.constraints)
234
+ node_name = aRepNode.name
235
+ child_name = aRepNode.subnodes[0].name
236
+
237
+ if aRepNode.child.is_a?(SequenceNode) &&
238
+ !symbols.include?(child_name) && aRepNode.repetition != :zero_or_one
239
+ add_nonterminal(child_name)
240
+ rhs = aRepNode.child.to_text
241
+ add_raw_rule(child_name, rhs, 'return_children', true)
290
242
  end
291
- name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
292
243
 
293
- case aGroupingNode.repetition
244
+ case aRepNode.repetition
294
245
  when :zero_or_one
295
- # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
296
- # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
297
- unless symbols.include? name_modified
298
- add_nonterminal(name_modified)
299
- add_raw_rule(name_modified, symb_name, suffix_qmark_one, true)
300
- add_raw_rule(name_modified, [], suffix_qmark_none, true)
246
+ # implicitly called: rule('node_name_qmark' => 'node_name_qmark').tag suffix_qmark_one
247
+ # implicitly called: rule('node_name_qmark' => '').tag suffix_qmark_none
248
+ unless symbols.include? node_name
249
+ add_nonterminal(node_name)
250
+ if aRepNode.child.is_a?(SequenceNode) && !aRepNode.child.constraints.empty?
251
+ aRepNode.constraints.merge(aRepNode.child.constraints)
252
+ end
253
+ rhs = aRepNode.child.to_text
254
+ add_raw_rule(node_name, rhs, 'return_children', false, aRepNode.constraints)
255
+ add_raw_rule(node_name, [], suffix_qmark_none, true)
301
256
  end
302
257
 
303
258
  when :zero_or_more
304
- # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
305
- # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
306
- unless symbols.include? name_modified
307
- add_nonterminal(name_modified)
308
- add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
309
- add_raw_rule(name_modified, '', suffix_star_none)
259
+ # implicitly called: rule('node_name_star' => 'node_name_star node_name').tag suffix_star_more
260
+ # implicitly called: rule('node_name_star' => '').tag suffix_star_none
261
+ unless symbols.include? node_name
262
+ add_nonterminal(node_name)
263
+ rhs = "#{node_name} #{child_name}"
264
+ add_raw_rule(node_name, rhs, suffix_star_more)
265
+ add_raw_rule(node_name, '', suffix_star_none)
310
266
  end
311
267
 
312
- when :exactly_one
313
- # Do nothing
314
-
315
268
  when :one_or_more
316
- unless symbols.include? name_modified
317
- add_nonterminal(name_modified)
318
- add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
319
- add_raw_rule(name_modified, symb_name, suffix_plus_one)
269
+ unless symbols.include? node_name
270
+ add_nonterminal(node_name)
271
+ add_raw_rule(node_name, "#{node_name} #{child_name}", suffix_plus_more)
272
+ add_raw_rule(node_name, child_name, suffix_plus_one)
320
273
  end
321
274
  else
322
275
  raise StandardError, 'Unhandled multiplicity'
323
276
  end
324
277
 
325
- unless aGroupingNode.repetition == :exactly_one
326
- symb = get_grm_symbol(name_modified)
327
- visitor2rhs[aVisitor] << symb
328
- end
278
+ symb = get_grm_symbol(node_name)
279
+ visitor2rhs[aVisitor] << symb
329
280
  end
330
281
 
331
282
  # A notification to the builderobject that the programmer
@@ -425,22 +376,33 @@ module Rley # This module is used as a namespace
425
376
  symbols[name]
426
377
  end
427
378
 
428
- def sequence_name(aSequenceNode)
429
- subnode_names = +''
430
- aSequenceNode.subnodes.each do |subn|
431
- case subn
432
- when SymbolNode
433
- subnode_names << "_#{subn.name}"
434
- when SequenceNode
435
- subnode_names << "_#{sequence_name(subn)}"
436
- end
437
- suffix = repetition2suffix(subn.repetition)
438
- subnode_names << suffix
439
- end
379
+ def add_constraints(aCompositeNode)
380
+ aCompositeNode.subnodes.each_with_index do |sn, i|
381
+ next if sn.annotation.empty?
440
382
 
441
- "seq#{subnode_names}"
383
+ matching = sn.annotation['match_closest']
384
+ constraint = Syntax::MatchClosest.new(aCompositeNode, i, matching)
385
+ aCompositeNode.constraints << constraint
386
+ end
442
387
  end
443
388
 
389
+ # def sequence_name(aSequenceNode)
390
+ # subnode_names = +''
391
+ # aSequenceNode.subnodes.each do |subn|
392
+ # case subn
393
+ # when SymbolNode
394
+ # subnode_names << "_#{subn.name}"
395
+ # when SequenceNode
396
+ # subnode_names << "_#{sequence_name(subn)}"
397
+ # when RepetitionNode
398
+ # suffix = repetition2suffix(subn.repetition)
399
+ # subnode_names << suffix
400
+ # end
401
+ # end
402
+ #
403
+ # "seq#{subnode_names}"
404
+ # end
405
+
444
406
  def node_base_name(aNode)
445
407
  if aNode.kind_of?(SymbolNode)
446
408
  aNode.name
@@ -456,23 +418,23 @@ module Rley # This module is used as a namespace
456
418
  "#{base_name}#{suffix}"
457
419
  end
458
420
 
459
- def serialize_sequence(aSequenceNode)
460
- text = +''
461
- aSequenceNode.subnodes.each do |sn|
462
- text << ' '
463
- case sn
464
- when SymbolNode
465
- text << sn.name
466
- when SequenceNode
467
- text << sequence_name(sn)
468
- end
469
-
470
- suffix = repetition2suffix(sn.repetition)
471
- text << suffix
472
- end
473
-
474
- text.strip
475
- end
421
+ # def serialize_sequence(aSequenceNode)
422
+ # text = +''
423
+ # aSequenceNode.subnodes.each do |sn|
424
+ # text << ' '
425
+ # case sn
426
+ # when SymbolNode
427
+ # text << sn.name
428
+ # when SequenceNode
429
+ # text << sequence_name(sn)
430
+ # when RepetitionNode
431
+ # suffix = repetition2suffix(sn.repetition)
432
+ # text << suffix
433
+ # end
434
+ # end
435
+ #
436
+ # text.strip
437
+ # end
476
438
 
477
439
  def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
478
440
  raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
@@ -484,7 +446,7 @@ module Rley # This module is used as a namespace
484
446
  end
485
447
 
486
448
  def process_raw_rules
487
- until synthetized.empty? do
449
+ until synthetized.empty?
488
450
  raw_rules = synthetized.delete(synthetized.keys.first)
489
451
  raw_rules.each do |raw|
490
452
  new_prod = nil
@@ -494,7 +456,7 @@ module Rley # This module is used as a namespace
494
456
  new_prod = rule(raw.lhs => raw.rhs)
495
457
  end
496
458
  new_prod.tag(raw.tag)
497
- new_prod.constraints = raw.constraints
459
+ new_prod.constraints.concat(raw.constraints)
498
460
  end
499
461
  end
500
462
  end
@@ -5,8 +5,8 @@ require_relative 'grammar'
5
5
  require_relative 'ast_builder'
6
6
 
7
7
  module Rley
8
- module Notation
9
- # A Lox parser that produce concrete parse trees.
8
+ module RGN
9
+ # A RRN (Rley Rule Notation) parser that produce concrete parse trees.
10
10
  # Concrete parse trees are the default kind of parse tree
11
11
  # generated by the Rley library.
12
12
  # They consist of two node types only:
@@ -28,16 +28,16 @@ module Rley
28
28
  # Create a Rley facade object
29
29
  @engine = Rley::Engine.new do |cfg|
30
30
  cfg.diagnose = true
31
- cfg.repr_builder = Notation::ASTBuilder
31
+ cfg.repr_builder = RGN::ASTBuilder
32
32
  end
33
33
 
34
34
  # Step 1. Load RGN grammar
35
- @engine.use_grammar(Rley::Notation::RGNGrammar)
35
+ @engine.use_grammar(Rley::RGN::RGNGrammar)
36
36
  end
37
37
 
38
- # Parse the given Lox program into a parse tree.
39
- # @param source [String] Lox program to parse
40
- # @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
38
+ # Parse the given RGN snippet into a parse tree.
39
+ # @param source [String] Snippet to parse
40
+ # @return [Rley::ParseTree] A parse tree equivalent to the RGN input.
41
41
  def parse(source)
42
42
  lexer = Tokenizer.new(source)
43
43
  result = engine.parse(lexer.tokens)
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'composite_node'
4
+
5
+ module Rley
6
+ module RGN
7
+ # A RGN syntax node representing an expression quantified by a ?, * or +.
8
+ class RepetitionNode < CompositeNode
9
+ # @return [Symbol] one of: :zero_or_one, :zero_or_more, :one_or_more
10
+ attr_accessor :repetition
11
+
12
+ Repetition2suffix = {
13
+ zero_or_one: '_qmark',
14
+ zero_or_more: '_star',
15
+ exactly_one: '',
16
+ one_or_more: '_plus'
17
+ }.freeze
18
+
19
+ # @param child [Array<ASTNode>] sequence of AST nodes
20
+ # @param theRepetition [Symbol] how many times the child node can be repeated
21
+ def initialize(child, theRepetition)
22
+ super([child])
23
+ @repetition = theRepetition
24
+ end
25
+
26
+ # @return [RGN::ASTNode]
27
+ def child
28
+ subnodes[0]
29
+ end
30
+
31
+ # @return [String]
32
+ def name
33
+ child_name = subnodes[0].name
34
+ "rep_#{child_name}#{Repetition2suffix[repetition]}"
35
+ end
36
+
37
+ # @return [String]
38
+ def to_text
39
+ child_text = subnodes[0].to_text
40
+ "rep_#{child_text}#{Repetition2suffix[repetition]}"
41
+ end
42
+
43
+ # Part of the 'visitee' role in Visitor design pattern.
44
+ # @param visitor [RGN::ASTVisitor] the visitor
45
+ def accept(visitor)
46
+ visitor.visit_repetition_node(self)
47
+ end
48
+
49
+ def suffix_qmark
50
+ Repetition2suffix[:zero_or_one]
51
+ end
52
+
53
+ def suffix_star
54
+ Repetition2suffix[:zero_or_more]
55
+ end
56
+
57
+ def suffix_plus
58
+ Repetition2suffix[:one_or_more]
59
+ end
60
+ end # class
61
+ end # module
62
+ end # module
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'composite_node'
4
+
5
+ module Rley
6
+ module RGN
7
+ # A syntax node for a sequence of AST nodes
8
+ class SequenceNode < CompositeNode
9
+ def name
10
+ result = +''
11
+ subnodes.each do |sn|
12
+ result << "_#{sn.name}"
13
+ end
14
+
15
+ "seq#{result}"
16
+ end
17
+
18
+ def to_text
19
+ arr = subnodes.map(&:to_text)
20
+ arr.join(' ')
21
+ end
22
+
23
+ # Part of the 'visitee' role in Visitor design pattern.
24
+ # @param visitor [RGN::ASTVisitor] the visitor
25
+ def accept(visitor)
26
+ visitor.visit_sequence_node(self)
27
+ end
28
+ end # class
29
+ end # module
30
+ end # module
@@ -3,24 +3,32 @@
3
3
  require_relative 'ast_node'
4
4
 
5
5
  module Rley
6
- module Notation
7
- # A syntax node for a grammar symbol occurring in rhs of a rule
6
+ module RGN
7
+ # A syntax node for a grammar symbol occurring in rhs of a rule.
8
+ # symbol nodes are leaf nodes of RRN parse trees.
8
9
  class SymbolNode < ASTNode
10
+ # @return [Rley::Lexical::Position] Position of the entry in the input stream.
11
+ attr_reader :position
12
+
9
13
  # @return [String] name of grammar symbol
10
14
  attr_reader :name
11
15
 
12
16
  # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
13
17
  # @param aName [String] name of grammar symbol
14
- # @param theRepetition [Symbol] indicates how many times the symbol can be repeated
15
- def initialize(aPosition, aName, theRepetition = nil)
16
- super(aPosition)
18
+ def initialize(aPosition, aName)
19
+ super()
20
+ @position = aPosition
17
21
  @name = aName
18
- self.repetition = theRepetition if theRepetition
22
+ end
23
+
24
+ # @return [String] name of grammar symbol
25
+ def to_text
26
+ annotation.empty? ? name : "#{name} #{annotation_to_text}"
19
27
  end
20
28
 
21
29
  # Abstract method (must be overriden in subclasses).
22
30
  # Part of the 'visitee' role in Visitor design pattern.
23
- # @param _visitor [LoxxyTreeVisitor] the visitor
31
+ # @param visitor [RGN::ASTVisitor] the visitor
24
32
  def accept(visitor)
25
33
  visitor.visit_symbol_node(self)
26
34
  end
@@ -4,7 +4,7 @@ require 'strscan'
4
4
  require_relative '../lexical/token'
5
5
 
6
6
  module Rley
7
- module Notation
7
+ module RGN
8
8
  # A tokenizer for the Rley notation language.
9
9
  # Responsibility: break input into a sequence of token objects.
10
10
  # The tokenizer should recognize:
@@ -14,6 +14,13 @@ module Rley
14
14
  # Delimiters: e.g. parentheses '(', ')'
15
15
  # Separators: e.g. comma
16
16
  class Tokenizer
17
+ PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
18
+ PATT_INTEGER = /\d+/.freeze
19
+ PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
20
+ PATT_STRING_START = /"|'/.freeze
21
+ PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
22
+ PATT_WHITESPACE = /[ \t\f]+/.freeze
23
+
17
24
  # @return [StringScanner] Low-level input scanner
18
25
  attr_reader(:scanner)
19
26
 
@@ -24,7 +31,7 @@ module Rley
24
31
  attr_reader(:line_start)
25
32
 
26
33
  # One or two special character tokens.
27
- @@lexeme2name = {
34
+ Lexeme2name = {
28
35
  '(' => 'LEFT_PAREN',
29
36
  ')' => 'RIGHT_PAREN',
30
37
  '{' => 'LEFT_BRACE',
@@ -41,19 +48,19 @@ module Rley
41
48
  match_closest repeat
42
49
  ].map { |x| [x, x] }.to_h
43
50
 
44
- # Constructor. Initialize a tokenizer for Lox input.
45
- # @param source [String] Lox text to tokenize.
51
+ # Constructor. Initialize a tokenizer for RGN input.
52
+ # @param source [String] RGN text to tokenize.
46
53
  def initialize(source = nil)
47
- @scanner = StringScanner.new('')
48
- start_with(source) if source
54
+ reset
55
+ input = source || ''
56
+ @scanner = StringScanner.new(input)
49
57
  end
50
58
 
51
59
  # Reset the tokenizer and make the given text, the current input.
52
- # @param source [String] Lox text to tokenize.
60
+ # @param source [String] RGN text to tokenize.
53
61
  def start_with(source)
62
+ reset
54
63
  @scanner.string = source
55
- @lineno = 1
56
- @line_start = 0
57
64
  end
58
65
 
59
66
  # Scan the source and return an array of tokens.
@@ -65,47 +72,67 @@ module Rley
65
72
  tok_sequence << token unless token.nil?
66
73
  end
67
74
 
68
- return tok_sequence
75
+ tok_sequence
69
76
  end
70
77
 
71
78
  private
72
79
 
73
- def _next_token
74
- pos_before = scanner.pos
75
- skip_intertoken_spaces
76
- ws_found = true if scanner.pos > pos_before
77
- curr_ch = scanner.peek(1)
78
- return nil if curr_ch.nil? || curr_ch.empty?
80
+ def reset
81
+ @lineno = 1
82
+ @line_start = 0
83
+ end
79
84
 
85
+ def _next_token
80
86
  token = nil
87
+ ws_found = false
81
88
 
82
- if '(){},'.include? curr_ch
83
- # Single delimiter, separator or character
84
- token = build_token(@@lexeme2name[curr_ch], scanner.getch)
85
- elsif '?*+,'.include? curr_ch # modifier character
86
- # modifiers without prefix text are symbols
87
- symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
88
- token = build_token(symb, scanner.getch)
89
- elsif (lexeme = scanner.scan(/\.\./))
90
- # One or two special character tokens
91
- token = build_token(@@lexeme2name[lexeme], lexeme)
92
- elsif scanner.check(/"|'/) # Start of string detected...
93
- token = build_string_token
94
- elsif (lexeme = scanner.scan(/\d+/))
95
- token = build_token('INT_LIT', lexeme)
96
- elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
97
- keyw = @@keywords[lexeme.chop!]
98
- token = build_token('KEY', lexeme) if keyw
99
- # ... error case
100
- elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
101
- token = build_token('SYMBOL', lexeme)
102
- else # Unknown token
103
- col = scanner.pos - @line_start + 1
104
- _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
105
- raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
106
- end
89
+ # Loop until end of input reached or token found
90
+ until token || scanner.eos?
107
91
 
108
- return token
92
+ nl_found = scanner.skip(PATT_NEWLINE)
93
+ if nl_found
94
+ next_line_scanned
95
+ next
96
+ end
97
+ if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
98
+ ws_found = true
99
+ next
100
+ end
101
+
102
+ curr_ch = scanner.peek(1)
103
+
104
+ if '(){},'.include? curr_ch
105
+ # Single delimiter, separator or character
106
+ token = build_token(Lexeme2name[curr_ch], scanner.getch)
107
+ elsif '?*+,'.include? curr_ch # modifier character
108
+ # modifiers without prefix text are symbols
109
+ symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
110
+ token = build_token(symb, scanner.getch)
111
+ elsif (lexeme = scanner.scan(/\.\./))
112
+ # One or two special character tokens
113
+ token = build_token(Lexeme2name[lexeme], lexeme)
114
+ elsif scanner.check(PATT_STRING_START) # Start of string detected...
115
+ token = build_string_token
116
+ elsif (lexeme = scanner.scan(PATT_INTEGER))
117
+ token = build_token('INT_LIT', lexeme)
118
+ elsif (lexeme = scanner.scan(PATT_KEY))
119
+ keyw = @@keywords[lexeme.chop!]
120
+ token = build_token('KEY', lexeme) if keyw
121
+ # ... error case
122
+ elsif (lexeme = scanner.scan(PATT_SYMBOL))
123
+ token = build_token('SYMBOL', lexeme)
124
+ else # Unknown token
125
+ col = scanner.pos - @line_start + 1
126
+ _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
127
+ raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
128
+ end
129
+ ws_found = false
130
+ end # until
131
+
132
+ # unterminated(@string_start.line, @string_start.column) if state == :multiline
133
+ token
134
+
135
+ # return token
109
136
  end
110
137
 
111
138
  def build_token(aSymbolName, aLexeme)
@@ -154,24 +181,8 @@ module Rley
154
181
  Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
155
182
  end
156
183
 
157
- # Skip non-significant whitespaces and comments.
158
- # Advance the scanner until something significant is found.
159
- def skip_intertoken_spaces
160
- loop do
161
- ws_found = scanner.skip(/[ \t\f]+/) ? true : false
162
- nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
163
- if nl_found
164
- ws_found = true
165
- next_line
166
- end
167
-
168
- break unless ws_found
169
- end
170
-
171
- scanner.pos
172
- end
173
-
174
- def next_line
184
+ # Event: next line detected.
185
+ def next_line_scanned
175
186
  @lineno += 1
176
187
  @line_start = scanner.pos
177
188
  end
@@ -8,10 +8,6 @@ module Rley # This module is used as a namespace
8
8
  # @return [String] The name of the grammar symbol
9
9
  attr_reader(:name)
10
10
 
11
- # An indicator that tells whether the grammar symbol can generate a
12
- # non-empty string of terminals.
13
- attr_writer(:generative)
14
-
15
11
  # Constructor.
16
12
  # aName [String] The name of the grammar symbol.
17
13
  def initialize(aName)
@@ -7,6 +7,10 @@ module Rley # This module is used as a namespace
7
7
  # A non-terminal symbol (sometimes called a syntactic variable) represents
8
8
  # a composition of terminal or non-terminal symbols
9
9
  class NonTerminal < GrmSymbol
10
+ # An indicator that tells whether the grammar symbol can generate a
11
+ # non-empty string of terminals.
12
+ attr_writer(:generative)
13
+
10
14
  # A non-terminal symbol is nullable if it can match an empty string.
11
15
  attr_writer(:nullable)
12
16