ebnf 1.2.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,12 +3,52 @@ require 'ebnf/ll1/lexer'
3
3
  module EBNF::LL1
4
4
  ##
5
5
  # A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
6
+ #
7
+ # # Creating terminal definitions and parser rules to parse generated grammars
8
+ #
9
+ # The parser is initialized to callbacks invoked on entry and exit
10
+ # to each `terminal` and `production`. A trivial parser loop can be described as follows:
11
+ #
12
+ # require 'ebnf/ll1/parser'
13
+ # require 'meta'
14
+ #
15
+ # class Parser
16
+ # include Meta
17
+ # include EBNF::LL1::Parser
18
+ #
19
+ # terminal(:SYMBOL, /([a-z]|[A-Z]|[0-9]|_)+/) do |prod, token, input|
20
+ # # Add data based on scanned token to input
21
+ # input[:symbol] = token.value
22
+ # end
23
+ #
24
+ # start_production(:rule) do |input, current, callback|
25
+ # # Process on start of production
26
+ # # Set state for entry into recursed rules through current
27
+ #
28
+ # # Callback to parser loop with callback
29
+ # end
30
+ #
31
+ # production(:rule) do |input, current, callback|
32
+ # # Process on end of production
33
+ # # return results in input, retrieve results from recursed rules in current
34
+ #
35
+ # # Callback to parser loop with callback
36
+ # end
37
+ #
38
+ # def initialize(input)
39
+ # parse(input, start_symbol,
40
+ # branch: BRANCH,
41
+ # first: FIRST,
42
+ # follow: FOLLOW,
43
+ # cleanup: CLEANUP
44
+ # ) do |context, *data|
45
+ # # Process calls from callback from productions
46
+ #
47
+ # rescue ArgumentError, RDF::LL1::Parser::Error => e
48
+ # progress("Parsing completed with errors:\n\t#{e.message}")
49
+ # raise RDF::ReaderError, e.message if validate?
50
+ # end
6
51
  module Parser
7
- ##
8
- # @private
9
- # level above which debug messages are supressed
10
- DEBUG_LEVEL = 10
11
-
12
52
  ##
13
53
  # @return [Integer] line number of current token
14
54
  attr_reader :lineno
@@ -186,7 +226,7 @@ module EBNF::LL1
186
226
  # def each_statement(&block)
187
227
  # @callback = block
188
228
  #
189
- # parse(START.to_sym) do |context, *data|
229
+ # parse(input, START.to_sym) do |context, *data|
190
230
  # case context
191
231
  # when :statement
192
232
  # yield *data
@@ -205,16 +245,13 @@ module EBNF::LL1
205
245
  # Lists valid terminals that can precede each production (for error recovery).
206
246
  # @option options [Hash{Symbol,String => Array<Symbol,String>}] :follow ({})
207
247
  # Lists valid terminals that can follow each production (for error recovery).
208
- # @option options [Boolean] :validate (false)
209
- # whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
210
- # @option options [Boolean] :progress
211
- # Show progress of parser productions
212
- # @option options [Boolean] :debug
213
- # Detailed debug output
214
- # @option options [Boolean] :reset_on_start
215
- # Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
216
248
  # @option options[Integer] :high_water passed to lexer
249
+ # @option options [Logger] :logger for errors/progress/debug.
217
250
  # @option options[Integer] :low_water passed to lexer
251
+ # @option options [Boolean] :reset_on_start
252
+ # Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
253
+ # @option options [Boolean] :validate (false)
254
+ # whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
218
255
  # @yield [context, *data]
219
256
  # Yields for to return data to parser
220
257
  # @yieldparam [:statement, :trace] context
@@ -225,13 +262,9 @@ module EBNF::LL1
225
262
  # @raise [Exception] Raises exceptions for parsing errors
226
263
  # or errors raised during processing callbacks. Internal
227
264
  # errors are raised using {Error}.
228
- # @see http://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
265
+ # @see https://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
229
266
  def parse(input = nil, start = nil, **options, &block)
230
267
  @options = options.dup
231
- @options[:debug] ||= case
232
- when @options[:progress] then 2
233
- when @options[:validate] then 1
234
- end
235
268
  @branch = options[:branch]
236
269
  @first = options[:first] ||= {}
237
270
  @follow = options[:follow] ||= {}
@@ -356,9 +389,9 @@ module EBNF::LL1
356
389
  end
357
390
 
358
391
  # Get the list of follows for this sequence, this production and the stacked productions.
359
- debug("recovery", "stack follows:", level: 4)
392
+ debug("recovery", "stack follows:")
360
393
  todo_stack.reverse.each do |todo|
361
- debug("recovery", level: 4) {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
394
+ debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
362
395
  end
363
396
 
364
397
  # Find all follows to the top of the stack
@@ -466,14 +499,15 @@ module EBNF::LL1
466
499
  protected
467
500
 
468
501
  ##
469
- # Error information, used as level `0` debug messages.
502
+ # Error information, used as level `3` logger messages.
503
+ # Messages may be logged and are saved for reporting at end of parsing.
470
504
  #
471
505
  # @param [String] node Relevant location associated with message
472
506
  # @param [String] message Error string
473
- # @param [Hash] options
507
+ # @param [Hash{Symbol => Object}] options
474
508
  # @option options [URI, #to_s] :production
475
509
  # @option options [Token] :token
476
- # @see {#debug}
510
+ # @see #debug
477
511
  def error(node, message, **options)
478
512
  lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
479
513
  m = "ERROR "
@@ -483,83 +517,74 @@ module EBNF::LL1
483
517
  m += ", production = #{options[:production].inspect}" if options[:production]
484
518
  @error_log << m unless @recovering
485
519
  @recovering = true
486
- debug(node, m, level: 0, **options)
520
+ debug(node, m, level: options.fetch(:level, 3), **options)
487
521
  if options[:raise] || @options[:validate]
488
522
  raise Error.new(m, lineno: lineno, token: options[:token], production: options[:production])
489
523
  end
490
524
  end
491
525
 
492
526
  ##
493
- # Warning information, used as level `1` debug messages.
527
+ # Warning information, used as level `2` logger messages.
528
+ # Messages may be logged and are saved for reporting at end of parsing.
494
529
  #
495
530
  # @param [String] node Relevant location associated with message
496
531
  # @param [String] message Error string
497
532
  # @param [Hash] options
498
533
  # @option options [URI, #to_s] :production
499
534
  # @option options [Token] :token
500
- # @see {#debug}
535
+ # @see #debug
501
536
  def warn(node, message, **options)
537
+ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
502
538
  m = "WARNING "
503
- m += "[line: #{@lineno}] " if @lineno
539
+ m += "[line: #{lineno}] " if lineno
504
540
  m += message
505
541
  m += " (found #{options[:token].inspect})" if options[:token]
506
542
  m += ", production = #{options[:production].inspect}" if options[:production]
507
543
  @error_log << m unless @recovering
508
- debug(node, m, level: 1, **options)
544
+ debug(node, m, level: 2, lineno: lineno, **options)
509
545
  end
510
546
 
511
547
  ##
512
- # Progress output when parsing. Passed as level `2` debug messages.
548
+ # Progress logged when parsing. Passed as level `1` logger messages.
513
549
  #
514
- # @overload progress(node, message, **options)
550
+ # The call is ignored, unless `@options[:logger]` is set.
551
+ #
552
+ # @overload progress(node, message, **options, &block)
515
553
  # @param [String] node Relevant location associated with message
516
554
  # @param [String] message ("")
517
555
  # @param [Hash] options
518
556
  # @option options [Integer] :depth
519
557
  # Recursion depth for indenting output
520
- # @see {#debug}
558
+ # @see #debug
521
559
  def progress(node, *args, &block)
522
- return unless @options[:progress] || @options[:debug]
560
+ return unless @options[:logger]
561
+ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
523
562
  args << {} unless args.last.is_a?(Hash)
524
- args.last[:level] ||= 2
563
+ args.last[:level] ||= 1
564
+ args.last[:lineno] ||= lineno
525
565
  debug(node, *args, &block)
526
566
  end
527
567
 
528
568
  ##
529
- # Progress output when debugging.
569
+ # Debug logging.
530
570
  #
531
- # The call is ignored, unless `@options[:debug]` is set, in which
532
- # case it yields tracing information as indicated. Additionally,
533
- # if `@options[:debug]` is an Integer, the call is aborted if the
534
- # `:level` option is less than than `:level`.
571
+ # The call is ignored, unless `@options[:logger]` is set.
535
572
  #
536
573
  # @overload debug(node, message, **options)
537
574
  # @param [Array<String>] args Relevant location associated with message
538
575
  # @param [Hash] options
539
576
  # @option options [Integer] :depth
540
577
  # Recursion depth for indenting output
541
- # @option options [Integer] :level
542
- # Level assigned to message, by convention, level `0` is for
543
- # errors, level `1` is for warnings, level `2` is for parser
544
- # progress information, and anything higher is for various levels
545
- # of debug information.
546
- #
547
- # @yield trace, level, lineno, depth, args
548
- # @yieldparam [:trace] trace
549
- # @yieldparam [Integer] level
550
- # @yieldparam [Integer] lineno
551
- # @yieldparam [Integer] depth Recursive depth of productions
552
- # @yieldparam [Array<String>] args
553
- # @yieldreturn [String] added to message
578
+ # @yieldreturn [String] additional string appended to `message`.
554
579
  def debug(*args)
555
- return unless @options[:debug] && @parse_callback
580
+ return unless @options[:logger]
556
581
  options = args.last.is_a?(Hash) ? args.pop : {}
557
- debug_level = options.fetch(:level, 3)
558
- return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
582
+ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
583
+ level = options.fetch(:level, 0)
559
584
 
560
585
  depth = options[:depth] || self.depth
561
586
  args << yield if block_given?
562
- @parse_callback.call(:trace, debug_level, @lineno, depth, *args)
587
+ @options[:logger].add(level, "[#{@lineno}]" + (" " * depth) + args.join(" "))
563
588
  end
564
589
 
565
590
  private
@@ -570,7 +595,7 @@ module EBNF::LL1
570
595
  if handler
571
596
  # Create a new production data element, potentially allowing handler
572
597
  # to customize before pushing on the @prod_data stack
573
- progress("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
598
+ debug("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
574
599
  data = {}
575
600
  begin
576
601
  self.class.eval_with_binding(self) {
@@ -584,12 +609,12 @@ module EBNF::LL1
584
609
  elsif [:merge, :star].include?(@cleanup[prod])
585
610
  # Save current data to merge later
586
611
  @prod_data << {}
587
- progress("#{prod}(:start}:#{@prod_data.length}:cleanup:#{@cleanup[prod]}") { get_token.inspect + (@recovering ? ' recovering' : '')}
612
+ debug("#{prod}(:start}:#{@prod_data.length}:cleanup:#{@cleanup[prod]}") { get_token.inspect + (@recovering ? ' recovering' : '')}
588
613
  else
589
614
  # Make sure we push as many was we pop, even if there is no
590
615
  # explicit start handler
591
616
  @prod_data << {} if self.class.production_handlers[prod]
592
- progress("#{prod}(:start:#{@prod_data.length})") { get_token.inspect + (@recovering ? ' recovering' : '')}
617
+ debug("#{prod}(:start:#{@prod_data.length})") { get_token.inspect + (@recovering ? ' recovering' : '')}
593
618
  end
594
619
  #puts "prod_data(s): " + @prod_data.inspect
595
620
  end
@@ -623,7 +648,7 @@ module EBNF::LL1
623
648
  else Array(input[k]) + Array(v)
624
649
  end
625
650
  end
626
- progress("#{prod}(:finish):#{@prod_data.length} cleanup:#{@cleanup[prod]}") {@prod_data.last}
651
+ debug("#{prod}(:finish):#{@prod_data.length} cleanup:#{@cleanup[prod]}") {@prod_data.last}
627
652
  else
628
653
  progress("#{prod}(:finish):#{@prod_data.length}") { "recovering" if @recovering }
629
654
  end
@@ -730,7 +755,7 @@ module EBNF::LL1
730
755
  # "invalid token '%' on line 10",
731
756
  # token: '%', lineno: 9, production: :turtleDoc)
732
757
  #
733
- # @see http://ruby-doc.org/core/classes/StandardError.html
758
+ # @see https://ruby-doc.org/core/classes/StandardError.html
734
759
  class Error < StandardError
735
760
  ##
736
761
  # The current production.
@@ -3,7 +3,7 @@ require 'strscan' unless defined?(StringScanner)
3
3
 
4
4
  module EBNF::LL1
5
5
  ##
6
- # Overload StringScanner with file operations
6
+ # Overload StringScanner with file operations and line counting
7
7
  #
8
8
  # * Reloads scanner as required until EOF.
9
9
  # * Loads to a high-water and reloads when remaining size reaches a low-water.
@@ -14,25 +14,14 @@ module EBNF::LL1
14
14
  LOW_WATER = 4 * 1024
15
15
 
16
16
  ##
17
- # @return [IO, StringIO]
17
+ # @return [String, IO, StringIO]
18
18
  attr_reader :input
19
19
 
20
20
  ##
21
- # If we don't have an IO input, simply use StringScanner directly
22
- # @private
23
- def self.new(input, **options)
24
- input ||= ""
25
- if input.respond_to?(:read)
26
- scanner = self.allocate
27
- scanner.send(:initialize, input, **options)
28
- else
29
- if input.encoding != Encoding::UTF_8
30
- input = input.dup if input.frozen?
31
- input.force_encoding(Encoding::UTF_8)
32
- end
33
- StringScanner.new(input)
34
- end
35
- end
21
+ # The current line number (one-based).
22
+ #
23
+ # @return [Integer]
24
+ attr_accessor :lineno
36
25
 
37
26
  ##
38
27
  # Create a scanner, from an IO
@@ -45,32 +34,23 @@ module EBNF::LL1
45
34
  def initialize(input, **options)
46
35
  @options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER)
47
36
 
48
- @input = input
49
- super("")
37
+ @previous_lineno = @lineno = 1
38
+ @input = input.is_a?(String) ? encode_utf8(input) : input
39
+ super(input.is_a?(String) ? @input : "")
50
40
  feed_me
51
41
  self
52
42
  end
53
43
 
54
44
  ##
55
- # Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer).
56
- # If there is no more data (eos? = true), it returns "".
57
- #
58
- # @return [String]
59
- def rest
60
- feed_me
61
- encode_utf8 super
62
- end
63
-
64
- ##
65
- # Attempts to skip over the given `pattern` beginning with the scan pointer.
66
- # If it matches, the scan pointer is advanced to the end of the match,
67
- # and the length of the match is returned. Otherwise, `nil` is returned.
68
- #
69
- # similar to `scan`, but without returning the matched string.
70
- # @param [Regexp] pattern
71
- def skip(pattern)
72
- feed_me
73
- super
45
+ # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
46
+ def ensure_buffer_full
47
+ # Read up to high-water mark ensuring we're at an end of line
48
+ if @input.respond_to?(:eof?) && !@input.eof?
49
+ diff = @options[:high_water] - rest_size
50
+ string = encode_utf8(@input.read(diff))
51
+ string << encode_utf8(@input.gets) unless @input.eof?
52
+ self << string if string
53
+ end
74
54
  end
75
55
 
76
56
  ##
@@ -83,10 +63,14 @@ module EBNF::LL1
83
63
  end
84
64
 
85
65
  ##
86
- # Set the scan pointer to the end of the string and clear matching data
87
- def terminate
66
+ # Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer).
67
+ # If there is no more data (eos? = true), it returns "".
68
+ #
69
+ # @return [String]
70
+ def rest
88
71
  feed_me
89
- super
72
+ @lineno += 1 if eos?
73
+ encode_utf8 super
90
74
  end
91
75
 
92
76
  ##
@@ -108,19 +92,68 @@ module EBNF::LL1
108
92
  # @return [String]
109
93
  def scan(pattern)
110
94
  feed_me
111
- encode_utf8 super
95
+ @previous_lineno = @lineno
96
+ if matched = encode_utf8(super)
97
+ @lineno += matched.count("\n")
98
+ end
99
+ matched
112
100
  end
113
101
 
114
102
  ##
115
- # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
116
- def ensure_buffer_full
117
- # Read up to high-water mark ensuring we're at an end of line
118
- if @input && !@input.eof?
119
- diff = @options[:high_water] - rest_size
120
- string = encode_utf8(@input.read(diff))
121
- string << encode_utf8(@input.gets) unless @input.eof?
122
- self << string if string
103
+ # Scans the string until the pattern is matched. Returns the substring up to and including the end of the match, advancing the scan pointer to that location. If there is no match, nil is returned.
104
+ #
105
+ # @example
106
+ # s = StringScanner.new("Fri Dec 12 1975 14:39")
107
+ # s.scan_until(/1/) # -> "Fri Dec 1"
108
+ # s.pre_match # -> "Fri Dec "
109
+ # s.scan_until(/XYZ/) # -> nil
110
+ #
111
+ # @param [Regexp] pattern
112
+ # @return [String]
113
+ def scan_until(pattern)
114
+ feed_me
115
+ @previous_lineno = @lineno
116
+ if matched = encode_utf8(super)
117
+ @lineno += matched.count("\n")
123
118
  end
119
+ matched
120
+ end
121
+
122
+ ##
123
+ # Attempts to skip over the given `pattern` beginning with the scan pointer.
124
+ # If it matches, the scan pointer is advanced to the end of the match,
125
+ # and the length of the match is returned. Otherwise, `nil` is returned.
126
+ #
127
+ # similar to `scan`, but without returning the matched string.
128
+ # @param [Regexp] pattern
129
+ def skip(pattern)
130
+ scan(pattern)
131
+ nil
132
+ end
133
+
134
+ ##
135
+ # Advances the scan pointer until pattern is matched and consumed. Returns the number of bytes advanced, or nil if no match was found.
136
+ #
137
+ # Look ahead to match pattern, and advance the scan pointer to the end of the match. Return the number of characters advanced, or nil if the match was unsuccessful.
138
+ #
139
+ # It’s similar to scan_until, but without returning the intervening string.
140
+ # @param [Regexp] pattern
141
+ def skip_until(pattern)
142
+ (matched = scan_until(pattern)) && matched.length
143
+ end
144
+
145
+ ##
146
+ # Sets the scan pointer to the previous position. Only one previous position is remembered, and it changes with each scanning operation.
147
+ def unscan
148
+ @lineno = @previous_lineno
149
+ super
150
+ end
151
+
152
+ ##
153
+ # Set the scan pointer to the end of the string and clear matching data
154
+ def terminate
155
+ feed_me
156
+ super
124
157
  end
125
158
 
126
159
  private