ebnf 1.1.3 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +221 -198
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +113 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +138 -6
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +443 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +565 -83
- metadata +107 -29
- data/etc/sparql.rb +0 -45773
    
        data/lib/ebnf/ll1/parser.rb
    CHANGED
    
    | @@ -3,12 +3,52 @@ require 'ebnf/ll1/lexer' | |
| 3 3 | 
             
            module EBNF::LL1
         | 
| 4 4 | 
             
              ##
         | 
| 5 5 | 
             
              # A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
         | 
| 6 | 
            +
              #
         | 
| 7 | 
            +
              #  # Creating terminal definitions and parser rules to parse generated grammars
         | 
| 8 | 
            +
              #
         | 
| 9 | 
            +
              #  The parser is initialized to callbacks invoked on entry and exit
         | 
| 10 | 
            +
              #  to each `terminal` and `production`. A trivial parser loop can be described as follows:
         | 
| 11 | 
            +
              #
         | 
| 12 | 
            +
              #      require 'ebnf/ll1/parser'
         | 
| 13 | 
            +
              #      require 'meta'
         | 
| 14 | 
            +
              #
         | 
| 15 | 
            +
              #      class Parser
         | 
| 16 | 
            +
              #        include Meta
         | 
| 17 | 
            +
              #        include EBNF::LL1::Parser
         | 
| 18 | 
            +
              #
         | 
| 19 | 
            +
              #        terminal(:SYMBOL, /([a-z]|[A-Z]|[0-9]|_)+/) do |prod, token, input|
         | 
| 20 | 
            +
              #          # Add data based on scanned token to input
         | 
| 21 | 
            +
              #          input[:symbol] = token.value
         | 
| 22 | 
            +
              #        end
         | 
| 23 | 
            +
              #
         | 
| 24 | 
            +
              #        start_production(:rule) do |input, current, callback|
         | 
| 25 | 
            +
              #          # Process on start of production
         | 
| 26 | 
            +
              #          # Set state for entry into recursed rules through current
         | 
| 27 | 
            +
              #
         | 
| 28 | 
            +
              #          # Callback to parser loop with callback
         | 
| 29 | 
            +
              #        end
         | 
| 30 | 
            +
              #
         | 
| 31 | 
            +
              #        production(:rule) do |input, current, callback|
         | 
| 32 | 
            +
              #          # Process on end of production
         | 
| 33 | 
            +
              #          # return results in input, retrieve results from recursed rules in current
         | 
| 34 | 
            +
              #
         | 
| 35 | 
            +
              #          # Callback to parser loop with callback
         | 
| 36 | 
            +
              #        end
         | 
| 37 | 
            +
              #
         | 
| 38 | 
            +
              #        def initialize(input)
         | 
| 39 | 
            +
              #          parse(input, start_symbol,
         | 
| 40 | 
            +
              #            branch: BRANCH,
         | 
| 41 | 
            +
              #            first: FIRST,
         | 
| 42 | 
            +
              #            follow: FOLLOW,
         | 
| 43 | 
            +
              #            cleanup: CLEANUP
         | 
| 44 | 
            +
              #          ) do |context, *data|
         | 
| 45 | 
            +
              #            # Process calls from callback from productions
         | 
| 46 | 
            +
              #
         | 
| 47 | 
            +
              #          rescue ArgumentError, RDF::LL1::Parser::Error => e
         | 
| 48 | 
            +
              #            progress("Parsing completed with errors:\n\t#{e.message}")
         | 
| 49 | 
            +
              #            raise RDF::ReaderError, e.message if validate?
         | 
| 50 | 
            +
              #          end
         | 
| 6 51 | 
             
              module Parser
         | 
| 7 | 
            -
                ##
         | 
| 8 | 
            -
                # @private
         | 
| 9 | 
            -
                # level above which debug messages are supressed
         | 
| 10 | 
            -
                DEBUG_LEVEL = 10
         | 
| 11 | 
            -
             | 
| 12 52 | 
             
                ##
         | 
| 13 53 | 
             
                # @return [Integer] line number of current token
         | 
| 14 54 | 
             
                attr_reader :lineno
         | 
| @@ -51,10 +91,10 @@ module EBNF::LL1 | |
| 51 91 | 
             
                  # @yieldparam [Proc] block
         | 
| 52 92 | 
             
                  #   Block passed to initialization for yielding to calling parser.
         | 
| 53 93 | 
             
                  #   Should conform to the yield specs for #initialize
         | 
| 54 | 
            -
                  def terminal(term, regexp, options | 
| 94 | 
            +
                  def terminal(term, regexp, **options, &block)
         | 
| 55 95 | 
             
                    @patterns ||= []
         | 
| 56 96 | 
             
                    # Passed in order to define evaulation sequence
         | 
| 57 | 
            -
                    @patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, options)
         | 
| 97 | 
            +
                    @patterns << EBNF::LL1::Lexer::Terminal.new(term, regexp, **options)
         | 
| 58 98 | 
             
                    @terminal_handlers ||= {}
         | 
| 59 99 | 
             
                    @terminal_handlers[term] = block if block_given?
         | 
| 60 100 | 
             
                  end
         | 
| @@ -122,7 +162,14 @@ module EBNF::LL1 | |
| 122 162 |  | 
| 123 163 | 
             
                  def method_missing(method, *args, &block)
         | 
| 124 164 | 
             
                    if @delegate ||= nil
         | 
| 125 | 
            -
                       | 
| 165 | 
            +
                      # special handling when last arg is **options
         | 
| 166 | 
            +
                      params = @delegate.method(method).parameters
         | 
| 167 | 
            +
                      if params.any? {|t, _| t == :keyrest} && args.last.is_a?(Hash)
         | 
| 168 | 
            +
                        opts = args.pop
         | 
| 169 | 
            +
                        @delegate.send(method, *args, **opts, &block)
         | 
| 170 | 
            +
                      else
         | 
| 171 | 
            +
                        @delegate.send(method, *args, &block)
         | 
| 172 | 
            +
                      end
         | 
| 126 173 | 
             
                    else
         | 
| 127 174 | 
             
                      super
         | 
| 128 175 | 
             
                    end
         | 
| @@ -179,7 +226,7 @@ module EBNF::LL1 | |
| 179 226 | 
             
                #     def each_statement(&block)
         | 
| 180 227 | 
             
                #       @callback = block
         | 
| 181 228 | 
             
                #
         | 
| 182 | 
            -
                #       parse(START.to_sym) do |context, *data|
         | 
| 229 | 
            +
                #       parse(input, START.to_sym) do |context, *data|
         | 
| 183 230 | 
             
                #         case context
         | 
| 184 231 | 
             
                #         when :statement
         | 
| 185 232 | 
             
                #           yield *data
         | 
| @@ -198,16 +245,13 @@ module EBNF::LL1 | |
| 198 245 | 
             
                #   Lists valid terminals that can precede each production (for error recovery).
         | 
| 199 246 | 
             
                # @option options [Hash{Symbol,String => Array<Symbol,String>}] :follow ({})
         | 
| 200 247 | 
             
                #   Lists valid terminals that can follow each production (for error recovery).
         | 
| 201 | 
            -
                # @option options [Boolean]  :validate     (false)
         | 
| 202 | 
            -
                #   whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
         | 
| 203 | 
            -
                # @option options [Boolean] :progress
         | 
| 204 | 
            -
                #   Show progress of parser productions
         | 
| 205 | 
            -
                # @option options [Boolean] :debug
         | 
| 206 | 
            -
                #   Detailed debug output
         | 
| 207 | 
            -
                # @option options [Boolean] :reset_on_start
         | 
| 208 | 
            -
                #   Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
         | 
| 209 248 | 
             
                # @option options[Integer] :high_water passed to lexer
         | 
| 249 | 
            +
                # @option options [Logger] :logger for errors/progress/debug.
         | 
| 210 250 | 
             
                # @option options[Integer] :low_water passed to lexer
         | 
| 251 | 
            +
                # @option options [Boolean] :reset_on_start
         | 
| 252 | 
            +
                #   Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
         | 
| 253 | 
            +
                # @option options [Boolean]  :validate     (false)
         | 
| 254 | 
            +
                #   whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.
         | 
| 211 255 | 
             
                # @yield [context, *data]
         | 
| 212 256 | 
             
                #   Yields for to return data to parser
         | 
| 213 257 | 
             
                # @yieldparam [:statement, :trace] context
         | 
| @@ -218,18 +262,14 @@ module EBNF::LL1 | |
| 218 262 | 
             
                # @raise [Exception] Raises exceptions for parsing errors
         | 
| 219 263 | 
             
                #   or errors raised during processing callbacks. Internal
         | 
| 220 264 | 
             
                #   errors are raised using {Error}.
         | 
| 221 | 
            -
                # @see  | 
| 222 | 
            -
                def parse(input = nil, start = nil, options | 
| 265 | 
            +
                # @see https://cs.adelaide.edu.au/~charles/lt/Lectures/07-ErrorRecovery.pdf
         | 
| 266 | 
            +
                def parse(input = nil, start = nil, **options, &block)
         | 
| 223 267 | 
             
                  @options = options.dup
         | 
| 224 | 
            -
                  @options[:debug] ||= case
         | 
| 225 | 
            -
                  when @options[:progress] then 2
         | 
| 226 | 
            -
                  when @options[:validate] then 1
         | 
| 227 | 
            -
                  end
         | 
| 228 268 | 
             
                  @branch  = options[:branch]
         | 
| 229 269 | 
             
                  @first   = options[:first] ||= {}
         | 
| 230 270 | 
             
                  @follow  = options[:follow] ||= {}
         | 
| 231 271 | 
             
                  @cleanup = options[:cleanup] ||= {}
         | 
| 232 | 
            -
                  @lexer   = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns,  | 
| 272 | 
            +
                  @lexer   = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, **@options)
         | 
| 233 273 | 
             
                  @productions = []
         | 
| 234 274 | 
             
                  @parse_callback = block
         | 
| 235 275 | 
             
                  @recovering = false
         | 
| @@ -349,9 +389,9 @@ module EBNF::LL1 | |
| 349 389 | 
             
                      end
         | 
| 350 390 |  | 
| 351 391 | 
             
                      # Get the list of follows for this sequence, this production and the stacked productions.
         | 
| 352 | 
            -
                      debug("recovery", "stack follows:" | 
| 392 | 
            +
                      debug("recovery", "stack follows:")
         | 
| 353 393 | 
             
                      todo_stack.reverse.each do |todo|
         | 
| 354 | 
            -
                        debug("recovery" | 
| 394 | 
            +
                        debug("recovery") {"  #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
         | 
| 355 395 | 
             
                      end
         | 
| 356 396 |  | 
| 357 397 | 
             
                      # Find all follows to the top of the stack
         | 
| @@ -459,15 +499,16 @@ module EBNF::LL1 | |
| 459 499 | 
             
              protected
         | 
| 460 500 |  | 
| 461 501 | 
             
                ##
         | 
| 462 | 
            -
                # Error information, used as level ` | 
| 502 | 
            +
                # Error information, used as level `3` logger messages.
         | 
| 503 | 
            +
                # Messages may be logged and are saved for reporting at end of parsing.
         | 
| 463 504 | 
             
                #
         | 
| 464 505 | 
             
                # @param [String] node Relevant location associated with message
         | 
| 465 506 | 
             
                # @param [String] message Error string
         | 
| 466 | 
            -
                # @param [Hash] options
         | 
| 507 | 
            +
                # @param [Hash{Symbol => Object}] options
         | 
| 467 508 | 
             
                # @option options [URI, #to_s] :production
         | 
| 468 509 | 
             
                # @option options [Token] :token
         | 
| 469 | 
            -
                # @see  | 
| 470 | 
            -
                def error(node, message, options | 
| 510 | 
            +
                # @see #debug
         | 
| 511 | 
            +
                def error(node, message, **options)
         | 
| 471 512 | 
             
                  lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
         | 
| 472 513 | 
             
                  m = "ERROR "
         | 
| 473 514 | 
             
                  m += "[line: #{lineno}] " if lineno
         | 
| @@ -476,83 +517,82 @@ module EBNF::LL1 | |
| 476 517 | 
             
                  m += ", production = #{options[:production].inspect}" if options[:production]
         | 
| 477 518 | 
             
                  @error_log << m unless @recovering
         | 
| 478 519 | 
             
                  @recovering = true
         | 
| 479 | 
            -
                  debug(node, m, options. | 
| 520 | 
            +
                  debug(node, m, level: options.fetch(:level, 3), **options)
         | 
| 480 521 | 
             
                  if options[:raise] || @options[:validate]
         | 
| 481 522 | 
             
                    raise Error.new(m, lineno: lineno, token: options[:token], production: options[:production])
         | 
| 482 523 | 
             
                  end
         | 
| 483 524 | 
             
                end
         | 
| 484 525 |  | 
| 485 526 | 
             
                ##
         | 
| 486 | 
            -
                # Warning information, used as level ` | 
| 527 | 
            +
                # Warning information, used as level `2` logger messages.
         | 
| 528 | 
            +
                # Messages may be logged and are saved for reporting at end of parsing.
         | 
| 487 529 | 
             
                #
         | 
| 488 530 | 
             
                # @param [String] node Relevant location associated with message
         | 
| 489 531 | 
             
                # @param [String] message Error string
         | 
| 490 532 | 
             
                # @param [Hash] options
         | 
| 491 533 | 
             
                # @option options [URI, #to_s] :production
         | 
| 492 534 | 
             
                # @option options [Token] :token
         | 
| 493 | 
            -
                # @see  | 
| 494 | 
            -
                def warn(node, message, options | 
| 535 | 
            +
                # @see #debug
         | 
| 536 | 
            +
                def warn(node, message, **options)
         | 
| 537 | 
            +
                  lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
         | 
| 495 538 | 
             
                  m = "WARNING "
         | 
| 496 | 
            -
                  m += "[line: #{ | 
| 539 | 
            +
                  m += "[line: #{lineno}] " if lineno
         | 
| 497 540 | 
             
                  m += message
         | 
| 498 541 | 
             
                  m += " (found #{options[:token].inspect})" if options[:token]
         | 
| 499 542 | 
             
                  m += ", production = #{options[:production].inspect}" if options[:production]
         | 
| 500 543 | 
             
                  @error_log << m unless @recovering
         | 
| 501 | 
            -
                  debug(node, m,  | 
| 544 | 
            +
                  debug(node, m, level: 2, lineno: lineno, **options)
         | 
| 502 545 | 
             
                end
         | 
| 503 546 |  | 
| 504 547 | 
             
                ##
         | 
| 505 | 
            -
                # Progress  | 
| 548 | 
            +
                # Progress logged when parsing. Passed as level `1` logger messages.
         | 
| 549 | 
            +
                #
         | 
| 550 | 
            +
                # The call is ignored, unless `@options[:logger]` is set.
         | 
| 506 551 | 
             
                #
         | 
| 507 | 
            -
                # @overload progress(node, message, options)
         | 
| 552 | 
            +
                # @overload progress(node, message, **options, &block)
         | 
| 508 553 | 
             
                #   @param [String] node Relevant location associated with message
         | 
| 509 554 | 
             
                #   @param [String] message ("")
         | 
| 510 555 | 
             
                #   @param [Hash] options
         | 
| 511 556 | 
             
                #   @option options [Integer] :depth
         | 
| 512 557 | 
             
                #       Recursion depth for indenting output
         | 
| 513 | 
            -
                # @see  | 
| 558 | 
            +
                # @see #debug
         | 
| 514 559 | 
             
                def progress(node, *args, &block)
         | 
| 515 | 
            -
                  return unless @options[: | 
| 560 | 
            +
                  return unless @options[:logger]
         | 
| 561 | 
            +
                  lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
         | 
| 516 562 | 
             
                  args << {} unless args.last.is_a?(Hash)
         | 
| 517 | 
            -
                  args.last[:level] ||=  | 
| 563 | 
            +
                  args.last[:level] ||= 1
         | 
| 564 | 
            +
                  args.last[:lineno] ||= lineno
         | 
| 518 565 | 
             
                  debug(node, *args, &block)
         | 
| 519 566 | 
             
                end
         | 
| 520 567 |  | 
| 521 568 | 
             
                ##
         | 
| 522 | 
            -
                #  | 
| 569 | 
            +
                # Debug logging.
         | 
| 523 570 | 
             
                #
         | 
| 524 | 
            -
                # The call is ignored, unless `@options[: | 
| 525 | 
            -
                # case it yields tracing information as indicated. Additionally,
         | 
| 526 | 
            -
                # if `@options[:debug]` is an Integer, the call is aborted if the
         | 
| 527 | 
            -
                # `:level` option is less than than `:level`.
         | 
| 571 | 
            +
                # The call is ignored, unless `@options[:logger]` is set.
         | 
| 528 572 | 
             
                #
         | 
| 529 | 
            -
                # @overload debug(node, message, options)
         | 
| 573 | 
            +
                # @overload debug(node, message, **options)
         | 
| 530 574 | 
             
                #   @param [Array<String>] args Relevant location associated with message
         | 
| 531 575 | 
             
                #   @param [Hash] options
         | 
| 532 576 | 
             
                #   @option options [Integer] :depth
         | 
| 533 577 | 
             
                #     Recursion depth for indenting output
         | 
| 534 | 
            -
                #   @ | 
| 535 | 
            -
                 | 
| 536 | 
            -
             | 
| 537 | 
            -
                #     progress information, and anything higher is for various levels
         | 
| 538 | 
            -
                #     of debug information.
         | 
| 539 | 
            -
                #
         | 
| 540 | 
            -
                # @yield trace, level, lineno, depth, args
         | 
| 541 | 
            -
                # @yieldparam [:trace] trace
         | 
| 542 | 
            -
                # @yieldparam [Integer] level
         | 
| 543 | 
            -
                # @yieldparam [Integer] lineno
         | 
| 544 | 
            -
                # @yieldparam [Integer] depth Recursive depth of productions
         | 
| 545 | 
            -
                # @yieldparam [Array<String>] args
         | 
| 546 | 
            -
                # @yieldreturn [String] added to message
         | 
| 547 | 
            -
                def debug(*args)
         | 
| 548 | 
            -
                  return unless @options[:debug] && @parse_callback
         | 
| 578 | 
            +
                #   @yieldreturn [String] additional string appended to `message`.
         | 
| 579 | 
            +
                def debug(*args, &block)
         | 
| 580 | 
            +
                  return unless @options[:logger]
         | 
| 549 581 | 
             
                  options = args.last.is_a?(Hash) ? args.pop : {}
         | 
| 550 | 
            -
                   | 
| 551 | 
            -
                   | 
| 552 | 
            -
             | 
| 582 | 
            +
                  lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
         | 
| 583 | 
            +
                  level = options.fetch(:level, 0)
         | 
| 553 584 | 
             
                  depth = options[:depth] || self.depth
         | 
| 554 | 
            -
             | 
| 555 | 
            -
                   | 
| 585 | 
            +
             | 
| 586 | 
            +
                  if self.respond_to?(:log_debug)
         | 
| 587 | 
            +
                    level = [:debug, :info, :warn, :error, :fatal][level]
         | 
| 588 | 
            +
                    log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
         | 
| 589 | 
            +
                  elsif @options[:logger].respond_to?(:add)
         | 
| 590 | 
            +
                    args << yield if block_given?
         | 
| 591 | 
            +
                    @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
         | 
| 592 | 
            +
                  elsif @options[:logger].respond_to?(:<<)
         | 
| 593 | 
            +
                    args << yield if block_given?
         | 
| 594 | 
            +
                    @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
         | 
| 595 | 
            +
                  end
         | 
| 556 596 | 
             
                end
         | 
| 557 597 |  | 
| 558 598 | 
             
              private
         | 
| @@ -563,7 +603,7 @@ module EBNF::LL1 | |
| 563 603 | 
             
                  if handler
         | 
| 564 604 | 
             
                    # Create a new production data element, potentially allowing handler
         | 
| 565 605 | 
             
                    # to customize before pushing on the @prod_data stack
         | 
| 566 | 
            -
                     | 
| 606 | 
            +
                    debug("#{prod}(:start):#{@prod_data.length}") {@prod_data.last}
         | 
| 567 607 | 
             
                    data = {}
         | 
| 568 608 | 
             
                    begin
         | 
| 569 609 | 
             
                      self.class.eval_with_binding(self) {
         | 
| @@ -577,12 +617,12 @@ module EBNF::LL1 | |
| 577 617 | 
             
                  elsif [:merge, :star].include?(@cleanup[prod])
         | 
| 578 618 | 
             
                    # Save current data to merge later
         | 
| 579 619 | 
             
                    @prod_data << {}
         | 
| 580 | 
            -
                     | 
| 620 | 
            +
                    debug("#{prod}(:start}:#{@prod_data.length}:cleanup:#{@cleanup[prod]}") { get_token.inspect + (@recovering ? ' recovering' : '')}
         | 
| 581 621 | 
             
                  else
         | 
| 582 622 | 
             
                    # Make sure we push as many was we pop, even if there is no
         | 
| 583 623 | 
             
                    # explicit start handler
         | 
| 584 624 | 
             
                    @prod_data << {} if self.class.production_handlers[prod]
         | 
| 585 | 
            -
                     | 
| 625 | 
            +
                    debug("#{prod}(:start:#{@prod_data.length})") { get_token.inspect + (@recovering ? ' recovering' : '')}
         | 
| 586 626 | 
             
                  end
         | 
| 587 627 | 
             
                  #puts "prod_data(s): " + @prod_data.inspect
         | 
| 588 628 | 
             
                end
         | 
| @@ -616,7 +656,7 @@ module EBNF::LL1 | |
| 616 656 | 
             
                      else Array(input[k]) + Array(v)
         | 
| 617 657 | 
             
                      end
         | 
| 618 658 | 
             
                    end
         | 
| 619 | 
            -
                     | 
| 659 | 
            +
                    debug("#{prod}(:finish):#{@prod_data.length} cleanup:#{@cleanup[prod]}") {@prod_data.last}
         | 
| 620 660 | 
             
                  else
         | 
| 621 661 | 
             
                    progress("#{prod}(:finish):#{@prod_data.length}") { "recovering" if @recovering }
         | 
| 622 662 | 
             
                  end
         | 
| @@ -723,7 +763,7 @@ module EBNF::LL1 | |
| 723 763 | 
             
                #     "invalid token '%' on line 10",
         | 
| 724 764 | 
             
                #     token: '%', lineno: 9, production: :turtleDoc)
         | 
| 725 765 | 
             
                #
         | 
| 726 | 
            -
                # @see  | 
| 766 | 
            +
                # @see https://ruby-doc.org/core/classes/StandardError.html
         | 
| 727 767 | 
             
                class Error < StandardError
         | 
| 728 768 | 
             
                  ##
         | 
| 729 769 | 
             
                  # The current production.
         | 
| @@ -751,7 +791,7 @@ module EBNF::LL1 | |
| 751 791 | 
             
                  # @option options [Symbol]         :production  (nil)
         | 
| 752 792 | 
             
                  # @option options [String]         :token  (nil)
         | 
| 753 793 | 
             
                  # @option options [Integer]        :lineno (nil)
         | 
| 754 | 
            -
                  def initialize(message, options | 
| 794 | 
            +
                  def initialize(message, **options)
         | 
| 755 795 | 
             
                    @production = options[:production]
         | 
| 756 796 | 
             
                    @token      = options[:token]
         | 
| 757 797 | 
             
                    @lineno     = options[:lineno] || (@token.lineno if @token.respond_to?(:lineno))
         | 
    
        data/lib/ebnf/ll1/scanner.rb
    CHANGED
    
    | @@ -3,7 +3,7 @@ require 'strscan'    unless defined?(StringScanner) | |
| 3 3 |  | 
| 4 4 | 
             
            module EBNF::LL1
         | 
| 5 5 | 
             
              ##
         | 
| 6 | 
            -
              # Overload StringScanner with file operations
         | 
| 6 | 
            +
              # Overload StringScanner with file operations and line counting
         | 
| 7 7 | 
             
              #
         | 
| 8 8 | 
             
              # * Reloads scanner as required until EOF.
         | 
| 9 9 | 
             
              # * Loads to a high-water and reloads when remaining size reaches a low-water.
         | 
| @@ -14,25 +14,14 @@ module EBNF::LL1 | |
| 14 14 | 
             
                LOW_WATER  = 4 * 1024
         | 
| 15 15 |  | 
| 16 16 | 
             
                ##
         | 
| 17 | 
            -
                # @return [IO, StringIO]
         | 
| 17 | 
            +
                # @return [String, IO, StringIO]
         | 
| 18 18 | 
             
                attr_reader :input
         | 
| 19 19 |  | 
| 20 20 | 
             
                ##
         | 
| 21 | 
            -
                #  | 
| 22 | 
            -
                # | 
| 23 | 
            -
                 | 
| 24 | 
            -
             | 
| 25 | 
            -
                  if input.respond_to?(:read)
         | 
| 26 | 
            -
                    scanner = self.allocate
         | 
| 27 | 
            -
                    scanner.send(:initialize, input, options)
         | 
| 28 | 
            -
                  else
         | 
| 29 | 
            -
                    if input.encoding != Encoding::UTF_8
         | 
| 30 | 
            -
                      input = input.dup if input.frozen?
         | 
| 31 | 
            -
                      input.force_encoding(Encoding::UTF_8)
         | 
| 32 | 
            -
                    end
         | 
| 33 | 
            -
                    StringScanner.new(input)
         | 
| 34 | 
            -
                  end
         | 
| 35 | 
            -
                end
         | 
| 21 | 
            +
                # The current line number (one-based).
         | 
| 22 | 
            +
                #
         | 
| 23 | 
            +
                # @return [Integer]
         | 
| 24 | 
            +
                attr_accessor :lineno
         | 
| 36 25 |  | 
| 37 26 | 
             
                ##
         | 
| 38 27 | 
             
                # Create a scanner, from an IO
         | 
| @@ -42,35 +31,26 @@ module EBNF::LL1 | |
| 42 31 | 
             
                # @option options[Integer] :high_water (HIGH_WATER)
         | 
| 43 32 | 
             
                # @option options[Integer] :low_water (LOW_WATER)
         | 
| 44 33 | 
             
                # @return [Scanner]
         | 
| 45 | 
            -
                def initialize(input, options | 
| 34 | 
            +
                def initialize(input, **options)
         | 
| 46 35 | 
             
                  @options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER)
         | 
| 47 36 |  | 
| 48 | 
            -
                  @ | 
| 49 | 
            -
                   | 
| 37 | 
            +
                  @previous_lineno = @lineno = 1
         | 
| 38 | 
            +
                  @input = input.is_a?(String) ? encode_utf8(input) : input
         | 
| 39 | 
            +
                  super(input.is_a?(String) ? @input : "")
         | 
| 50 40 | 
             
                  feed_me
         | 
| 51 41 | 
             
                  self
         | 
| 52 42 | 
             
                end
         | 
| 53 43 |  | 
| 54 44 | 
             
                ##
         | 
| 55 | 
            -
                #  | 
| 56 | 
            -
                 | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
                ##
         | 
| 65 | 
            -
                # Attempts to skip over the given `pattern` beginning with the scan pointer.
         | 
| 66 | 
            -
                # If it matches, the scan pointer is advanced to the end of the match,
         | 
| 67 | 
            -
                # and the length of the match is returned. Otherwise, `nil` is returned.
         | 
| 68 | 
            -
                #
         | 
| 69 | 
            -
                # similar to `scan`, but without returning the matched string.
         | 
| 70 | 
            -
                # @param [Regexp] pattern
         | 
| 71 | 
            -
                def skip(pattern)
         | 
| 72 | 
            -
                  feed_me
         | 
| 73 | 
            -
                  super
         | 
| 45 | 
            +
                # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
         | 
| 46 | 
            +
                def ensure_buffer_full
         | 
| 47 | 
            +
                  # Read up to high-water mark ensuring we're at an end of line
         | 
| 48 | 
            +
                  if @input.respond_to?(:eof?) && !@input.eof?
         | 
| 49 | 
            +
                    diff = @options[:high_water] - rest_size
         | 
| 50 | 
            +
                    string = encode_utf8(@input.read(diff))
         | 
| 51 | 
            +
                    string << encode_utf8(@input.gets) unless @input.eof?
         | 
| 52 | 
            +
                    self << string if string
         | 
| 53 | 
            +
                  end
         | 
| 74 54 | 
             
                end
         | 
| 75 55 |  | 
| 76 56 | 
             
                ##
         | 
| @@ -83,10 +63,13 @@ module EBNF::LL1 | |
| 83 63 | 
             
                end
         | 
| 84 64 |  | 
| 85 65 | 
             
                ##
         | 
| 86 | 
            -
                #  | 
| 87 | 
            -
                 | 
| 66 | 
            +
                # Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer).
         | 
| 67 | 
            +
                # If there is no more data (eos? = true), it returns "".
         | 
| 68 | 
            +
                #
         | 
| 69 | 
            +
                # @return [String]
         | 
| 70 | 
            +
                def rest
         | 
| 88 71 | 
             
                  feed_me
         | 
| 89 | 
            -
                  super
         | 
| 72 | 
            +
                  encode_utf8 super
         | 
| 90 73 | 
             
                end
         | 
| 91 74 |  | 
| 92 75 | 
             
                ##
         | 
| @@ -108,19 +91,68 @@ module EBNF::LL1 | |
| 108 91 | 
             
                # @return [String]
         | 
| 109 92 | 
             
                def scan(pattern)
         | 
| 110 93 | 
             
                  feed_me
         | 
| 111 | 
            -
                   | 
| 94 | 
            +
                  @previous_lineno = @lineno
         | 
| 95 | 
            +
                  if matched = encode_utf8(super)
         | 
| 96 | 
            +
                    @lineno += matched.count("\n")
         | 
| 97 | 
            +
                  end
         | 
| 98 | 
            +
                  matched
         | 
| 112 99 | 
             
                end
         | 
| 113 100 |  | 
| 114 101 | 
             
                ##
         | 
| 115 | 
            -
                #  | 
| 116 | 
            -
                 | 
| 117 | 
            -
             | 
| 118 | 
            -
             | 
| 119 | 
            -
                     | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 102 | 
            +
                # Scans the string until the pattern is matched. Returns the substring up to and including the end of the match, advancing the scan pointer to that location. If there is no match, nil is returned.
         | 
| 103 | 
            +
                #
         | 
| 104 | 
            +
                # @example
         | 
| 105 | 
            +
                #     s = StringScanner.new("Fri Dec 12 1975 14:39")
         | 
| 106 | 
            +
                #     s.scan_until(/1/)        # -> "Fri Dec 1"
         | 
| 107 | 
            +
                #     s.pre_match              # -> "Fri Dec "
         | 
| 108 | 
            +
                #     s.scan_until(/XYZ/)      # -> nil
         | 
| 109 | 
            +
                #
         | 
| 110 | 
            +
                # @param [Regexp] pattern
         | 
| 111 | 
            +
                # @return [String]
         | 
| 112 | 
            +
                def scan_until(pattern)
         | 
| 113 | 
            +
                  feed_me
         | 
| 114 | 
            +
                  @previous_lineno = @lineno
         | 
| 115 | 
            +
                  if matched = encode_utf8(super)
         | 
| 116 | 
            +
                    @lineno += matched.count("\n")
         | 
| 123 117 | 
             
                  end
         | 
| 118 | 
            +
                  matched
         | 
| 119 | 
            +
                end
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                ##
         | 
| 122 | 
            +
                # Attempts to skip over the given `pattern` beginning with the scan pointer.
         | 
| 123 | 
            +
                # If it matches, the scan pointer is advanced to the end of the match,
         | 
| 124 | 
            +
                # and the length of the match is returned. Otherwise, `nil` is returned.
         | 
| 125 | 
            +
                #
         | 
| 126 | 
            +
                # similar to `scan`, but without returning the matched string.
         | 
| 127 | 
            +
                # @param [Regexp] pattern
         | 
| 128 | 
            +
                def skip(pattern)
         | 
| 129 | 
            +
                  scan(pattern)
         | 
| 130 | 
            +
                  nil
         | 
| 131 | 
            +
                end
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                ##
         | 
| 134 | 
            +
                # Advances the scan pointer until pattern is matched and consumed. Returns the number of bytes advanced, or nil if no match was found.
         | 
| 135 | 
            +
                #
         | 
| 136 | 
            +
                # Look ahead to match pattern, and advance the scan pointer to the end of the match. Return the number of characters advanced, or nil if the match was unsuccessful.
         | 
| 137 | 
            +
                #
         | 
| 138 | 
            +
                # It’s similar to scan_until, but without returning the intervening string.
         | 
| 139 | 
            +
                # @param [Regexp] pattern
         | 
| 140 | 
            +
                def skip_until(pattern)
         | 
| 141 | 
            +
                  (matched = scan_until(pattern)) && matched.length
         | 
| 142 | 
            +
                end
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                ##
         | 
| 145 | 
            +
                # Sets the scan pointer to the previous position. Only one previous position is remembered, and it changes with each scanning operation.
         | 
| 146 | 
            +
                def unscan
         | 
| 147 | 
            +
                  @lineno = @previous_lineno
         | 
| 148 | 
            +
                  super
         | 
| 149 | 
            +
                end
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                ##
         | 
| 152 | 
            +
                # Set the scan pointer to the end of the string and clear matching data
         | 
| 153 | 
            +
                def terminate
         | 
| 154 | 
            +
                  feed_me
         | 
| 155 | 
            +
                  super
         | 
| 124 156 | 
             
                end
         | 
| 125 157 |  | 
| 126 158 | 
             
              private
         |