ebnf 1.2.0 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +223 -199
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +38 -19
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -18
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +76 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +6 -1
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +114 -69
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +131 -3
- data/lib/ebnf/ll1/lexer.rb +20 -22
- data/lib/ebnf/ll1/parser.rb +97 -64
- data/lib/ebnf/ll1/scanner.rb +82 -50
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +250 -0
- data/lib/ebnf/rule.rb +442 -148
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +587 -82
- metadata +125 -18
- data/etc/sparql.rb +0 -45773
    
        data/lib/ebnf/ll1/scanner.rb
    CHANGED
    
    | @@ -3,7 +3,7 @@ require 'strscan'    unless defined?(StringScanner) | |
| 3 3 |  | 
| 4 4 | 
             
            module EBNF::LL1
         | 
| 5 5 | 
             
              ##
         | 
| 6 | 
            -
              # Overload StringScanner with file operations
         | 
| 6 | 
            +
              # Overload StringScanner with file operations and line counting
         | 
| 7 7 | 
             
              #
         | 
| 8 8 | 
             
              # * Reloads scanner as required until EOF.
         | 
| 9 9 | 
             
              # * Loads to a high-water and reloads when remaining size reaches a low-water.
         | 
| @@ -14,25 +14,14 @@ module EBNF::LL1 | |
| 14 14 | 
             
                LOW_WATER  = 4 * 1024
         | 
| 15 15 |  | 
| 16 16 | 
             
                ##
         | 
| 17 | 
            -
                # @return [IO, StringIO]
         | 
| 17 | 
            +
                # @return [String, IO, StringIO]
         | 
| 18 18 | 
             
                attr_reader :input
         | 
| 19 19 |  | 
| 20 20 | 
             
                ##
         | 
| 21 | 
            -
                #  | 
| 22 | 
            -
                # | 
| 23 | 
            -
                 | 
| 24 | 
            -
             | 
| 25 | 
            -
                  if input.respond_to?(:read)
         | 
| 26 | 
            -
                    scanner = self.allocate
         | 
| 27 | 
            -
                    scanner.send(:initialize, input, **options)
         | 
| 28 | 
            -
                  else
         | 
| 29 | 
            -
                    if input.encoding != Encoding::UTF_8
         | 
| 30 | 
            -
                      input = input.dup if input.frozen?
         | 
| 31 | 
            -
                      input.force_encoding(Encoding::UTF_8)
         | 
| 32 | 
            -
                    end
         | 
| 33 | 
            -
                    StringScanner.new(input)
         | 
| 34 | 
            -
                  end
         | 
| 35 | 
            -
                end
         | 
| 21 | 
            +
                # The current line number (one-based).
         | 
| 22 | 
            +
                #
         | 
| 23 | 
            +
                # @return [Integer]
         | 
| 24 | 
            +
                attr_accessor :lineno
         | 
| 36 25 |  | 
| 37 26 | 
             
                ##
         | 
| 38 27 | 
             
                # Create a scanner, from an IO
         | 
| @@ -45,32 +34,23 @@ module EBNF::LL1 | |
| 45 34 | 
             
                def initialize(input, **options)
         | 
| 46 35 | 
             
                  @options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER)
         | 
| 47 36 |  | 
| 48 | 
            -
                  @ | 
| 49 | 
            -
                   | 
| 37 | 
            +
                  @previous_lineno = @lineno = 1
         | 
| 38 | 
            +
                  @input = input.is_a?(String) ? encode_utf8(input) : input
         | 
| 39 | 
            +
                  super(input.is_a?(String) ? @input : "")
         | 
| 50 40 | 
             
                  feed_me
         | 
| 51 41 | 
             
                  self
         | 
| 52 42 | 
             
                end
         | 
| 53 43 |  | 
| 54 44 | 
             
                ##
         | 
| 55 | 
            -
                #  | 
| 56 | 
            -
                 | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
                ##
         | 
| 65 | 
            -
                # Attempts to skip over the given `pattern` beginning with the scan pointer.
         | 
| 66 | 
            -
                # If it matches, the scan pointer is advanced to the end of the match,
         | 
| 67 | 
            -
                # and the length of the match is returned. Otherwise, `nil` is returned.
         | 
| 68 | 
            -
                #
         | 
| 69 | 
            -
                # similar to `scan`, but without returning the matched string.
         | 
| 70 | 
            -
                # @param [Regexp] pattern
         | 
| 71 | 
            -
                def skip(pattern)
         | 
| 72 | 
            -
                  feed_me
         | 
| 73 | 
            -
                  super
         | 
| 45 | 
            +
                # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
         | 
| 46 | 
            +
                def ensure_buffer_full
         | 
| 47 | 
            +
                  # Read up to high-water mark ensuring we're at an end of line
         | 
| 48 | 
            +
                  if @input.respond_to?(:eof?) && !@input.eof?
         | 
| 49 | 
            +
                    diff = @options[:high_water] - rest_size
         | 
| 50 | 
            +
                    string = encode_utf8(@input.read(diff))
         | 
| 51 | 
            +
                    string << encode_utf8(@input.gets) unless @input.eof?
         | 
| 52 | 
            +
                    self << string if string
         | 
| 53 | 
            +
                  end
         | 
| 74 54 | 
             
                end
         | 
| 75 55 |  | 
| 76 56 | 
             
                ##
         | 
| @@ -83,10 +63,13 @@ module EBNF::LL1 | |
| 83 63 | 
             
                end
         | 
| 84 64 |  | 
| 85 65 | 
             
                ##
         | 
| 86 | 
            -
                #  | 
| 87 | 
            -
                 | 
| 66 | 
            +
                # Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer).
         | 
| 67 | 
            +
                # If there is no more data (eos? = true), it returns "".
         | 
| 68 | 
            +
                #
         | 
| 69 | 
            +
                # @return [String]
         | 
| 70 | 
            +
                def rest
         | 
| 88 71 | 
             
                  feed_me
         | 
| 89 | 
            -
                  super
         | 
| 72 | 
            +
                  encode_utf8 super
         | 
| 90 73 | 
             
                end
         | 
| 91 74 |  | 
| 92 75 | 
             
                ##
         | 
| @@ -108,19 +91,68 @@ module EBNF::LL1 | |
| 108 91 | 
             
                # @return [String]
         | 
| 109 92 | 
             
                def scan(pattern)
         | 
| 110 93 | 
             
                  feed_me
         | 
| 111 | 
            -
                   | 
| 94 | 
            +
                  @previous_lineno = @lineno
         | 
| 95 | 
            +
                  if matched = encode_utf8(super)
         | 
| 96 | 
            +
                    @lineno += matched.count("\n")
         | 
| 97 | 
            +
                  end
         | 
| 98 | 
            +
                  matched
         | 
| 112 99 | 
             
                end
         | 
| 113 100 |  | 
| 114 101 | 
             
                ##
         | 
| 115 | 
            -
                #  | 
| 116 | 
            -
                 | 
| 117 | 
            -
             | 
| 118 | 
            -
             | 
| 119 | 
            -
                     | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 102 | 
            +
                # Scans the string until the pattern is matched. Returns the substring up to and including the end of the match, advancing the scan pointer to that location. If there is no match, nil is returned.
         | 
| 103 | 
            +
                #
         | 
| 104 | 
            +
                # @example
         | 
| 105 | 
            +
                #     s = StringScanner.new("Fri Dec 12 1975 14:39")
         | 
| 106 | 
            +
                #     s.scan_until(/1/)        # -> "Fri Dec 1"
         | 
| 107 | 
            +
                #     s.pre_match              # -> "Fri Dec "
         | 
| 108 | 
            +
                #     s.scan_until(/XYZ/)      # -> nil
         | 
| 109 | 
            +
                #
         | 
| 110 | 
            +
                # @param [Regexp] pattern
         | 
| 111 | 
            +
                # @return [String]
         | 
| 112 | 
            +
                def scan_until(pattern)
         | 
| 113 | 
            +
                  feed_me
         | 
| 114 | 
            +
                  @previous_lineno = @lineno
         | 
| 115 | 
            +
                  if matched = encode_utf8(super)
         | 
| 116 | 
            +
                    @lineno += matched.count("\n")
         | 
| 123 117 | 
             
                  end
         | 
| 118 | 
            +
                  matched
         | 
| 119 | 
            +
                end
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                ##
         | 
| 122 | 
            +
                # Attempts to skip over the given `pattern` beginning with the scan pointer.
         | 
| 123 | 
            +
                # If it matches, the scan pointer is advanced to the end of the match,
         | 
| 124 | 
            +
                # and the length of the match is returned. Otherwise, `nil` is returned.
         | 
| 125 | 
            +
                #
         | 
| 126 | 
            +
                # similar to `scan`, but without returning the matched string.
         | 
| 127 | 
            +
                # @param [Regexp] pattern
         | 
| 128 | 
            +
                def skip(pattern)
         | 
| 129 | 
            +
                  scan(pattern)
         | 
| 130 | 
            +
                  nil
         | 
| 131 | 
            +
                end
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                ##
         | 
| 134 | 
            +
                # Advances the scan pointer until pattern is matched and consumed. Returns the number of bytes advanced, or nil if no match was found.
         | 
| 135 | 
            +
                #
         | 
| 136 | 
            +
                # Look ahead to match pattern, and advance the scan pointer to the end of the match. Return the number of characters advanced, or nil if the match was unsuccessful.
         | 
| 137 | 
            +
                #
         | 
| 138 | 
            +
                # It’s similar to scan_until, but without returning the intervening string.
         | 
| 139 | 
            +
                # @param [Regexp] pattern
         | 
| 140 | 
            +
                def skip_until(pattern)
         | 
| 141 | 
            +
                  (matched = scan_until(pattern)) && matched.length
         | 
| 142 | 
            +
                end
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                ##
         | 
| 145 | 
            +
                # Sets the scan pointer to the previous position. Only one previous position is remembered, and it changes with each scanning operation.
         | 
| 146 | 
            +
                def unscan
         | 
| 147 | 
            +
                  @lineno = @previous_lineno
         | 
| 148 | 
            +
                  super
         | 
| 149 | 
            +
                end
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                ##
         | 
| 152 | 
            +
                # Set the scan pointer to the end of the string and clear matching data
         | 
| 153 | 
            +
                def terminate
         | 
| 154 | 
            +
                  feed_me
         | 
| 155 | 
            +
                  super
         | 
| 124 156 | 
             
                end
         | 
| 125 157 |  | 
| 126 158 | 
             
              private
         | 
    
        data/lib/ebnf/native.rb
    ADDED
    
    | @@ -0,0 +1,320 @@ | |
| 1 | 
            +
            module EBNF
         | 
| 2 | 
            +
              module Native
         | 
| 3 | 
            +
                ##
         | 
| 4 | 
            +
                # Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself.
         | 
| 5 | 
            +
                #
         | 
| 6 | 
            +
                # Iterate over rule strings.
         | 
| 7 | 
            +
                # a line that starts with '\[' or '@' starts a new rule
         | 
| 8 | 
            +
                #
         | 
| 9 | 
            +
                # @param [StringScanner] scanner
         | 
| 10 | 
            +
                # @yield rule_string
         | 
| 11 | 
            +
                # @yieldparam [String] rule_string
         | 
| 12 | 
            +
                def eachRule(scanner)
         | 
| 13 | 
            +
                  cur_lineno = 1
         | 
| 14 | 
            +
                  r = ''
         | 
| 15 | 
            +
                  until scanner.eos?
         | 
| 16 | 
            +
                    case
         | 
| 17 | 
            +
                    when s = scanner.scan(%r(\s+)m)
         | 
| 18 | 
            +
                      # Eat whitespace
         | 
| 19 | 
            +
                      cur_lineno += s.count("\n")
         | 
| 20 | 
            +
                      #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
         | 
| 21 | 
            +
                    when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
         | 
| 22 | 
            +
                      # Eat comments /* .. */
         | 
| 23 | 
            +
                      cur_lineno += s.count("\n")
         | 
| 24 | 
            +
                      debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
         | 
| 25 | 
            +
                    when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
         | 
| 26 | 
            +
                      # Eat comments (* .. *)
         | 
| 27 | 
            +
                      cur_lineno += s.count("\n")
         | 
| 28 | 
            +
                      debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
         | 
| 29 | 
            +
                    when s = scanner.scan(%r((#(?!x)|//).*$))
         | 
| 30 | 
            +
                      # Eat comments // & #
         | 
| 31 | 
            +
                      cur_lineno += s.count("\n")
         | 
| 32 | 
            +
                      debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
         | 
| 33 | 
            +
                    when s = scanner.scan(/\A["']/)
         | 
| 34 | 
            +
                      # Found a quote, scan until end of matching quote
         | 
| 35 | 
            +
                      s += scanner.scan_until(/#{scanner.matched}|$/)
         | 
| 36 | 
            +
                      r += s
         | 
| 37 | 
            +
                    when s = scanner.scan(%r(^@terminals))
         | 
| 38 | 
            +
                      #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
         | 
| 39 | 
            +
                      yield(r) unless r.empty?
         | 
| 40 | 
            +
                      @lineno = cur_lineno
         | 
| 41 | 
            +
                      yield(s)
         | 
| 42 | 
            +
                      r = ''
         | 
| 43 | 
            +
                    when s = scanner.scan(/@pass/)
         | 
| 44 | 
            +
                      # Found rule start, if we've already collected a rule, yield it
         | 
| 45 | 
            +
                      #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
         | 
| 46 | 
            +
                      yield r unless r.empty?
         | 
| 47 | 
            +
                      @lineno = cur_lineno
         | 
| 48 | 
            +
                      r = s
         | 
| 49 | 
            +
                    when s = scanner.scan(EBNF::Terminals::LHS)
         | 
| 50 | 
            +
                      # Found rule start, if we've already collected a rule, yield it
         | 
| 51 | 
            +
                      yield r unless r.empty?
         | 
| 52 | 
            +
                      #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
         | 
| 53 | 
            +
                      @lineno = cur_lineno
         | 
| 54 | 
            +
                      r = s
         | 
| 55 | 
            +
                    else
         | 
| 56 | 
            +
                      # Collect until end of line, or start of comment or quote
         | 
| 57 | 
            +
                      s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
         | 
| 58 | 
            +
                      if scanner.matched.length > 0
         | 
| 59 | 
            +
                        # Back up scan head before ending match
         | 
| 60 | 
            +
                        scanner.pos = scanner.pos - scanner.matched.length
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                        # Remove matched from end of string
         | 
| 63 | 
            +
                        s = s[0..-(scanner.matched.length+1)]
         | 
| 64 | 
            +
                      end
         | 
| 65 | 
            +
                      cur_lineno += s.count("\n")
         | 
| 66 | 
            +
                      #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
         | 
| 67 | 
            +
                      r += s
         | 
| 68 | 
            +
                    end
         | 
| 69 | 
            +
                  end
         | 
| 70 | 
            +
                  yield r unless r.empty?
         | 
| 71 | 
            +
                end
         | 
| 72 | 
            +
              
         | 
| 73 | 
            +
                ##
         | 
| 74 | 
            +
                # Parse a rule into an optional rule number, a symbol and an expression
         | 
| 75 | 
            +
                #
         | 
| 76 | 
            +
                # @param [String] rule
         | 
| 77 | 
            +
                # @return [Rule]
         | 
| 78 | 
            +
                def ruleParts(rule)
         | 
| 79 | 
            +
                  num_sym, expr = rule.split('::=', 2).map(&:strip)
         | 
| 80 | 
            +
                  num, sym = num_sym.split(']', 2).map(&:strip)
         | 
| 81 | 
            +
                  num, sym = "", num if sym.nil?
         | 
| 82 | 
            +
                  num = num[1..-1]
         | 
| 83 | 
            +
                  r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
         | 
| 84 | 
            +
                  debug("ruleParts") { r.inspect }
         | 
| 85 | 
            +
                  r
         | 
| 86 | 
            +
                end
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                ##
         | 
| 89 | 
            +
                # Parse a string into an expression tree and a remaining string
         | 
| 90 | 
            +
                #
         | 
| 91 | 
            +
                # @example
         | 
| 92 | 
            +
                #     >>> expression("a b c")
         | 
| 93 | 
            +
                #     ((seq a b c) '')
         | 
| 94 | 
            +
                #     
         | 
| 95 | 
            +
                #     >>> expression("a? b+ c*")
         | 
| 96 | 
            +
                #     ((seq (opt a) (plus b) (star c)) '')
         | 
| 97 | 
            +
                #     
         | 
| 98 | 
            +
                #     >>> expression(" | x xlist")
         | 
| 99 | 
            +
                #     ((alt (seq) (seq x xlist)) '')
         | 
| 100 | 
            +
                #     
         | 
| 101 | 
            +
                #     >>> expression("a | (b - c)")
         | 
| 102 | 
            +
                #     ((alt a (diff b c)) '')
         | 
| 103 | 
            +
                #     
         | 
| 104 | 
            +
                #     >>> expression("a b | c d")
         | 
| 105 | 
            +
                #     ((alt (seq a b) (seq c d)) '')
         | 
| 106 | 
            +
                #     
         | 
| 107 | 
            +
                #     >>> expression("a | b | c")
         | 
| 108 | 
            +
                #     ((alt a b c) '')
         | 
| 109 | 
            +
                #     
         | 
| 110 | 
            +
                #     >>> expression("a) b c")
         | 
| 111 | 
            +
                #     (a ' b c')
         | 
| 112 | 
            +
                #     
         | 
| 113 | 
            +
                #     >>> expression("BaseDecl? PrefixDecl*")
         | 
| 114 | 
            +
                #     ((seq (opt BaseDecl) (star PrefixDecl)) '')
         | 
| 115 | 
            +
                #     
         | 
| 116 | 
            +
                #     >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
         | 
| 117 | 
            +
                #     ((alt NCCHAR1 diff
         | 
| 118 | 
            +
                #           (range '0-9')
         | 
| 119 | 
            +
                #           (hex '#x00B7')
         | 
| 120 | 
            +
                #           (range '#x0300-#x036F')
         | 
| 121 | 
            +
                #           (range, '#x203F-#x2040')) '')
         | 
| 122 | 
            +
                #     
         | 
| 123 | 
            +
                # @param [String] s
         | 
| 124 | 
            +
                # @return [Array]
         | 
| 125 | 
            +
                def expression(s)
         | 
| 126 | 
            +
                  debug("expression") {"(#{s.inspect})"}
         | 
| 127 | 
            +
                  e, s = depth {alt(s)}
         | 
| 128 | 
            +
                  debug {"=> alt returned #{[e, s].inspect}"}
         | 
| 129 | 
            +
                  unless s.to_s.empty?
         | 
| 130 | 
            +
                    t, ss = depth {terminal(s)}
         | 
| 131 | 
            +
                    debug {"=> terminal returned #{[t, ss].inspect}"}
         | 
| 132 | 
            +
                    return [e, ss] if t.is_a?(Array) && t.first == :")"
         | 
| 133 | 
            +
                  end
         | 
| 134 | 
            +
                  [e, s]
         | 
| 135 | 
            +
                end
         | 
| 136 | 
            +
              
         | 
| 137 | 
            +
                ##
         | 
| 138 | 
            +
                # Parse alt
         | 
| 139 | 
            +
                #     >>> alt("a | b | c")
         | 
| 140 | 
            +
                #     ((alt a b c) '')
         | 
| 141 | 
            +
                # @param [String] s
         | 
| 142 | 
            +
                # @return [Array]
         | 
| 143 | 
            +
                def alt(s)
         | 
| 144 | 
            +
                  debug("alt") {"(#{s.inspect})"}
         | 
| 145 | 
            +
                  args = []
         | 
| 146 | 
            +
                  while !s.to_s.empty?
         | 
| 147 | 
            +
                    e, s = depth {seq(s)}
         | 
| 148 | 
            +
                    debug {"=> seq returned #{[e, s].inspect}"}
         | 
| 149 | 
            +
                    if e.to_s.empty?
         | 
| 150 | 
            +
                      break unless args.empty?
         | 
| 151 | 
            +
                      e = [:seq, []] # empty sequence
         | 
| 152 | 
            +
                    end
         | 
| 153 | 
            +
                    args << e
         | 
| 154 | 
            +
                    unless s.to_s.empty?
         | 
| 155 | 
            +
                      t, ss = depth {terminal(s)}
         | 
| 156 | 
            +
                      break unless t[0] == :alt
         | 
| 157 | 
            +
                      s = ss
         | 
| 158 | 
            +
                    end
         | 
| 159 | 
            +
                  end
         | 
| 160 | 
            +
                  args.length > 1 ? [args.unshift(:alt), s] : [e, s]
         | 
| 161 | 
            +
                end
         | 
| 162 | 
            +
              
         | 
| 163 | 
            +
                ##
         | 
| 164 | 
            +
                # parse seq
         | 
| 165 | 
            +
                #
         | 
| 166 | 
            +
                #     >>> seq("a b c")
         | 
| 167 | 
            +
                #     ((seq a b c) '')
         | 
| 168 | 
            +
                #     
         | 
| 169 | 
            +
                #     >>> seq("a b? c")
         | 
| 170 | 
            +
                #     ((seq a (opt b) c) '')
         | 
| 171 | 
            +
                def seq(s)
         | 
| 172 | 
            +
                  debug("seq") {"(#{s.inspect})"}
         | 
| 173 | 
            +
                  args = []
         | 
| 174 | 
            +
                  while !s.to_s.empty?
         | 
| 175 | 
            +
                    e, ss = depth {diff(s)}
         | 
| 176 | 
            +
                    debug {"=> diff returned #{[e, ss].inspect}"}
         | 
| 177 | 
            +
                    unless e.to_s.empty?
         | 
| 178 | 
            +
                      args << e
         | 
| 179 | 
            +
                      s = ss
         | 
| 180 | 
            +
                    else
         | 
| 181 | 
            +
                      break;
         | 
| 182 | 
            +
                    end
         | 
| 183 | 
            +
                  end
         | 
| 184 | 
            +
                  if args.length > 1
         | 
| 185 | 
            +
                    [args.unshift(:seq), s]
         | 
| 186 | 
            +
                  elsif args.length == 1
         | 
| 187 | 
            +
                    args + [s]
         | 
| 188 | 
            +
                  else
         | 
| 189 | 
            +
                    ["", s]
         | 
| 190 | 
            +
                  end
         | 
| 191 | 
            +
                end
         | 
| 192 | 
            +
              
         | 
| 193 | 
            +
                ##
         | 
| 194 | 
            +
                # parse diff
         | 
| 195 | 
            +
                # 
         | 
| 196 | 
            +
                #     >>> diff("a - b")
         | 
| 197 | 
            +
                #     ((diff a b) '')
         | 
| 198 | 
            +
                def diff(s)
         | 
| 199 | 
            +
                  debug("diff") {"(#{s.inspect})"}
         | 
| 200 | 
            +
                  e1, s = depth {postfix(s)}
         | 
| 201 | 
            +
                  debug {"=> postfix returned #{[e1, s].inspect}"}
         | 
| 202 | 
            +
                  unless e1.to_s.empty?
         | 
| 203 | 
            +
                    unless s.to_s.empty?
         | 
| 204 | 
            +
                      t, ss = depth {terminal(s)}
         | 
| 205 | 
            +
                      debug {"diff #{[t, ss].inspect}"}
         | 
| 206 | 
            +
                      if t.is_a?(Array) && t.first == :diff
         | 
| 207 | 
            +
                        s = ss
         | 
| 208 | 
            +
                        e2, s = primary(s)
         | 
| 209 | 
            +
                        unless e2.to_s.empty?
         | 
| 210 | 
            +
                          return [[:diff, e1, e2], s]
         | 
| 211 | 
            +
                        else
         | 
| 212 | 
            +
                          error("diff", "Syntax Error")
         | 
| 213 | 
            +
                          raise SyntaxError, "diff missing second operand"
         | 
| 214 | 
            +
                        end
         | 
| 215 | 
            +
                      end
         | 
| 216 | 
            +
                    end
         | 
| 217 | 
            +
                  end
         | 
| 218 | 
            +
                  [e1, s]
         | 
| 219 | 
            +
                end
         | 
| 220 | 
            +
              
         | 
| 221 | 
            +
                ##
         | 
| 222 | 
            +
                # parse postfix
         | 
| 223 | 
            +
                # 
         | 
| 224 | 
            +
                #     >>> postfix("a b c")
         | 
| 225 | 
            +
                #     (a ' b c')
         | 
| 226 | 
            +
                #     
         | 
| 227 | 
            +
                #     >>> postfix("a? b c")
         | 
| 228 | 
            +
                #     ((opt, a) ' b c')
         | 
| 229 | 
            +
                def postfix(s)
         | 
| 230 | 
            +
                  debug("postfix") {"(#{s.inspect})"}
         | 
| 231 | 
            +
                  e, s = depth {primary(s)}
         | 
| 232 | 
            +
                  debug {"=> primary returned #{[e, s].inspect}"}
         | 
| 233 | 
            +
                  return ["", s] if e.to_s.empty?
         | 
| 234 | 
            +
                  if !s.to_s.empty?
         | 
| 235 | 
            +
                    t, ss = depth {terminal(s)}
         | 
| 236 | 
            +
                    debug {"=> #{[t, ss].inspect}"}
         | 
| 237 | 
            +
                    if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
         | 
| 238 | 
            +
                      return [[t.first, e], ss]
         | 
| 239 | 
            +
                    end
         | 
| 240 | 
            +
                  end
         | 
| 241 | 
            +
                  [e, s]
         | 
| 242 | 
            +
                end
         | 
| 243 | 
            +
             | 
| 244 | 
            +
                ##
         | 
| 245 | 
            +
                # parse primary
         | 
| 246 | 
            +
                # 
         | 
| 247 | 
            +
                #     >>> primary("a b c")
         | 
| 248 | 
            +
                #     (a ' b c')
         | 
| 249 | 
            +
                def primary(s)
         | 
| 250 | 
            +
                  debug("primary") {"(#{s.inspect})"}
         | 
| 251 | 
            +
                  t, s = depth {terminal(s)}
         | 
| 252 | 
            +
                  debug {"=> terminal returned #{[t, s].inspect}"}
         | 
| 253 | 
            +
                  if t.is_a?(Symbol) || t.is_a?(String)
         | 
| 254 | 
            +
                    [t, s]
         | 
| 255 | 
            +
                  elsif %w(range hex).map(&:to_sym).include?(t.first)
         | 
| 256 | 
            +
                    [t, s]
         | 
| 257 | 
            +
                  elsif t.first == :"("
         | 
| 258 | 
            +
                    e, s = depth {expression(s)}
         | 
| 259 | 
            +
                    debug {"=> expression returned #{[e, s].inspect}"}
         | 
| 260 | 
            +
                    [e, s]
         | 
| 261 | 
            +
                  else
         | 
| 262 | 
            +
                    ["", s]
         | 
| 263 | 
            +
                  end
         | 
| 264 | 
            +
                end
         | 
| 265 | 
            +
              
         | 
| 266 | 
            +
                ##
         | 
| 267 | 
            +
                # parse one terminal; return the terminal and the remaining string
         | 
| 268 | 
            +
                # 
         | 
| 269 | 
            +
                # A terminal is represented as a tuple whose 1st item gives the type;
         | 
| 270 | 
            +
                # some types have additional info in the tuple.
         | 
| 271 | 
            +
                # 
         | 
| 272 | 
            +
                # @example
         | 
| 273 | 
            +
                #     >>> terminal("'abc' def")
         | 
| 274 | 
            +
                #     ('abc' ' def')
         | 
| 275 | 
            +
                #     
         | 
| 276 | 
            +
                #     >>> terminal("[0-9]")
         | 
| 277 | 
            +
                #     ((range '0-9') '')
         | 
| 278 | 
            +
                #     >>> terminal("#x00B7")
         | 
| 279 | 
            +
                #     ((hex '#x00B7') '')
         | 
| 280 | 
            +
                #     >>> terminal ("\[#x0300-#x036F\]")
         | 
| 281 | 
            +
                #     ((range '#x0300-#x036F') '')
         | 
| 282 | 
            +
                #     >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
         | 
| 283 | 
            +
                #     ((range "^<>'{}|^`") '-\[#x00-#x20\]')
         | 
| 284 | 
            +
                def terminal(s)
         | 
| 285 | 
            +
                  s = s.strip
         | 
| 286 | 
            +
                  #STDERR.puts s.inspect
         | 
| 287 | 
            +
                  case m = s[0,1]
         | 
| 288 | 
            +
                  when '"', "'" # STRING1 or STRING2
         | 
| 289 | 
            +
                    l, s = s[1..-1].split(m.rstrip, 2)
         | 
| 290 | 
            +
                    [LL1::Lexer.unescape_string(l), s]
         | 
| 291 | 
            +
                  when '[' # RANGE, O_RANGE
         | 
| 292 | 
            +
                    l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
         | 
| 293 | 
            +
                    [[:range, LL1::Lexer.unescape_string(l)], s]
         | 
| 294 | 
            +
                  when '#' # HEX
         | 
| 295 | 
            +
                    s.match(/(#x\h+)(.*)$/)
         | 
| 296 | 
            +
                    l, s = $1, $2
         | 
| 297 | 
            +
                    [[:hex, l], s]
         | 
| 298 | 
            +
                  when /[\w\.]/ # SYMBOL
         | 
| 299 | 
            +
                    s.match(/([\w\.]+)(.*)$/)
         | 
| 300 | 
            +
                    l, s = $1, $2
         | 
| 301 | 
            +
                    [l.to_sym, s]
         | 
| 302 | 
            +
                  when '-'
         | 
| 303 | 
            +
                    [[:diff], s[1..-1]]
         | 
| 304 | 
            +
                  when '?'
         | 
| 305 | 
            +
                    [[:opt], s[1..-1]]
         | 
| 306 | 
            +
                  when '|'
         | 
| 307 | 
            +
                    [[:alt], s[1..-1]]
         | 
| 308 | 
            +
                  when '+'
         | 
| 309 | 
            +
                    [[:plus], s[1..-1]]
         | 
| 310 | 
            +
                  when '*'
         | 
| 311 | 
            +
                    [[:star], s[1..-1]]
         | 
| 312 | 
            +
                  when /[\(\)]/ # '(' or ')'
         | 
| 313 | 
            +
                    [[m.to_sym], s[1..-1]]
         | 
| 314 | 
            +
                  else
         | 
| 315 | 
            +
                    error("terminal", "unrecognized terminal: #{s.inspect}")
         | 
| 316 | 
            +
                    raise SyntaxError, "unrecognized terminal: #{s.inspect}"
         | 
| 317 | 
            +
                  end
         | 
| 318 | 
            +
                end
         | 
| 319 | 
            +
              end
         | 
| 320 | 
            +
            end
         |