RubyGems - tokn - Versions diffs - 0.0.5 → 0.0.6 - Mend

tokn 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/README.txt +4 -5
data/bin/tokncompile +1 -1
data/bin/toknprocess +10 -4
data/lib/tokn/code_set.rb +332 -337
data/lib/tokn/dfa.rb +187 -162
data/lib/tokn/dfa_builder.rb +218 -220
data/lib/tokn/range_partition.rb +205 -203
data/lib/tokn/reg_parse.rb +336 -331
data/lib/tokn/state.rb +267 -270
data/lib/tokn/token_defn_parser.rb +144 -139
data/lib/tokn/tokenizer.rb +243 -175
data/lib/tokn/tokn_const.rb +11 -6
data/lib/tokn/tools.rb +42 -20
data/test/Example1.rb +50 -0
data/test/data/compileddfa.txt +1 -0
data/test/data/sampletext.txt +6 -1
data/test/test.rb +17 -12
metadata +7 -6
data/test/simple.rb +0 -33

data/lib/tokn/range_partition.rb CHANGED Viewed

@@ -1,233 +1,235 @@
 require_relative 'tools'
 req('tokn_const code_set')
-# A data structure that transforms a set of CodeSets to a
-# disjoint set of them, such that no two range sets overlap.
-#
-# This is improve the efficiency of the NFA => DFA algorithm,
-# which involves gathering information about what states are
-# reachable on certain characters.  We can't afford to treat each
-# character as a singleton, since the ranges can be quite large.
-# Hence, we want to treat ranges of characters as single entities;
-# this will only work if no two such ranges overlap.
-#
-# It works by starting with a tree whose node is labelled with
-# the maximal superset of character values.  Then, for each edge
-# in the NFA, performs a DFS on this tree, splitting any node that
-# only partially intersects any one set that appears in the edge label.
-# The running time is O(n log k), where n is the size of the NFA, and
-# k is the height of the resulting tree.
-#
-# We encourage k to be small by sorting the NFA edges by their
-# label complexity.
-#
-class RangePartition
-  include Tokn
+module ToknInternal
-  def initialize()
-    # We will build a tree, where each node has a CodeSet
-    # associated with it, and the child nodes (if present)
-    # partition this CodeSet into smaller, nonempty sets.
-    # A tree is represented by a node, where each node is a pair [x,y],
-    # with x the node's CodeSet, and y a list of the node's children.
-    @nextNodeId = 0
-    # Make the root node hold the largest possible CodeSet.
-    # We want to be able to include all the token ids as well.
-    @rootNode = buildNode(CodeSet.new(CODEMIN,CODEMAX))
-    @setsToAdd = Set.new
-    # Add epsilon immediately, so it's always in its own subset
-    addSet(CodeSet.new(EPSILON))
-    @prepared = false
-  end
-  def addSet(s)
-    if @prepared
-      raise IllegalStateException
-    end
-    @setsToAdd.add(s)
-  end
+  # A data structure that transforms a set of CodeSets to a
+  # disjoint set of them, such that no two range sets overlap.
+  #
+  # This is improve the efficiency of the NFA => DFA algorithm,
+  # which involves gathering information about what states are
+  # reachable on certain characters.  We can't afford to treat each
+  # character as a singleton, since the ranges can be quite large.
+  # Hence, we want to treat ranges of characters as single entities;
+  # this will only work if no two such ranges overlap.
+  #
+  # It works by starting with a tree whose node is labelled with
+  # the maximal superset of character values.  Then, for each edge
+  # in the NFA, performs a DFS on this tree, splitting any node that
+  # only partially intersects any one set that appears in the edge label.
+  # The running time is O(n log k), where n is the size of the NFA, and
+  # k is the height of the resulting tree.
+  #
+  # We encourage k to be small by sorting the NFA edges by their
+  # label complexity.
+  #
+  class RangePartition
+    # include Tokn
+    def initialize()
+      # We will build a tree, where each node has a CodeSet
+      # associated with it, and the child nodes (if present)
+      # partition this CodeSet into smaller, nonempty sets.
+      # A tree is represented by a node, where each node is a pair [x,y],
+      # with x the node's CodeSet, and y a list of the node's children.
-  def prepare()
-    if @prepared
-      raise IllegalStateException
+      @nextNodeId = 0
+      # Make the root node hold the largest possible CodeSet.
+      # We want to be able to include all the token ids as well.
+      @rootNode = buildNode(CodeSet.new(CODEMIN,CODEMAX))
+      @setsToAdd = Set.new
+      # Add epsilon immediately, so it's always in its own subset
+      addSet(CodeSet.new(EPSILON))
+      @prepared = false
     end
-    # Construct partition from previously added sets
-    list = @setsToAdd.to_a
-    # Sort set by cardinality: probably get a more balanced tree
-    # if larger sets are processed first
-    list.sort!{ |x,y| y.cardinality <=> x.cardinality }
-    list.each do |s|
-      addSetAux(s)
-    end
-    @prepared = true
-  end
-  # Generate a .dot file, and from that, a PDF, for debug purposes
-  #
-  def generatePDF(name = "partition")
-    if !@prepared
-      raise IllegalStateException
-    end
-    g = ""
-    g += "digraph "+name+" {\n\n"
-    nodes = []
-    buildNodeList(nodes)
-    nodes.each do |node|
-      g += " '" + d(node) + "' [shape=rect] [label='" + node.set.to_s_alt + "']\n"
-    end
-    g += "\n"
-    nodes.each do |node|
-      node.children.each do |ch|
-        g += " '" + d(node) + "' -> '" + d(ch) + "'\n"
+    def addSet(s)
+      if @prepared
+        raise IllegalStateException
       end
+      @setsToAdd.add(s)
     end
-    g += "\n}\n"
-    g.gsub!( /'/, '"' )
-    dotToPDF(g,name)
-  end
-  # Apply the partition to a CodeSet
-  #
-  # > s CodeSet
-  # < array of subsets from the partition whose union equals s
-  #   (this array will be the single element s if no partitioning was necessary)
-  #
-  def apply(s)
-    if !@prepared
-      raise IllegalStateException
+    def prepare()
+      if @prepared
+        raise IllegalStateException
+      end
+      # Construct partition from previously added sets
+      list = @setsToAdd.to_a
+      # Sort set by cardinality: probably get a more balanced tree
+      # if larger sets are processed first
+      list.sort!{ |x,y| y.cardinality <=> x.cardinality }
+      list.each do |s|
+        addSetAux(s)
+      end
+      @prepared = true
     end
-    list = []
-    s2 = s.makeCopy
-    applyAux(@rootNode, s2, list)
-    # Sort the list of subsets by their first elements
-    list.sort! { |x,y| x.array[0] <=> y.array[0] }
-    list
-  end
-  private
-  def applyAux(n, s, list)
-    db = false
-    !db||pr("applyAux to set[%s], node=[%s]\n",d(s),d(n.set))
-    if n.children.empty?
-      # # Verify that this set equals the input set
-      # myAssert(s.eql? n.set)
-      list.push(s)
-    else
-      n.children.each do |m|
-        s1 = s.intersect(m.set)
-        !db||pr(" child set=[%s], intersection=[%s]\n",d(m.set),d(s1))
-        if s1.empty?
-          next
-        end
-        applyAux(m, s1, list)
-        !db||pr("  subtracting child set [%s] from s=[%s]\n",d(m.set),d(s))
-        s = s.difference(m.set)
-        !db||pr("  subtracted child set, now [%s]\n",d(s))
-        if s.empty?
-          break
+    # Generate a .dot file, and from that, a PDF, for debug purposes
+    #
+    def generatePDF(name = "partition")
+      if !@prepared
+        raise IllegalStateException
+      end
+      g = ""
+      g += "digraph "+name+" {\n\n"
+      nodes = []
+      buildNodeList(nodes)
+      nodes.each do |node|
+        g += " '" + d(node) + "' [shape=rect] [label='" + node.set.to_s_alt + "']\n"
+      end
+      g += "\n"
+      nodes.each do |node|
+        node.children.each do |ch|
+          g += " '" + d(node) + "' -> '" + d(ch) + "'\n"
         end
       end
-    end
-  end
+      g += "\n}\n"
+      g.gsub!( /'/, '"' )
+      dotToPDF(g,name)
-  def buildNode(rangeSet)
-    id = @nextNodeId
-    @nextNodeId += 1
-    n = RPNode.new(id, rangeSet, [])
-    n
-  end
-  def buildNodeList(list, root = nil)
-    if not root
-      root = @rootNode
-    end
-    list.push(root)
-    root.children.each do |x|
-      buildNodeList(list, x)
     end
-  end
-  # Add a set to the tree, extending the tree as necessary to
-  # maintain a (disjoint) partition
-  #
-  def addSetAux(s, n = @rootNode)
-    #
-    # The algorithm is this:
+    # Apply the partition to a CodeSet
     #
-    # add (s, n)    # add set s to node n; s must be subset of n.set
-    #   if n.set = s, return
-    #   if n is leaf:
-    #     x = n.set - s
-    #     add x,y as child sets of n
-    #   else
-    #     for each child m of n:
-    #       t = intersect of m.set and s
-    #       if t is nonempty, add(t, m)
+    # > s CodeSet
+    # < array of subsets from the partition whose union equals s
+    #   (this array will be the single element s if no partitioning was necessary)
     #
-    if n.set.eql? s
-      return
-    end
-    if n.children.empty?
-      x = n.set.difference(s)
-      n.children.push buildNode(x)
-      n.children.push buildNode(s)
-    else
-      n.children.each do |m|
-        t = m.set.intersect(s)
-        addSetAux(t,m) unless t.empty?
+    def apply(s)
+      if !@prepared
+        raise IllegalStateException
       end
+      list = []
+      s2 = s.makeCopy
+      applyAux(@rootNode, s2, list)
+      # Sort the list of subsets by their first elements
+      list.sort! { |x,y| x.array[0] <=> y.array[0] }
+      list
     end
-  end
-end
-# A node within a RangePartition tree
-#
-class RPNode
-  attr_accessor :id, :set, :children
+    private
-  def initialize(id, set, children)
-    @id = id
-    @set = set
-    @children = children
-  end
+    def applyAux(n, s, list)
+      db = false
+      !db||pr("applyAux to set[%s], node=[%s]\n",d(s),d(n.set))
+      if n.children.empty?
+        # # Verify that this set equals the input set
+        # myAssert(s.eql? n.set)
+        list.push(s)
+      else
+        n.children.each do |m|
+          s1 = s.intersect(m.set)
+          !db||pr(" child set=[%s], intersection=[%s]\n",d(m.set),d(s1))
+          if s1.empty?
+            next
+          end
+          applyAux(m, s1, list)
+          !db||pr("  subtracting child set [%s] from s=[%s]\n",d(m.set),d(s))
+          s = s.difference(m.set)
+          !db||pr("  subtracted child set, now [%s]\n",d(s))
+          if s.empty?
+            break
+          end
+        end
+      end
+    end
+    def buildNode(rangeSet)
+      id = @nextNodeId
+      @nextNodeId += 1
+      n = RPNode.new(id, rangeSet, [])
+      n
+    end
-  def inspect
-    return 'N' + id.to_s
+    def buildNodeList(list, root = nil)
+      if not root
+        root = @rootNode
+      end
+      list.push(root)
+      root.children.each do |x|
+        buildNodeList(list, x)
+      end
+    end
+    # Add a set to the tree, extending the tree as necessary to
+    # maintain a (disjoint) partition
+    #
+    def addSetAux(s, n = @rootNode)
+      #
+      # The algorithm is this:
+      #
+      # add (s, n)    # add set s to node n; s must be subset of n.set
+      #   if n.set = s, return
+      #   if n is leaf:
+      #     x = n.set - s
+      #     add x,y as child sets of n
+      #   else
+      #     for each child m of n:
+      #       t = intersect of m.set and s
+      #       if t is nonempty, add(t, m)
+      #
+      if n.set.eql? s
+        return
+      end
+      if n.children.empty?
+        x = n.set.difference(s)
+        n.children.push buildNode(x)
+        n.children.push buildNode(s)
+      else
+        n.children.each do |m|
+          t = m.set.intersect(s)
+          addSetAux(t,m) unless t.empty?
+        end
+      end
+    end
   end
-end
+  # A node within a RangePartition tree
+  #
+  class RPNode
+    attr_accessor :id, :set, :children
+    def initialize(id, set, children)
+      @id = id
+      @set = set
+      @children = children
+    end
+    def inspect
+      return 'N' + id.to_s
+    end
+  end
+end  # module ToknInternal

data/lib/tokn/reg_parse.rb CHANGED Viewed

@@ -1,379 +1,384 @@
 require_relative 'tools'
 req('code_set state')
-class ParseException < Exception
-end
-# Parses a single regular expression from a string.
-# Produces an NFA with distinguished start and end states
-# (none of these states are marked as final states)
-#
-# Here is the grammar for regular expressions.  Spaces are ignored,
-# and can be liberally sprinkled within the regular expressions to
-# aid readability.  To represent a space, the \s escape sequence must be used.
-# See the file 'sampletokens.txt' for some examples.
-#
-#   Expressions have one of these types:
-#
-#   E : base class
-#   J : a Join expression, formed by concatenating one or more together
-#   Q : a Quantified expression; followed optionally by '*', '+', or '?'
-#   P : a Parenthesized expression, which is optionally surrounded with (), {}, []
-#
-#   E -> J '|' E
-#      | J
-#
-#   J -> Q J
-#      | Q
-#
-#   Q -> P '*'
-#      | P '+'
-#      | P '?'
-#      | P
-#
-#   P -> '(' E ')'
-#      | '{' TOKENNAME '}'
-#      | '[^' SETSEQ ']'     A code not appearing in the set
-#      | '[' SETSEQ ']'
-#      | CHARCODE
-#
-#   SETSEQ -> SET SETSEQ
-#           | SET
-#
-#   SET -> CHARCODE
-#           | CHARCODE '-' CHARCODE
-#
-#   CHARCODE ->
-#            a |  b |  c  ...   any printable except {,},[, etc.
-#        |  \xhh                  hex value from 00...ff
-#        |  \uhhhh                hex value from 0000...ffff (e.g., unicode)
-#        |  \f | \n | \r | \t     formfeed, linefeed, return, tab
-#        |  \s                    a space (' ')
-#        |  \*                    where * is some other non-alphabetic
-#                                  character that needs to be escaped
-#
-# The parser performs recursive descent parsing;
-# each method returns an NFA represented by
-# a pair of states: the start and end states.
-#
-class RegParse
-  attr_reader :startState, :endState
+module ToknInternal
-  # Construct a parser and perform the parsing
-  # @param script script to parse
-  # @param tokenDefMap if not nil, a map of previously parsed regular expressions
-  #     (mapping names to ids) to be consulted if a curly brace expression appears
-  #     in the script
-  #
-  def initialize(script, tokenDefMap = nil)
-    @script = script.strip
-    @nextStateId = 0
-    @tokenDefMap = tokenDefMap
-    parseScript
-  end
-  def inspect
-    s = "RegParse: #{@script}"
-    s += " start:"+d(@startState)+" end:"+d(@endState)
-    return s
-  end
-  private
-  # Raise a ParseException, with a helpful message indicating
-  # the parser's current location within the string
+  # Exception thrown if problem parsing regular expression
   #
-  def abort(msg)
-    # Assume we've already read the problem character
-    i = @cursor - 1
-    s = ''
-    if i > 4
-      s += '...'
-    end
-    s +=  @script[i-3...i] || ""
-    s += ' !!! '
-    s += @script[i...i+3] || ""
-    if i +3 < @script.size
-      s += '...'
-    end
-    raise ParseException, msg + ": "+s
+  class ParseException < Exception
   end
-  # Read next character as a hex digit
+  # Parses a single regular expression from a string.
+  # Produces an NFA with distinguished start and end states
+  # (none of these states are marked as final states)
   #
-  def readHex
-    v = read.upcase.ord
-    if v >= 48 and v < 58
-      return v - 48
-    elsif v >= 65 and v < 71
-      return v - 65 + 10
-    else
-      abort "Missing hex digit"
-    end
-  end
-  NO_ESCAPE_CHARS = Regexp.new("[A-Za-z0-9]")
-  # Parse character definition (CHARCODE) from input
+  # Here is the grammar for regular expressions.  Spaces are ignored,
+  # and can be liberally sprinkled within the regular expressions to
+  # aid readability.  To represent a space, the \s escape sequence must be used.
+  # See the file 'sampletokens.txt' for some examples.
+  #
+  #   Expressions have one of these types:
+  #
+  #   E : base class
+  #   J : a Join expression, formed by concatenating one or more together
+  #   Q : a Quantified expression; followed optionally by '*', '+', or '?'
+  #   P : a Parenthesized expression, which is optionally surrounded with (), {}, []
+  #
+  #   E -> J '|' E
+  #      | J
+  #
+  #   J -> Q J
+  #      | Q
+  #
+  #   Q -> P '*'
+  #      | P '+'
+  #      | P '?'
+  #      | P
+  #
+  #   P -> '(' E ')'
+  #      | '{' TOKENNAME '}'
+  #      | '[^' SETSEQ ']'     A code not appearing in the set
+  #      | '[' SETSEQ ']'
+  #      | CHARCODE
+  #
+  #   SETSEQ -> SET SETSEQ
+  #           | SET
+  #
+  #   SET -> CHARCODE
+  #           | CHARCODE '-' CHARCODE
   #
-  def parseChar
+  #   CHARCODE ->
+  #            a |  b |  c  ...   any printable except {,},[, etc.
+  #        |  \xhh                  hex value from 00...ff
+  #        |  \uhhhh                hex value from 0000...ffff (e.g., unicode)
+  #        |  \f | \n | \r | \t     formfeed, linefeed, return, tab
+  #        |  \s                    a space (' ')
+  #        |  \*                    where * is some other non-alphabetic
+  #                                  character that needs to be escaped
+  #
+  # The parser performs recursive descent parsing;
+  # each method returns an NFA represented by
+  # a pair of states: the start and end states.
+  #
+  class RegParse
-    c = read
+    attr_reader :startState, :endState
-    val = c.ord
+    # Construct a parser and perform the parsing
+    # @param script script to parse
+    # @param tokenDefMap if not nil, a map of previously parsed regular expressions
+    #     (mapping names to ids) to be consulted if a curly brace expression appears
+    #     in the script
+    #
+    def initialize(script, tokenDefMap = nil)
+      @script = script.strip
+      @nextStateId = 0
+      @tokenDefMap = tokenDefMap
+      parseScript
+    end
+    def inspect
+      s = "RegParse: #{@script}"
+      s += " start:"+d(@startState)+" end:"+d(@endState)
+      return s
+    end
+    private
+    # Raise a ParseException, with a helpful message indicating
+    # the parser's current location within the string
+    #
+    def abort(msg)
+      # Assume we've already read the problem character
+      i = @cursor - 1
+      s = ''
+      if i > 4
+        s += '...'
+      end
+      s +=  @script[i-3...i] || ""
+      s += ' !!! '
+      s += @script[i...i+3] || ""
+      if i +3 < @script.size
+        s += '...'
+      end
+      raise ParseException, msg + ": "+s
+    end
-    if "{}[]*?+|-^()".include?(c) or val <= 0x20
-      abort "Unexpected or unescaped character"
+    # Read next character as a hex digit
+    #
+    def readHex
+      v = read.upcase.ord
+      if v >= 48 and v < 58
+        return v - 48
+      elsif v >= 65 and v < 71
+        return v - 65 + 10
+      else
+        abort "Missing hex digit"
+      end
     end
-    if c == '\\'
+    NO_ESCAPE_CHARS = Regexp.new("[A-Za-z0-9]")
+    # Parse character definition (CHARCODE) from input
+    #
+    def parseChar
       c = read
-      if "xX".include? c
-        val = (readHex() << 4) | readHex()
-      elsif "uU".include? c
-        val = (readHex() << 12) | (readHex() << 8) | (readHex() << 4) | readHex()
-      else
-        if c == 'f'
-          val = "\f".ord
-        elsif c == 'r'
-          val == "\r".ord
-        elsif c == 'n'
-          val = "\n".ord
-        elsif c == 't'
-          val = "\t".ord
-        elsif c == 's'
-          val = " ".ord
+      val = c.ord
+      if "{}[]*?+|-^()".include?(c) or val <= 0x20
+        abort "Unexpected or unescaped character"
+      end
+      if c == '\\'
+        c = read
+        if "xX".include? c
+          val = (readHex() << 4) | readHex()
+        elsif "uU".include? c
+          val = (readHex() << 12) | (readHex() << 8) | (readHex() << 4) | readHex()
         else
-          if c =~ NO_ESCAPE_CHARS
-            abort "Unsupported escape sequence ("+c+")"
-          end
-          val = c.ord
-        end
+          if c == 'f'
+            val = "\f".ord
+          elsif c == 'r'
+            val == "\r".ord
+          elsif c == 'n'
+            val = "\n".ord
+          elsif c == 't'
+            val = "\t".ord
+          elsif c == 's'
+            val = " ".ord
+          else
+            if c =~ NO_ESCAPE_CHARS
+              abort "Unsupported escape sequence ("+c+")"
+            end
+            val = c.ord
+          end
+        end
       end
+      return val
     end
-    return val
-  end
-  def parseCharNFA
-    val = parseChar
-    # Construct a pair of states with an edge between them
-    # labelled with this character code
-    sA = newState
-    sB = newState
-    cset = CodeSet.new
-    cset.add(val)
-    sA.addEdge(cset, sB)
-    return [sA,sB]
-  end
+    def parseCharNFA
+      val = parseChar
-  def dbInfo
-    j = @cursor
-    k = j + 5
-    if k >= @script.size
-      return @script[j..k]+"<<<== end"
-    else
-      return @script[j..k]+"..."
+      # Construct a pair of states with an edge between them
+      # labelled with this character code
+      sA = newState
+      sB = newState
+      cset = CodeSet.new
+      cset.add(val)
+      sA.addEdge(cset, sB)
+      return [sA,sB]
     end
-  end
-  def parseScript
-    # Set up the input scanner
-    @cursor = 0
-    exp = parseE
-    @startState = exp[0]
-    @endState = exp[1]
-  end
-  def newState
-    s = State.new(@nextStateId)
-    @nextStateId += 1
-    return s
-  end
-  def parseSET
-    u = parseChar
-    v = u+1
-    if readIf('-')
-      v = parseChar() + 1
-      if v <= u
-        abort "Illegal range"
-      end
-    end
-    return u,v
-  end
-  def parseSETSEQ
-    db = false
-    !db || pr("parseSETSEQ\n")
-    read('[')
-    negated = readIf('^')
-    !db || pr(" negated=%s\n",negated)
-    rs = CodeSet.new
+    def dbInfo
+      j = @cursor
+      k = j + 5
+      if k >= @script.size
+        return @script[j..k]+"<<<== end"
+      else
+        return @script[j..k]+"..."
+      end
+    end
-    u,v = parseSET
-    rs.add(u,v)
-    !db || pr(" initial set=%s\n",d(rs))
-    while not readIf(']')
-      u,v = parseSET
-      rs.add(u,v)
-      !db || pr("  added another; %s\n",d(rs))
-    end
-    if negated
-      rs.negate
-      !db || pr(" negated=%s\n",d(rs))
+    def parseScript
+      # Set up the input scanner
+      @cursor = 0
+      exp = parseE
+      @startState = exp[0]
+      @endState = exp[1]
     end
-    if rs.empty?
-      abort "Empty character range"
+    def newState
+      s = State.new(@nextStateId)
+      @nextStateId += 1
+      return s
     end
-    sA = newState
-    sB = newState
-    sA.addEdge(rs, sB)
-    return [sA,sB]
-  end
+    def parseSET
+      u = parseChar
+      v = u+1
+      if readIf('-')
+        v = parseChar() + 1
+        if v <= u
+          abort "Illegal range"
+        end
+      end
+      return u,v
+    end
-  TOKENREF_EXPR = Regexp.new('^[_A-Za-z][_A-Za-z0-9]*$')
+    def parseSETSEQ
+      db = false
+      !db || pr("parseSETSEQ\n")
+      read('[')
+      negated = readIf('^')
+      !db || pr(" negated=%s\n",negated)
+      rs = CodeSet.new
+      u,v = parseSET
+      rs.add(u,v)
+      !db || pr(" initial set=%s\n",d(rs))
-  def parseTokenDef
-    read('{')
-    name = ''
-    while !readIf('}')
-      name += read
-    end
-    # pr("name=[%s], TR=[%s], match=[%s]\n",d(name),d(TOKENREF_EXPR),d(name =~ TOKENREF_EXPR))
-    if name  !~ TOKENREF_EXPR
-      abort "Problem with token name"
-    end
-    tokInfo = nil
-    if @tokenDefMap
-      tokInfo = @tokenDefMap[name]
-    end
-    if !tokInfo
-      abort "Undefined token"
+      while not readIf(']')
+        u,v = parseSET
+        rs.add(u,v)
+        !db || pr("  added another; %s\n",d(rs))
+      end
+      if negated
+        rs.negate
+        !db || pr(" negated=%s\n",d(rs))
+      end
+      if rs.empty?
+        abort "Empty character range"
+      end
+      sA = newState
+      sB = newState
+      sA.addEdge(rs, sB)
+      return [sA,sB]
     end
-    rg = tokInfo[1]
-    oldToNewMap, @nextStateId = rg.startState.duplicateNFA(@nextStateId)
+    TOKENREF_EXPR = Regexp.new('^[_A-Za-z][_A-Za-z0-9]*$')
-    newStart = oldToNewMap[rg.startState]
-    newEnd = oldToNewMap[rg.endState]
+    def parseTokenDef
+      read('{')
+      name = ''
+      while !readIf('}')
+        name += read
+      end
+      # pr("name=[%s], TR=[%s], match=[%s]\n",d(name),d(TOKENREF_EXPR),d(name =~ TOKENREF_EXPR))
+      if name  !~ TOKENREF_EXPR
+        abort "Problem with token name"
+      end
+      tokInfo = nil
+      if @tokenDefMap
+        tokInfo = @tokenDefMap[name]
+      end
+      if !tokInfo
+        abort "Undefined token"
+      end
+      rg = tokInfo[1]
+      oldToNewMap, @nextStateId = rg.startState.duplicateNFA(@nextStateId)
+      newStart = oldToNewMap[rg.startState]
+      newEnd = oldToNewMap[rg.endState]
+      [newStart, newEnd]
+    end
-    [newStart, newEnd]
-  end
-  def parseP
-    ch = peek
-    if ch == '('
-      read
-      e1 = parseE
-      read ')'
-    elsif ch == '{'
-      e1 = parseTokenDef
-    elsif ch == '['
-      e1 = parseSETSEQ
-    else
-      e1 = parseCharNFA
+    def parseP
+      ch = peek
+      if ch == '('
+        read
+        e1 = parseE
+        read ')'
+      elsif ch == '{'
+        e1 = parseTokenDef
+      elsif ch == '['
+        e1 = parseSETSEQ
+      else
+        e1 = parseCharNFA
+      end
+      return e1
+     end
+    def parseE
+      e1 = parseJ
+      if readIf('|')
+        e2 = parseE
+        u = newState
+        v = newState
+        u.addEps(e1[0])
+        u.addEps(e2[0])
+        e1[1].addEps(v)
+        e2[1].addEps(v)
+        e1 = [u,v]
+      end
+      return e1
     end
-    return e1
-   end
-  def parseE
-    e1 = parseJ
-    if readIf('|')
-      e2 = parseE
+    def parseJ
+      e1 = parseQ
+      p = peek
+      if p and not "|)".include? p
+        e2 = parseJ
+        e1[1].addEps(e2[0])
+        e1 = [e1[0],e2[1]]
+      end
-      u = newState
-      v = newState
-      u.addEps(e1[0])
-      u.addEps(e2[0])
-      e1[1].addEps(v)
-      e2[1].addEps(v)
-      e1 = [u,v]
-    end
-    return e1
-  end
-  def parseJ
-    e1 = parseQ
-    p = peek
-    if p and not "|)".include? p
-      e2 = parseJ
-      e1[1].addEps(e2[0])
-      e1 = [e1[0],e2[1]]
+      return e1
     end
-    return e1
-  end
-  def parseQ
-    e1 = parseP
-    p = peek
+    def parseQ
+      e1 = parseP
+      p = peek
+      if p == '*'
+        read
+        e1[0].addEps(e1[1])
+        e1[1].addEps(e1[0])
+      elsif p == '+'
+        read
+        e1[1].addEps(e1[0])
+      elsif p == '?'
+        read
+        e1[0].addEps(e1[1])
+        # e1[0].generatePDF("optional")
+      end
+      return e1
+    end
-    if p == '*'
-      read
-      e1[0].addEps(e1[1])
-      e1[1].addEps(e1[0])
-    elsif p == '+'
-      read
-      e1[1].addEps(e1[0])
-    elsif p == '?'
-      read
-      e1[0].addEps(e1[1])
-      # e1[0].generatePDF("optional")
+    def peek(mustExist = false)
+      # skip over any non-linefeed whitespace
+      while @cursor < @script.size && " \t".index(@script[@cursor])
+        @cursor += 1
+      end
+      if mustExist or @cursor < @script.size
+        @script[@cursor]
+      else
+        nil
+      end
     end
-    return e1
-  end
-  def peek(mustExist = false)
-    # skip over any non-linefeed whitespace
-    while @cursor < @script.size && " \t".index(@script[@cursor])
-      @cursor += 1
+    def readIf(expChar)
+      r = (peek == expChar)
+      if r
+        read
+      end
+      return r
     end
-    if mustExist or @cursor < @script.size
-      @script[@cursor]
-    else
-      nil
+    def read(expChar = nil)
+      ch = peek
+      if ch and ((not expChar) or ch == expChar)
+        @cursor += 1
+        ch
+      else
+        abort 'Unexpected end of input'
+      end
     end
   end
-  def readIf(expChar)
-    r = (peek == expChar)
-    if r
-      read
-    end
-    return r
-  end
-  def read(expChar = nil)
-    ch = peek
-    if ch and ((not expChar) or ch == expChar)
-      @cursor += 1
-      ch
-    else
-      abort 'Unexpected end of input'
-    end
-  end
-end
+end  # module ToknInternal