RubyGems - tokn - Versions diffs - 0.0.9 → 0.1.0 - Mend

tokn 0.0.9 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/CHANGELOG.txt +6 -0
data/{README.txt → README.md} +3 -4
data/lib/tokn/range_partition.rb +2 -2
data/lib/tokn/state.rb +3 -3
data/lib/tokn/tools.rb +177 -23
data/test/_OLD_/Example1.rb +81 -0
data/test/{data → _OLD_/data}/compileddfa.txt +0 -0
data/test/_OLD_/data/sampletokens_dfa.txt +1 -0
data/test/{test.rb → _OLD_/test.rb} +0 -0
data/test/{testcmds → _OLD_/testcmds} +0 -0
data/test/{data/sampletext.txt → sampletext.txt} +0 -0
data/test/{data/sampletokens.txt → sampletokens.txt} +0 -0
data/test/test_tokn.rb +584 -0
metadata +12 -11
data/test/Example1.rb +0 -50

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 804ed12fc717a528758a7f1bc2ec03e92e829310
-  data.tar.gz: 2d3df30d3525d0c0ef3d5b7aa1f16839db6fd786
+  metadata.gz: 84933fe2966d19908c447c84cbccb9179c4e351d
+  data.tar.gz: d1a6fae299cdd6c9b57961bfde7c879e1a786a48
 SHA512:
-  metadata.gz: 80fb1504a1f42d95ebe2ac6b97d16f31d9b14e2f1b3d2e08ab45780584f8b0763a2f1a4a2da09614be68c0b67e1041b7a3d1a5db57a66705d9dbf57454265987
-  data.tar.gz: 9f6a5c6304471df7a2b8f2cbb6b584c6f1bdcf425d94e0904bec630e45163f60a42eca8b4865ed1b0b87f7681edaf83eeac6181ce752863f673e4185f1666d48
+  metadata.gz: 022689701816eb3fb37690579b275194cbafc53d31591b7fb87d93a6cacafc50318dd964d28697ed1181ceb288d700214191cc41d6362cf4221e110e7c885531
+  data.tar.gz: 96ee32c79d3e12ba8b01cd27fdfb9ff0bfc6ff518972358dc2fbac04f4c8842fccd6f8722c994daba299327718915074f853cc3c9125c0e22dffe3ef19fd189d

data/CHANGELOG.txt CHANGED

@@ -11,3 +11,9 @@
   * Version 0.0.9
   * Fixed problem with README file
+2013-04-08
+	* Version 0.1.0
+	* Cleaned up test directory

data/{README.txt → README.md} RENAMED

@@ -1,11 +1,10 @@
-# @markup markdown
 tokn
 =======
-A ruby gem for constructing DFAs and using them to tokenize text files.
+Tokn is a ruby gem that generates automatons from regular expressions to extract tokens from text files.
-Written and (c) by Jeff Sember, March 2013.
+Written by Jeff Sember, March 2013.
+[Source code documentation can be found here.](http://rubydoc.info/gems/tokn/frames)
 Description of the problem

data/lib/tokn/range_partition.rb CHANGED

@@ -79,7 +79,7 @@ module ToknInternal
     # Generate a .dot file, and from that, a PDF, for debug purposes
     #
-    def generatePDF(name = "partition")
+    def generatePDF(test_dir = nil, name = "partition")
       if !@prepared
         raise IllegalStateException
       end
@@ -103,7 +103,7 @@ module ToknInternal
       g += "\n}\n"
       g.gsub!( /'/, '"' )
-      dotToPDF(g,name)
+      dotToPDF(g,name, test_dir)
     end

data/lib/tokn/state.rb CHANGED

@@ -102,11 +102,11 @@ module ToknInternal
     # Generate a PDF of the state machine;
     # Makes a system call to the dot utility to convert a .dot file to a .pdf
     #
-    def generatePDF(title = "nfa")
+    def generatePDF(dir = nil, title = "nfa")
       stateList = {}
       startState = self
-      genAux(stateList, startState)
+      genAux( stateList, startState)
       g = ""
       g += "digraph "+title+" {\n"
@@ -135,7 +135,7 @@ module ToknInternal
       g += "\n}\n"
       g.gsub!( /'/, '"' )
-      dotToPDF(g,title)
+      dotToPDF(g,title,dir)
     end

data/lib/tokn/tools.rb CHANGED

@@ -1,4 +1,5 @@
 require 'set'
+require 'fileutils'
 # Various utility and debug convenience functions.
 #
@@ -45,38 +46,41 @@ def myAssert(cond, *msg)
 end
-# Set test directory.  If nil, sets to home directory + "__test__"
-#
-def setTestDir(d = nil)
-  $testDir = d || File.join(Dir.home,"__test__")
-end
+## Set test directory.  If nil, sets to home directory + "__test__"
+##
+#def setTestDir(d = nil)
+#  $testDir = d || File.join(Dir.home,"__test__")
+#end
-# Get a path within the test directory;
-# create test directory if it doesn't exist.
-#
-# relPath : if nil, returns the test directory; else
-#   returns the test directory joined to this one
-#
-def withinTestDir(relPath = nil)
-  if !$testDir
-    raise IllegalStateException, "No test directory has been defined"
-  end
-  if !File.directory?($testDir)
-    Dir::mkdir($testDir)
-  end
-  relPath ? File.join($testDir,relPath) : $testDir
-end
+## Get a path within the test directory;
+## create test directory if it doesn't exist.
+##
+## relPath : if nil, returns the test directory; else
+##   returns the test directory joined to this one
+##
+#def withinTestDir(relPath = nil)
+#  if !$testDir
+#    raise IllegalStateException, "No test directory has been defined"
+#  end
+#  if !File.directory?($testDir)
+#    Dir::mkdir($testDir)
+#  end
+#  relPath ? File.join($testDir,relPath) : $testDir
+#end
 # Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
 # in the test directory.
 #
 # It does this by making a system call to the 'dot' utility.
 #
-def dotToPDF(dotFile, name = "")
+def dotToPDF(dotFile, name = "", test_dir = nil)
   gr = dotFile
-  dotPath = withinTestDir(".__mygraph__.dot")
+  raise ArgumentError if !test_dir
+  dotPath = File.join(test_dir,".__mygraph__.dot")
   writeTextFile(dotPath,gr)
-  destName = withinTestDir( "__mygraph__"+name+".pdf")
+  destName = File.join(test_dir,"__mygraph__"+name+".pdf")
   system("dot -Tpdf "+dotPath+" -o "+destName)
 end
@@ -190,6 +194,16 @@ def readTextFile(path)
   contents
 end
+# Delete a file or directory, if it exists.
+# Caution!  If directory, deletes all files and subdirectories.
+def remove_file_or_dir(pth)
+  if File.directory?(pth)
+    FileUtils.remove_dir(pth)
+  elsif File.file?(pth)
+    FileUtils.remove_file(pth)
+  end
+end
 # Method that takes a code block as an argument to
 # achieve the same functionality as Java/C++'s
 #  do {
@@ -206,3 +220,143 @@ end
 class IllegalStateException < Exception
 end
+# Convenience method to detect if a script is being run
+# e.g. as a 'main' method (for debug purposes only).
+# If so, it changes the current directory to the
+# directory containing the script (if such a directory exists).
+#
+# @param file pass __FILE__ in here
+# @return true if so
+#
+def main?(file)
+  scr = $0
+  # The test/unit framework seems to be adding a suffix ": xxx#xxx.."
+  # to the .rb filename, so adjust in this case
+  i = scr.index(".rb: ")
+  if i
+    scr = scr[0...i+3]
+  end
+  if (ret = (file == scr))
+    dr = File.dirname(file)
+    if File.directory?(dr)
+      Dir.chdir(dr)
+    end
+  end
+  ret
+end
+if defined? Test::Unit
+  # A simple extension to Ruby's Test::Unit class that provides
+  # suite-level setup/teardown methods.
+  #
+  # If test suite functionality is desired within a script,
+  # then require 'test/unit' before requiring 'tools.rb'.
+  # This will cause the following class, MyTestSuite, to be defined.
+  #
+  # The user's test script can define subclasses of this,
+  # and declare test methods with the name 'test_xxxx', where
+  # xxxx is lexicographically between 01 and zz.
+  #
+  # There are two levels of setup/teardown called : suite level, and
+  # method level.  For example, if the user's test class performs two tests:
+  #
+  #  def test_b   ... end
+  #  def test_c   ... end
+  #
+  # Then the test framework will make these calls:
+  #
+  #     suite_setup
+  #
+  #     method_setup
+  #     test_b
+  #     method_teardown
+  #
+  #     method_setup
+  #     test_c
+  #     method_teardown
+  #
+  #     suite_teardown
+  #
+  # Notes
+  # -----
+  # 1) The usual setup / teardown methods should NOT be overridden; instead,
+  # use the method_xxx alternatives.
+  #
+  # 2) The base class implementations of method_/suite_xxx do nothing.
+  #
+  # 3) The number of test cases reported may be higher than you expect, since
+  # there are additional test methods defined by the TestSuite class to
+  # implement the suite setup / teardown functionality.
+  #
+  # 4) Avoid naming test methods that fall outside of test_01 ... test_zz.
+  #
+  class MyTestSuite < Test::Unit::TestCase
+    # This is named to be the FIRST test called.  It
+    # will do suite-level setup, and nothing else.
+    def test_00_setup
+      @@suiteSetup = true
+      suite_setup()
+    end
+    # This is named to be the LAST test called.  It
+    # will do suite-level teardown, and nothing else.
+    def test_zzzzzz_teardown
+      suite_teardown()
+      @@suiteSetup = false
+    end
+    # True if called within suite-level setup/teardown window
+    def _suite_active?
+      !(@__name__ == "test_00_setup" || @__name__ == "test_zzzzzz_teardown")
+    end
+    def setup
+      if _suite_active?
+        # If only a specific test was requested, the
+        # suite setup may not have run... if not, do it now.
+        if !defined? @@suiteSetup
+          suite_setup
+        end
+        return
+      end
+      method_setup
+    end
+    def out_dir
+      "_output_"
+    end
+    def out_path(f)
+      File.join(out_dir,f)
+    end
+    def teardown
+      if _suite_active?
+        if !defined? @@suiteSetup
+          suite_teardown
+        end
+        return
+      end
+      method_teardown
+    end
+    def suite_setup
+    end
+    def suite_teardown
+    end
+    def method_setup
+    end
+    def method_teardown
+    end
+  end
+end

data/test/_OLD_/Example1.rb ADDED

@@ -0,0 +1,81 @@
+require_relative '../lib/tokn/tokenizer'
+class Example1
+  include Tokn
+  def dataPath(f)
+    File.dirname(__FILE__)+"/data/"+f
+  end
+  setTestDir()
+  def initialize
+    @sampleText = readTextFile(dataPath("sampletext.txt"))
+  end
+  def makeTok
+    @dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
+    Tokenizer.new(@dfa,  @sampleText, "WS")
+  end
+  def go
+    puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
+    t = makeTok
+    s = ''
+    while t.hasNext do
+      tk = t.peek
+      if t.nameOf(tk) == 'BROP'
+        lst = t.readSequenceIf('BROP DO ID BRCL')
+        if lst
+          s << " ...read BROP DO ID sequence...\n"
+          lst.each{ |x| s << "   #{d(x)}\n"}
+          next
+        else
+          s << " ...couldn't find sequence...\n"
+        end
+      end
+      tk = t.read
+      s << d(tk) << "\n"
+    end
+    exp =<<"EXP"
+(line 1, col 1)   : speed
+(line 1, col 6)   : =
+(line 1, col 7)   : 42
+(line 1, col 9)   : gravity
+(line 1, col 16)  : =
+(line 1, col 17)  : -9.80
+ ...couldn't find sequence...
+(line 1, col 22)  : {
+(line 1, col 23)  : color
+(line 1, col 29)  : =
+(line 1, col 30)  : green
+(line 1, col 35)  : }
+(line 1, col 36)  : title
+(line 1, col 41)  : =
+(line 1, col 42)  : 'This is a string with \' an escaped delimiter'
+(line 1, col 89)  : if
+(line 1, col 91)  : gravity
+(line 1, col 98)  : ==
+(line 1, col 100) : 12
+ ...read BROP DO ID sequence...
+(line 1, col 102) : {
+(line 1, col 103) : do
+(line 1, col 105) : something
+(line 1, col 114) : }
+(line 1, col 115) : do
+(line 1, col 117) : something_else
+EXP
+   assert(s == exp)
+  end
+end
+Example1.new.go

data/test/{data → _OLD_/data}/compileddfa.txt RENAMED

File without changes

data/test/_OLD_/data/sampletokens_dfa.txt ADDED

@@ -0,0 +1 @@

+ {"version":1.0,"tokens":["WS","DBL","INT","LBL","ID","ASSIGN","EQUIV","IF","DO","BROP","BRCL"],"states":[[false,[[[125,126],1],[[123,124],2],[[100,101],3],[[105,106],4],[[61,62],5],[[65,91,95,96,97,100,101,105,106,123],6],[[39,40],7],[[48,58],8],[[45,46],9],[[46,47],10],[[9,11,12,13,32,33,92,93],11],[[47,48],12]]],[false,[[[-12,-11],14]]],[false,[[[-11,-10],14]]],[false,[[[48,58,65,91,95,96,97,111,112,123],6],[[-6,-5],14],[[111,112],22]]],[false,[[[48,58,65,91,95,96,97,102,103,123],6],[[-6,-5],14],[[102,103],21]]],[false,[[[-7,-6],14],[[61,62],20]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-6,-5],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[39,40],17],[[92,93],18]]],[false,[[[48,58],8],[[46,47],10],[[-4,-3],14]]],[false,[[[48,58],8],[[46,47],10]]],[false,[[[48,58],16]]],[false,[[[9,11,12,13,32,33,92,93],11],[[-2,-1],14]]],[false,[[[47,48],13]]],[false,[[[0,10,11,1114112],13],[[-2,-1],14],[[10,11],15]]],[true,[]],[false,[[[-2,-1],14]]],[false,[[[-3,-2],14],[[48,58],16]]],[false,[[[-5,-4],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[92,93],18],[[39,40],19]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[-5,-4],14],[[39,40],17],[[92,93],18]]],[false,[[[-8,-7],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-9,-8],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-10,-9],14]]]]}

data/test/{test.rb → _OLD_/test.rb} RENAMED

File without changes

data/test/{testcmds → _OLD_/testcmds} RENAMED

File without changes

data/test/{data/sampletext.txt → sampletext.txt} RENAMED

File without changes

data/test/{data/sampletokens.txt → sampletokens.txt} RENAMED

File without changes

data/test/test_tokn.rb ADDED

@@ -0,0 +1,584 @@
+require 'test/unit'
+require_relative '../lib/tokn/tools.rb'
+req('range_partition dfa dfa_builder tokenizer token_defn_parser')
+# Get access to Tokn namespace
+#setTestDir()
+#SINGLETEST = "test_ps_output_multi"
+if defined? SINGLETEST
+  if main?(__FILE__)
+    ARGV.concat("-n  #{SINGLETEST}".split)
+  end
+end
+class TestTokn <  MyTestSuite
+  include Tokn, ToknInternal
+#  def data_file(f)
+#    File.join("data",f)
+#    #    File.dirname(__FILE__)+"/data/"+f
+#  end
+  def suite_setup
+    # Make current directory = the one containing this script
+    main?(__FILE__)
+    if !File.directory?(out_dir)
+      Dir.mkdir(out_dir)
+    end
+    @@sampleText = readTextFile("sampletext.txt")
+    @@sampleTokens = readTextFile("sampletokens.txt")
+  end
+  #  def withinTestDir(f)
+  #    File.join(@@testDir,"__source__")
+  def suite_teardown
+    remove_file_or_dir(out_dir)
+  end
+  def method_setup
+  end
+  def method_teardown
+  end
+  def add(lower, upper = nil)
+    @cs.add(lower,upper)
+  end
+  def remove(lower, upper = nil)
+    @cs.remove(lower,upper)
+  end
+  def swap
+    @ct = @cs
+    prep
+  end
+  def isect
+    @cs.intersect!(@ct)
+  end
+  def diff
+    @cs.difference!(@ct)
+  end
+  def equ(s, arr = nil)
+    arr ||= @cs.array
+    ia = s.split.map{|n| n.to_i}
+    assert_equal(ia,arr)
+  end
+  def test_100_add
+    prep
+    add(72,81)
+    equ '72 81'
+    add(50)
+    equ '50 51 72 81'
+    add(75,77)
+    equ '50 51 72 81'
+    add(72,78)
+    equ '50 51 72 81'
+    add(70,78)
+    equ '50 51 70 81'
+    add 60
+    equ '50 51 60 61 70 81'
+    add 40
+    equ '40 41 50 51 60 61 70 81'
+    add 41
+    equ '40 42 50 51 60 61 70 81'
+    add 81
+    equ '40 42 50 51 60 61 70 82'
+    add 83
+    equ '40 42 50 51 60 61 70 82 83 84'
+    add 49,84
+    equ '40 42 49 84'
+    add 39,86
+    equ '39 86'
+  end
+  def test_110_intersect
+    prep
+    add 39,86
+    swap
+    add 50,70
+    isect
+    equ '50 70'
+    swap
+    add 20,25
+    add 35,51
+    add 62,68
+    add 72,80
+    isect
+    equ '50 51 62 68'
+    prep
+    swap
+    add 50,70
+    isect
+    equ ''
+    add 50,70
+    swap
+    add 50,70
+    isect
+    equ '50 70'
+    prep
+    add 20,25
+    swap
+    add 25,30
+    isect
+    equ ''
+  end
+  def test_120_difference
+    prep
+    add 20,30
+    add 40,50
+    swap
+    add 20,80
+    diff
+    equ '30 40 50 80'
+    prep
+    add 19,32
+    diff
+    equ '19 20 30 32'
+    prep
+    add 30,40
+    diff
+    equ '30 40'
+    prep
+    add 20,30
+    add 40,50
+    diff
+    equ ''
+    prep
+    add 19,30
+    add 40,50
+    diff
+    equ '19 20'
+    prep
+    add 20,30
+    add 40,51
+    diff
+    equ '50 51'
+  end
+  def prep
+    @cs =  CodeSet.new
+  end
+  def test_130_illegalRange
+    prep
+    assert_raise(RangeError) { add 60,50 }
+    assert_raise(RangeError) { add 60,60 }
+  end
+  def neg(lower, upper)
+    @cs.negate lower, upper
+  end
+  def test_140_negate
+    prep
+    add 10,15
+    add 20,25
+    add 30
+    add 40,45
+    equ '10 15 20 25 30 31 40 45'
+    neg 22,37
+    equ '10 15 20 22 25 30 31 37 40 45'
+    neg 25,27
+    equ '10 15 20 22 27 30 31 37 40 45'
+    neg 15,20
+    equ '10 22 27 30 31 37 40 45'
+    prep
+    add 10,22
+    @cs.negate
+    equ '0 10 22 1114112'
+    prep
+    add 10,20
+    neg 10,20
+    equ ''
+    prep
+    add 10,20
+    add 30,40
+    neg 5,10
+    equ '5 20 30 40'
+    prep
+    add 10,20
+    add 30,40
+    neg 25,30
+    equ '10 20 25 40'
+    prep
+    add 10,20
+    add 30,40
+    neg 40,50
+    equ '10 20 30 50'
+    prep
+    add 10,20
+    add 30,40
+    neg 41,50
+    equ '10 20 30 40 41 50'
+    prep
+    add 10,20
+    add 30,40
+    neg 15,35
+    equ '10 15 20 30 35 40'
+  end
+  def test_150_remove
+    prep
+    add 10,20
+    add 30,40
+    remove 29,41
+    equ '10 20'
+    add 30,40
+    equ '10 20 30 40'
+    remove 20,30
+    equ '10 20 30 40'
+    remove 15,35
+    equ '10 15 35 40'
+    remove 10,15
+    equ '35 40'
+    remove 35
+    equ '36 40'
+    remove 40
+    equ '36 40'
+    remove 38
+    equ '36 38 39 40'
+    remove 37,39
+    equ '36 37 39 40'
+  end
+  def dset(st)
+    s = ''
+    st.each{|x|
+      if s.length > 0
+        s+= ' '
+      end
+      s += d(x)
+    }
+    return s
+  end
+  def newpar
+    @par =  RangePartition.new
+  end
+  def addset(lower, upper = nil)
+    upper ||= lower + 1
+    r =  CodeSet.new(lower,upper)
+    @par.addSet(r)
+  end
+  def apply
+    list = @par.apply(@cs)
+    res = []
+    list.each do |x|
+      res.concat x.array
+    end
+    @parResult = res
+  end
+  def test_160_partition
+    newpar
+    addset(20,30)
+    addset(25,33)
+    addset(37)
+    addset(40,50)
+    @par.prepare
+    @par.generatePDF(out_dir)
+    prep
+    add 25,33
+    apply
+    equ('25 30 30 33', @parResult)
+    prep
+    add 37
+    apply
+    equ('37 38', @parResult)
+    prep
+    add 40,50
+    apply
+    equ('40 50', @parResult)
+  end
+  REGEX_SCRIPT = "(\\-?[0-9]+)|[_a-zA-Z][_a-zA-Z0-9]*|333q"
+  TOKEN_SCRIPT2 = <<'END'
+        sep:  \s
+        tku:  a(a|b)*
+        tkv:  b(aa|b*)
+        tkw:  bbb
+END
+  def test_170_build_DFA
+    x =  RegParse.new(REGEX_SCRIPT)
+    s = x.startState
+    x.endState.finalState = true
+    s.generatePDF(out_dir,"nfa")
+    r = s.reverseNFA()
+    r.generatePDF(out_dir,"reversed")
+    dfa = DFABuilder.nfa_to_dfa(s)
+    dfa.generatePDF(out_dir,"buildDFA")
+  end
+  def test_180_cvt_NFA_to_DFA
+    x = RegParse.new(REGEX_SCRIPT)
+    s = x.startState
+    x.endState.finalState = true
+    s.generatePDF(out_dir,"nfa")
+    dfa = DFABuilder.nfa_to_dfa(s)
+    dfa.generatePDF(out_dir,"dfa")
+    oldToNewMap, maxId2 = dfa.duplicateNFA(42)
+    dfa2 = oldToNewMap[dfa]
+    dfa2.generatePDF(out_dir,"dfa_duplicated")
+  end
+  def test_190_TokenDefParser
+    s = TOKEN_SCRIPT2
+    td = TokenDefParser.new(s)
+    tokDFA = td.dfa
+    tokDFA.startState.generatePDF(out_dir,"TokenDFA")
+  end
+  def makeTok
+    dfa = DFA.from_script(@@sampleTokens)
+    Tokenizer.new(dfa, @@sampleText)
+  end
+  def test_200_Tokenizer
+    tok = makeTok
+    tokList = []
+    while tok.hasNext
+      t = tok.read
+      tokList.push(t)
+    end
+    tok.unread(tokList.size)
+    tokList.each do |t1|
+      tName = tok.nameOf(t1)
+      t2 = tok.read(tName)
+    end
+  end
+  def test_210_Tokenizer_Missing_Expected
+    assert_raise TokenizerException do
+      tok = makeTok
+      tok.read
+      tok.read
+      tok.read
+      tok.read
+      tok.read("signedint")
+    end
+  end
+  def test_220_CompileDFAToDisk
+    tokScript = @@sampleTokens
+    testText = @@sampleText
+    destPath = out_path("sampletokens_dfa.txt")
+    if File.exist?(destPath)
+      File.delete(destPath)
+    end
+    assert(!File.exist?(destPath))
+    dfa = DFA.from_script(tokScript, destPath)
+    assert(File.exist?(destPath))
+    tok = Tokenizer.new(dfa,  testText)
+  end
+  def prep2
+    testText = @@sampleText
+    dfa = DFA.from_file(out_path("sampletokens_dfa.txt"))
+    tok = Tokenizer.new(dfa, testText)
+  end
+  def test_230_readAndUnread
+    tok = prep2
+    unread = false
+    while tok.hasNext
+      t = tok.read
+#      pr("Read  %-8s %s\n",tok.nameOf(t),d(t))
+      if !unread && tok.nameOf(t) == "DO"
+#        pr("  ...pushing back four tokens...\n")
+        tok.unread(4)
+        unread = true
+#        pr("  ...and resuming...\n")
+      end
+    end
+  end
+  def test_240_UnrecognizedToken
+    assert_raise TokenizerException do
+      tok = prep2
+      while tok.hasNext
+        t = tok.read
+        if tok.nameOf(t) == "DO"
+          tok.read("BRCL") # <== this should raise problem
+        end
+      end
+    end
+  end
+  def test_250_ReadPastEnd
+    assert_raise TokenizerException do
+      tok = prep2
+      while tok.hasNext
+        t = tok.read
+      end
+      tok.read
+    end
+  end
+  def test_260_UnreadBeforeStart
+    assert_raise TokenizerException do
+      tok = prep2
+      k = 0
+      while tok.hasNext
+        t = tok.read
+        k += 1
+        if k == 15
+          tok.unread(5)
+          tok.unread(7)
+          tok.read()
+          tok.unread(4)
+          tok.unread(3)
+        end
+      end
+      tok.read
+    end
+  end
+  def test_270_filter_ws
+    dfa = DFA.from_script_file("sampletokens.txt")
+    t = Tokenizer.new(dfa,  readTextFile("sampletext.txt"), "WS")
+    s = ''
+    while t.hasNext do
+      tk = t.peek
+      if t.nameOf(tk) == 'BROP'
+        lst = t.readSequenceIf('BROP DO ID BRCL')
+        if lst
+          s << " ...read BROP DO ID sequence...\n"
+          lst.each{ |x| s << "   #{d(x)}\n"}
+          next
+        else
+          s << " ...couldn't find sequence...\n"
+        end
+      end
+      tk = t.read
+      s << d(tk) << "\n"
+    end
+    exp =<<"EXP"
+(line 1, col 1)   : speed
+(line 1, col 6)   : =
+(line 1, col 7)   : 42
+(line 1, col 9)   : gravity
+(line 1, col 16)  : =
+(line 1, col 17)  : -9.80
+ ...couldn't find sequence...
+(line 1, col 22)  : {
+(line 1, col 23)  : color
+(line 1, col 29)  : =
+(line 1, col 30)  : green
+(line 1, col 35)  : }
+(line 1, col 36)  : title
+(line 1, col 41)  : =
+(line 1, col 42)  : 'This is a string with \\' an escaped delimiter'
+(line 1, col 89)  : if
+(line 1, col 91)  : gravity
+(line 1, col 98)  : ==
+(line 1, col 100) : 12
+ ...read BROP DO ID sequence...
+   (line 1, col 102) : {
+   (line 1, col 103) : do
+   (line 1, col 105) : something
+   (line 1, col 114) : }
+(line 1, col 115) : do
+(line 1, col 117) : something_else
+EXP
+   assert(s.strip == exp.strip)
+  end
+end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: tokn
 version: !ruby/object:Gem::Version
-  version: 0.0.9
+  version: 0.1.0
 platform: ruby
 authors:
 - Jeff Sember
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-03-07 00:00:00.000000000 Z
+date: 2013-04-09 00:00:00.000000000 Z
 dependencies: []
 description: "Given a script containing token descriptions (each a regular expression),
   \ntokn compiles an automaton which it can then use to efficiently convert a \ntext
@@ -34,13 +34,15 @@ files:
 - bin/tokncompile
 - bin/toknprocess
 - CHANGELOG.txt
-- README.txt
-- test/Example1.rb
-- test/data/compileddfa.txt
-- test/data/sampletext.txt
-- test/data/sampletokens.txt
-- test/test.rb
-- test/testcmds
+- README.md
+- test/_OLD_/Example1.rb
+- test/_OLD_/data/compileddfa.txt
+- test/_OLD_/data/sampletokens_dfa.txt
+- test/_OLD_/test.rb
+- test/_OLD_/testcmds
+- test/sampletext.txt
+- test/sampletokens.txt
+- test/test_tokn.rb
 homepage: http://www.cs.ubc.ca/~jpsember/
 licenses:
 - mit
@@ -66,6 +68,5 @@ signing_key:
 specification_version: 4
 summary: Extracts tokens from source files
 test_files:
-- test/Example1.rb
-- test/test.rb
+- test/test_tokn.rb
 has_rdoc:

data/test/Example1.rb DELETED

@@ -1,50 +0,0 @@
-require_relative '../lib/tokn/tokenizer'
-class Example1
-  include Tokn
-  def dataPath(f)
-    File.dirname(__FILE__)+"/data/"+f
-  end
-  setTestDir()
-  def initialize
-    @sampleText = readTextFile(dataPath("sampletext.txt"))
-  end
-  def makeTok
-    @dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
-    Tokenizer.new(@dfa,  @sampleText, "WS")
-  end
-  def go
-    puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
-    t = makeTok
-    while t.hasNext do
-      tk = t.peek
-      if t.nameOf(tk) == 'BROP'
-        lst = t.readSequenceIf('BROP DO ID BRCL')
-        if lst
-          pr(" ...read BROP DO ID sequence...\n")
-          lst.each{ |x| pr("   %s\n",d(x))}
-          next
-        else
-          pr(" ...couldn't find sequence...\n")
-        end
-      end
-      tk = t.read
-      pr("%s\n",d(tk))
-    end
-  end
-end
-Example1.new.go