RubyGems - tokn - Versions diffs - 0.0.5 → 0.0.6 - Mend

tokn 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/README.txt +4 -5
data/bin/tokncompile +1 -1
data/bin/toknprocess +10 -4
data/lib/tokn/code_set.rb +332 -337
data/lib/tokn/dfa.rb +187 -162
data/lib/tokn/dfa_builder.rb +218 -220
data/lib/tokn/range_partition.rb +205 -203
data/lib/tokn/reg_parse.rb +336 -331
data/lib/tokn/state.rb +267 -270
data/lib/tokn/token_defn_parser.rb +144 -139
data/lib/tokn/tokenizer.rb +243 -175
data/lib/tokn/tokn_const.rb +11 -6
data/lib/tokn/tools.rb +42 -20
data/test/Example1.rb +50 -0
data/test/data/compileddfa.txt +1 -0
data/test/data/sampletext.txt +6 -1
data/test/test.rb +17 -12
metadata +7 -6
data/test/simple.rb +0 -33

data/lib/tokn/tokn_const.rb CHANGED Viewed

@@ -1,7 +1,13 @@
-# Module containing tokn-related constants and functions
+# Namespace to encompass the portions of the Tokn gem
+# accessible to end users
 #
 module Tokn
+end
+# Namespace to encompass the portions of the Tokn gem
+# used only internally
+#
+module ToknInternal
   # Token id if text didn't match any tokens in the DFA
   UNKNOWN_TOKEN = -1
@@ -16,14 +22,13 @@ module Tokn
   # Convert a token id (>=0) to an edge label value ( < 0)
   #
-  def tokenIdToEdgeLabel(tokenId)
+  def self.tokenIdToEdgeLabel(tokenId)
     EPSILON-1-tokenId
   end
   # Convert an edge label value ( < 0) to a token id (>=0)
   #
-  def edgeLabelToTokenId(edgeLabel)
+  def self.edgeLabelToTokenId(edgeLabel)
     EPSILON-1-edgeLabel
   end
-end
+end

data/lib/tokn/tools.rb CHANGED Viewed

@@ -29,11 +29,7 @@ end
 # should be considered a debug-only feature
 #
 def d(arg)
-  if arg.nil?
-    "<nil>"
-  else
-    arg.inspect
-  end
+  arg.nil? ? "<nil>" : arg.inspect
 end
 # Assert that a value is true.  Should be considered a
@@ -43,11 +39,7 @@ end
 def myAssert(cond, *msg)
   oneTimeAlert("warning",0,"Checking assertion")
   if not cond
-    if msg.size == 0
-      str = "assertion error"
-    else
-      str = sprintf(*msg)
-    end
+    str = (msg.size == 0) ? "assertion error" : sprintf(*msg)
     raise Exception, str
   end
 end
@@ -56,10 +48,7 @@ end
 # Set test directory.  If nil, sets to home directory + "__test__"
 #
 def setTestDir(d = nil)
-  if !d
-    d = File.join(Dir.home,"__test__")
-  end
-  $testDir = d
+  $testDir = d || File.join(Dir.home,"__test__")
 end
 # Get a path within the test directory;
@@ -75,11 +64,7 @@ def withinTestDir(relPath = nil)
   if !File.directory?($testDir)
     Dir::mkdir($testDir)
   end
-  if relPath
-    File.join($testDir,relPath)
-  else
-    $testDir
-  end
+  relPath ? File.join($testDir,relPath) : $testDir
 end
 # Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
@@ -95,6 +80,28 @@ def dotToPDF(dotFile, name = "")
   system("dot -Tpdf "+dotPath+" -o "+destName)
 end
+# Extensions to the Enumerable module
+#
+module Enumerable
+  # Calculate a value for each item, and return the item with the
+  # highest value, its index, and the value.
+  # @yieldparam function to calculate value of an object, given that object as a parameter
+  # @return the triple [object, index, value] reflecting the maximum value, or
+  #   nil if there were no items
+  def max_with_index
+    best = nil
+    each_with_index do |obj,ind|
+      sc = yield(obj)
+      if !best || best[2] < sc
+        best = [obj,ind,sc]
+      end
+    end
+    best
+  end
+end
 # Get a nice, concise description of the file and line
 # of some caller within the stack.
@@ -109,7 +116,6 @@ def getCallerLocation(nSkip = 2)
   if nSkip >= 0 && nSkip < caller.size
     fi = caller[nSkip]
-    # ' path : line number : other '
     i = fi.index(':')
     j = nil
     if i
@@ -184,3 +190,19 @@ def readTextFile(path)
   contents
 end
+# Method that takes a code block as an argument to
+# achieve the same functionality as Java/C++'s
+#  do {
+#    ...
+#    ...  possibly with 'break' to jump to the end ...
+#  } while (false);
+#
+def block
+  yield
+end
+# Exception class for objects in illegal states
+#
+class IllegalStateException < Exception
+end

data/test/Example1.rb ADDED Viewed

@@ -0,0 +1,50 @@
+require_relative '../lib/tokn/tokenizer'
+class Example1
+  include Tokn
+  def dataPath(f)
+    File.dirname(__FILE__)+"/data/"+f
+  end
+  setTestDir()
+  def initialize
+    @sampleText = readTextFile(dataPath("sampletext.txt"))
+  end
+  def makeTok
+    @dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
+    Tokenizer.new(@dfa,  @sampleText, "WS")
+  end
+  def go
+    puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
+    t = makeTok
+    while t.hasNext do
+      tk = t.peek
+      if t.nameOf(tk) == 'BROP'
+        lst = t.readSequenceIf('BROP DO ID BRCL')
+        if lst
+          pr(" ...read BROP DO ID sequence...\n")
+          lst.each{ |x| pr("   %s\n",d(x))}
+          next
+        else
+          pr(" ...couldn't find sequence...\n")
+        end
+      end
+      tk = t.read
+      pr("%s\n",d(tk))
+    end
+  end
+end
+Example1.new.go

data/test/data/compileddfa.txt ADDED Viewed

@@ -0,0 +1 @@

+ {"version":1.0,"tokens":["WS","DBL","INT","LBL","ID","ASSIGN","EQUIV","IF","DO","BROP","BRCL"],"states":[[false,[[[125,126],1],[[123,124],2],[[100,101],3],[[105,106],4],[[61,62],5],[[65,91,95,96,97,100,101,105,106,123],6],[[39,40],7],[[48,58],8],[[45,46],9],[[46,47],10],[[9,11,12,13,32,33,92,93],11],[[47,48],12]]],[false,[[[-12,-11],14]]],[false,[[[-11,-10],14]]],[false,[[[48,58,65,91,95,96,97,111,112,123],6],[[-6,-5],14],[[111,112],22]]],[false,[[[48,58,65,91,95,96,97,102,103,123],6],[[-6,-5],14],[[102,103],21]]],[false,[[[-7,-6],14],[[61,62],20]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-6,-5],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[39,40],17],[[92,93],18]]],[false,[[[48,58],8],[[46,47],10],[[-4,-3],14]]],[false,[[[48,58],8],[[46,47],10]]],[false,[[[48,58],16]]],[false,[[[9,11,12,13,32,33,92,93],11],[[-2,-1],14]]],[false,[[[47,48],13]]],[false,[[[0,10,11,1114112],13],[[-2,-1],14],[[10,11],15]]],[true,[]],[false,[[[-2,-1],14]]],[false,[[[-3,-2],14],[[48,58],16]]],[false,[[[-5,-4],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[92,93],18],[[39,40],19]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[-5,-4],14],[[39,40],17],[[92,93],18]]],[false,[[[-8,-7],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-9,-8],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-10,-9],14]]]]}

data/test/data/sampletext.txt CHANGED Viewed

@@ -1,11 +1,16 @@
 // Example source file that can be tokenized
 speed = 42   // speed of object
 gravity = -9.80
+{	color = green }
 title = 'This is a string with \' an escaped delimiter'
 if gravity == 12 {
   do something
 }
+do something_else
+// End of 'sampletext.txt'

data/test/test.rb CHANGED Viewed

@@ -3,16 +3,22 @@ require_relative '../lib/tokn/tools.rb'
 req('range_partition dfa dfa_builder tokenizer')
+# Get access to Tokn namespace
 def dataPath(f)
   File.dirname(__FILE__)+"/data/"+f
 end
 setTestDir()
 # Various unit tests for state machines, character range sets, etc.
 #
 class TestComponent < Test::Unit::TestCase
+  include Tokn, ToknInternal
   SKIPMOST = false  # skip most of the tests?
   def add(lower, upper = nil)
@@ -163,7 +169,7 @@ class TestComponent < Test::Unit::TestCase
   end
   def prep
-    @cs = CodeSet.new
+    @cs =  CodeSet.new
   end
   def test_illegalRange
@@ -277,12 +283,12 @@ class TestComponent < Test::Unit::TestCase
   def newpar
-    @par = RangePartition.new
+    @par =  RangePartition.new
   end
   def addset(lower, upper = nil)
     upper ||= lower + 1
-    r = CodeSet.new(lower,upper)
+    r =  CodeSet.new(lower,upper)
     @par.addSet(r)
   end
@@ -330,17 +336,17 @@ class TestComponent < Test::Unit::TestCase
   REGEX_SCRIPT = "(\\-?[0-9]+)|[_a-zA-Z][_a-zA-Z0-9]*|333q"
   TOKEN_SCRIPT2 = <<'END'
-      sep:  \s
-      tku:  a(a|b)*
-      tkv:  b(aa|b*)
-      tkw:  bbb
+        sep:  \s
+        tku:  a(a|b)*
+        tkv:  b(aa|b*)
+        tkw:  bbb
 END
   def test_buildDFA
     return if SKIPMOST
-    x = RegParse.new(REGEX_SCRIPT)
+    x =  RegParse.new(REGEX_SCRIPT)
     s = x.startState
     x.endState.finalState = true
@@ -389,7 +395,7 @@ END
   @@sampleTokens = readTextFile(dataPath("sampletokens.txt"))
   def makeTok
-    dfa = DFA.dfa_from_script(@@sampleTokens)
+    dfa = DFA.from_script(@@sampleTokens)
     Tokenizer.new(dfa, @@sampleText)
   end
@@ -443,7 +449,7 @@ END
     end
     assert(!File.exist?(destPath))
-    dfa = DFA.dfa_from_script(tokScript, destPath)
+    dfa = DFA.from_script(tokScript, destPath)
     assert(File.exist?(destPath))
     tok = Tokenizer.new(dfa,  testText)
@@ -453,7 +459,7 @@ END
   def prep2
     testText = @@sampleText
-    dfa = DFA.dfa_from_file(withinTestDir("sampletokens_dfa.txt"))
+    dfa = DFA.from_file(withinTestDir("sampletokens_dfa.txt"))
     tok = Tokenizer.new(dfa, testText)
   end
@@ -516,4 +522,3 @@ END
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: tokn
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 0.0.6
 platform: ruby
 authors:
 - Jeff Sember
@@ -10,9 +10,9 @@ bindir: bin
 cert_chain: []
 date: 2013-03-07 00:00:00.000000000 Z
 dependencies: []
-description: 'Given a script containing token descriptions (each a regular expression),
-  tokn compiles a DFA which it then uses to efficiently extract a sequence of tokens
-  from source files. '
+description: Given a script containing token descriptions (each a regular expression),
+  tokn compiles an automaton which it can then use to efficiently convert a text file
+  to a sequence of those tokens.
 email: jpsember@gmail.com
 executables:
 - tokncompile
@@ -34,9 +34,10 @@ files:
 - bin/tokncompile
 - bin/toknprocess
 - README.txt
+- test/Example1.rb
+- test/data/compileddfa.txt
 - test/data/sampletext.txt
 - test/data/sampletokens.txt
-- test/simple.rb
 - test/test.rb
 - test/testcmds
 - figures/sample_dfa.pdf
@@ -64,6 +65,6 @@ signing_key:
 specification_version: 4
 summary: Extracts tokens from source files
 test_files:
-- test/simple.rb
+- test/Example1.rb
 - test/test.rb
 has_rdoc:

data/test/simple.rb DELETED Viewed

@@ -1,33 +0,0 @@
-require 'test/unit'
-require_relative '../lib/tokn/tools.rb'
-req('tokenizer dfa')
-class Simple
-  def dataPath(f)
-    File.dirname(__FILE__)+"/data/"+f
-  end
-  setTestDir()
-  # Various unit tests for state machines, character range sets, etc.
-  def initialize
-    @sampleText = readTextFile(self.dataPath("sampletext.txt"))
-    # @sampleTokens = readTextFile(self.dataPath("sampletokens.txt"))
-  end
-  def makeTok
-    dfa = DFA.dfa_from_script_file(self.dataPath("sampletokens.txt"))
-    Tokenizer.new(dfa,  @sampleText)
-  end
-  def go
-    makeTok
-  end
-end
-s = Simple.new
-s.go