tokn 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,13 @@
1
- # Module containing tokn-related constants and functions
1
+ # Namespace to encompass the portions of the Tokn gem
2
+ # accessible to end users
2
3
  #
3
4
  module Tokn
4
-
5
+ end
6
+
7
+ # Namespace to encompass the portions of the Tokn gem
8
+ # used only internally
9
+ #
10
+ module ToknInternal
5
11
  # Token id if text didn't match any tokens in the DFA
6
12
  UNKNOWN_TOKEN = -1
7
13
 
@@ -16,14 +22,13 @@ module Tokn
16
22
 
17
23
  # Convert a token id (>=0) to an edge label value ( < 0)
18
24
  #
19
- def tokenIdToEdgeLabel(tokenId)
25
+ def self.tokenIdToEdgeLabel(tokenId)
20
26
  EPSILON-1-tokenId
21
27
  end
22
28
 
23
29
  # Convert an edge label value ( < 0) to a token id (>=0)
24
30
  #
25
- def edgeLabelToTokenId(edgeLabel)
31
+ def self.edgeLabelToTokenId(edgeLabel)
26
32
  EPSILON-1-edgeLabel
27
33
  end
28
-
29
- end
34
+ end
data/lib/tokn/tools.rb CHANGED
@@ -29,11 +29,7 @@ end
29
29
  # should be considered a debug-only feature
30
30
  #
31
31
  def d(arg)
32
- if arg.nil?
33
- "<nil>"
34
- else
35
- arg.inspect
36
- end
32
+ arg.nil? ? "<nil>" : arg.inspect
37
33
  end
38
34
 
39
35
  # Assert that a value is true. Should be considered a
@@ -43,11 +39,7 @@ end
43
39
  def myAssert(cond, *msg)
44
40
  oneTimeAlert("warning",0,"Checking assertion")
45
41
  if not cond
46
- if msg.size == 0
47
- str = "assertion error"
48
- else
49
- str = sprintf(*msg)
50
- end
42
+ str = (msg.size == 0) ? "assertion error" : sprintf(*msg)
51
43
  raise Exception, str
52
44
  end
53
45
  end
@@ -56,10 +48,7 @@ end
56
48
  # Set test directory. If nil, sets to home directory + "__test__"
57
49
  #
58
50
  def setTestDir(d = nil)
59
- if !d
60
- d = File.join(Dir.home,"__test__")
61
- end
62
- $testDir = d
51
+ $testDir = d || File.join(Dir.home,"__test__")
63
52
  end
64
53
 
65
54
  # Get a path within the test directory;
@@ -75,11 +64,7 @@ def withinTestDir(relPath = nil)
75
64
  if !File.directory?($testDir)
76
65
  Dir::mkdir($testDir)
77
66
  end
78
- if relPath
79
- File.join($testDir,relPath)
80
- else
81
- $testDir
82
- end
67
+ relPath ? File.join($testDir,relPath) : $testDir
83
68
  end
84
69
 
85
70
  # Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
@@ -95,6 +80,28 @@ def dotToPDF(dotFile, name = "")
95
80
  system("dot -Tpdf "+dotPath+" -o "+destName)
96
81
  end
97
82
 
83
+ # Extensions to the Enumerable module
84
+ #
85
+ module Enumerable
86
+ # Calculate a value for each item, and return the item with the
87
+ # highest value, its index, and the value.
88
+ # @yieldparam function to calculate value of an object, given that object as a parameter
89
+ # @return the triple [object, index, value] reflecting the maximum value, or
90
+ # nil if there were no items
91
+ def max_with_index
92
+
93
+ best = nil
94
+
95
+ each_with_index do |obj,ind|
96
+ sc = yield(obj)
97
+ if !best || best[2] < sc
98
+ best = [obj,ind,sc]
99
+ end
100
+ end
101
+ best
102
+ end
103
+ end
104
+
98
105
 
99
106
  # Get a nice, concise description of the file and line
100
107
  # of some caller within the stack.
@@ -109,7 +116,6 @@ def getCallerLocation(nSkip = 2)
109
116
  if nSkip >= 0 && nSkip < caller.size
110
117
  fi = caller[nSkip]
111
118
 
112
- # ' path : line number : other '
113
119
  i = fi.index(':')
114
120
  j = nil
115
121
  if i
@@ -184,3 +190,19 @@ def readTextFile(path)
184
190
  contents
185
191
  end
186
192
 
193
+ # Method that takes a code block as an argument to
194
+ # achieve the same functionality as Java/C++'s
195
+ # do {
196
+ # ...
197
+ # ... possibly with 'break' to jump to the end ...
198
+ # } while (false);
199
+ #
200
+ def block
201
+ yield
202
+ end
203
+
204
+ # Exception class for objects in illegal states
205
+ #
206
+ class IllegalStateException < Exception
207
+ end
208
+
data/test/Example1.rb ADDED
@@ -0,0 +1,50 @@
1
+ require_relative '../lib/tokn/tokenizer'
2
+
3
+ class Example1
4
+
5
+ include Tokn
6
+
7
+ def dataPath(f)
8
+ File.dirname(__FILE__)+"/data/"+f
9
+ end
10
+
11
+ setTestDir()
12
+
13
+ def initialize
14
+ @sampleText = readTextFile(dataPath("sampletext.txt"))
15
+ end
16
+
17
+ def makeTok
18
+ @dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
19
+ Tokenizer.new(@dfa, @sampleText, "WS")
20
+ end
21
+
22
+ def go
23
+ puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
24
+
25
+ t = makeTok
26
+
27
+ while t.hasNext do
28
+
29
+ tk = t.peek
30
+
31
+ if t.nameOf(tk) == 'BROP'
32
+ lst = t.readSequenceIf('BROP DO ID BRCL')
33
+ if lst
34
+ pr(" ...read BROP DO ID sequence...\n")
35
+ lst.each{ |x| pr(" %s\n",d(x))}
36
+ next
37
+ else
38
+ pr(" ...couldn't find sequence...\n")
39
+ end
40
+ end
41
+
42
+ tk = t.read
43
+ pr("%s\n",d(tk))
44
+
45
+ end
46
+ end
47
+
48
+ end
49
+
50
+ Example1.new.go
@@ -0,0 +1 @@
1
+ {"version":1.0,"tokens":["WS","DBL","INT","LBL","ID","ASSIGN","EQUIV","IF","DO","BROP","BRCL"],"states":[[false,[[[125,126],1],[[123,124],2],[[100,101],3],[[105,106],4],[[61,62],5],[[65,91,95,96,97,100,101,105,106,123],6],[[39,40],7],[[48,58],8],[[45,46],9],[[46,47],10],[[9,11,12,13,32,33,92,93],11],[[47,48],12]]],[false,[[[-12,-11],14]]],[false,[[[-11,-10],14]]],[false,[[[48,58,65,91,95,96,97,111,112,123],6],[[-6,-5],14],[[111,112],22]]],[false,[[[48,58,65,91,95,96,97,102,103,123],6],[[-6,-5],14],[[102,103],21]]],[false,[[[-7,-6],14],[[61,62],20]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-6,-5],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[39,40],17],[[92,93],18]]],[false,[[[48,58],8],[[46,47],10],[[-4,-3],14]]],[false,[[[48,58],8],[[46,47],10]]],[false,[[[48,58],16]]],[false,[[[9,11,12,13,32,33,92,93],11],[[-2,-1],14]]],[false,[[[47,48],13]]],[false,[[[0,10,11,1114112],13],[[-2,-1],14],[[10,11],15]]],[true,[]],[false,[[[-2,-1],14]]],[false,[[[-3,-2],14],[[48,58],16]]],[false,[[[-5,-4],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[92,93],18],[[39,40],19]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[-5,-4],14],[[39,40],17],[[92,93],18]]],[false,[[[-8,-7],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-9,-8],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-10,-9],14]]]]}
@@ -1,11 +1,16 @@
1
1
  // Example source file that can be tokenized
2
2
 
3
3
  speed = 42 // speed of object
4
-
5
4
  gravity = -9.80
6
5
 
6
+ { color = green }
7
+
7
8
  title = 'This is a string with \' an escaped delimiter'
8
9
 
9
10
  if gravity == 12 {
10
11
  do something
11
12
  }
13
+
14
+ do something_else
15
+
16
+ // End of 'sampletext.txt'
data/test/test.rb CHANGED
@@ -3,16 +3,22 @@ require_relative '../lib/tokn/tools.rb'
3
3
  req('range_partition dfa dfa_builder tokenizer')
4
4
 
5
5
 
6
+
7
+ # Get access to Tokn namespace
8
+
6
9
  def dataPath(f)
7
10
  File.dirname(__FILE__)+"/data/"+f
8
11
  end
9
12
 
13
+
10
14
  setTestDir()
11
15
 
12
16
  # Various unit tests for state machines, character range sets, etc.
13
17
  #
14
18
  class TestComponent < Test::Unit::TestCase
15
19
 
20
+ include Tokn, ToknInternal
21
+
16
22
  SKIPMOST = false # skip most of the tests?
17
23
 
18
24
  def add(lower, upper = nil)
@@ -163,7 +169,7 @@ class TestComponent < Test::Unit::TestCase
163
169
  end
164
170
 
165
171
  def prep
166
- @cs = CodeSet.new
172
+ @cs = CodeSet.new
167
173
  end
168
174
 
169
175
  def test_illegalRange
@@ -277,12 +283,12 @@ class TestComponent < Test::Unit::TestCase
277
283
 
278
284
 
279
285
  def newpar
280
- @par = RangePartition.new
286
+ @par = RangePartition.new
281
287
  end
282
288
 
283
289
  def addset(lower, upper = nil)
284
290
  upper ||= lower + 1
285
- r = CodeSet.new(lower,upper)
291
+ r = CodeSet.new(lower,upper)
286
292
  @par.addSet(r)
287
293
  end
288
294
 
@@ -330,17 +336,17 @@ class TestComponent < Test::Unit::TestCase
330
336
  REGEX_SCRIPT = "(\\-?[0-9]+)|[_a-zA-Z][_a-zA-Z0-9]*|333q"
331
337
 
332
338
  TOKEN_SCRIPT2 = <<'END'
333
- sep: \s
334
- tku: a(a|b)*
335
- tkv: b(aa|b*)
336
- tkw: bbb
339
+ sep: \s
340
+ tku: a(a|b)*
341
+ tkv: b(aa|b*)
342
+ tkw: bbb
337
343
  END
338
344
 
339
345
 
340
346
  def test_buildDFA
341
347
  return if SKIPMOST
342
348
 
343
- x = RegParse.new(REGEX_SCRIPT)
349
+ x = RegParse.new(REGEX_SCRIPT)
344
350
  s = x.startState
345
351
  x.endState.finalState = true
346
352
 
@@ -389,7 +395,7 @@ END
389
395
  @@sampleTokens = readTextFile(dataPath("sampletokens.txt"))
390
396
 
391
397
  def makeTok
392
- dfa = DFA.dfa_from_script(@@sampleTokens)
398
+ dfa = DFA.from_script(@@sampleTokens)
393
399
  Tokenizer.new(dfa, @@sampleText)
394
400
  end
395
401
 
@@ -443,7 +449,7 @@ END
443
449
  end
444
450
  assert(!File.exist?(destPath))
445
451
 
446
- dfa = DFA.dfa_from_script(tokScript, destPath)
452
+ dfa = DFA.from_script(tokScript, destPath)
447
453
  assert(File.exist?(destPath))
448
454
 
449
455
  tok = Tokenizer.new(dfa, testText)
@@ -453,7 +459,7 @@ END
453
459
 
454
460
  def prep2
455
461
  testText = @@sampleText
456
- dfa = DFA.dfa_from_file(withinTestDir("sampletokens_dfa.txt"))
462
+ dfa = DFA.from_file(withinTestDir("sampletokens_dfa.txt"))
457
463
  tok = Tokenizer.new(dfa, testText)
458
464
  end
459
465
 
@@ -516,4 +522,3 @@ END
516
522
  end
517
523
  end
518
524
 
519
-
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokn
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Sember
@@ -10,9 +10,9 @@ bindir: bin
10
10
  cert_chain: []
11
11
  date: 2013-03-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: 'Given a script containing token descriptions (each a regular expression),
14
- tokn compiles a DFA which it then uses to efficiently extract a sequence of tokens
15
- from source files. '
13
+ description: Given a script containing token descriptions (each a regular expression),
14
+ tokn compiles an automaton which it can then use to efficiently convert a text file
15
+ to a sequence of those tokens.
16
16
  email: jpsember@gmail.com
17
17
  executables:
18
18
  - tokncompile
@@ -34,9 +34,10 @@ files:
34
34
  - bin/tokncompile
35
35
  - bin/toknprocess
36
36
  - README.txt
37
+ - test/Example1.rb
38
+ - test/data/compileddfa.txt
37
39
  - test/data/sampletext.txt
38
40
  - test/data/sampletokens.txt
39
- - test/simple.rb
40
41
  - test/test.rb
41
42
  - test/testcmds
42
43
  - figures/sample_dfa.pdf
@@ -64,6 +65,6 @@ signing_key:
64
65
  specification_version: 4
65
66
  summary: Extracts tokens from source files
66
67
  test_files:
67
- - test/simple.rb
68
+ - test/Example1.rb
68
69
  - test/test.rb
69
70
  has_rdoc:
data/test/simple.rb DELETED
@@ -1,33 +0,0 @@
1
- require 'test/unit'
2
- require_relative '../lib/tokn/tools.rb'
3
- req('tokenizer dfa')
4
-
5
-
6
- class Simple
7
-
8
- def dataPath(f)
9
- File.dirname(__FILE__)+"/data/"+f
10
- end
11
-
12
- setTestDir()
13
-
14
- # Various unit tests for state machines, character range sets, etc.
15
-
16
- def initialize
17
- @sampleText = readTextFile(self.dataPath("sampletext.txt"))
18
- # @sampleTokens = readTextFile(self.dataPath("sampletokens.txt"))
19
- end
20
-
21
- def makeTok
22
- dfa = DFA.dfa_from_script_file(self.dataPath("sampletokens.txt"))
23
- Tokenizer.new(dfa, @sampleText)
24
- end
25
-
26
- def go
27
- makeTok
28
- end
29
- end
30
-
31
-
32
- s = Simple.new
33
- s.go