tokn 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,13 @@
1
- # Module containing tokn-related constants and functions
1
+ # Namespace to encompass the portions of the Tokn gem
2
+ # accessible to end users
2
3
  #
3
4
  module Tokn
4
-
5
+ end
6
+
7
+ # Namespace to encompass the portions of the Tokn gem
8
+ # used only internally
9
+ #
10
+ module ToknInternal
5
11
  # Token id if text didn't match any tokens in the DFA
6
12
  UNKNOWN_TOKEN = -1
7
13
 
@@ -16,14 +22,13 @@ module Tokn
16
22
 
17
23
  # Convert a token id (>=0) to an edge label value ( < 0)
18
24
  #
19
- def tokenIdToEdgeLabel(tokenId)
25
+ def self.tokenIdToEdgeLabel(tokenId)
20
26
  EPSILON-1-tokenId
21
27
  end
22
28
 
23
29
  # Convert an edge label value ( < 0) to a token id (>=0)
24
30
  #
25
- def edgeLabelToTokenId(edgeLabel)
31
+ def self.edgeLabelToTokenId(edgeLabel)
26
32
  EPSILON-1-edgeLabel
27
33
  end
28
-
29
- end
34
+ end
data/lib/tokn/tools.rb CHANGED
@@ -29,11 +29,7 @@ end
29
29
  # should be considered a debug-only feature
30
30
  #
31
31
  def d(arg)
32
- if arg.nil?
33
- "<nil>"
34
- else
35
- arg.inspect
36
- end
32
+ arg.nil? ? "<nil>" : arg.inspect
37
33
  end
38
34
 
39
35
  # Assert that a value is true. Should be considered a
@@ -43,11 +39,7 @@ end
43
39
  def myAssert(cond, *msg)
44
40
  oneTimeAlert("warning",0,"Checking assertion")
45
41
  if not cond
46
- if msg.size == 0
47
- str = "assertion error"
48
- else
49
- str = sprintf(*msg)
50
- end
42
+ str = (msg.size == 0) ? "assertion error" : sprintf(*msg)
51
43
  raise Exception, str
52
44
  end
53
45
  end
@@ -56,10 +48,7 @@ end
56
48
  # Set test directory. If nil, sets to home directory + "__test__"
57
49
  #
58
50
  def setTestDir(d = nil)
59
- if !d
60
- d = File.join(Dir.home,"__test__")
61
- end
62
- $testDir = d
51
+ $testDir = d || File.join(Dir.home,"__test__")
63
52
  end
64
53
 
65
54
  # Get a path within the test directory;
@@ -75,11 +64,7 @@ def withinTestDir(relPath = nil)
75
64
  if !File.directory?($testDir)
76
65
  Dir::mkdir($testDir)
77
66
  end
78
- if relPath
79
- File.join($testDir,relPath)
80
- else
81
- $testDir
82
- end
67
+ relPath ? File.join($testDir,relPath) : $testDir
83
68
  end
84
69
 
85
70
  # Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
@@ -95,6 +80,28 @@ def dotToPDF(dotFile, name = "")
95
80
  system("dot -Tpdf "+dotPath+" -o "+destName)
96
81
  end
97
82
 
83
+ # Extensions to the Enumerable module
84
+ #
85
+ module Enumerable
86
+ # Calculate a value for each item, and return the item with the
87
+ # highest value, its index, and the value.
88
+ # @yieldparam function to calculate value of an object, given that object as a parameter
89
+ # @return the triple [object, index, value] reflecting the maximum value, or
90
+ # nil if there were no items
91
+ def max_with_index
92
+
93
+ best = nil
94
+
95
+ each_with_index do |obj,ind|
96
+ sc = yield(obj)
97
+ if !best || best[2] < sc
98
+ best = [obj,ind,sc]
99
+ end
100
+ end
101
+ best
102
+ end
103
+ end
104
+
98
105
 
99
106
  # Get a nice, concise description of the file and line
100
107
  # of some caller within the stack.
@@ -109,7 +116,6 @@ def getCallerLocation(nSkip = 2)
109
116
  if nSkip >= 0 && nSkip < caller.size
110
117
  fi = caller[nSkip]
111
118
 
112
- # ' path : line number : other '
113
119
  i = fi.index(':')
114
120
  j = nil
115
121
  if i
@@ -184,3 +190,19 @@ def readTextFile(path)
184
190
  contents
185
191
  end
186
192
 
193
+ # Method that takes a code block as an argument to
194
+ # achieve the same functionality as Java/C++'s
195
+ # do {
196
+ # ...
197
+ # ... possibly with 'break' to jump to the end ...
198
+ # } while (false);
199
+ #
200
+ def block
201
+ yield
202
+ end
203
+
204
+ # Exception class for objects in illegal states
205
+ #
206
+ class IllegalStateException < Exception
207
+ end
208
+
data/test/Example1.rb ADDED
@@ -0,0 +1,50 @@
1
+ require_relative '../lib/tokn/tokenizer'
2
+
3
+ class Example1
4
+
5
+ include Tokn
6
+
7
+ def dataPath(f)
8
+ File.dirname(__FILE__)+"/data/"+f
9
+ end
10
+
11
+ setTestDir()
12
+
13
+ def initialize
14
+ @sampleText = readTextFile(dataPath("sampletext.txt"))
15
+ end
16
+
17
+ def makeTok
18
+ @dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
19
+ Tokenizer.new(@dfa, @sampleText, "WS")
20
+ end
21
+
22
+ def go
23
+ puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
24
+
25
+ t = makeTok
26
+
27
+ while t.hasNext do
28
+
29
+ tk = t.peek
30
+
31
+ if t.nameOf(tk) == 'BROP'
32
+ lst = t.readSequenceIf('BROP DO ID BRCL')
33
+ if lst
34
+ pr(" ...read BROP DO ID sequence...\n")
35
+ lst.each{ |x| pr(" %s\n",d(x))}
36
+ next
37
+ else
38
+ pr(" ...couldn't find sequence...\n")
39
+ end
40
+ end
41
+
42
+ tk = t.read
43
+ pr("%s\n",d(tk))
44
+
45
+ end
46
+ end
47
+
48
+ end
49
+
50
+ Example1.new.go
@@ -0,0 +1 @@
1
+ {"version":1.0,"tokens":["WS","DBL","INT","LBL","ID","ASSIGN","EQUIV","IF","DO","BROP","BRCL"],"states":[[false,[[[125,126],1],[[123,124],2],[[100,101],3],[[105,106],4],[[61,62],5],[[65,91,95,96,97,100,101,105,106,123],6],[[39,40],7],[[48,58],8],[[45,46],9],[[46,47],10],[[9,11,12,13,32,33,92,93],11],[[47,48],12]]],[false,[[[-12,-11],14]]],[false,[[[-11,-10],14]]],[false,[[[48,58,65,91,95,96,97,111,112,123],6],[[-6,-5],14],[[111,112],22]]],[false,[[[48,58,65,91,95,96,97,102,103,123],6],[[-6,-5],14],[[102,103],21]]],[false,[[[-7,-6],14],[[61,62],20]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-6,-5],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[39,40],17],[[92,93],18]]],[false,[[[48,58],8],[[46,47],10],[[-4,-3],14]]],[false,[[[48,58],8],[[46,47],10]]],[false,[[[48,58],16]]],[false,[[[9,11,12,13,32,33,92,93],11],[[-2,-1],14]]],[false,[[[47,48],13]]],[false,[[[0,10,11,1114112],13],[[-2,-1],14],[[10,11],15]]],[true,[]],[false,[[[-2,-1],14]]],[false,[[[-3,-2],14],[[48,58],16]]],[false,[[[-5,-4],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[92,93],18],[[39,40],19]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[-5,-4],14],[[39,40],17],[[92,93],18]]],[false,[[[-8,-7],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-9,-8],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-10,-9],14]]]]}
@@ -1,11 +1,16 @@
1
1
  // Example source file that can be tokenized
2
2
 
3
3
  speed = 42 // speed of object
4
-
5
4
  gravity = -9.80
6
5
 
6
+ { color = green }
7
+
7
8
  title = 'This is a string with \' an escaped delimiter'
8
9
 
9
10
  if gravity == 12 {
10
11
  do something
11
12
  }
13
+
14
+ do something_else
15
+
16
+ // End of 'sampletext.txt'
data/test/test.rb CHANGED
@@ -3,16 +3,22 @@ require_relative '../lib/tokn/tools.rb'
3
3
  req('range_partition dfa dfa_builder tokenizer')
4
4
 
5
5
 
6
+
7
+ # Get access to Tokn namespace
8
+
6
9
  def dataPath(f)
7
10
  File.dirname(__FILE__)+"/data/"+f
8
11
  end
9
12
 
13
+
10
14
  setTestDir()
11
15
 
12
16
  # Various unit tests for state machines, character range sets, etc.
13
17
  #
14
18
  class TestComponent < Test::Unit::TestCase
15
19
 
20
+ include Tokn, ToknInternal
21
+
16
22
  SKIPMOST = false # skip most of the tests?
17
23
 
18
24
  def add(lower, upper = nil)
@@ -163,7 +169,7 @@ class TestComponent < Test::Unit::TestCase
163
169
  end
164
170
 
165
171
  def prep
166
- @cs = CodeSet.new
172
+ @cs = CodeSet.new
167
173
  end
168
174
 
169
175
  def test_illegalRange
@@ -277,12 +283,12 @@ class TestComponent < Test::Unit::TestCase
277
283
 
278
284
 
279
285
  def newpar
280
- @par = RangePartition.new
286
+ @par = RangePartition.new
281
287
  end
282
288
 
283
289
  def addset(lower, upper = nil)
284
290
  upper ||= lower + 1
285
- r = CodeSet.new(lower,upper)
291
+ r = CodeSet.new(lower,upper)
286
292
  @par.addSet(r)
287
293
  end
288
294
 
@@ -330,17 +336,17 @@ class TestComponent < Test::Unit::TestCase
330
336
  REGEX_SCRIPT = "(\\-?[0-9]+)|[_a-zA-Z][_a-zA-Z0-9]*|333q"
331
337
 
332
338
  TOKEN_SCRIPT2 = <<'END'
333
- sep: \s
334
- tku: a(a|b)*
335
- tkv: b(aa|b*)
336
- tkw: bbb
339
+ sep: \s
340
+ tku: a(a|b)*
341
+ tkv: b(aa|b*)
342
+ tkw: bbb
337
343
  END
338
344
 
339
345
 
340
346
  def test_buildDFA
341
347
  return if SKIPMOST
342
348
 
343
- x = RegParse.new(REGEX_SCRIPT)
349
+ x = RegParse.new(REGEX_SCRIPT)
344
350
  s = x.startState
345
351
  x.endState.finalState = true
346
352
 
@@ -389,7 +395,7 @@ END
389
395
  @@sampleTokens = readTextFile(dataPath("sampletokens.txt"))
390
396
 
391
397
  def makeTok
392
- dfa = DFA.dfa_from_script(@@sampleTokens)
398
+ dfa = DFA.from_script(@@sampleTokens)
393
399
  Tokenizer.new(dfa, @@sampleText)
394
400
  end
395
401
 
@@ -443,7 +449,7 @@ END
443
449
  end
444
450
  assert(!File.exist?(destPath))
445
451
 
446
- dfa = DFA.dfa_from_script(tokScript, destPath)
452
+ dfa = DFA.from_script(tokScript, destPath)
447
453
  assert(File.exist?(destPath))
448
454
 
449
455
  tok = Tokenizer.new(dfa, testText)
@@ -453,7 +459,7 @@ END
453
459
 
454
460
  def prep2
455
461
  testText = @@sampleText
456
- dfa = DFA.dfa_from_file(withinTestDir("sampletokens_dfa.txt"))
462
+ dfa = DFA.from_file(withinTestDir("sampletokens_dfa.txt"))
457
463
  tok = Tokenizer.new(dfa, testText)
458
464
  end
459
465
 
@@ -516,4 +522,3 @@ END
516
522
  end
517
523
  end
518
524
 
519
-
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokn
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Sember
@@ -10,9 +10,9 @@ bindir: bin
10
10
  cert_chain: []
11
11
  date: 2013-03-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: 'Given a script containing token descriptions (each a regular expression),
14
- tokn compiles a DFA which it then uses to efficiently extract a sequence of tokens
15
- from source files. '
13
+ description: Given a script containing token descriptions (each a regular expression),
14
+ tokn compiles an automaton which it can then use to efficiently convert a text file
15
+ to a sequence of those tokens.
16
16
  email: jpsember@gmail.com
17
17
  executables:
18
18
  - tokncompile
@@ -34,9 +34,10 @@ files:
34
34
  - bin/tokncompile
35
35
  - bin/toknprocess
36
36
  - README.txt
37
+ - test/Example1.rb
38
+ - test/data/compileddfa.txt
37
39
  - test/data/sampletext.txt
38
40
  - test/data/sampletokens.txt
39
- - test/simple.rb
40
41
  - test/test.rb
41
42
  - test/testcmds
42
43
  - figures/sample_dfa.pdf
@@ -64,6 +65,6 @@ signing_key:
64
65
  specification_version: 4
65
66
  summary: Extracts tokens from source files
66
67
  test_files:
67
- - test/simple.rb
68
+ - test/Example1.rb
68
69
  - test/test.rb
69
70
  has_rdoc:
data/test/simple.rb DELETED
@@ -1,33 +0,0 @@
1
- require 'test/unit'
2
- require_relative '../lib/tokn/tools.rb'
3
- req('tokenizer dfa')
4
-
5
-
6
- class Simple
7
-
8
- def dataPath(f)
9
- File.dirname(__FILE__)+"/data/"+f
10
- end
11
-
12
- setTestDir()
13
-
14
- # Various unit tests for state machines, character range sets, etc.
15
-
16
- def initialize
17
- @sampleText = readTextFile(self.dataPath("sampletext.txt"))
18
- # @sampleTokens = readTextFile(self.dataPath("sampletokens.txt"))
19
- end
20
-
21
- def makeTok
22
- dfa = DFA.dfa_from_script_file(self.dataPath("sampletokens.txt"))
23
- Tokenizer.new(dfa, @sampleText)
24
- end
25
-
26
- def go
27
- makeTok
28
- end
29
- end
30
-
31
-
32
- s = Simple.new
33
- s.go