tokn 0.0.9 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 804ed12fc717a528758a7f1bc2ec03e92e829310
4
- data.tar.gz: 2d3df30d3525d0c0ef3d5b7aa1f16839db6fd786
3
+ metadata.gz: 84933fe2966d19908c447c84cbccb9179c4e351d
4
+ data.tar.gz: d1a6fae299cdd6c9b57961bfde7c879e1a786a48
5
5
  SHA512:
6
- metadata.gz: 80fb1504a1f42d95ebe2ac6b97d16f31d9b14e2f1b3d2e08ab45780584f8b0763a2f1a4a2da09614be68c0b67e1041b7a3d1a5db57a66705d9dbf57454265987
7
- data.tar.gz: 9f6a5c6304471df7a2b8f2cbb6b584c6f1bdcf425d94e0904bec630e45163f60a42eca8b4865ed1b0b87f7681edaf83eeac6181ce752863f673e4185f1666d48
6
+ metadata.gz: 022689701816eb3fb37690579b275194cbafc53d31591b7fb87d93a6cacafc50318dd964d28697ed1181ceb288d700214191cc41d6362cf4221e110e7c885531
7
+ data.tar.gz: 96ee32c79d3e12ba8b01cd27fdfb9ff0bfc6ff518972358dc2fbac04f4c8842fccd6f8722c994daba299327718915074f853cc3c9125c0e22dffe3ef19fd189d
@@ -11,3 +11,9 @@
11
11
 
12
12
  * Version 0.0.9
13
13
  * Fixed problem with README file
14
+
15
+ 2013-04-08
16
+ * Version 0.1.0
17
+ * Cleaned up test directory
18
+
19
+
@@ -1,11 +1,10 @@
1
- # @markup markdown
2
-
3
1
  tokn
4
2
  =======
5
- A ruby gem for constructing DFAs and using them to tokenize text files.
3
+ Tokn is a ruby gem that generates automatons from regular expressions to extract tokens from text files.
6
4
 
7
- Written and (c) by Jeff Sember, March 2013.
5
+ Written by Jeff Sember, March 2013.
8
6
 
7
+ [Source code documentation can be found here.](http://rubydoc.info/gems/tokn/frames)
9
8
 
10
9
 
11
10
  Description of the problem
@@ -79,7 +79,7 @@ module ToknInternal
79
79
 
80
80
  # Generate a .dot file, and from that, a PDF, for debug purposes
81
81
  #
82
- def generatePDF(name = "partition")
82
+ def generatePDF(test_dir = nil, name = "partition")
83
83
  if !@prepared
84
84
  raise IllegalStateException
85
85
  end
@@ -103,7 +103,7 @@ module ToknInternal
103
103
  g += "\n}\n"
104
104
  g.gsub!( /'/, '"' )
105
105
 
106
- dotToPDF(g,name)
106
+ dotToPDF(g,name, test_dir)
107
107
 
108
108
  end
109
109
 
@@ -102,11 +102,11 @@ module ToknInternal
102
102
  # Generate a PDF of the state machine;
103
103
  # Makes a system call to the dot utility to convert a .dot file to a .pdf
104
104
  #
105
- def generatePDF(title = "nfa")
105
+ def generatePDF(dir = nil, title = "nfa")
106
106
  stateList = {}
107
107
 
108
108
  startState = self
109
- genAux(stateList, startState)
109
+ genAux( stateList, startState)
110
110
 
111
111
  g = ""
112
112
  g += "digraph "+title+" {\n"
@@ -135,7 +135,7 @@ module ToknInternal
135
135
  g += "\n}\n"
136
136
  g.gsub!( /'/, '"' )
137
137
 
138
- dotToPDF(g,title)
138
+ dotToPDF(g,title,dir)
139
139
  end
140
140
 
141
141
 
@@ -1,4 +1,5 @@
1
1
  require 'set'
2
+ require 'fileutils'
2
3
 
3
4
  # Various utility and debug convenience functions.
4
5
  #
@@ -45,38 +46,41 @@ def myAssert(cond, *msg)
45
46
  end
46
47
 
47
48
 
48
- # Set test directory. If nil, sets to home directory + "__test__"
49
- #
50
- def setTestDir(d = nil)
51
- $testDir = d || File.join(Dir.home,"__test__")
52
- end
49
+ ## Set test directory. If nil, sets to home directory + "__test__"
50
+ ##
51
+ #def setTestDir(d = nil)
52
+ # $testDir = d || File.join(Dir.home,"__test__")
53
+ #end
53
54
 
54
- # Get a path within the test directory;
55
- # create test directory if it doesn't exist.
56
- #
57
- # relPath : if nil, returns the test directory; else
58
- # returns the test directory joined to this one
59
- #
60
- def withinTestDir(relPath = nil)
61
- if !$testDir
62
- raise IllegalStateException, "No test directory has been defined"
63
- end
64
- if !File.directory?($testDir)
65
- Dir::mkdir($testDir)
66
- end
67
- relPath ? File.join($testDir,relPath) : $testDir
68
- end
55
+ ## Get a path within the test directory;
56
+ ## create test directory if it doesn't exist.
57
+ ##
58
+ ## relPath : if nil, returns the test directory; else
59
+ ## returns the test directory joined to this one
60
+ ##
61
+ #def withinTestDir(relPath = nil)
62
+ # if !$testDir
63
+ # raise IllegalStateException, "No test directory has been defined"
64
+ # end
65
+ # if !File.directory?($testDir)
66
+ # Dir::mkdir($testDir)
67
+ # end
68
+ # relPath ? File.join($testDir,relPath) : $testDir
69
+ #end
69
70
 
70
71
  # Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
71
72
  # in the test directory.
72
73
  #
73
74
  # It does this by making a system call to the 'dot' utility.
74
75
  #
75
- def dotToPDF(dotFile, name = "")
76
+ def dotToPDF(dotFile, name = "", test_dir = nil)
76
77
  gr = dotFile
77
- dotPath = withinTestDir(".__mygraph__.dot")
78
+
79
+ raise ArgumentError if !test_dir
80
+
81
+ dotPath = File.join(test_dir,".__mygraph__.dot")
78
82
  writeTextFile(dotPath,gr)
79
- destName = withinTestDir( "__mygraph__"+name+".pdf")
83
+ destName = File.join(test_dir,"__mygraph__"+name+".pdf")
80
84
  system("dot -Tpdf "+dotPath+" -o "+destName)
81
85
  end
82
86
 
@@ -190,6 +194,16 @@ def readTextFile(path)
190
194
  contents
191
195
  end
192
196
 
197
+ # Delete a file or directory, if it exists.
198
+ # Caution! If directory, deletes all files and subdirectories.
199
+ def remove_file_or_dir(pth)
200
+ if File.directory?(pth)
201
+ FileUtils.remove_dir(pth)
202
+ elsif File.file?(pth)
203
+ FileUtils.remove_file(pth)
204
+ end
205
+ end
206
+
193
207
  # Method that takes a code block as an argument to
194
208
  # achieve the same functionality as Java/C++'s
195
209
  # do {
@@ -206,3 +220,143 @@ end
206
220
  class IllegalStateException < Exception
207
221
  end
208
222
 
223
+
224
+ # Convenience method to detect if a script is being run
225
+ # e.g. as a 'main' method (for debug purposes only).
226
+ # If so, it changes the current directory to the
227
+ # directory containing the script (if such a directory exists).
228
+ #
229
+ # @param file pass __FILE__ in here
230
+ # @return true if so
231
+ #
232
+ def main?(file)
233
+
234
+ scr = $0
235
+
236
+ # The test/unit framework seems to be adding a suffix ": xxx#xxx.."
237
+ # to the .rb filename, so adjust in this case
238
+ i = scr.index(".rb: ")
239
+ if i
240
+ scr = scr[0...i+3]
241
+ end
242
+
243
+ if (ret = (file == scr))
244
+ dr = File.dirname(file)
245
+ if File.directory?(dr)
246
+ Dir.chdir(dr)
247
+ end
248
+ end
249
+ ret
250
+ end
251
+
252
+ if defined? Test::Unit
253
+
254
+ # A simple extension to Ruby's Test::Unit class that provides
255
+ # suite-level setup/teardown methods.
256
+ #
257
+ # If test suite functionality is desired within a script,
258
+ # then require 'test/unit' before requiring 'tools.rb'.
259
+ # This will cause the following class, MyTestSuite, to be defined.
260
+ #
261
+ # The user's test script can define subclasses of this,
262
+ # and declare test methods with the name 'test_xxxx', where
263
+ # xxxx is lexicographically between 01 and zz.
264
+ #
265
+ # There are two levels of setup/teardown called : suite level, and
266
+ # method level. For example, if the user's test class performs two tests:
267
+ #
268
+ # def test_b ... end
269
+ # def test_c ... end
270
+ #
271
+ # Then the test framework will make these calls:
272
+ #
273
+ # suite_setup
274
+ #
275
+ # method_setup
276
+ # test_b
277
+ # method_teardown
278
+ #
279
+ # method_setup
280
+ # test_c
281
+ # method_teardown
282
+ #
283
+ # suite_teardown
284
+ #
285
+ # Notes
286
+ # -----
287
+ # 1) The usual setup / teardown methods should NOT be overridden; instead,
288
+ # use the method_xxx alternatives.
289
+ #
290
+ # 2) The base class implementations of method_/suite_xxx do nothing.
291
+ #
292
+ # 3) The number of test cases reported may be higher than you expect, since
293
+ # there are additional test methods defined by the TestSuite class to
294
+ # implement the suite setup / teardown functionality.
295
+ #
296
+ # 4) Avoid naming test methods that fall outside of test_01 ... test_zz.
297
+ #
298
+ class MyTestSuite < Test::Unit::TestCase
299
+
300
+ # This is named to be the FIRST test called. It
301
+ # will do suite-level setup, and nothing else.
302
+ def test_00_setup
303
+ @@suiteSetup = true
304
+ suite_setup()
305
+ end
306
+
307
+ # This is named to be the LAST test called. It
308
+ # will do suite-level teardown, and nothing else.
309
+ def test_zzzzzz_teardown
310
+ suite_teardown()
311
+ @@suiteSetup = false
312
+ end
313
+
314
+ # True if called within suite-level setup/teardown window
315
+ def _suite_active?
316
+ !(@__name__ == "test_00_setup" || @__name__ == "test_zzzzzz_teardown")
317
+ end
318
+
319
+ def setup
320
+ if _suite_active?
321
+ # If only a specific test was requested, the
322
+ # suite setup may not have run... if not, do it now.
323
+ if !defined? @@suiteSetup
324
+ suite_setup
325
+ end
326
+ return
327
+ end
328
+ method_setup
329
+ end
330
+
331
+ def out_dir
332
+ "_output_"
333
+ end
334
+
335
+ def out_path(f)
336
+ File.join(out_dir,f)
337
+ end
338
+
339
+ def teardown
340
+ if _suite_active?
341
+ if !defined? @@suiteSetup
342
+ suite_teardown
343
+ end
344
+ return
345
+ end
346
+ method_teardown
347
+ end
348
+
349
+ def suite_setup
350
+ end
351
+
352
+ def suite_teardown
353
+ end
354
+
355
+ def method_setup
356
+ end
357
+
358
+ def method_teardown
359
+ end
360
+ end
361
+ end
362
+
@@ -0,0 +1,81 @@
1
+ require_relative '../lib/tokn/tokenizer'
2
+
3
+ class Example1
4
+
5
+ include Tokn
6
+
7
+ def dataPath(f)
8
+ File.dirname(__FILE__)+"/data/"+f
9
+ end
10
+
11
+ setTestDir()
12
+
13
+ def initialize
14
+ @sampleText = readTextFile(dataPath("sampletext.txt"))
15
+ end
16
+
17
+ def makeTok
18
+ @dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
19
+ Tokenizer.new(@dfa, @sampleText, "WS")
20
+ end
21
+
22
+ def go
23
+ puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
24
+
25
+ t = makeTok
26
+
27
+ s = ''
28
+ while t.hasNext do
29
+
30
+ tk = t.peek
31
+
32
+ if t.nameOf(tk) == 'BROP'
33
+ lst = t.readSequenceIf('BROP DO ID BRCL')
34
+ if lst
35
+ s << " ...read BROP DO ID sequence...\n"
36
+ lst.each{ |x| s << " #{d(x)}\n"}
37
+ next
38
+ else
39
+ s << " ...couldn't find sequence...\n"
40
+ end
41
+ end
42
+
43
+ tk = t.read
44
+ s << d(tk) << "\n"
45
+
46
+ end
47
+ exp =<<"EXP"
48
+ (line 1, col 1) : speed
49
+ (line 1, col 6) : =
50
+ (line 1, col 7) : 42
51
+ (line 1, col 9) : gravity
52
+ (line 1, col 16) : =
53
+ (line 1, col 17) : -9.80
54
+ ...couldn't find sequence...
55
+ (line 1, col 22) : {
56
+ (line 1, col 23) : color
57
+ (line 1, col 29) : =
58
+ (line 1, col 30) : green
59
+ (line 1, col 35) : }
60
+ (line 1, col 36) : title
61
+ (line 1, col 41) : =
62
+ (line 1, col 42) : 'This is a string with \' an escaped delimiter'
63
+ (line 1, col 89) : if
64
+ (line 1, col 91) : gravity
65
+ (line 1, col 98) : ==
66
+ (line 1, col 100) : 12
67
+ ...read BROP DO ID sequence...
68
+ (line 1, col 102) : {
69
+ (line 1, col 103) : do
70
+ (line 1, col 105) : something
71
+ (line 1, col 114) : }
72
+ (line 1, col 115) : do
73
+ (line 1, col 117) : something_else
74
+ EXP
75
+ assert(s == exp)
76
+
77
+ end
78
+
79
+ end
80
+
81
+ Example1.new.go
@@ -0,0 +1 @@
1
+ {"version":1.0,"tokens":["WS","DBL","INT","LBL","ID","ASSIGN","EQUIV","IF","DO","BROP","BRCL"],"states":[[false,[[[125,126],1],[[123,124],2],[[100,101],3],[[105,106],4],[[61,62],5],[[65,91,95,96,97,100,101,105,106,123],6],[[39,40],7],[[48,58],8],[[45,46],9],[[46,47],10],[[9,11,12,13,32,33,92,93],11],[[47,48],12]]],[false,[[[-12,-11],14]]],[false,[[[-11,-10],14]]],[false,[[[48,58,65,91,95,96,97,111,112,123],6],[[-6,-5],14],[[111,112],22]]],[false,[[[48,58,65,91,95,96,97,102,103,123],6],[[-6,-5],14],[[102,103],21]]],[false,[[[-7,-6],14],[[61,62],20]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-6,-5],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[39,40],17],[[92,93],18]]],[false,[[[48,58],8],[[46,47],10],[[-4,-3],14]]],[false,[[[48,58],8],[[46,47],10]]],[false,[[[48,58],16]]],[false,[[[9,11,12,13,32,33,92,93],11],[[-2,-1],14]]],[false,[[[47,48],13]]],[false,[[[0,10,11,1114112],13],[[-2,-1],14],[[10,11],15]]],[true,[]],[false,[[[-2,-1],14]]],[false,[[[-3,-2],14],[[48,58],16]]],[false,[[[-5,-4],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[92,93],18],[[39,40],19]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[-5,-4],14],[[39,40],17],[[92,93],18]]],[false,[[[-8,-7],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-9,-8],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-10,-9],14]]]]}
File without changes
File without changes
@@ -0,0 +1,584 @@
1
+ require 'test/unit'
2
+
3
+ require_relative '../lib/tokn/tools.rb'
4
+ req('range_partition dfa dfa_builder tokenizer token_defn_parser')
5
+
6
+ # Get access to Tokn namespace
7
+
8
+ #setTestDir()
9
+
10
+ #SINGLETEST = "test_ps_output_multi"
11
+ if defined? SINGLETEST
12
+ if main?(__FILE__)
13
+ ARGV.concat("-n #{SINGLETEST}".split)
14
+ end
15
+ end
16
+
17
+ class TestTokn < MyTestSuite
18
+
19
+ include Tokn, ToknInternal
20
+
21
+ # def data_file(f)
22
+ # File.join("data",f)
23
+ # # File.dirname(__FILE__)+"/data/"+f
24
+ # end
25
+
26
+ def suite_setup
27
+
28
+ # Make current directory = the one containing this script
29
+ main?(__FILE__)
30
+
31
+ if !File.directory?(out_dir)
32
+ Dir.mkdir(out_dir)
33
+ end
34
+
35
+ @@sampleText = readTextFile("sampletext.txt")
36
+ @@sampleTokens = readTextFile("sampletokens.txt")
37
+ end
38
+
39
+ # def withinTestDir(f)
40
+ # File.join(@@testDir,"__source__")
41
+
42
+ def suite_teardown
43
+ remove_file_or_dir(out_dir)
44
+ end
45
+
46
+ def method_setup
47
+ end
48
+
49
+ def method_teardown
50
+ end
51
+
52
+ def add(lower, upper = nil)
53
+ @cs.add(lower,upper)
54
+ end
55
+
56
+ def remove(lower, upper = nil)
57
+ @cs.remove(lower,upper)
58
+ end
59
+
60
+ def swap
61
+ @ct = @cs
62
+ prep
63
+ end
64
+
65
+ def isect
66
+ @cs.intersect!(@ct)
67
+ end
68
+
69
+ def diff
70
+ @cs.difference!(@ct)
71
+ end
72
+
73
+ def equ(s, arr = nil)
74
+ arr ||= @cs.array
75
+ ia = s.split.map{|n| n.to_i}
76
+ assert_equal(ia,arr)
77
+ end
78
+
79
+ def test_100_add
80
+ prep
81
+
82
+ add(72,81)
83
+ equ '72 81'
84
+
85
+ add(50)
86
+ equ '50 51 72 81'
87
+
88
+ add(75,77)
89
+ equ '50 51 72 81'
90
+
91
+ add(72,78)
92
+ equ '50 51 72 81'
93
+
94
+ add(70,78)
95
+ equ '50 51 70 81'
96
+
97
+ add 60
98
+ equ '50 51 60 61 70 81'
99
+
100
+ add 40
101
+ equ '40 41 50 51 60 61 70 81'
102
+
103
+ add 41
104
+ equ '40 42 50 51 60 61 70 81'
105
+
106
+ add 81
107
+ equ '40 42 50 51 60 61 70 82'
108
+
109
+ add 83
110
+ equ '40 42 50 51 60 61 70 82 83 84'
111
+
112
+ add 49,84
113
+ equ '40 42 49 84'
114
+
115
+ add 39,86
116
+ equ '39 86'
117
+ end
118
+
119
+ def test_110_intersect
120
+ prep
121
+ add 39,86
122
+ swap
123
+ add 50,70
124
+ isect
125
+ equ '50 70'
126
+
127
+ swap
128
+ add 20,25
129
+ add 35,51
130
+ add 62,68
131
+ add 72,80
132
+ isect
133
+ equ '50 51 62 68'
134
+
135
+ prep
136
+ swap
137
+ add 50,70
138
+ isect
139
+ equ ''
140
+
141
+ add 50,70
142
+ swap
143
+ add 50,70
144
+ isect
145
+ equ '50 70'
146
+
147
+ prep
148
+ add 20,25
149
+ swap
150
+ add 25,30
151
+ isect
152
+ equ ''
153
+
154
+ end
155
+
156
+ def test_120_difference
157
+ prep
158
+ add 20,30
159
+ add 40,50
160
+ swap
161
+
162
+ add 20,80
163
+ diff
164
+ equ '30 40 50 80'
165
+
166
+ prep
167
+ add 19,32
168
+ diff
169
+ equ '19 20 30 32'
170
+
171
+ prep
172
+ add 30,40
173
+ diff
174
+ equ '30 40'
175
+
176
+ prep
177
+ add 20,30
178
+ add 40,50
179
+ diff
180
+ equ ''
181
+
182
+ prep
183
+ add 19,30
184
+ add 40,50
185
+ diff
186
+ equ '19 20'
187
+
188
+ prep
189
+ add 20,30
190
+ add 40,51
191
+ diff
192
+ equ '50 51'
193
+
194
+ end
195
+
196
+ def prep
197
+ @cs = CodeSet.new
198
+ end
199
+
200
+ def test_130_illegalRange
201
+ prep
202
+
203
+ assert_raise(RangeError) { add 60,50 }
204
+ assert_raise(RangeError) { add 60,60 }
205
+ end
206
+
207
+ def neg(lower, upper)
208
+ @cs.negate lower, upper
209
+ end
210
+
211
+ def test_140_negate
212
+ prep
213
+ add 10,15
214
+ add 20,25
215
+ add 30
216
+ add 40,45
217
+ equ '10 15 20 25 30 31 40 45'
218
+ neg 22,37
219
+ equ '10 15 20 22 25 30 31 37 40 45'
220
+ neg 25,27
221
+ equ '10 15 20 22 27 30 31 37 40 45'
222
+ neg 15,20
223
+ equ '10 22 27 30 31 37 40 45'
224
+
225
+ prep
226
+ add 10,22
227
+ @cs.negate
228
+ equ '0 10 22 1114112'
229
+
230
+ prep
231
+ add 10,20
232
+ neg 10,20
233
+ equ ''
234
+
235
+ prep
236
+ add 10,20
237
+ add 30,40
238
+ neg 5,10
239
+ equ '5 20 30 40'
240
+
241
+ prep
242
+ add 10,20
243
+ add 30,40
244
+ neg 25,30
245
+ equ '10 20 25 40'
246
+
247
+ prep
248
+ add 10,20
249
+ add 30,40
250
+ neg 40,50
251
+ equ '10 20 30 50'
252
+
253
+ prep
254
+ add 10,20
255
+ add 30,40
256
+ neg 41,50
257
+ equ '10 20 30 40 41 50'
258
+
259
+ prep
260
+ add 10,20
261
+ add 30,40
262
+ neg 15,35
263
+ equ '10 15 20 30 35 40'
264
+ end
265
+
266
+ def test_150_remove
267
+
268
+ prep
269
+ add 10,20
270
+ add 30,40
271
+ remove 29,41
272
+ equ '10 20'
273
+
274
+ add 30,40
275
+ equ '10 20 30 40'
276
+
277
+ remove 20,30
278
+ equ '10 20 30 40'
279
+
280
+ remove 15,35
281
+ equ '10 15 35 40'
282
+
283
+ remove 10,15
284
+ equ '35 40'
285
+ remove 35
286
+ equ '36 40'
287
+ remove 40
288
+ equ '36 40'
289
+ remove 38
290
+ equ '36 38 39 40'
291
+ remove 37,39
292
+ equ '36 37 39 40'
293
+
294
+ end
295
+
296
+ def dset(st)
297
+ s = ''
298
+ st.each{|x|
299
+ if s.length > 0
300
+ s+= ' '
301
+ end
302
+ s += d(x)
303
+ }
304
+ return s
305
+ end
306
+
307
+ def newpar
308
+ @par = RangePartition.new
309
+ end
310
+
311
+ def addset(lower, upper = nil)
312
+ upper ||= lower + 1
313
+ r = CodeSet.new(lower,upper)
314
+ @par.addSet(r)
315
+ end
316
+
317
+ def apply
318
+ list = @par.apply(@cs)
319
+ res = []
320
+ list.each do |x|
321
+ res.concat x.array
322
+ end
323
+ @parResult = res
324
+ end
325
+
326
+ def test_160_partition
327
+
328
+ newpar
329
+ addset(20,30)
330
+ addset(25,33)
331
+ addset(37)
332
+ addset(40,50)
333
+ @par.prepare
334
+
335
+ @par.generatePDF(out_dir)
336
+
337
+ prep
338
+ add 25,33
339
+
340
+ apply
341
+ equ('25 30 30 33', @parResult)
342
+
343
+ prep
344
+ add 37
345
+ apply
346
+ equ('37 38', @parResult)
347
+
348
+ prep
349
+ add 40,50
350
+ apply
351
+ equ('40 50', @parResult)
352
+
353
+ end
354
+
355
+ REGEX_SCRIPT = "(\\-?[0-9]+)|[_a-zA-Z][_a-zA-Z0-9]*|333q"
356
+
357
+ TOKEN_SCRIPT2 = <<'END'
358
+ sep: \s
359
+ tku: a(a|b)*
360
+ tkv: b(aa|b*)
361
+ tkw: bbb
362
+ END
363
+
364
+ def test_170_build_DFA
365
+
366
+ x = RegParse.new(REGEX_SCRIPT)
367
+ s = x.startState
368
+ x.endState.finalState = true
369
+
370
+ s.generatePDF(out_dir,"nfa")
371
+
372
+ r = s.reverseNFA()
373
+ r.generatePDF(out_dir,"reversed")
374
+
375
+ dfa = DFABuilder.nfa_to_dfa(s)
376
+ dfa.generatePDF(out_dir,"buildDFA")
377
+ end
378
+
379
+ def test_180_cvt_NFA_to_DFA
380
+
381
+ x = RegParse.new(REGEX_SCRIPT)
382
+ s = x.startState
383
+ x.endState.finalState = true
384
+
385
+ s.generatePDF(out_dir,"nfa")
386
+
387
+ dfa = DFABuilder.nfa_to_dfa(s)
388
+ dfa.generatePDF(out_dir,"dfa")
389
+
390
+ oldToNewMap, maxId2 = dfa.duplicateNFA(42)
391
+ dfa2 = oldToNewMap[dfa]
392
+ dfa2.generatePDF(out_dir,"dfa_duplicated")
393
+ end
394
+
395
+ def test_190_TokenDefParser
396
+
397
+ s = TOKEN_SCRIPT2
398
+
399
+ td = TokenDefParser.new(s)
400
+
401
+ tokDFA = td.dfa
402
+ tokDFA.startState.generatePDF(out_dir,"TokenDFA")
403
+
404
+ end
405
+
406
+ def makeTok
407
+ dfa = DFA.from_script(@@sampleTokens)
408
+ Tokenizer.new(dfa, @@sampleText)
409
+ end
410
+
411
+ def test_200_Tokenizer
412
+
413
+ tok = makeTok
414
+
415
+ tokList = []
416
+ while tok.hasNext
417
+ t = tok.read
418
+ tokList.push(t)
419
+ end
420
+
421
+ tok.unread(tokList.size)
422
+
423
+ tokList.each do |t1|
424
+ tName = tok.nameOf(t1)
425
+ t2 = tok.read(tName)
426
+ end
427
+ end
428
+
429
+ def test_210_Tokenizer_Missing_Expected
430
+
431
+ assert_raise TokenizerException do
432
+
433
+ tok = makeTok
434
+
435
+ tok.read
436
+ tok.read
437
+ tok.read
438
+ tok.read
439
+ tok.read("signedint")
440
+ end
441
+
442
+ end
443
+
444
+ def test_220_CompileDFAToDisk
445
+ tokScript = @@sampleTokens
446
+ testText = @@sampleText
447
+
448
+ destPath = out_path("sampletokens_dfa.txt")
449
+
450
+ if File.exist?(destPath)
451
+ File.delete(destPath)
452
+ end
453
+ assert(!File.exist?(destPath))
454
+
455
+ dfa = DFA.from_script(tokScript, destPath)
456
+ assert(File.exist?(destPath))
457
+
458
+ tok = Tokenizer.new(dfa, testText)
459
+
460
+ end
461
+
462
+ def prep2
463
+ testText = @@sampleText
464
+ dfa = DFA.from_file(out_path("sampletokens_dfa.txt"))
465
+ tok = Tokenizer.new(dfa, testText)
466
+ end
467
+
468
+ def test_230_readAndUnread
469
+ tok = prep2
470
+ unread = false
471
+ while tok.hasNext
472
+ t = tok.read
473
+ # pr("Read %-8s %s\n",tok.nameOf(t),d(t))
474
+
475
+ if !unread && tok.nameOf(t) == "DO"
476
+ # pr(" ...pushing back four tokens...\n")
477
+ tok.unread(4)
478
+ unread = true
479
+ # pr(" ...and resuming...\n")
480
+ end
481
+ end
482
+ end
483
+
484
+ def test_240_UnrecognizedToken
485
+ assert_raise TokenizerException do
486
+ tok = prep2
487
+ while tok.hasNext
488
+ t = tok.read
489
+ if tok.nameOf(t) == "DO"
490
+ tok.read("BRCL") # <== this should raise problem
491
+ end
492
+ end
493
+ end
494
+ end
495
+
496
+ def test_250_ReadPastEnd
497
+ assert_raise TokenizerException do
498
+ tok = prep2
499
+ while tok.hasNext
500
+ t = tok.read
501
+ end
502
+ tok.read
503
+ end
504
+ end
505
+
506
+ def test_260_UnreadBeforeStart
507
+
508
+ assert_raise TokenizerException do
509
+ tok = prep2
510
+ k = 0
511
+ while tok.hasNext
512
+ t = tok.read
513
+ k += 1
514
+ if k == 15
515
+ tok.unread(5)
516
+ tok.unread(7)
517
+ tok.read()
518
+ tok.unread(4)
519
+ tok.unread(3)
520
+ end
521
+ end
522
+ tok.read
523
+ end
524
+ end
525
+
526
+ def test_270_filter_ws
527
+
528
+ dfa = DFA.from_script_file("sampletokens.txt")
529
+ t = Tokenizer.new(dfa, readTextFile("sampletext.txt"), "WS")
530
+
531
+ s = ''
532
+ while t.hasNext do
533
+
534
+ tk = t.peek
535
+
536
+ if t.nameOf(tk) == 'BROP'
537
+ lst = t.readSequenceIf('BROP DO ID BRCL')
538
+ if lst
539
+ s << " ...read BROP DO ID sequence...\n"
540
+ lst.each{ |x| s << " #{d(x)}\n"}
541
+ next
542
+ else
543
+ s << " ...couldn't find sequence...\n"
544
+ end
545
+ end
546
+
547
+ tk = t.read
548
+ s << d(tk) << "\n"
549
+
550
+ end
551
+ exp =<<"EXP"
552
+ (line 1, col 1) : speed
553
+ (line 1, col 6) : =
554
+ (line 1, col 7) : 42
555
+ (line 1, col 9) : gravity
556
+ (line 1, col 16) : =
557
+ (line 1, col 17) : -9.80
558
+ ...couldn't find sequence...
559
+ (line 1, col 22) : {
560
+ (line 1, col 23) : color
561
+ (line 1, col 29) : =
562
+ (line 1, col 30) : green
563
+ (line 1, col 35) : }
564
+ (line 1, col 36) : title
565
+ (line 1, col 41) : =
566
+ (line 1, col 42) : 'This is a string with \\' an escaped delimiter'
567
+ (line 1, col 89) : if
568
+ (line 1, col 91) : gravity
569
+ (line 1, col 98) : ==
570
+ (line 1, col 100) : 12
571
+ ...read BROP DO ID sequence...
572
+ (line 1, col 102) : {
573
+ (line 1, col 103) : do
574
+ (line 1, col 105) : something
575
+ (line 1, col 114) : }
576
+ (line 1, col 115) : do
577
+ (line 1, col 117) : something_else
578
+ EXP
579
+
580
+ assert(s.strip == exp.strip)
581
+ end
582
+
583
+ end
584
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokn
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Sember
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-07 00:00:00.000000000 Z
11
+ date: 2013-04-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: "Given a script containing token descriptions (each a regular expression),
14
14
  \ntokn compiles an automaton which it can then use to efficiently convert a \ntext
@@ -34,13 +34,15 @@ files:
34
34
  - bin/tokncompile
35
35
  - bin/toknprocess
36
36
  - CHANGELOG.txt
37
- - README.txt
38
- - test/Example1.rb
39
- - test/data/compileddfa.txt
40
- - test/data/sampletext.txt
41
- - test/data/sampletokens.txt
42
- - test/test.rb
43
- - test/testcmds
37
+ - README.md
38
+ - test/_OLD_/Example1.rb
39
+ - test/_OLD_/data/compileddfa.txt
40
+ - test/_OLD_/data/sampletokens_dfa.txt
41
+ - test/_OLD_/test.rb
42
+ - test/_OLD_/testcmds
43
+ - test/sampletext.txt
44
+ - test/sampletokens.txt
45
+ - test/test_tokn.rb
44
46
  homepage: http://www.cs.ubc.ca/~jpsember/
45
47
  licenses:
46
48
  - mit
@@ -66,6 +68,5 @@ signing_key:
66
68
  specification_version: 4
67
69
  summary: Extracts tokens from source files
68
70
  test_files:
69
- - test/Example1.rb
70
- - test/test.rb
71
+ - test/test_tokn.rb
71
72
  has_rdoc:
@@ -1,50 +0,0 @@
1
- require_relative '../lib/tokn/tokenizer'
2
-
3
- class Example1
4
-
5
- include Tokn
6
-
7
- def dataPath(f)
8
- File.dirname(__FILE__)+"/data/"+f
9
- end
10
-
11
- setTestDir()
12
-
13
- def initialize
14
- @sampleText = readTextFile(dataPath("sampletext.txt"))
15
- end
16
-
17
- def makeTok
18
- @dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
19
- Tokenizer.new(@dfa, @sampleText, "WS")
20
- end
21
-
22
- def go
23
- puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
24
-
25
- t = makeTok
26
-
27
- while t.hasNext do
28
-
29
- tk = t.peek
30
-
31
- if t.nameOf(tk) == 'BROP'
32
- lst = t.readSequenceIf('BROP DO ID BRCL')
33
- if lst
34
- pr(" ...read BROP DO ID sequence...\n")
35
- lst.each{ |x| pr(" %s\n",d(x))}
36
- next
37
- else
38
- pr(" ...couldn't find sequence...\n")
39
- end
40
- end
41
-
42
- tk = t.read
43
- pr("%s\n",d(tk))
44
-
45
- end
46
- end
47
-
48
- end
49
-
50
- Example1.new.go