tokn 0.0.9 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 804ed12fc717a528758a7f1bc2ec03e92e829310
4
- data.tar.gz: 2d3df30d3525d0c0ef3d5b7aa1f16839db6fd786
3
+ metadata.gz: 84933fe2966d19908c447c84cbccb9179c4e351d
4
+ data.tar.gz: d1a6fae299cdd6c9b57961bfde7c879e1a786a48
5
5
  SHA512:
6
- metadata.gz: 80fb1504a1f42d95ebe2ac6b97d16f31d9b14e2f1b3d2e08ab45780584f8b0763a2f1a4a2da09614be68c0b67e1041b7a3d1a5db57a66705d9dbf57454265987
7
- data.tar.gz: 9f6a5c6304471df7a2b8f2cbb6b584c6f1bdcf425d94e0904bec630e45163f60a42eca8b4865ed1b0b87f7681edaf83eeac6181ce752863f673e4185f1666d48
6
+ metadata.gz: 022689701816eb3fb37690579b275194cbafc53d31591b7fb87d93a6cacafc50318dd964d28697ed1181ceb288d700214191cc41d6362cf4221e110e7c885531
7
+ data.tar.gz: 96ee32c79d3e12ba8b01cd27fdfb9ff0bfc6ff518972358dc2fbac04f4c8842fccd6f8722c994daba299327718915074f853cc3c9125c0e22dffe3ef19fd189d
@@ -11,3 +11,9 @@
11
11
 
12
12
  * Version 0.0.9
13
13
  * Fixed problem with README file
14
+
15
+ 2013-04-08
16
+ * Version 0.1.0
17
+ * Cleaned up test directory
18
+
19
+
@@ -1,11 +1,10 @@
1
- # @markup markdown
2
-
3
1
  tokn
4
2
  =======
5
- A ruby gem for constructing DFAs and using them to tokenize text files.
3
+ Tokn is a ruby gem that generates automatons from regular expressions to extract tokens from text files.
6
4
 
7
- Written and (c) by Jeff Sember, March 2013.
5
+ Written by Jeff Sember, March 2013.
8
6
 
7
+ [Source code documentation can be found here.](http://rubydoc.info/gems/tokn/frames)
9
8
 
10
9
 
11
10
  Description of the problem
@@ -79,7 +79,7 @@ module ToknInternal
79
79
 
80
80
  # Generate a .dot file, and from that, a PDF, for debug purposes
81
81
  #
82
- def generatePDF(name = "partition")
82
+ def generatePDF(test_dir = nil, name = "partition")
83
83
  if !@prepared
84
84
  raise IllegalStateException
85
85
  end
@@ -103,7 +103,7 @@ module ToknInternal
103
103
  g += "\n}\n"
104
104
  g.gsub!( /'/, '"' )
105
105
 
106
- dotToPDF(g,name)
106
+ dotToPDF(g,name, test_dir)
107
107
 
108
108
  end
109
109
 
@@ -102,11 +102,11 @@ module ToknInternal
102
102
  # Generate a PDF of the state machine;
103
103
  # Makes a system call to the dot utility to convert a .dot file to a .pdf
104
104
  #
105
- def generatePDF(title = "nfa")
105
+ def generatePDF(dir = nil, title = "nfa")
106
106
  stateList = {}
107
107
 
108
108
  startState = self
109
- genAux(stateList, startState)
109
+ genAux( stateList, startState)
110
110
 
111
111
  g = ""
112
112
  g += "digraph "+title+" {\n"
@@ -135,7 +135,7 @@ module ToknInternal
135
135
  g += "\n}\n"
136
136
  g.gsub!( /'/, '"' )
137
137
 
138
- dotToPDF(g,title)
138
+ dotToPDF(g,title,dir)
139
139
  end
140
140
 
141
141
 
@@ -1,4 +1,5 @@
1
1
  require 'set'
2
+ require 'fileutils'
2
3
 
3
4
  # Various utility and debug convenience functions.
4
5
  #
@@ -45,38 +46,41 @@ def myAssert(cond, *msg)
45
46
  end
46
47
 
47
48
 
48
- # Set test directory. If nil, sets to home directory + "__test__"
49
- #
50
- def setTestDir(d = nil)
51
- $testDir = d || File.join(Dir.home,"__test__")
52
- end
49
+ ## Set test directory. If nil, sets to home directory + "__test__"
50
+ ##
51
+ #def setTestDir(d = nil)
52
+ # $testDir = d || File.join(Dir.home,"__test__")
53
+ #end
53
54
 
54
- # Get a path within the test directory;
55
- # create test directory if it doesn't exist.
56
- #
57
- # relPath : if nil, returns the test directory; else
58
- # returns the test directory joined to this one
59
- #
60
- def withinTestDir(relPath = nil)
61
- if !$testDir
62
- raise IllegalStateException, "No test directory has been defined"
63
- end
64
- if !File.directory?($testDir)
65
- Dir::mkdir($testDir)
66
- end
67
- relPath ? File.join($testDir,relPath) : $testDir
68
- end
55
+ ## Get a path within the test directory;
56
+ ## create test directory if it doesn't exist.
57
+ ##
58
+ ## relPath : if nil, returns the test directory; else
59
+ ## returns the test directory joined to this one
60
+ ##
61
+ #def withinTestDir(relPath = nil)
62
+ # if !$testDir
63
+ # raise IllegalStateException, "No test directory has been defined"
64
+ # end
65
+ # if !File.directory?($testDir)
66
+ # Dir::mkdir($testDir)
67
+ # end
68
+ # relPath ? File.join($testDir,relPath) : $testDir
69
+ #end
69
70
 
70
71
  # Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
71
72
  # in the test directory.
72
73
  #
73
74
  # It does this by making a system call to the 'dot' utility.
74
75
  #
75
- def dotToPDF(dotFile, name = "")
76
+ def dotToPDF(dotFile, name = "", test_dir = nil)
76
77
  gr = dotFile
77
- dotPath = withinTestDir(".__mygraph__.dot")
78
+
79
+ raise ArgumentError if !test_dir
80
+
81
+ dotPath = File.join(test_dir,".__mygraph__.dot")
78
82
  writeTextFile(dotPath,gr)
79
- destName = withinTestDir( "__mygraph__"+name+".pdf")
83
+ destName = File.join(test_dir,"__mygraph__"+name+".pdf")
80
84
  system("dot -Tpdf "+dotPath+" -o "+destName)
81
85
  end
82
86
 
@@ -190,6 +194,16 @@ def readTextFile(path)
190
194
  contents
191
195
  end
192
196
 
197
+ # Delete a file or directory, if it exists.
198
+ # Caution! If directory, deletes all files and subdirectories.
199
+ def remove_file_or_dir(pth)
200
+ if File.directory?(pth)
201
+ FileUtils.remove_dir(pth)
202
+ elsif File.file?(pth)
203
+ FileUtils.remove_file(pth)
204
+ end
205
+ end
206
+
193
207
  # Method that takes a code block as an argument to
194
208
  # achieve the same functionality as Java/C++'s
195
209
  # do {
@@ -206,3 +220,143 @@ end
206
220
  class IllegalStateException < Exception
207
221
  end
208
222
 
223
+
224
+ # Convenience method to detect if a script is being run
225
+ # e.g. as a 'main' method (for debug purposes only).
226
+ # If so, it changes the current directory to the
227
+ # directory containing the script (if such a directory exists).
228
+ #
229
+ # @param file pass __FILE__ in here
230
+ # @return true if so
231
+ #
232
+ def main?(file)
233
+
234
+ scr = $0
235
+
236
+ # The test/unit framework seems to be adding a suffix ": xxx#xxx.."
237
+ # to the .rb filename, so adjust in this case
238
+ i = scr.index(".rb: ")
239
+ if i
240
+ scr = scr[0...i+3]
241
+ end
242
+
243
+ if (ret = (file == scr))
244
+ dr = File.dirname(file)
245
+ if File.directory?(dr)
246
+ Dir.chdir(dr)
247
+ end
248
+ end
249
+ ret
250
+ end
251
+
252
+ if defined? Test::Unit
253
+
254
+ # A simple extension to Ruby's Test::Unit class that provides
255
+ # suite-level setup/teardown methods.
256
+ #
257
+ # If test suite functionality is desired within a script,
258
+ # then require 'test/unit' before requiring 'tools.rb'.
259
+ # This will cause the following class, MyTestSuite, to be defined.
260
+ #
261
+ # The user's test script can define subclasses of this,
262
+ # and declare test methods with the name 'test_xxxx', where
263
+ # xxxx is lexicographically between 01 and zz.
264
+ #
265
+ # There are two levels of setup/teardown called : suite level, and
266
+ # method level. For example, if the user's test class performs two tests:
267
+ #
268
+ # def test_b ... end
269
+ # def test_c ... end
270
+ #
271
+ # Then the test framework will make these calls:
272
+ #
273
+ # suite_setup
274
+ #
275
+ # method_setup
276
+ # test_b
277
+ # method_teardown
278
+ #
279
+ # method_setup
280
+ # test_c
281
+ # method_teardown
282
+ #
283
+ # suite_teardown
284
+ #
285
+ # Notes
286
+ # -----
287
+ # 1) The usual setup / teardown methods should NOT be overridden; instead,
288
+ # use the method_xxx alternatives.
289
+ #
290
+ # 2) The base class implementations of method_/suite_xxx do nothing.
291
+ #
292
+ # 3) The number of test cases reported may be higher than you expect, since
293
+ # there are additional test methods defined by the TestSuite class to
294
+ # implement the suite setup / teardown functionality.
295
+ #
296
+ # 4) Avoid naming test methods that fall outside of test_01 ... test_zz.
297
+ #
298
+ class MyTestSuite < Test::Unit::TestCase
299
+
300
+ # This is named to be the FIRST test called. It
301
+ # will do suite-level setup, and nothing else.
302
+ def test_00_setup
303
+ @@suiteSetup = true
304
+ suite_setup()
305
+ end
306
+
307
+ # This is named to be the LAST test called. It
308
+ # will do suite-level teardown, and nothing else.
309
+ def test_zzzzzz_teardown
310
+ suite_teardown()
311
+ @@suiteSetup = false
312
+ end
313
+
314
+ # True if called within suite-level setup/teardown window
315
+ def _suite_active?
316
+ !(@__name__ == "test_00_setup" || @__name__ == "test_zzzzzz_teardown")
317
+ end
318
+
319
+ def setup
320
+ if _suite_active?
321
+ # If only a specific test was requested, the
322
+ # suite setup may not have run... if not, do it now.
323
+ if !defined? @@suiteSetup
324
+ suite_setup
325
+ end
326
+ return
327
+ end
328
+ method_setup
329
+ end
330
+
331
+ def out_dir
332
+ "_output_"
333
+ end
334
+
335
+ def out_path(f)
336
+ File.join(out_dir,f)
337
+ end
338
+
339
+ def teardown
340
+ if _suite_active?
341
+ if !defined? @@suiteSetup
342
+ suite_teardown
343
+ end
344
+ return
345
+ end
346
+ method_teardown
347
+ end
348
+
349
+ def suite_setup
350
+ end
351
+
352
+ def suite_teardown
353
+ end
354
+
355
+ def method_setup
356
+ end
357
+
358
+ def method_teardown
359
+ end
360
+ end
361
+ end
362
+
@@ -0,0 +1,81 @@
1
+ require_relative '../lib/tokn/tokenizer'
2
+
3
+ class Example1
4
+
5
+ include Tokn
6
+
7
+ def dataPath(f)
8
+ File.dirname(__FILE__)+"/data/"+f
9
+ end
10
+
11
+ setTestDir()
12
+
13
+ def initialize
14
+ @sampleText = readTextFile(dataPath("sampletext.txt"))
15
+ end
16
+
17
+ def makeTok
18
+ @dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
19
+ Tokenizer.new(@dfa, @sampleText, "WS")
20
+ end
21
+
22
+ def go
23
+ puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
24
+
25
+ t = makeTok
26
+
27
+ s = ''
28
+ while t.hasNext do
29
+
30
+ tk = t.peek
31
+
32
+ if t.nameOf(tk) == 'BROP'
33
+ lst = t.readSequenceIf('BROP DO ID BRCL')
34
+ if lst
35
+ s << " ...read BROP DO ID sequence...\n"
36
+ lst.each{ |x| s << " #{d(x)}\n"}
37
+ next
38
+ else
39
+ s << " ...couldn't find sequence...\n"
40
+ end
41
+ end
42
+
43
+ tk = t.read
44
+ s << d(tk) << "\n"
45
+
46
+ end
47
+ exp =<<"EXP"
48
+ (line 1, col 1) : speed
49
+ (line 1, col 6) : =
50
+ (line 1, col 7) : 42
51
+ (line 1, col 9) : gravity
52
+ (line 1, col 16) : =
53
+ (line 1, col 17) : -9.80
54
+ ...couldn't find sequence...
55
+ (line 1, col 22) : {
56
+ (line 1, col 23) : color
57
+ (line 1, col 29) : =
58
+ (line 1, col 30) : green
59
+ (line 1, col 35) : }
60
+ (line 1, col 36) : title
61
+ (line 1, col 41) : =
62
+ (line 1, col 42) : 'This is a string with \' an escaped delimiter'
63
+ (line 1, col 89) : if
64
+ (line 1, col 91) : gravity
65
+ (line 1, col 98) : ==
66
+ (line 1, col 100) : 12
67
+ ...read BROP DO ID sequence...
68
+ (line 1, col 102) : {
69
+ (line 1, col 103) : do
70
+ (line 1, col 105) : something
71
+ (line 1, col 114) : }
72
+ (line 1, col 115) : do
73
+ (line 1, col 117) : something_else
74
+ EXP
75
+ assert(s == exp)
76
+
77
+ end
78
+
79
+ end
80
+
81
+ Example1.new.go
@@ -0,0 +1 @@
1
+ {"version":1.0,"tokens":["WS","DBL","INT","LBL","ID","ASSIGN","EQUIV","IF","DO","BROP","BRCL"],"states":[[false,[[[125,126],1],[[123,124],2],[[100,101],3],[[105,106],4],[[61,62],5],[[65,91,95,96,97,100,101,105,106,123],6],[[39,40],7],[[48,58],8],[[45,46],9],[[46,47],10],[[9,11,12,13,32,33,92,93],11],[[47,48],12]]],[false,[[[-12,-11],14]]],[false,[[[-11,-10],14]]],[false,[[[48,58,65,91,95,96,97,111,112,123],6],[[-6,-5],14],[[111,112],22]]],[false,[[[48,58,65,91,95,96,97,102,103,123],6],[[-6,-5],14],[[102,103],21]]],[false,[[[-7,-6],14],[[61,62],20]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-6,-5],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[39,40],17],[[92,93],18]]],[false,[[[48,58],8],[[46,47],10],[[-4,-3],14]]],[false,[[[48,58],8],[[46,47],10]]],[false,[[[48,58],16]]],[false,[[[9,11,12,13,32,33,92,93],11],[[-2,-1],14]]],[false,[[[47,48],13]]],[false,[[[0,10,11,1114112],13],[[-2,-1],14],[[10,11],15]]],[true,[]],[false,[[[-2,-1],14]]],[false,[[[-3,-2],14],[[48,58],16]]],[false,[[[-5,-4],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[92,93],18],[[39,40],19]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[-5,-4],14],[[39,40],17],[[92,93],18]]],[false,[[[-8,-7],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-9,-8],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-10,-9],14]]]]}
File without changes
File without changes
@@ -0,0 +1,584 @@
1
+ require 'test/unit'
2
+
3
+ require_relative '../lib/tokn/tools.rb'
4
+ req('range_partition dfa dfa_builder tokenizer token_defn_parser')
5
+
6
+ # Get access to Tokn namespace
7
+
8
+ #setTestDir()
9
+
10
+ #SINGLETEST = "test_ps_output_multi"
11
+ if defined? SINGLETEST
12
+ if main?(__FILE__)
13
+ ARGV.concat("-n #{SINGLETEST}".split)
14
+ end
15
+ end
16
+
17
+ class TestTokn < MyTestSuite
18
+
19
+ include Tokn, ToknInternal
20
+
21
+ # def data_file(f)
22
+ # File.join("data",f)
23
+ # # File.dirname(__FILE__)+"/data/"+f
24
+ # end
25
+
26
+ def suite_setup
27
+
28
+ # Make current directory = the one containing this script
29
+ main?(__FILE__)
30
+
31
+ if !File.directory?(out_dir)
32
+ Dir.mkdir(out_dir)
33
+ end
34
+
35
+ @@sampleText = readTextFile("sampletext.txt")
36
+ @@sampleTokens = readTextFile("sampletokens.txt")
37
+ end
38
+
39
+ # def withinTestDir(f)
40
+ # File.join(@@testDir,"__source__")
41
+
42
+ def suite_teardown
43
+ remove_file_or_dir(out_dir)
44
+ end
45
+
46
+ def method_setup
47
+ end
48
+
49
+ def method_teardown
50
+ end
51
+
52
+ def add(lower, upper = nil)
53
+ @cs.add(lower,upper)
54
+ end
55
+
56
+ def remove(lower, upper = nil)
57
+ @cs.remove(lower,upper)
58
+ end
59
+
60
+ def swap
61
+ @ct = @cs
62
+ prep
63
+ end
64
+
65
+ def isect
66
+ @cs.intersect!(@ct)
67
+ end
68
+
69
+ def diff
70
+ @cs.difference!(@ct)
71
+ end
72
+
73
+ def equ(s, arr = nil)
74
+ arr ||= @cs.array
75
+ ia = s.split.map{|n| n.to_i}
76
+ assert_equal(ia,arr)
77
+ end
78
+
79
+ def test_100_add
80
+ prep
81
+
82
+ add(72,81)
83
+ equ '72 81'
84
+
85
+ add(50)
86
+ equ '50 51 72 81'
87
+
88
+ add(75,77)
89
+ equ '50 51 72 81'
90
+
91
+ add(72,78)
92
+ equ '50 51 72 81'
93
+
94
+ add(70,78)
95
+ equ '50 51 70 81'
96
+
97
+ add 60
98
+ equ '50 51 60 61 70 81'
99
+
100
+ add 40
101
+ equ '40 41 50 51 60 61 70 81'
102
+
103
+ add 41
104
+ equ '40 42 50 51 60 61 70 81'
105
+
106
+ add 81
107
+ equ '40 42 50 51 60 61 70 82'
108
+
109
+ add 83
110
+ equ '40 42 50 51 60 61 70 82 83 84'
111
+
112
+ add 49,84
113
+ equ '40 42 49 84'
114
+
115
+ add 39,86
116
+ equ '39 86'
117
+ end
118
+
119
+ def test_110_intersect
120
+ prep
121
+ add 39,86
122
+ swap
123
+ add 50,70
124
+ isect
125
+ equ '50 70'
126
+
127
+ swap
128
+ add 20,25
129
+ add 35,51
130
+ add 62,68
131
+ add 72,80
132
+ isect
133
+ equ '50 51 62 68'
134
+
135
+ prep
136
+ swap
137
+ add 50,70
138
+ isect
139
+ equ ''
140
+
141
+ add 50,70
142
+ swap
143
+ add 50,70
144
+ isect
145
+ equ '50 70'
146
+
147
+ prep
148
+ add 20,25
149
+ swap
150
+ add 25,30
151
+ isect
152
+ equ ''
153
+
154
+ end
155
+
156
+ def test_120_difference
157
+ prep
158
+ add 20,30
159
+ add 40,50
160
+ swap
161
+
162
+ add 20,80
163
+ diff
164
+ equ '30 40 50 80'
165
+
166
+ prep
167
+ add 19,32
168
+ diff
169
+ equ '19 20 30 32'
170
+
171
+ prep
172
+ add 30,40
173
+ diff
174
+ equ '30 40'
175
+
176
+ prep
177
+ add 20,30
178
+ add 40,50
179
+ diff
180
+ equ ''
181
+
182
+ prep
183
+ add 19,30
184
+ add 40,50
185
+ diff
186
+ equ '19 20'
187
+
188
+ prep
189
+ add 20,30
190
+ add 40,51
191
+ diff
192
+ equ '50 51'
193
+
194
+ end
195
+
196
+ def prep
197
+ @cs = CodeSet.new
198
+ end
199
+
200
+ def test_130_illegalRange
201
+ prep
202
+
203
+ assert_raise(RangeError) { add 60,50 }
204
+ assert_raise(RangeError) { add 60,60 }
205
+ end
206
+
207
+ def neg(lower, upper)
208
+ @cs.negate lower, upper
209
+ end
210
+
211
+ def test_140_negate
212
+ prep
213
+ add 10,15
214
+ add 20,25
215
+ add 30
216
+ add 40,45
217
+ equ '10 15 20 25 30 31 40 45'
218
+ neg 22,37
219
+ equ '10 15 20 22 25 30 31 37 40 45'
220
+ neg 25,27
221
+ equ '10 15 20 22 27 30 31 37 40 45'
222
+ neg 15,20
223
+ equ '10 22 27 30 31 37 40 45'
224
+
225
+ prep
226
+ add 10,22
227
+ @cs.negate
228
+ equ '0 10 22 1114112'
229
+
230
+ prep
231
+ add 10,20
232
+ neg 10,20
233
+ equ ''
234
+
235
+ prep
236
+ add 10,20
237
+ add 30,40
238
+ neg 5,10
239
+ equ '5 20 30 40'
240
+
241
+ prep
242
+ add 10,20
243
+ add 30,40
244
+ neg 25,30
245
+ equ '10 20 25 40'
246
+
247
+ prep
248
+ add 10,20
249
+ add 30,40
250
+ neg 40,50
251
+ equ '10 20 30 50'
252
+
253
+ prep
254
+ add 10,20
255
+ add 30,40
256
+ neg 41,50
257
+ equ '10 20 30 40 41 50'
258
+
259
+ prep
260
+ add 10,20
261
+ add 30,40
262
+ neg 15,35
263
+ equ '10 15 20 30 35 40'
264
+ end
265
+
266
+ def test_150_remove
267
+
268
+ prep
269
+ add 10,20
270
+ add 30,40
271
+ remove 29,41
272
+ equ '10 20'
273
+
274
+ add 30,40
275
+ equ '10 20 30 40'
276
+
277
+ remove 20,30
278
+ equ '10 20 30 40'
279
+
280
+ remove 15,35
281
+ equ '10 15 35 40'
282
+
283
+ remove 10,15
284
+ equ '35 40'
285
+ remove 35
286
+ equ '36 40'
287
+ remove 40
288
+ equ '36 40'
289
+ remove 38
290
+ equ '36 38 39 40'
291
+ remove 37,39
292
+ equ '36 37 39 40'
293
+
294
+ end
295
+
296
+ def dset(st)
297
+ s = ''
298
+ st.each{|x|
299
+ if s.length > 0
300
+ s+= ' '
301
+ end
302
+ s += d(x)
303
+ }
304
+ return s
305
+ end
306
+
307
+ def newpar
308
+ @par = RangePartition.new
309
+ end
310
+
311
+ def addset(lower, upper = nil)
312
+ upper ||= lower + 1
313
+ r = CodeSet.new(lower,upper)
314
+ @par.addSet(r)
315
+ end
316
+
317
+ def apply
318
+ list = @par.apply(@cs)
319
+ res = []
320
+ list.each do |x|
321
+ res.concat x.array
322
+ end
323
+ @parResult = res
324
+ end
325
+
326
+ def test_160_partition
327
+
328
+ newpar
329
+ addset(20,30)
330
+ addset(25,33)
331
+ addset(37)
332
+ addset(40,50)
333
+ @par.prepare
334
+
335
+ @par.generatePDF(out_dir)
336
+
337
+ prep
338
+ add 25,33
339
+
340
+ apply
341
+ equ('25 30 30 33', @parResult)
342
+
343
+ prep
344
+ add 37
345
+ apply
346
+ equ('37 38', @parResult)
347
+
348
+ prep
349
+ add 40,50
350
+ apply
351
+ equ('40 50', @parResult)
352
+
353
+ end
354
+
355
+ REGEX_SCRIPT = "(\\-?[0-9]+)|[_a-zA-Z][_a-zA-Z0-9]*|333q"
356
+
357
+ TOKEN_SCRIPT2 = <<'END'
358
+ sep: \s
359
+ tku: a(a|b)*
360
+ tkv: b(aa|b*)
361
+ tkw: bbb
362
+ END
363
+
364
+ def test_170_build_DFA
365
+
366
+ x = RegParse.new(REGEX_SCRIPT)
367
+ s = x.startState
368
+ x.endState.finalState = true
369
+
370
+ s.generatePDF(out_dir,"nfa")
371
+
372
+ r = s.reverseNFA()
373
+ r.generatePDF(out_dir,"reversed")
374
+
375
+ dfa = DFABuilder.nfa_to_dfa(s)
376
+ dfa.generatePDF(out_dir,"buildDFA")
377
+ end
378
+
379
+ def test_180_cvt_NFA_to_DFA
380
+
381
+ x = RegParse.new(REGEX_SCRIPT)
382
+ s = x.startState
383
+ x.endState.finalState = true
384
+
385
+ s.generatePDF(out_dir,"nfa")
386
+
387
+ dfa = DFABuilder.nfa_to_dfa(s)
388
+ dfa.generatePDF(out_dir,"dfa")
389
+
390
+ oldToNewMap, maxId2 = dfa.duplicateNFA(42)
391
+ dfa2 = oldToNewMap[dfa]
392
+ dfa2.generatePDF(out_dir,"dfa_duplicated")
393
+ end
394
+
395
+ def test_190_TokenDefParser
396
+
397
+ s = TOKEN_SCRIPT2
398
+
399
+ td = TokenDefParser.new(s)
400
+
401
+ tokDFA = td.dfa
402
+ tokDFA.startState.generatePDF(out_dir,"TokenDFA")
403
+
404
+ end
405
+
406
+ def makeTok
407
+ dfa = DFA.from_script(@@sampleTokens)
408
+ Tokenizer.new(dfa, @@sampleText)
409
+ end
410
+
411
+ def test_200_Tokenizer
412
+
413
+ tok = makeTok
414
+
415
+ tokList = []
416
+ while tok.hasNext
417
+ t = tok.read
418
+ tokList.push(t)
419
+ end
420
+
421
+ tok.unread(tokList.size)
422
+
423
+ tokList.each do |t1|
424
+ tName = tok.nameOf(t1)
425
+ t2 = tok.read(tName)
426
+ end
427
+ end
428
+
429
+ def test_210_Tokenizer_Missing_Expected
430
+
431
+ assert_raise TokenizerException do
432
+
433
+ tok = makeTok
434
+
435
+ tok.read
436
+ tok.read
437
+ tok.read
438
+ tok.read
439
+ tok.read("signedint")
440
+ end
441
+
442
+ end
443
+
444
+ def test_220_CompileDFAToDisk
445
+ tokScript = @@sampleTokens
446
+ testText = @@sampleText
447
+
448
+ destPath = out_path("sampletokens_dfa.txt")
449
+
450
+ if File.exist?(destPath)
451
+ File.delete(destPath)
452
+ end
453
+ assert(!File.exist?(destPath))
454
+
455
+ dfa = DFA.from_script(tokScript, destPath)
456
+ assert(File.exist?(destPath))
457
+
458
+ tok = Tokenizer.new(dfa, testText)
459
+
460
+ end
461
+
462
+ def prep2
463
+ testText = @@sampleText
464
+ dfa = DFA.from_file(out_path("sampletokens_dfa.txt"))
465
+ tok = Tokenizer.new(dfa, testText)
466
+ end
467
+
468
+ def test_230_readAndUnread
469
+ tok = prep2
470
+ unread = false
471
+ while tok.hasNext
472
+ t = tok.read
473
+ # pr("Read %-8s %s\n",tok.nameOf(t),d(t))
474
+
475
+ if !unread && tok.nameOf(t) == "DO"
476
+ # pr(" ...pushing back four tokens...\n")
477
+ tok.unread(4)
478
+ unread = true
479
+ # pr(" ...and resuming...\n")
480
+ end
481
+ end
482
+ end
483
+
484
+ def test_240_UnrecognizedToken
485
+ assert_raise TokenizerException do
486
+ tok = prep2
487
+ while tok.hasNext
488
+ t = tok.read
489
+ if tok.nameOf(t) == "DO"
490
+ tok.read("BRCL") # <== this should raise problem
491
+ end
492
+ end
493
+ end
494
+ end
495
+
496
+ def test_250_ReadPastEnd
497
+ assert_raise TokenizerException do
498
+ tok = prep2
499
+ while tok.hasNext
500
+ t = tok.read
501
+ end
502
+ tok.read
503
+ end
504
+ end
505
+
506
+ def test_260_UnreadBeforeStart
507
+
508
+ assert_raise TokenizerException do
509
+ tok = prep2
510
+ k = 0
511
+ while tok.hasNext
512
+ t = tok.read
513
+ k += 1
514
+ if k == 15
515
+ tok.unread(5)
516
+ tok.unread(7)
517
+ tok.read()
518
+ tok.unread(4)
519
+ tok.unread(3)
520
+ end
521
+ end
522
+ tok.read
523
+ end
524
+ end
525
+
526
+ def test_270_filter_ws
527
+
528
+ dfa = DFA.from_script_file("sampletokens.txt")
529
+ t = Tokenizer.new(dfa, readTextFile("sampletext.txt"), "WS")
530
+
531
+ s = ''
532
+ while t.hasNext do
533
+
534
+ tk = t.peek
535
+
536
+ if t.nameOf(tk) == 'BROP'
537
+ lst = t.readSequenceIf('BROP DO ID BRCL')
538
+ if lst
539
+ s << " ...read BROP DO ID sequence...\n"
540
+ lst.each{ |x| s << " #{d(x)}\n"}
541
+ next
542
+ else
543
+ s << " ...couldn't find sequence...\n"
544
+ end
545
+ end
546
+
547
+ tk = t.read
548
+ s << d(tk) << "\n"
549
+
550
+ end
551
+ exp =<<"EXP"
552
+ (line 1, col 1) : speed
553
+ (line 1, col 6) : =
554
+ (line 1, col 7) : 42
555
+ (line 1, col 9) : gravity
556
+ (line 1, col 16) : =
557
+ (line 1, col 17) : -9.80
558
+ ...couldn't find sequence...
559
+ (line 1, col 22) : {
560
+ (line 1, col 23) : color
561
+ (line 1, col 29) : =
562
+ (line 1, col 30) : green
563
+ (line 1, col 35) : }
564
+ (line 1, col 36) : title
565
+ (line 1, col 41) : =
566
+ (line 1, col 42) : 'This is a string with \\' an escaped delimiter'
567
+ (line 1, col 89) : if
568
+ (line 1, col 91) : gravity
569
+ (line 1, col 98) : ==
570
+ (line 1, col 100) : 12
571
+ ...read BROP DO ID sequence...
572
+ (line 1, col 102) : {
573
+ (line 1, col 103) : do
574
+ (line 1, col 105) : something
575
+ (line 1, col 114) : }
576
+ (line 1, col 115) : do
577
+ (line 1, col 117) : something_else
578
+ EXP
579
+
580
+ assert(s.strip == exp.strip)
581
+ end
582
+
583
+ end
584
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokn
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeff Sember
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-07 00:00:00.000000000 Z
11
+ date: 2013-04-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: "Given a script containing token descriptions (each a regular expression),
14
14
  \ntokn compiles an automaton which it can then use to efficiently convert a \ntext
@@ -34,13 +34,15 @@ files:
34
34
  - bin/tokncompile
35
35
  - bin/toknprocess
36
36
  - CHANGELOG.txt
37
- - README.txt
38
- - test/Example1.rb
39
- - test/data/compileddfa.txt
40
- - test/data/sampletext.txt
41
- - test/data/sampletokens.txt
42
- - test/test.rb
43
- - test/testcmds
37
+ - README.md
38
+ - test/_OLD_/Example1.rb
39
+ - test/_OLD_/data/compileddfa.txt
40
+ - test/_OLD_/data/sampletokens_dfa.txt
41
+ - test/_OLD_/test.rb
42
+ - test/_OLD_/testcmds
43
+ - test/sampletext.txt
44
+ - test/sampletokens.txt
45
+ - test/test_tokn.rb
44
46
  homepage: http://www.cs.ubc.ca/~jpsember/
45
47
  licenses:
46
48
  - mit
@@ -66,6 +68,5 @@ signing_key:
66
68
  specification_version: 4
67
69
  summary: Extracts tokens from source files
68
70
  test_files:
69
- - test/Example1.rb
70
- - test/test.rb
71
+ - test/test_tokn.rb
71
72
  has_rdoc:
@@ -1,50 +0,0 @@
1
- require_relative '../lib/tokn/tokenizer'
2
-
3
- class Example1
4
-
5
- include Tokn
6
-
7
- def dataPath(f)
8
- File.dirname(__FILE__)+"/data/"+f
9
- end
10
-
11
- setTestDir()
12
-
13
- def initialize
14
- @sampleText = readTextFile(dataPath("sampletext.txt"))
15
- end
16
-
17
- def makeTok
18
- @dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
19
- Tokenizer.new(@dfa, @sampleText, "WS")
20
- end
21
-
22
- def go
23
- puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
24
-
25
- t = makeTok
26
-
27
- while t.hasNext do
28
-
29
- tk = t.peek
30
-
31
- if t.nameOf(tk) == 'BROP'
32
- lst = t.readSequenceIf('BROP DO ID BRCL')
33
- if lst
34
- pr(" ...read BROP DO ID sequence...\n")
35
- lst.each{ |x| pr(" %s\n",d(x))}
36
- next
37
- else
38
- pr(" ...couldn't find sequence...\n")
39
- end
40
- end
41
-
42
- tk = t.read
43
- pr("%s\n",d(tk))
44
-
45
- end
46
- end
47
-
48
- end
49
-
50
- Example1.new.go