tokn 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/test/test.rb ADDED
@@ -0,0 +1,519 @@
1
+ require 'test/unit'
2
+ require_relative '../lib/tokn/tools.rb'
3
+ req('range_partition dfa dfa_builder tokenizer')
4
+
5
+
6
+ def dataPath(f)
7
+ File.dirname(__FILE__)+"/data/"+f
8
+ end
9
+
10
+ setTestDir()
11
+
12
+ # Various unit tests for state machines, character range sets, etc.
13
+ #
14
+ class TestComponent < Test::Unit::TestCase
15
+
16
+ SKIPMOST = false # skip most of the tests?
17
+
18
+ def add(lower, upper = nil)
19
+ @cs.add(lower,upper)
20
+ end
21
+
22
+ def remove(lower, upper = nil)
23
+ @cs.remove(lower,upper)
24
+ end
25
+
26
+ def swap
27
+ @ct = @cs
28
+ prep
29
+ end
30
+
31
+ def isect
32
+ @cs.intersect!(@ct)
33
+ end
34
+
35
+ def diff
36
+ @cs.difference!(@ct)
37
+ end
38
+
39
+ def equ(s, arr = nil)
40
+ arr ||= @cs.array
41
+ ia = s.split.map{|n| n.to_i}
42
+ assert_equal(ia,arr)
43
+ end
44
+
45
+ def test_add
46
+ prep
47
+
48
+ add(72,81)
49
+ equ '72 81'
50
+
51
+ add(50)
52
+ equ '50 51 72 81'
53
+
54
+ add(75,77)
55
+ equ '50 51 72 81'
56
+
57
+ add(72,78)
58
+ equ '50 51 72 81'
59
+
60
+ add(70,78)
61
+ equ '50 51 70 81'
62
+
63
+ add 60
64
+ equ '50 51 60 61 70 81'
65
+
66
+ add 40
67
+ equ '40 41 50 51 60 61 70 81'
68
+
69
+ add 41
70
+ equ '40 42 50 51 60 61 70 81'
71
+
72
+ add 81
73
+ equ '40 42 50 51 60 61 70 82'
74
+
75
+ add 83
76
+ equ '40 42 50 51 60 61 70 82 83 84'
77
+
78
+ add 49,84
79
+ equ '40 42 49 84'
80
+
81
+ add 39,86
82
+ equ '39 86'
83
+ end
84
+
85
+ def test_intersect
86
+ prep
87
+ add 39,86
88
+ swap
89
+ add 50,70
90
+ isect
91
+ equ '50 70'
92
+
93
+ swap
94
+ add 20,25
95
+ add 35,51
96
+ add 62,68
97
+ add 72,80
98
+ isect
99
+ equ '50 51 62 68'
100
+
101
+ prep
102
+ swap
103
+ add 50,70
104
+ isect
105
+ equ ''
106
+
107
+ add 50,70
108
+ swap
109
+ add 50,70
110
+ isect
111
+ equ '50 70'
112
+
113
+
114
+ prep
115
+ add 20,25
116
+ swap
117
+ add 25,30
118
+ isect
119
+ equ ''
120
+
121
+ end
122
+
123
+ def test_difference
124
+ prep
125
+ add 20,30
126
+ add 40,50
127
+ swap
128
+
129
+
130
+ add 20,80
131
+ diff
132
+ equ '30 40 50 80'
133
+
134
+ prep
135
+ add 19,32
136
+ diff
137
+ equ '19 20 30 32'
138
+
139
+ prep
140
+ add 30,40
141
+ diff
142
+ equ '30 40'
143
+
144
+ prep
145
+ add 20,30
146
+ add 40,50
147
+ diff
148
+ equ ''
149
+
150
+ prep
151
+ add 19,30
152
+ add 40,50
153
+ diff
154
+ equ '19 20'
155
+
156
+ prep
157
+ add 20,30
158
+ add 40,51
159
+ diff
160
+ equ '50 51'
161
+
162
+
163
+ end
164
+
165
+ def prep
166
+ @cs = CodeSet.new
167
+ end
168
+
169
+ def test_illegalRange
170
+ prep
171
+
172
+ assert_raise(RangeError) { add 60,50 }
173
+ assert_raise(RangeError) { add 60,60 }
174
+ end
175
+
176
+ def neg lower, upper
177
+ @cs.negate lower, upper
178
+ end
179
+
180
+ def test_negate
181
+ prep
182
+ add 10,15
183
+ add 20,25
184
+ add 30
185
+ add 40,45
186
+ equ '10 15 20 25 30 31 40 45'
187
+ neg 22,37
188
+ equ '10 15 20 22 25 30 31 37 40 45'
189
+ neg 25,27
190
+ equ '10 15 20 22 27 30 31 37 40 45'
191
+ neg 15,20
192
+ equ '10 22 27 30 31 37 40 45'
193
+
194
+ prep
195
+ add 10,22
196
+ @cs.negate
197
+ equ '0 10 22 1114112'
198
+
199
+
200
+ prep
201
+ add 10,20
202
+ neg 10,20
203
+ equ ''
204
+
205
+ prep
206
+ add 10,20
207
+ add 30,40
208
+ neg 5,10
209
+ equ '5 20 30 40'
210
+
211
+ prep
212
+ add 10,20
213
+ add 30,40
214
+ neg 25,30
215
+ equ '10 20 25 40'
216
+
217
+ prep
218
+ add 10,20
219
+ add 30,40
220
+ neg 40,50
221
+ equ '10 20 30 50'
222
+
223
+ prep
224
+ add 10,20
225
+ add 30,40
226
+ neg 41,50
227
+ equ '10 20 30 40 41 50'
228
+
229
+ prep
230
+ add 10,20
231
+ add 30,40
232
+ neg 15,35
233
+ equ '10 15 20 30 35 40'
234
+ end
235
+
236
+ def test_remove
237
+
238
+ prep
239
+ add 10,20
240
+ add 30,40
241
+ remove 29,41
242
+ equ '10 20'
243
+
244
+ add 30,40
245
+ equ '10 20 30 40'
246
+
247
+ remove 20,30
248
+ equ '10 20 30 40'
249
+
250
+ remove 15,35
251
+ equ '10 15 35 40'
252
+
253
+ remove 10,15
254
+ equ '35 40'
255
+ remove 35
256
+ equ '36 40'
257
+ remove 40
258
+ equ '36 40'
259
+ remove 38
260
+ equ '36 38 39 40'
261
+ remove 37,39
262
+ equ '36 37 39 40'
263
+
264
+ end
265
+
266
+
267
+ def dset(st)
268
+ s = ''
269
+ st.each{|x|
270
+ if s.length > 0
271
+ s+= ' '
272
+ end
273
+ s += d(x)
274
+ }
275
+ return s
276
+ end
277
+
278
+
279
+ def newpar
280
+ @par = RangePartition.new
281
+ end
282
+
283
+ def addset(lower, upper = nil)
284
+ upper ||= lower + 1
285
+ r = CodeSet.new(lower,upper)
286
+ @par.addSet(r)
287
+ end
288
+
289
+ def apply
290
+ list = @par.apply(@cs)
291
+ res = []
292
+ list.each do |x|
293
+ res.concat x.array
294
+ end
295
+ @parResult = res
296
+ end
297
+
298
+ def test_partition
299
+ return if SKIPMOST
300
+
301
+ newpar
302
+ addset(20,30)
303
+ addset(25,33)
304
+ addset(37)
305
+ addset(40,50)
306
+ @par.prepare
307
+
308
+ @par.generatePDF()
309
+
310
+ prep
311
+ add 25,33
312
+
313
+ apply
314
+ equ('25 30 30 33', @parResult)
315
+
316
+
317
+ prep
318
+ add 37
319
+ apply
320
+ equ('37 38', @parResult)
321
+
322
+ prep
323
+ add 40,50
324
+ apply
325
+ equ('40 50', @parResult)
326
+
327
+ end
328
+
329
+
330
+ REGEX_SCRIPT = "(\\-?[0-9]+)|[_a-zA-Z][_a-zA-Z0-9]*|333q"
331
+
332
+ TOKEN_SCRIPT2 = <<'END'
333
+ sep: \s
334
+ tku: a(a|b)*
335
+ tkv: b(aa|b*)
336
+ tkw: bbb
337
+ END
338
+
339
+
340
+ def test_buildDFA
341
+ return if SKIPMOST
342
+
343
+ x = RegParse.new(REGEX_SCRIPT)
344
+ s = x.startState
345
+ x.endState.finalState = true
346
+
347
+ s.generatePDF("nfa")
348
+
349
+ r = s.reverseNFA()
350
+ r.generatePDF("reversed")
351
+
352
+ dfa = DFABuilder.nfa_to_dfa(s)
353
+ dfa.generatePDF("buildDFA")
354
+ end
355
+
356
+ def test_cvtNFAToDFA
357
+ return if SKIPMOST
358
+
359
+ x = RegParse.new(REGEX_SCRIPT)
360
+ s = x.startState
361
+ x.endState.finalState = true
362
+
363
+ s.generatePDF("nfa")
364
+
365
+ dfa = DFABuilder.nfa_to_dfa(s)
366
+ dfa.generatePDF("dfa")
367
+
368
+ oldToNewMap, maxId2 = dfa.duplicateNFA(42)
369
+ dfa2 = oldToNewMap[dfa]
370
+ dfa2.generatePDF("dfa_duplicated")
371
+ end
372
+
373
+
374
+ def test_TokenDefParser
375
+ return if SKIPMOST
376
+
377
+ s = TOKEN_SCRIPT2
378
+
379
+ td = TokenDefParser.new(s)
380
+
381
+ tokDFA = td.dfa
382
+ tokDFA.startState.generatePDF("TokenDFA")
383
+
384
+ end
385
+
386
+
387
+
388
+ @@sampleText = readTextFile(dataPath("sampletext.txt"))
389
+ @@sampleTokens = readTextFile(dataPath("sampletokens.txt"))
390
+
391
+ def makeTok
392
+ dfa = DFA.dfa_from_script(@@sampleTokens)
393
+ Tokenizer.new(dfa, @@sampleText)
394
+ end
395
+
396
+
397
+ def test_Tokenizer
398
+ return if SKIPMOST
399
+
400
+ tok = makeTok
401
+
402
+ tokList = []
403
+ while tok.hasNext
404
+ t = tok.read
405
+ tokList.push(t)
406
+ end
407
+
408
+ tok.unread(tokList.size)
409
+
410
+ tokList.each do |t1|
411
+ tName = tok.nameOf(t1)
412
+ t2 = tok.read(tName)
413
+ end
414
+ end
415
+
416
+
417
+
418
+
419
+ def test_TokenizerMissingExpected
420
+ return if SKIPMOST
421
+
422
+ assert_raise TokenizerException do
423
+
424
+ tok = makeTok
425
+
426
+ tok.read
427
+ tok.read
428
+ tok.read
429
+ tok.read
430
+ tok.read("signedint")
431
+ end
432
+
433
+ end
434
+
435
+ def test_CompileDFAToDisk
436
+ tokScript = @@sampleTokens
437
+ testText = @@sampleText
438
+
439
+ destPath = withinTestDir("sampletokens_dfa.txt")
440
+
441
+ if File.exist?(destPath)
442
+ File.delete(destPath)
443
+ end
444
+ assert(!File.exist?(destPath))
445
+
446
+ dfa = DFA.dfa_from_script(tokScript, destPath)
447
+ assert(File.exist?(destPath))
448
+
449
+ tok = Tokenizer.new(dfa, testText)
450
+
451
+ end
452
+
453
+
454
+ def prep2
455
+ testText = @@sampleText
456
+ dfa = DFA.dfa_from_file(withinTestDir("sampletokens_dfa.txt"))
457
+ tok = Tokenizer.new(dfa, testText)
458
+ end
459
+
460
+ def test_readAndUnread
461
+ tok = prep2
462
+ unread = false
463
+ while tok.hasNext
464
+ t = tok.read
465
+ pr("Read %-8s %s\n",tok.nameOf(t),d(t))
466
+
467
+ if !unread && tok.nameOf(t) == "DO"
468
+ pr(" ...pushing back four tokens...\n")
469
+ tok.unread(4)
470
+ unread = true
471
+ pr(" ...and resuming...\n")
472
+ end
473
+ end
474
+ end
475
+
476
+ def test_UnrecognizedToken
477
+ assert_raise TokenizerException do
478
+ tok = prep2
479
+ while tok.hasNext
480
+ t = tok.read
481
+ if tok.nameOf(t) == "DO"
482
+ tok.read("BRCL") # <== this should raise problem
483
+ end
484
+ end
485
+ end
486
+ end
487
+
488
+ def test_ReadPastEnd
489
+ assert_raise TokenizerException do
490
+ tok = prep2
491
+ while tok.hasNext
492
+ t = tok.read
493
+ end
494
+ tok.read
495
+ end
496
+ end
497
+
498
+ def test_UnreadBeforeStart
499
+
500
+ assert_raise TokenizerException do
501
+ tok = prep2
502
+ k = 0
503
+ while tok.hasNext
504
+ t = tok.read
505
+ k += 1
506
+ if k == 15
507
+ tok.unread(5)
508
+ tok.unread(7)
509
+ tok.read()
510
+ tok.unread(4)
511
+ tok.unread(3)
512
+ end
513
+ end
514
+ tok.read
515
+ end
516
+ end
517
+ end
518
+
519
+
data/test/testcmds ADDED
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+ ruby tokncompile.rb < sampletokens.txt > __testdata__/compiled.txt
3
+ ruby toknprocess.rb __testdata__/compiled.txt sampletext.txt
4
+
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tokn
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.4
5
+ platform: ruby
6
+ authors:
7
+ - Jeff Sember
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-03-07 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: 'Given a script containing token descriptions (each a regular expression),
14
+ tokn compiles a DFA which it then uses to efficiently extract a sequence of tokens
15
+ from source files. '
16
+ email: jpsember@gmail.com
17
+ executables:
18
+ - tokncompile
19
+ - toknprocess
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - lib/tokn.rb
24
+ - lib/tokn/code_set.rb
25
+ - lib/tokn/dfa.rb
26
+ - lib/tokn/dfa_builder.rb
27
+ - lib/tokn/range_partition.rb
28
+ - lib/tokn/reg_parse.rb
29
+ - lib/tokn/state.rb
30
+ - lib/tokn/token_defn_parser.rb
31
+ - lib/tokn/tokenizer.rb
32
+ - lib/tokn/tokn_const.rb
33
+ - lib/tokn/tools.rb
34
+ - bin/tokncompile
35
+ - bin/toknprocess
36
+ - README.txt
37
+ - test/data/sampletext.txt
38
+ - test/data/sampletokens.txt
39
+ - test/simple.rb
40
+ - test/test.rb
41
+ - test/testcmds
42
+ - figures/sample_dfa.pdf
43
+ homepage:
44
+ licenses: []
45
+ metadata: {}
46
+ post_install_message:
47
+ rdoc_options: []
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubyforge_project:
62
+ rubygems_version: 2.0.2
63
+ signing_key:
64
+ specification_version: 4
65
+ summary: Extracts tokens from source files
66
+ test_files:
67
+ - test/simple.rb
68
+ - test/test.rb
69
+ has_rdoc: