tokn 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/test/test.rb ADDED
@@ -0,0 +1,519 @@
1
+ require 'test/unit'
2
+ require_relative '../lib/tokn/tools.rb'
3
+ req('range_partition dfa dfa_builder tokenizer')
4
+
5
+
6
+ def dataPath(f)
7
+ File.dirname(__FILE__)+"/data/"+f
8
+ end
9
+
10
+ setTestDir()
11
+
12
+ # Various unit tests for state machines, character range sets, etc.
13
+ #
14
+ class TestComponent < Test::Unit::TestCase
15
+
16
+ SKIPMOST = false # skip most of the tests?
17
+
18
+ def add(lower, upper = nil)
19
+ @cs.add(lower,upper)
20
+ end
21
+
22
+ def remove(lower, upper = nil)
23
+ @cs.remove(lower,upper)
24
+ end
25
+
26
+ def swap
27
+ @ct = @cs
28
+ prep
29
+ end
30
+
31
+ def isect
32
+ @cs.intersect!(@ct)
33
+ end
34
+
35
+ def diff
36
+ @cs.difference!(@ct)
37
+ end
38
+
39
+ def equ(s, arr = nil)
40
+ arr ||= @cs.array
41
+ ia = s.split.map{|n| n.to_i}
42
+ assert_equal(ia,arr)
43
+ end
44
+
45
+ def test_add
46
+ prep
47
+
48
+ add(72,81)
49
+ equ '72 81'
50
+
51
+ add(50)
52
+ equ '50 51 72 81'
53
+
54
+ add(75,77)
55
+ equ '50 51 72 81'
56
+
57
+ add(72,78)
58
+ equ '50 51 72 81'
59
+
60
+ add(70,78)
61
+ equ '50 51 70 81'
62
+
63
+ add 60
64
+ equ '50 51 60 61 70 81'
65
+
66
+ add 40
67
+ equ '40 41 50 51 60 61 70 81'
68
+
69
+ add 41
70
+ equ '40 42 50 51 60 61 70 81'
71
+
72
+ add 81
73
+ equ '40 42 50 51 60 61 70 82'
74
+
75
+ add 83
76
+ equ '40 42 50 51 60 61 70 82 83 84'
77
+
78
+ add 49,84
79
+ equ '40 42 49 84'
80
+
81
+ add 39,86
82
+ equ '39 86'
83
+ end
84
+
85
+ def test_intersect
86
+ prep
87
+ add 39,86
88
+ swap
89
+ add 50,70
90
+ isect
91
+ equ '50 70'
92
+
93
+ swap
94
+ add 20,25
95
+ add 35,51
96
+ add 62,68
97
+ add 72,80
98
+ isect
99
+ equ '50 51 62 68'
100
+
101
+ prep
102
+ swap
103
+ add 50,70
104
+ isect
105
+ equ ''
106
+
107
+ add 50,70
108
+ swap
109
+ add 50,70
110
+ isect
111
+ equ '50 70'
112
+
113
+
114
+ prep
115
+ add 20,25
116
+ swap
117
+ add 25,30
118
+ isect
119
+ equ ''
120
+
121
+ end
122
+
123
+ def test_difference
124
+ prep
125
+ add 20,30
126
+ add 40,50
127
+ swap
128
+
129
+
130
+ add 20,80
131
+ diff
132
+ equ '30 40 50 80'
133
+
134
+ prep
135
+ add 19,32
136
+ diff
137
+ equ '19 20 30 32'
138
+
139
+ prep
140
+ add 30,40
141
+ diff
142
+ equ '30 40'
143
+
144
+ prep
145
+ add 20,30
146
+ add 40,50
147
+ diff
148
+ equ ''
149
+
150
+ prep
151
+ add 19,30
152
+ add 40,50
153
+ diff
154
+ equ '19 20'
155
+
156
+ prep
157
+ add 20,30
158
+ add 40,51
159
+ diff
160
+ equ '50 51'
161
+
162
+
163
+ end
164
+
165
+ def prep
166
+ @cs = CodeSet.new
167
+ end
168
+
169
+ def test_illegalRange
170
+ prep
171
+
172
+ assert_raise(RangeError) { add 60,50 }
173
+ assert_raise(RangeError) { add 60,60 }
174
+ end
175
+
176
+ def neg lower, upper
177
+ @cs.negate lower, upper
178
+ end
179
+
180
+ def test_negate
181
+ prep
182
+ add 10,15
183
+ add 20,25
184
+ add 30
185
+ add 40,45
186
+ equ '10 15 20 25 30 31 40 45'
187
+ neg 22,37
188
+ equ '10 15 20 22 25 30 31 37 40 45'
189
+ neg 25,27
190
+ equ '10 15 20 22 27 30 31 37 40 45'
191
+ neg 15,20
192
+ equ '10 22 27 30 31 37 40 45'
193
+
194
+ prep
195
+ add 10,22
196
+ @cs.negate
197
+ equ '0 10 22 1114112'
198
+
199
+
200
+ prep
201
+ add 10,20
202
+ neg 10,20
203
+ equ ''
204
+
205
+ prep
206
+ add 10,20
207
+ add 30,40
208
+ neg 5,10
209
+ equ '5 20 30 40'
210
+
211
+ prep
212
+ add 10,20
213
+ add 30,40
214
+ neg 25,30
215
+ equ '10 20 25 40'
216
+
217
+ prep
218
+ add 10,20
219
+ add 30,40
220
+ neg 40,50
221
+ equ '10 20 30 50'
222
+
223
+ prep
224
+ add 10,20
225
+ add 30,40
226
+ neg 41,50
227
+ equ '10 20 30 40 41 50'
228
+
229
+ prep
230
+ add 10,20
231
+ add 30,40
232
+ neg 15,35
233
+ equ '10 15 20 30 35 40'
234
+ end
235
+
236
+ def test_remove
237
+
238
+ prep
239
+ add 10,20
240
+ add 30,40
241
+ remove 29,41
242
+ equ '10 20'
243
+
244
+ add 30,40
245
+ equ '10 20 30 40'
246
+
247
+ remove 20,30
248
+ equ '10 20 30 40'
249
+
250
+ remove 15,35
251
+ equ '10 15 35 40'
252
+
253
+ remove 10,15
254
+ equ '35 40'
255
+ remove 35
256
+ equ '36 40'
257
+ remove 40
258
+ equ '36 40'
259
+ remove 38
260
+ equ '36 38 39 40'
261
+ remove 37,39
262
+ equ '36 37 39 40'
263
+
264
+ end
265
+
266
+
267
+ def dset(st)
268
+ s = ''
269
+ st.each{|x|
270
+ if s.length > 0
271
+ s+= ' '
272
+ end
273
+ s += d(x)
274
+ }
275
+ return s
276
+ end
277
+
278
+
279
+ def newpar
280
+ @par = RangePartition.new
281
+ end
282
+
283
+ def addset(lower, upper = nil)
284
+ upper ||= lower + 1
285
+ r = CodeSet.new(lower,upper)
286
+ @par.addSet(r)
287
+ end
288
+
289
+ def apply
290
+ list = @par.apply(@cs)
291
+ res = []
292
+ list.each do |x|
293
+ res.concat x.array
294
+ end
295
+ @parResult = res
296
+ end
297
+
298
+ def test_partition
299
+ return if SKIPMOST
300
+
301
+ newpar
302
+ addset(20,30)
303
+ addset(25,33)
304
+ addset(37)
305
+ addset(40,50)
306
+ @par.prepare
307
+
308
+ @par.generatePDF()
309
+
310
+ prep
311
+ add 25,33
312
+
313
+ apply
314
+ equ('25 30 30 33', @parResult)
315
+
316
+
317
+ prep
318
+ add 37
319
+ apply
320
+ equ('37 38', @parResult)
321
+
322
+ prep
323
+ add 40,50
324
+ apply
325
+ equ('40 50', @parResult)
326
+
327
+ end
328
+
329
+
330
+ REGEX_SCRIPT = "(\\-?[0-9]+)|[_a-zA-Z][_a-zA-Z0-9]*|333q"
331
+
332
+ TOKEN_SCRIPT2 = <<'END'
333
+ sep: \s
334
+ tku: a(a|b)*
335
+ tkv: b(aa|b*)
336
+ tkw: bbb
337
+ END
338
+
339
+
340
+ def test_buildDFA
341
+ return if SKIPMOST
342
+
343
+ x = RegParse.new(REGEX_SCRIPT)
344
+ s = x.startState
345
+ x.endState.finalState = true
346
+
347
+ s.generatePDF("nfa")
348
+
349
+ r = s.reverseNFA()
350
+ r.generatePDF("reversed")
351
+
352
+ dfa = DFABuilder.nfa_to_dfa(s)
353
+ dfa.generatePDF("buildDFA")
354
+ end
355
+
356
+ def test_cvtNFAToDFA
357
+ return if SKIPMOST
358
+
359
+ x = RegParse.new(REGEX_SCRIPT)
360
+ s = x.startState
361
+ x.endState.finalState = true
362
+
363
+ s.generatePDF("nfa")
364
+
365
+ dfa = DFABuilder.nfa_to_dfa(s)
366
+ dfa.generatePDF("dfa")
367
+
368
+ oldToNewMap, maxId2 = dfa.duplicateNFA(42)
369
+ dfa2 = oldToNewMap[dfa]
370
+ dfa2.generatePDF("dfa_duplicated")
371
+ end
372
+
373
+
374
+ def test_TokenDefParser
375
+ return if SKIPMOST
376
+
377
+ s = TOKEN_SCRIPT2
378
+
379
+ td = TokenDefParser.new(s)
380
+
381
+ tokDFA = td.dfa
382
+ tokDFA.startState.generatePDF("TokenDFA")
383
+
384
+ end
385
+
386
+
387
+
388
+ @@sampleText = readTextFile(dataPath("sampletext.txt"))
389
+ @@sampleTokens = readTextFile(dataPath("sampletokens.txt"))
390
+
391
+ def makeTok
392
+ dfa = DFA.dfa_from_script(@@sampleTokens)
393
+ Tokenizer.new(dfa, @@sampleText)
394
+ end
395
+
396
+
397
+ def test_Tokenizer
398
+ return if SKIPMOST
399
+
400
+ tok = makeTok
401
+
402
+ tokList = []
403
+ while tok.hasNext
404
+ t = tok.read
405
+ tokList.push(t)
406
+ end
407
+
408
+ tok.unread(tokList.size)
409
+
410
+ tokList.each do |t1|
411
+ tName = tok.nameOf(t1)
412
+ t2 = tok.read(tName)
413
+ end
414
+ end
415
+
416
+
417
+
418
+
419
+ def test_TokenizerMissingExpected
420
+ return if SKIPMOST
421
+
422
+ assert_raise TokenizerException do
423
+
424
+ tok = makeTok
425
+
426
+ tok.read
427
+ tok.read
428
+ tok.read
429
+ tok.read
430
+ tok.read("signedint")
431
+ end
432
+
433
+ end
434
+
435
+ def test_CompileDFAToDisk
436
+ tokScript = @@sampleTokens
437
+ testText = @@sampleText
438
+
439
+ destPath = withinTestDir("sampletokens_dfa.txt")
440
+
441
+ if File.exist?(destPath)
442
+ File.delete(destPath)
443
+ end
444
+ assert(!File.exist?(destPath))
445
+
446
+ dfa = DFA.dfa_from_script(tokScript, destPath)
447
+ assert(File.exist?(destPath))
448
+
449
+ tok = Tokenizer.new(dfa, testText)
450
+
451
+ end
452
+
453
+
454
+ def prep2
455
+ testText = @@sampleText
456
+ dfa = DFA.dfa_from_file(withinTestDir("sampletokens_dfa.txt"))
457
+ tok = Tokenizer.new(dfa, testText)
458
+ end
459
+
460
+ def test_readAndUnread
461
+ tok = prep2
462
+ unread = false
463
+ while tok.hasNext
464
+ t = tok.read
465
+ pr("Read %-8s %s\n",tok.nameOf(t),d(t))
466
+
467
+ if !unread && tok.nameOf(t) == "DO"
468
+ pr(" ...pushing back four tokens...\n")
469
+ tok.unread(4)
470
+ unread = true
471
+ pr(" ...and resuming...\n")
472
+ end
473
+ end
474
+ end
475
+
476
+ def test_UnrecognizedToken
477
+ assert_raise TokenizerException do
478
+ tok = prep2
479
+ while tok.hasNext
480
+ t = tok.read
481
+ if tok.nameOf(t) == "DO"
482
+ tok.read("BRCL") # <== this should raise problem
483
+ end
484
+ end
485
+ end
486
+ end
487
+
488
+ def test_ReadPastEnd
489
+ assert_raise TokenizerException do
490
+ tok = prep2
491
+ while tok.hasNext
492
+ t = tok.read
493
+ end
494
+ tok.read
495
+ end
496
+ end
497
+
498
+ def test_UnreadBeforeStart
499
+
500
+ assert_raise TokenizerException do
501
+ tok = prep2
502
+ k = 0
503
+ while tok.hasNext
504
+ t = tok.read
505
+ k += 1
506
+ if k == 15
507
+ tok.unread(5)
508
+ tok.unread(7)
509
+ tok.read()
510
+ tok.unread(4)
511
+ tok.unread(3)
512
+ end
513
+ end
514
+ tok.read
515
+ end
516
+ end
517
+ end
518
+
519
+
data/test/testcmds ADDED
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+ ruby tokncompile.rb < sampletokens.txt > __testdata__/compiled.txt
3
+ ruby toknprocess.rb __testdata__/compiled.txt sampletext.txt
4
+
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tokn
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.4
5
+ platform: ruby
6
+ authors:
7
+ - Jeff Sember
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-03-07 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: 'Given a script containing token descriptions (each a regular expression),
14
+ tokn compiles a DFA which it then uses to efficiently extract a sequence of tokens
15
+ from source files. '
16
+ email: jpsember@gmail.com
17
+ executables:
18
+ - tokncompile
19
+ - toknprocess
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - lib/tokn.rb
24
+ - lib/tokn/code_set.rb
25
+ - lib/tokn/dfa.rb
26
+ - lib/tokn/dfa_builder.rb
27
+ - lib/tokn/range_partition.rb
28
+ - lib/tokn/reg_parse.rb
29
+ - lib/tokn/state.rb
30
+ - lib/tokn/token_defn_parser.rb
31
+ - lib/tokn/tokenizer.rb
32
+ - lib/tokn/tokn_const.rb
33
+ - lib/tokn/tools.rb
34
+ - bin/tokncompile
35
+ - bin/toknprocess
36
+ - README.txt
37
+ - test/data/sampletext.txt
38
+ - test/data/sampletokens.txt
39
+ - test/simple.rb
40
+ - test/test.rb
41
+ - test/testcmds
42
+ - figures/sample_dfa.pdf
43
+ homepage:
44
+ licenses: []
45
+ metadata: {}
46
+ post_install_message:
47
+ rdoc_options: []
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubyforge_project:
62
+ rubygems_version: 2.0.2
63
+ signing_key:
64
+ specification_version: 4
65
+ summary: Extracts tokens from source files
66
+ test_files:
67
+ - test/simple.rb
68
+ - test/test.rb
69
+ has_rdoc: