tokn 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.txt +6 -0
- data/{README.txt → README.md} +3 -4
- data/lib/tokn/range_partition.rb +2 -2
- data/lib/tokn/state.rb +3 -3
- data/lib/tokn/tools.rb +177 -23
- data/test/_OLD_/Example1.rb +81 -0
- data/test/{data → _OLD_/data}/compileddfa.txt +0 -0
- data/test/_OLD_/data/sampletokens_dfa.txt +1 -0
- data/test/{test.rb → _OLD_/test.rb} +0 -0
- data/test/{testcmds → _OLD_/testcmds} +0 -0
- data/test/{data/sampletext.txt → sampletext.txt} +0 -0
- data/test/{data/sampletokens.txt → sampletokens.txt} +0 -0
- data/test/test_tokn.rb +584 -0
- metadata +12 -11
- data/test/Example1.rb +0 -50
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84933fe2966d19908c447c84cbccb9179c4e351d
|
4
|
+
data.tar.gz: d1a6fae299cdd6c9b57961bfde7c879e1a786a48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 022689701816eb3fb37690579b275194cbafc53d31591b7fb87d93a6cacafc50318dd964d28697ed1181ceb288d700214191cc41d6362cf4221e110e7c885531
|
7
|
+
data.tar.gz: 96ee32c79d3e12ba8b01cd27fdfb9ff0bfc6ff518972358dc2fbac04f4c8842fccd6f8722c994daba299327718915074f853cc3c9125c0e22dffe3ef19fd189d
|
data/CHANGELOG.txt
CHANGED
data/{README.txt → README.md}
RENAMED
@@ -1,11 +1,10 @@
|
|
1
|
-
# @markup markdown
|
2
|
-
|
3
1
|
tokn
|
4
2
|
=======
|
5
|
-
|
3
|
+
Tokn is a ruby gem that generates automatons from regular expressions to extract tokens from text files.
|
6
4
|
|
7
|
-
Written
|
5
|
+
Written by Jeff Sember, March 2013.
|
8
6
|
|
7
|
+
[Source code documentation can be found here.](http://rubydoc.info/gems/tokn/frames)
|
9
8
|
|
10
9
|
|
11
10
|
Description of the problem
|
data/lib/tokn/range_partition.rb
CHANGED
@@ -79,7 +79,7 @@ module ToknInternal
|
|
79
79
|
|
80
80
|
# Generate a .dot file, and from that, a PDF, for debug purposes
|
81
81
|
#
|
82
|
-
def generatePDF(name = "partition")
|
82
|
+
def generatePDF(test_dir = nil, name = "partition")
|
83
83
|
if !@prepared
|
84
84
|
raise IllegalStateException
|
85
85
|
end
|
@@ -103,7 +103,7 @@ module ToknInternal
|
|
103
103
|
g += "\n}\n"
|
104
104
|
g.gsub!( /'/, '"' )
|
105
105
|
|
106
|
-
dotToPDF(g,name)
|
106
|
+
dotToPDF(g,name, test_dir)
|
107
107
|
|
108
108
|
end
|
109
109
|
|
data/lib/tokn/state.rb
CHANGED
@@ -102,11 +102,11 @@ module ToknInternal
|
|
102
102
|
# Generate a PDF of the state machine;
|
103
103
|
# Makes a system call to the dot utility to convert a .dot file to a .pdf
|
104
104
|
#
|
105
|
-
def generatePDF(title = "nfa")
|
105
|
+
def generatePDF(dir = nil, title = "nfa")
|
106
106
|
stateList = {}
|
107
107
|
|
108
108
|
startState = self
|
109
|
-
genAux(stateList, startState)
|
109
|
+
genAux( stateList, startState)
|
110
110
|
|
111
111
|
g = ""
|
112
112
|
g += "digraph "+title+" {\n"
|
@@ -135,7 +135,7 @@ module ToknInternal
|
|
135
135
|
g += "\n}\n"
|
136
136
|
g.gsub!( /'/, '"' )
|
137
137
|
|
138
|
-
dotToPDF(g,title)
|
138
|
+
dotToPDF(g,title,dir)
|
139
139
|
end
|
140
140
|
|
141
141
|
|
data/lib/tokn/tools.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'set'
|
2
|
+
require 'fileutils'
|
2
3
|
|
3
4
|
# Various utility and debug convenience functions.
|
4
5
|
#
|
@@ -45,38 +46,41 @@ def myAssert(cond, *msg)
|
|
45
46
|
end
|
46
47
|
|
47
48
|
|
48
|
-
|
49
|
-
|
50
|
-
def setTestDir(d = nil)
|
51
|
-
$testDir = d || File.join(Dir.home,"__test__")
|
52
|
-
end
|
49
|
+
## Set test directory. If nil, sets to home directory + "__test__"
|
50
|
+
##
|
51
|
+
#def setTestDir(d = nil)
|
52
|
+
# $testDir = d || File.join(Dir.home,"__test__")
|
53
|
+
#end
|
53
54
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
def withinTestDir(relPath = nil)
|
61
|
-
if !$testDir
|
62
|
-
raise IllegalStateException, "No test directory has been defined"
|
63
|
-
end
|
64
|
-
if !File.directory?($testDir)
|
65
|
-
Dir::mkdir($testDir)
|
66
|
-
end
|
67
|
-
relPath ? File.join($testDir,relPath) : $testDir
|
68
|
-
end
|
55
|
+
## Get a path within the test directory;
|
56
|
+
## create test directory if it doesn't exist.
|
57
|
+
##
|
58
|
+
## relPath : if nil, returns the test directory; else
|
59
|
+
## returns the test directory joined to this one
|
60
|
+
##
|
61
|
+
#def withinTestDir(relPath = nil)
|
62
|
+
# if !$testDir
|
63
|
+
# raise IllegalStateException, "No test directory has been defined"
|
64
|
+
# end
|
65
|
+
# if !File.directory?($testDir)
|
66
|
+
# Dir::mkdir($testDir)
|
67
|
+
# end
|
68
|
+
# relPath ? File.join($testDir,relPath) : $testDir
|
69
|
+
#end
|
69
70
|
|
70
71
|
# Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
|
71
72
|
# in the test directory.
|
72
73
|
#
|
73
74
|
# It does this by making a system call to the 'dot' utility.
|
74
75
|
#
|
75
|
-
def dotToPDF(dotFile, name = "")
|
76
|
+
def dotToPDF(dotFile, name = "", test_dir = nil)
|
76
77
|
gr = dotFile
|
77
|
-
|
78
|
+
|
79
|
+
raise ArgumentError if !test_dir
|
80
|
+
|
81
|
+
dotPath = File.join(test_dir,".__mygraph__.dot")
|
78
82
|
writeTextFile(dotPath,gr)
|
79
|
-
destName =
|
83
|
+
destName = File.join(test_dir,"__mygraph__"+name+".pdf")
|
80
84
|
system("dot -Tpdf "+dotPath+" -o "+destName)
|
81
85
|
end
|
82
86
|
|
@@ -190,6 +194,16 @@ def readTextFile(path)
|
|
190
194
|
contents
|
191
195
|
end
|
192
196
|
|
197
|
+
# Delete a file or directory, if it exists.
|
198
|
+
# Caution! If directory, deletes all files and subdirectories.
|
199
|
+
def remove_file_or_dir(pth)
|
200
|
+
if File.directory?(pth)
|
201
|
+
FileUtils.remove_dir(pth)
|
202
|
+
elsif File.file?(pth)
|
203
|
+
FileUtils.remove_file(pth)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
193
207
|
# Method that takes a code block as an argument to
|
194
208
|
# achieve the same functionality as Java/C++'s
|
195
209
|
# do {
|
@@ -206,3 +220,143 @@ end
|
|
206
220
|
class IllegalStateException < Exception
|
207
221
|
end
|
208
222
|
|
223
|
+
|
224
|
+
# Convenience method to detect if a script is being run
|
225
|
+
# e.g. as a 'main' method (for debug purposes only).
|
226
|
+
# If so, it changes the current directory to the
|
227
|
+
# directory containing the script (if such a directory exists).
|
228
|
+
#
|
229
|
+
# @param file pass __FILE__ in here
|
230
|
+
# @return true if so
|
231
|
+
#
|
232
|
+
def main?(file)
|
233
|
+
|
234
|
+
scr = $0
|
235
|
+
|
236
|
+
# The test/unit framework seems to be adding a suffix ": xxx#xxx.."
|
237
|
+
# to the .rb filename, so adjust in this case
|
238
|
+
i = scr.index(".rb: ")
|
239
|
+
if i
|
240
|
+
scr = scr[0...i+3]
|
241
|
+
end
|
242
|
+
|
243
|
+
if (ret = (file == scr))
|
244
|
+
dr = File.dirname(file)
|
245
|
+
if File.directory?(dr)
|
246
|
+
Dir.chdir(dr)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
ret
|
250
|
+
end
|
251
|
+
|
252
|
+
if defined? Test::Unit
|
253
|
+
|
254
|
+
# A simple extension to Ruby's Test::Unit class that provides
|
255
|
+
# suite-level setup/teardown methods.
|
256
|
+
#
|
257
|
+
# If test suite functionality is desired within a script,
|
258
|
+
# then require 'test/unit' before requiring 'tools.rb'.
|
259
|
+
# This will cause the following class, MyTestSuite, to be defined.
|
260
|
+
#
|
261
|
+
# The user's test script can define subclasses of this,
|
262
|
+
# and declare test methods with the name 'test_xxxx', where
|
263
|
+
# xxxx is lexicographically between 01 and zz.
|
264
|
+
#
|
265
|
+
# There are two levels of setup/teardown called : suite level, and
|
266
|
+
# method level. For example, if the user's test class performs two tests:
|
267
|
+
#
|
268
|
+
# def test_b ... end
|
269
|
+
# def test_c ... end
|
270
|
+
#
|
271
|
+
# Then the test framework will make these calls:
|
272
|
+
#
|
273
|
+
# suite_setup
|
274
|
+
#
|
275
|
+
# method_setup
|
276
|
+
# test_b
|
277
|
+
# method_teardown
|
278
|
+
#
|
279
|
+
# method_setup
|
280
|
+
# test_c
|
281
|
+
# method_teardown
|
282
|
+
#
|
283
|
+
# suite_teardown
|
284
|
+
#
|
285
|
+
# Notes
|
286
|
+
# -----
|
287
|
+
# 1) The usual setup / teardown methods should NOT be overridden; instead,
|
288
|
+
# use the method_xxx alternatives.
|
289
|
+
#
|
290
|
+
# 2) The base class implementations of method_/suite_xxx do nothing.
|
291
|
+
#
|
292
|
+
# 3) The number of test cases reported may be higher than you expect, since
|
293
|
+
# there are additional test methods defined by the TestSuite class to
|
294
|
+
# implement the suite setup / teardown functionality.
|
295
|
+
#
|
296
|
+
# 4) Avoid naming test methods that fall outside of test_01 ... test_zz.
|
297
|
+
#
|
298
|
+
class MyTestSuite < Test::Unit::TestCase
|
299
|
+
|
300
|
+
# This is named to be the FIRST test called. It
|
301
|
+
# will do suite-level setup, and nothing else.
|
302
|
+
def test_00_setup
|
303
|
+
@@suiteSetup = true
|
304
|
+
suite_setup()
|
305
|
+
end
|
306
|
+
|
307
|
+
# This is named to be the LAST test called. It
|
308
|
+
# will do suite-level teardown, and nothing else.
|
309
|
+
def test_zzzzzz_teardown
|
310
|
+
suite_teardown()
|
311
|
+
@@suiteSetup = false
|
312
|
+
end
|
313
|
+
|
314
|
+
# True if called within suite-level setup/teardown window
|
315
|
+
def _suite_active?
|
316
|
+
!(@__name__ == "test_00_setup" || @__name__ == "test_zzzzzz_teardown")
|
317
|
+
end
|
318
|
+
|
319
|
+
def setup
|
320
|
+
if _suite_active?
|
321
|
+
# If only a specific test was requested, the
|
322
|
+
# suite setup may not have run... if not, do it now.
|
323
|
+
if !defined? @@suiteSetup
|
324
|
+
suite_setup
|
325
|
+
end
|
326
|
+
return
|
327
|
+
end
|
328
|
+
method_setup
|
329
|
+
end
|
330
|
+
|
331
|
+
def out_dir
|
332
|
+
"_output_"
|
333
|
+
end
|
334
|
+
|
335
|
+
def out_path(f)
|
336
|
+
File.join(out_dir,f)
|
337
|
+
end
|
338
|
+
|
339
|
+
def teardown
|
340
|
+
if _suite_active?
|
341
|
+
if !defined? @@suiteSetup
|
342
|
+
suite_teardown
|
343
|
+
end
|
344
|
+
return
|
345
|
+
end
|
346
|
+
method_teardown
|
347
|
+
end
|
348
|
+
|
349
|
+
def suite_setup
|
350
|
+
end
|
351
|
+
|
352
|
+
def suite_teardown
|
353
|
+
end
|
354
|
+
|
355
|
+
def method_setup
|
356
|
+
end
|
357
|
+
|
358
|
+
def method_teardown
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require_relative '../lib/tokn/tokenizer'
|
2
|
+
|
3
|
+
class Example1
|
4
|
+
|
5
|
+
include Tokn
|
6
|
+
|
7
|
+
def dataPath(f)
|
8
|
+
File.dirname(__FILE__)+"/data/"+f
|
9
|
+
end
|
10
|
+
|
11
|
+
setTestDir()
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@sampleText = readTextFile(dataPath("sampletext.txt"))
|
15
|
+
end
|
16
|
+
|
17
|
+
def makeTok
|
18
|
+
@dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
|
19
|
+
Tokenizer.new(@dfa, @sampleText, "WS")
|
20
|
+
end
|
21
|
+
|
22
|
+
def go
|
23
|
+
puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
|
24
|
+
|
25
|
+
t = makeTok
|
26
|
+
|
27
|
+
s = ''
|
28
|
+
while t.hasNext do
|
29
|
+
|
30
|
+
tk = t.peek
|
31
|
+
|
32
|
+
if t.nameOf(tk) == 'BROP'
|
33
|
+
lst = t.readSequenceIf('BROP DO ID BRCL')
|
34
|
+
if lst
|
35
|
+
s << " ...read BROP DO ID sequence...\n"
|
36
|
+
lst.each{ |x| s << " #{d(x)}\n"}
|
37
|
+
next
|
38
|
+
else
|
39
|
+
s << " ...couldn't find sequence...\n"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
tk = t.read
|
44
|
+
s << d(tk) << "\n"
|
45
|
+
|
46
|
+
end
|
47
|
+
exp =<<"EXP"
|
48
|
+
(line 1, col 1) : speed
|
49
|
+
(line 1, col 6) : =
|
50
|
+
(line 1, col 7) : 42
|
51
|
+
(line 1, col 9) : gravity
|
52
|
+
(line 1, col 16) : =
|
53
|
+
(line 1, col 17) : -9.80
|
54
|
+
...couldn't find sequence...
|
55
|
+
(line 1, col 22) : {
|
56
|
+
(line 1, col 23) : color
|
57
|
+
(line 1, col 29) : =
|
58
|
+
(line 1, col 30) : green
|
59
|
+
(line 1, col 35) : }
|
60
|
+
(line 1, col 36) : title
|
61
|
+
(line 1, col 41) : =
|
62
|
+
(line 1, col 42) : 'This is a string with \' an escaped delimiter'
|
63
|
+
(line 1, col 89) : if
|
64
|
+
(line 1, col 91) : gravity
|
65
|
+
(line 1, col 98) : ==
|
66
|
+
(line 1, col 100) : 12
|
67
|
+
...read BROP DO ID sequence...
|
68
|
+
(line 1, col 102) : {
|
69
|
+
(line 1, col 103) : do
|
70
|
+
(line 1, col 105) : something
|
71
|
+
(line 1, col 114) : }
|
72
|
+
(line 1, col 115) : do
|
73
|
+
(line 1, col 117) : something_else
|
74
|
+
EXP
|
75
|
+
assert(s == exp)
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
Example1.new.go
|
File without changes
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":1.0,"tokens":["WS","DBL","INT","LBL","ID","ASSIGN","EQUIV","IF","DO","BROP","BRCL"],"states":[[false,[[[125,126],1],[[123,124],2],[[100,101],3],[[105,106],4],[[61,62],5],[[65,91,95,96,97,100,101,105,106,123],6],[[39,40],7],[[48,58],8],[[45,46],9],[[46,47],10],[[9,11,12,13,32,33,92,93],11],[[47,48],12]]],[false,[[[-12,-11],14]]],[false,[[[-11,-10],14]]],[false,[[[48,58,65,91,95,96,97,111,112,123],6],[[-6,-5],14],[[111,112],22]]],[false,[[[48,58,65,91,95,96,97,102,103,123],6],[[-6,-5],14],[[102,103],21]]],[false,[[[-7,-6],14],[[61,62],20]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-6,-5],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[39,40],17],[[92,93],18]]],[false,[[[48,58],8],[[46,47],10],[[-4,-3],14]]],[false,[[[48,58],8],[[46,47],10]]],[false,[[[48,58],16]]],[false,[[[9,11,12,13,32,33,92,93],11],[[-2,-1],14]]],[false,[[[47,48],13]]],[false,[[[0,10,11,1114112],13],[[-2,-1],14],[[10,11],15]]],[true,[]],[false,[[[-2,-1],14]]],[false,[[[-3,-2],14],[[48,58],16]]],[false,[[[-5,-4],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[92,93],18],[[39,40],19]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[-5,-4],14],[[39,40],17],[[92,93],18]]],[false,[[[-8,-7],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-9,-8],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-10,-9],14]]]]}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/test/test_tokn.rb
ADDED
@@ -0,0 +1,584 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
require_relative '../lib/tokn/tools.rb'
|
4
|
+
req('range_partition dfa dfa_builder tokenizer token_defn_parser')
|
5
|
+
|
6
|
+
# Get access to Tokn namespace
|
7
|
+
|
8
|
+
#setTestDir()
|
9
|
+
|
10
|
+
#SINGLETEST = "test_ps_output_multi"
|
11
|
+
if defined? SINGLETEST
|
12
|
+
if main?(__FILE__)
|
13
|
+
ARGV.concat("-n #{SINGLETEST}".split)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class TestTokn < MyTestSuite
|
18
|
+
|
19
|
+
include Tokn, ToknInternal
|
20
|
+
|
21
|
+
# def data_file(f)
|
22
|
+
# File.join("data",f)
|
23
|
+
# # File.dirname(__FILE__)+"/data/"+f
|
24
|
+
# end
|
25
|
+
|
26
|
+
def suite_setup
|
27
|
+
|
28
|
+
# Make current directory = the one containing this script
|
29
|
+
main?(__FILE__)
|
30
|
+
|
31
|
+
if !File.directory?(out_dir)
|
32
|
+
Dir.mkdir(out_dir)
|
33
|
+
end
|
34
|
+
|
35
|
+
@@sampleText = readTextFile("sampletext.txt")
|
36
|
+
@@sampleTokens = readTextFile("sampletokens.txt")
|
37
|
+
end
|
38
|
+
|
39
|
+
# def withinTestDir(f)
|
40
|
+
# File.join(@@testDir,"__source__")
|
41
|
+
|
42
|
+
def suite_teardown
|
43
|
+
remove_file_or_dir(out_dir)
|
44
|
+
end
|
45
|
+
|
46
|
+
def method_setup
|
47
|
+
end
|
48
|
+
|
49
|
+
def method_teardown
|
50
|
+
end
|
51
|
+
|
52
|
+
def add(lower, upper = nil)
|
53
|
+
@cs.add(lower,upper)
|
54
|
+
end
|
55
|
+
|
56
|
+
def remove(lower, upper = nil)
|
57
|
+
@cs.remove(lower,upper)
|
58
|
+
end
|
59
|
+
|
60
|
+
def swap
|
61
|
+
@ct = @cs
|
62
|
+
prep
|
63
|
+
end
|
64
|
+
|
65
|
+
def isect
|
66
|
+
@cs.intersect!(@ct)
|
67
|
+
end
|
68
|
+
|
69
|
+
def diff
|
70
|
+
@cs.difference!(@ct)
|
71
|
+
end
|
72
|
+
|
73
|
+
def equ(s, arr = nil)
|
74
|
+
arr ||= @cs.array
|
75
|
+
ia = s.split.map{|n| n.to_i}
|
76
|
+
assert_equal(ia,arr)
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_100_add
|
80
|
+
prep
|
81
|
+
|
82
|
+
add(72,81)
|
83
|
+
equ '72 81'
|
84
|
+
|
85
|
+
add(50)
|
86
|
+
equ '50 51 72 81'
|
87
|
+
|
88
|
+
add(75,77)
|
89
|
+
equ '50 51 72 81'
|
90
|
+
|
91
|
+
add(72,78)
|
92
|
+
equ '50 51 72 81'
|
93
|
+
|
94
|
+
add(70,78)
|
95
|
+
equ '50 51 70 81'
|
96
|
+
|
97
|
+
add 60
|
98
|
+
equ '50 51 60 61 70 81'
|
99
|
+
|
100
|
+
add 40
|
101
|
+
equ '40 41 50 51 60 61 70 81'
|
102
|
+
|
103
|
+
add 41
|
104
|
+
equ '40 42 50 51 60 61 70 81'
|
105
|
+
|
106
|
+
add 81
|
107
|
+
equ '40 42 50 51 60 61 70 82'
|
108
|
+
|
109
|
+
add 83
|
110
|
+
equ '40 42 50 51 60 61 70 82 83 84'
|
111
|
+
|
112
|
+
add 49,84
|
113
|
+
equ '40 42 49 84'
|
114
|
+
|
115
|
+
add 39,86
|
116
|
+
equ '39 86'
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_110_intersect
|
120
|
+
prep
|
121
|
+
add 39,86
|
122
|
+
swap
|
123
|
+
add 50,70
|
124
|
+
isect
|
125
|
+
equ '50 70'
|
126
|
+
|
127
|
+
swap
|
128
|
+
add 20,25
|
129
|
+
add 35,51
|
130
|
+
add 62,68
|
131
|
+
add 72,80
|
132
|
+
isect
|
133
|
+
equ '50 51 62 68'
|
134
|
+
|
135
|
+
prep
|
136
|
+
swap
|
137
|
+
add 50,70
|
138
|
+
isect
|
139
|
+
equ ''
|
140
|
+
|
141
|
+
add 50,70
|
142
|
+
swap
|
143
|
+
add 50,70
|
144
|
+
isect
|
145
|
+
equ '50 70'
|
146
|
+
|
147
|
+
prep
|
148
|
+
add 20,25
|
149
|
+
swap
|
150
|
+
add 25,30
|
151
|
+
isect
|
152
|
+
equ ''
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
def test_120_difference
|
157
|
+
prep
|
158
|
+
add 20,30
|
159
|
+
add 40,50
|
160
|
+
swap
|
161
|
+
|
162
|
+
add 20,80
|
163
|
+
diff
|
164
|
+
equ '30 40 50 80'
|
165
|
+
|
166
|
+
prep
|
167
|
+
add 19,32
|
168
|
+
diff
|
169
|
+
equ '19 20 30 32'
|
170
|
+
|
171
|
+
prep
|
172
|
+
add 30,40
|
173
|
+
diff
|
174
|
+
equ '30 40'
|
175
|
+
|
176
|
+
prep
|
177
|
+
add 20,30
|
178
|
+
add 40,50
|
179
|
+
diff
|
180
|
+
equ ''
|
181
|
+
|
182
|
+
prep
|
183
|
+
add 19,30
|
184
|
+
add 40,50
|
185
|
+
diff
|
186
|
+
equ '19 20'
|
187
|
+
|
188
|
+
prep
|
189
|
+
add 20,30
|
190
|
+
add 40,51
|
191
|
+
diff
|
192
|
+
equ '50 51'
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
def prep
|
197
|
+
@cs = CodeSet.new
|
198
|
+
end
|
199
|
+
|
200
|
+
def test_130_illegalRange
|
201
|
+
prep
|
202
|
+
|
203
|
+
assert_raise(RangeError) { add 60,50 }
|
204
|
+
assert_raise(RangeError) { add 60,60 }
|
205
|
+
end
|
206
|
+
|
207
|
+
def neg(lower, upper)
|
208
|
+
@cs.negate lower, upper
|
209
|
+
end
|
210
|
+
|
211
|
+
def test_140_negate
|
212
|
+
prep
|
213
|
+
add 10,15
|
214
|
+
add 20,25
|
215
|
+
add 30
|
216
|
+
add 40,45
|
217
|
+
equ '10 15 20 25 30 31 40 45'
|
218
|
+
neg 22,37
|
219
|
+
equ '10 15 20 22 25 30 31 37 40 45'
|
220
|
+
neg 25,27
|
221
|
+
equ '10 15 20 22 27 30 31 37 40 45'
|
222
|
+
neg 15,20
|
223
|
+
equ '10 22 27 30 31 37 40 45'
|
224
|
+
|
225
|
+
prep
|
226
|
+
add 10,22
|
227
|
+
@cs.negate
|
228
|
+
equ '0 10 22 1114112'
|
229
|
+
|
230
|
+
prep
|
231
|
+
add 10,20
|
232
|
+
neg 10,20
|
233
|
+
equ ''
|
234
|
+
|
235
|
+
prep
|
236
|
+
add 10,20
|
237
|
+
add 30,40
|
238
|
+
neg 5,10
|
239
|
+
equ '5 20 30 40'
|
240
|
+
|
241
|
+
prep
|
242
|
+
add 10,20
|
243
|
+
add 30,40
|
244
|
+
neg 25,30
|
245
|
+
equ '10 20 25 40'
|
246
|
+
|
247
|
+
prep
|
248
|
+
add 10,20
|
249
|
+
add 30,40
|
250
|
+
neg 40,50
|
251
|
+
equ '10 20 30 50'
|
252
|
+
|
253
|
+
prep
|
254
|
+
add 10,20
|
255
|
+
add 30,40
|
256
|
+
neg 41,50
|
257
|
+
equ '10 20 30 40 41 50'
|
258
|
+
|
259
|
+
prep
|
260
|
+
add 10,20
|
261
|
+
add 30,40
|
262
|
+
neg 15,35
|
263
|
+
equ '10 15 20 30 35 40'
|
264
|
+
end
|
265
|
+
|
266
|
+
def test_150_remove
|
267
|
+
|
268
|
+
prep
|
269
|
+
add 10,20
|
270
|
+
add 30,40
|
271
|
+
remove 29,41
|
272
|
+
equ '10 20'
|
273
|
+
|
274
|
+
add 30,40
|
275
|
+
equ '10 20 30 40'
|
276
|
+
|
277
|
+
remove 20,30
|
278
|
+
equ '10 20 30 40'
|
279
|
+
|
280
|
+
remove 15,35
|
281
|
+
equ '10 15 35 40'
|
282
|
+
|
283
|
+
remove 10,15
|
284
|
+
equ '35 40'
|
285
|
+
remove 35
|
286
|
+
equ '36 40'
|
287
|
+
remove 40
|
288
|
+
equ '36 40'
|
289
|
+
remove 38
|
290
|
+
equ '36 38 39 40'
|
291
|
+
remove 37,39
|
292
|
+
equ '36 37 39 40'
|
293
|
+
|
294
|
+
end
|
295
|
+
|
296
|
+
def dset(st)
|
297
|
+
s = ''
|
298
|
+
st.each{|x|
|
299
|
+
if s.length > 0
|
300
|
+
s+= ' '
|
301
|
+
end
|
302
|
+
s += d(x)
|
303
|
+
}
|
304
|
+
return s
|
305
|
+
end
|
306
|
+
|
307
|
+
def newpar
|
308
|
+
@par = RangePartition.new
|
309
|
+
end
|
310
|
+
|
311
|
+
def addset(lower, upper = nil)
|
312
|
+
upper ||= lower + 1
|
313
|
+
r = CodeSet.new(lower,upper)
|
314
|
+
@par.addSet(r)
|
315
|
+
end
|
316
|
+
|
317
|
+
def apply
|
318
|
+
list = @par.apply(@cs)
|
319
|
+
res = []
|
320
|
+
list.each do |x|
|
321
|
+
res.concat x.array
|
322
|
+
end
|
323
|
+
@parResult = res
|
324
|
+
end
|
325
|
+
|
326
|
+
def test_160_partition
|
327
|
+
|
328
|
+
newpar
|
329
|
+
addset(20,30)
|
330
|
+
addset(25,33)
|
331
|
+
addset(37)
|
332
|
+
addset(40,50)
|
333
|
+
@par.prepare
|
334
|
+
|
335
|
+
@par.generatePDF(out_dir)
|
336
|
+
|
337
|
+
prep
|
338
|
+
add 25,33
|
339
|
+
|
340
|
+
apply
|
341
|
+
equ('25 30 30 33', @parResult)
|
342
|
+
|
343
|
+
prep
|
344
|
+
add 37
|
345
|
+
apply
|
346
|
+
equ('37 38', @parResult)
|
347
|
+
|
348
|
+
prep
|
349
|
+
add 40,50
|
350
|
+
apply
|
351
|
+
equ('40 50', @parResult)
|
352
|
+
|
353
|
+
end
|
354
|
+
|
355
|
+
REGEX_SCRIPT = "(\\-?[0-9]+)|[_a-zA-Z][_a-zA-Z0-9]*|333q"
|
356
|
+
|
357
|
+
TOKEN_SCRIPT2 = <<'END'
|
358
|
+
sep: \s
|
359
|
+
tku: a(a|b)*
|
360
|
+
tkv: b(aa|b*)
|
361
|
+
tkw: bbb
|
362
|
+
END
|
363
|
+
|
364
|
+
def test_170_build_DFA
|
365
|
+
|
366
|
+
x = RegParse.new(REGEX_SCRIPT)
|
367
|
+
s = x.startState
|
368
|
+
x.endState.finalState = true
|
369
|
+
|
370
|
+
s.generatePDF(out_dir,"nfa")
|
371
|
+
|
372
|
+
r = s.reverseNFA()
|
373
|
+
r.generatePDF(out_dir,"reversed")
|
374
|
+
|
375
|
+
dfa = DFABuilder.nfa_to_dfa(s)
|
376
|
+
dfa.generatePDF(out_dir,"buildDFA")
|
377
|
+
end
|
378
|
+
|
379
|
+
def test_180_cvt_NFA_to_DFA
|
380
|
+
|
381
|
+
x = RegParse.new(REGEX_SCRIPT)
|
382
|
+
s = x.startState
|
383
|
+
x.endState.finalState = true
|
384
|
+
|
385
|
+
s.generatePDF(out_dir,"nfa")
|
386
|
+
|
387
|
+
dfa = DFABuilder.nfa_to_dfa(s)
|
388
|
+
dfa.generatePDF(out_dir,"dfa")
|
389
|
+
|
390
|
+
oldToNewMap, maxId2 = dfa.duplicateNFA(42)
|
391
|
+
dfa2 = oldToNewMap[dfa]
|
392
|
+
dfa2.generatePDF(out_dir,"dfa_duplicated")
|
393
|
+
end
|
394
|
+
|
395
|
+
def test_190_TokenDefParser
|
396
|
+
|
397
|
+
s = TOKEN_SCRIPT2
|
398
|
+
|
399
|
+
td = TokenDefParser.new(s)
|
400
|
+
|
401
|
+
tokDFA = td.dfa
|
402
|
+
tokDFA.startState.generatePDF(out_dir,"TokenDFA")
|
403
|
+
|
404
|
+
end
|
405
|
+
|
406
|
+
def makeTok
|
407
|
+
dfa = DFA.from_script(@@sampleTokens)
|
408
|
+
Tokenizer.new(dfa, @@sampleText)
|
409
|
+
end
|
410
|
+
|
411
|
+
def test_200_Tokenizer
|
412
|
+
|
413
|
+
tok = makeTok
|
414
|
+
|
415
|
+
tokList = []
|
416
|
+
while tok.hasNext
|
417
|
+
t = tok.read
|
418
|
+
tokList.push(t)
|
419
|
+
end
|
420
|
+
|
421
|
+
tok.unread(tokList.size)
|
422
|
+
|
423
|
+
tokList.each do |t1|
|
424
|
+
tName = tok.nameOf(t1)
|
425
|
+
t2 = tok.read(tName)
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
def test_210_Tokenizer_Missing_Expected
|
430
|
+
|
431
|
+
assert_raise TokenizerException do
|
432
|
+
|
433
|
+
tok = makeTok
|
434
|
+
|
435
|
+
tok.read
|
436
|
+
tok.read
|
437
|
+
tok.read
|
438
|
+
tok.read
|
439
|
+
tok.read("signedint")
|
440
|
+
end
|
441
|
+
|
442
|
+
end
|
443
|
+
|
444
|
+
def test_220_CompileDFAToDisk
|
445
|
+
tokScript = @@sampleTokens
|
446
|
+
testText = @@sampleText
|
447
|
+
|
448
|
+
destPath = out_path("sampletokens_dfa.txt")
|
449
|
+
|
450
|
+
if File.exist?(destPath)
|
451
|
+
File.delete(destPath)
|
452
|
+
end
|
453
|
+
assert(!File.exist?(destPath))
|
454
|
+
|
455
|
+
dfa = DFA.from_script(tokScript, destPath)
|
456
|
+
assert(File.exist?(destPath))
|
457
|
+
|
458
|
+
tok = Tokenizer.new(dfa, testText)
|
459
|
+
|
460
|
+
end
|
461
|
+
|
462
|
+
def prep2
|
463
|
+
testText = @@sampleText
|
464
|
+
dfa = DFA.from_file(out_path("sampletokens_dfa.txt"))
|
465
|
+
tok = Tokenizer.new(dfa, testText)
|
466
|
+
end
|
467
|
+
|
468
|
+
def test_230_readAndUnread
|
469
|
+
tok = prep2
|
470
|
+
unread = false
|
471
|
+
while tok.hasNext
|
472
|
+
t = tok.read
|
473
|
+
# pr("Read %-8s %s\n",tok.nameOf(t),d(t))
|
474
|
+
|
475
|
+
if !unread && tok.nameOf(t) == "DO"
|
476
|
+
# pr(" ...pushing back four tokens...\n")
|
477
|
+
tok.unread(4)
|
478
|
+
unread = true
|
479
|
+
# pr(" ...and resuming...\n")
|
480
|
+
end
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
484
|
+
def test_240_UnrecognizedToken
|
485
|
+
assert_raise TokenizerException do
|
486
|
+
tok = prep2
|
487
|
+
while tok.hasNext
|
488
|
+
t = tok.read
|
489
|
+
if tok.nameOf(t) == "DO"
|
490
|
+
tok.read("BRCL") # <== this should raise problem
|
491
|
+
end
|
492
|
+
end
|
493
|
+
end
|
494
|
+
end
|
495
|
+
|
496
|
+
def test_250_ReadPastEnd
|
497
|
+
assert_raise TokenizerException do
|
498
|
+
tok = prep2
|
499
|
+
while tok.hasNext
|
500
|
+
t = tok.read
|
501
|
+
end
|
502
|
+
tok.read
|
503
|
+
end
|
504
|
+
end
|
505
|
+
|
506
|
+
def test_260_UnreadBeforeStart
|
507
|
+
|
508
|
+
assert_raise TokenizerException do
|
509
|
+
tok = prep2
|
510
|
+
k = 0
|
511
|
+
while tok.hasNext
|
512
|
+
t = tok.read
|
513
|
+
k += 1
|
514
|
+
if k == 15
|
515
|
+
tok.unread(5)
|
516
|
+
tok.unread(7)
|
517
|
+
tok.read()
|
518
|
+
tok.unread(4)
|
519
|
+
tok.unread(3)
|
520
|
+
end
|
521
|
+
end
|
522
|
+
tok.read
|
523
|
+
end
|
524
|
+
end
|
525
|
+
|
526
|
+
def test_270_filter_ws
|
527
|
+
|
528
|
+
dfa = DFA.from_script_file("sampletokens.txt")
|
529
|
+
t = Tokenizer.new(dfa, readTextFile("sampletext.txt"), "WS")
|
530
|
+
|
531
|
+
s = ''
|
532
|
+
while t.hasNext do
|
533
|
+
|
534
|
+
tk = t.peek
|
535
|
+
|
536
|
+
if t.nameOf(tk) == 'BROP'
|
537
|
+
lst = t.readSequenceIf('BROP DO ID BRCL')
|
538
|
+
if lst
|
539
|
+
s << " ...read BROP DO ID sequence...\n"
|
540
|
+
lst.each{ |x| s << " #{d(x)}\n"}
|
541
|
+
next
|
542
|
+
else
|
543
|
+
s << " ...couldn't find sequence...\n"
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
tk = t.read
|
548
|
+
s << d(tk) << "\n"
|
549
|
+
|
550
|
+
end
|
551
|
+
exp =<<"EXP"
|
552
|
+
(line 1, col 1) : speed
|
553
|
+
(line 1, col 6) : =
|
554
|
+
(line 1, col 7) : 42
|
555
|
+
(line 1, col 9) : gravity
|
556
|
+
(line 1, col 16) : =
|
557
|
+
(line 1, col 17) : -9.80
|
558
|
+
...couldn't find sequence...
|
559
|
+
(line 1, col 22) : {
|
560
|
+
(line 1, col 23) : color
|
561
|
+
(line 1, col 29) : =
|
562
|
+
(line 1, col 30) : green
|
563
|
+
(line 1, col 35) : }
|
564
|
+
(line 1, col 36) : title
|
565
|
+
(line 1, col 41) : =
|
566
|
+
(line 1, col 42) : 'This is a string with \\' an escaped delimiter'
|
567
|
+
(line 1, col 89) : if
|
568
|
+
(line 1, col 91) : gravity
|
569
|
+
(line 1, col 98) : ==
|
570
|
+
(line 1, col 100) : 12
|
571
|
+
...read BROP DO ID sequence...
|
572
|
+
(line 1, col 102) : {
|
573
|
+
(line 1, col 103) : do
|
574
|
+
(line 1, col 105) : something
|
575
|
+
(line 1, col 114) : }
|
576
|
+
(line 1, col 115) : do
|
577
|
+
(line 1, col 117) : something_else
|
578
|
+
EXP
|
579
|
+
|
580
|
+
assert(s.strip == exp.strip)
|
581
|
+
end
|
582
|
+
|
583
|
+
end
|
584
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Sember
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "Given a script containing token descriptions (each a regular expression),
|
14
14
|
\ntokn compiles an automaton which it can then use to efficiently convert a \ntext
|
@@ -34,13 +34,15 @@ files:
|
|
34
34
|
- bin/tokncompile
|
35
35
|
- bin/toknprocess
|
36
36
|
- CHANGELOG.txt
|
37
|
-
- README.
|
38
|
-
- test/Example1.rb
|
39
|
-
- test/data/compileddfa.txt
|
40
|
-
- test/data/
|
41
|
-
- test/
|
42
|
-
- test/
|
43
|
-
- test/
|
37
|
+
- README.md
|
38
|
+
- test/_OLD_/Example1.rb
|
39
|
+
- test/_OLD_/data/compileddfa.txt
|
40
|
+
- test/_OLD_/data/sampletokens_dfa.txt
|
41
|
+
- test/_OLD_/test.rb
|
42
|
+
- test/_OLD_/testcmds
|
43
|
+
- test/sampletext.txt
|
44
|
+
- test/sampletokens.txt
|
45
|
+
- test/test_tokn.rb
|
44
46
|
homepage: http://www.cs.ubc.ca/~jpsember/
|
45
47
|
licenses:
|
46
48
|
- mit
|
@@ -66,6 +68,5 @@ signing_key:
|
|
66
68
|
specification_version: 4
|
67
69
|
summary: Extracts tokens from source files
|
68
70
|
test_files:
|
69
|
-
- test/
|
70
|
-
- test/test.rb
|
71
|
+
- test/test_tokn.rb
|
71
72
|
has_rdoc:
|
data/test/Example1.rb
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
require_relative '../lib/tokn/tokenizer'
|
2
|
-
|
3
|
-
class Example1
|
4
|
-
|
5
|
-
include Tokn
|
6
|
-
|
7
|
-
def dataPath(f)
|
8
|
-
File.dirname(__FILE__)+"/data/"+f
|
9
|
-
end
|
10
|
-
|
11
|
-
setTestDir()
|
12
|
-
|
13
|
-
def initialize
|
14
|
-
@sampleText = readTextFile(dataPath("sampletext.txt"))
|
15
|
-
end
|
16
|
-
|
17
|
-
def makeTok
|
18
|
-
@dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
|
19
|
-
Tokenizer.new(@dfa, @sampleText, "WS")
|
20
|
-
end
|
21
|
-
|
22
|
-
def go
|
23
|
-
puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
|
24
|
-
|
25
|
-
t = makeTok
|
26
|
-
|
27
|
-
while t.hasNext do
|
28
|
-
|
29
|
-
tk = t.peek
|
30
|
-
|
31
|
-
if t.nameOf(tk) == 'BROP'
|
32
|
-
lst = t.readSequenceIf('BROP DO ID BRCL')
|
33
|
-
if lst
|
34
|
-
pr(" ...read BROP DO ID sequence...\n")
|
35
|
-
lst.each{ |x| pr(" %s\n",d(x))}
|
36
|
-
next
|
37
|
-
else
|
38
|
-
pr(" ...couldn't find sequence...\n")
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
tk = t.read
|
43
|
-
pr("%s\n",d(tk))
|
44
|
-
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
end
|
49
|
-
|
50
|
-
Example1.new.go
|