tokn 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.txt +6 -0
- data/{README.txt → README.md} +3 -4
- data/lib/tokn/range_partition.rb +2 -2
- data/lib/tokn/state.rb +3 -3
- data/lib/tokn/tools.rb +177 -23
- data/test/_OLD_/Example1.rb +81 -0
- data/test/{data → _OLD_/data}/compileddfa.txt +0 -0
- data/test/_OLD_/data/sampletokens_dfa.txt +1 -0
- data/test/{test.rb → _OLD_/test.rb} +0 -0
- data/test/{testcmds → _OLD_/testcmds} +0 -0
- data/test/{data/sampletext.txt → sampletext.txt} +0 -0
- data/test/{data/sampletokens.txt → sampletokens.txt} +0 -0
- data/test/test_tokn.rb +584 -0
- metadata +12 -11
- data/test/Example1.rb +0 -50
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84933fe2966d19908c447c84cbccb9179c4e351d
|
4
|
+
data.tar.gz: d1a6fae299cdd6c9b57961bfde7c879e1a786a48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 022689701816eb3fb37690579b275194cbafc53d31591b7fb87d93a6cacafc50318dd964d28697ed1181ceb288d700214191cc41d6362cf4221e110e7c885531
|
7
|
+
data.tar.gz: 96ee32c79d3e12ba8b01cd27fdfb9ff0bfc6ff518972358dc2fbac04f4c8842fccd6f8722c994daba299327718915074f853cc3c9125c0e22dffe3ef19fd189d
|
data/CHANGELOG.txt
CHANGED
data/{README.txt → README.md}
RENAMED
@@ -1,11 +1,10 @@
|
|
1
|
-
# @markup markdown
|
2
|
-
|
3
1
|
tokn
|
4
2
|
=======
|
5
|
-
|
3
|
+
Tokn is a ruby gem that generates automatons from regular expressions to extract tokens from text files.
|
6
4
|
|
7
|
-
Written
|
5
|
+
Written by Jeff Sember, March 2013.
|
8
6
|
|
7
|
+
[Source code documentation can be found here.](http://rubydoc.info/gems/tokn/frames)
|
9
8
|
|
10
9
|
|
11
10
|
Description of the problem
|
data/lib/tokn/range_partition.rb
CHANGED
@@ -79,7 +79,7 @@ module ToknInternal
|
|
79
79
|
|
80
80
|
# Generate a .dot file, and from that, a PDF, for debug purposes
|
81
81
|
#
|
82
|
-
def generatePDF(name = "partition")
|
82
|
+
def generatePDF(test_dir = nil, name = "partition")
|
83
83
|
if !@prepared
|
84
84
|
raise IllegalStateException
|
85
85
|
end
|
@@ -103,7 +103,7 @@ module ToknInternal
|
|
103
103
|
g += "\n}\n"
|
104
104
|
g.gsub!( /'/, '"' )
|
105
105
|
|
106
|
-
dotToPDF(g,name)
|
106
|
+
dotToPDF(g,name, test_dir)
|
107
107
|
|
108
108
|
end
|
109
109
|
|
data/lib/tokn/state.rb
CHANGED
@@ -102,11 +102,11 @@ module ToknInternal
|
|
102
102
|
# Generate a PDF of the state machine;
|
103
103
|
# Makes a system call to the dot utility to convert a .dot file to a .pdf
|
104
104
|
#
|
105
|
-
def generatePDF(title = "nfa")
|
105
|
+
def generatePDF(dir = nil, title = "nfa")
|
106
106
|
stateList = {}
|
107
107
|
|
108
108
|
startState = self
|
109
|
-
genAux(stateList, startState)
|
109
|
+
genAux( stateList, startState)
|
110
110
|
|
111
111
|
g = ""
|
112
112
|
g += "digraph "+title+" {\n"
|
@@ -135,7 +135,7 @@ module ToknInternal
|
|
135
135
|
g += "\n}\n"
|
136
136
|
g.gsub!( /'/, '"' )
|
137
137
|
|
138
|
-
dotToPDF(g,title)
|
138
|
+
dotToPDF(g,title,dir)
|
139
139
|
end
|
140
140
|
|
141
141
|
|
data/lib/tokn/tools.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'set'
|
2
|
+
require 'fileutils'
|
2
3
|
|
3
4
|
# Various utility and debug convenience functions.
|
4
5
|
#
|
@@ -45,38 +46,41 @@ def myAssert(cond, *msg)
|
|
45
46
|
end
|
46
47
|
|
47
48
|
|
48
|
-
|
49
|
-
|
50
|
-
def setTestDir(d = nil)
|
51
|
-
$testDir = d || File.join(Dir.home,"__test__")
|
52
|
-
end
|
49
|
+
## Set test directory. If nil, sets to home directory + "__test__"
|
50
|
+
##
|
51
|
+
#def setTestDir(d = nil)
|
52
|
+
# $testDir = d || File.join(Dir.home,"__test__")
|
53
|
+
#end
|
53
54
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
def withinTestDir(relPath = nil)
|
61
|
-
if !$testDir
|
62
|
-
raise IllegalStateException, "No test directory has been defined"
|
63
|
-
end
|
64
|
-
if !File.directory?($testDir)
|
65
|
-
Dir::mkdir($testDir)
|
66
|
-
end
|
67
|
-
relPath ? File.join($testDir,relPath) : $testDir
|
68
|
-
end
|
55
|
+
## Get a path within the test directory;
|
56
|
+
## create test directory if it doesn't exist.
|
57
|
+
##
|
58
|
+
## relPath : if nil, returns the test directory; else
|
59
|
+
## returns the test directory joined to this one
|
60
|
+
##
|
61
|
+
#def withinTestDir(relPath = nil)
|
62
|
+
# if !$testDir
|
63
|
+
# raise IllegalStateException, "No test directory has been defined"
|
64
|
+
# end
|
65
|
+
# if !File.directory?($testDir)
|
66
|
+
# Dir::mkdir($testDir)
|
67
|
+
# end
|
68
|
+
# relPath ? File.join($testDir,relPath) : $testDir
|
69
|
+
#end
|
69
70
|
|
70
71
|
# Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
|
71
72
|
# in the test directory.
|
72
73
|
#
|
73
74
|
# It does this by making a system call to the 'dot' utility.
|
74
75
|
#
|
75
|
-
def dotToPDF(dotFile, name = "")
|
76
|
+
def dotToPDF(dotFile, name = "", test_dir = nil)
|
76
77
|
gr = dotFile
|
77
|
-
|
78
|
+
|
79
|
+
raise ArgumentError if !test_dir
|
80
|
+
|
81
|
+
dotPath = File.join(test_dir,".__mygraph__.dot")
|
78
82
|
writeTextFile(dotPath,gr)
|
79
|
-
destName =
|
83
|
+
destName = File.join(test_dir,"__mygraph__"+name+".pdf")
|
80
84
|
system("dot -Tpdf "+dotPath+" -o "+destName)
|
81
85
|
end
|
82
86
|
|
@@ -190,6 +194,16 @@ def readTextFile(path)
|
|
190
194
|
contents
|
191
195
|
end
|
192
196
|
|
197
|
+
# Delete a file or directory, if it exists.
|
198
|
+
# Caution! If directory, deletes all files and subdirectories.
|
199
|
+
def remove_file_or_dir(pth)
|
200
|
+
if File.directory?(pth)
|
201
|
+
FileUtils.remove_dir(pth)
|
202
|
+
elsif File.file?(pth)
|
203
|
+
FileUtils.remove_file(pth)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
193
207
|
# Method that takes a code block as an argument to
|
194
208
|
# achieve the same functionality as Java/C++'s
|
195
209
|
# do {
|
@@ -206,3 +220,143 @@ end
|
|
206
220
|
class IllegalStateException < Exception
|
207
221
|
end
|
208
222
|
|
223
|
+
|
224
|
+
# Convenience method to detect if a script is being run
|
225
|
+
# e.g. as a 'main' method (for debug purposes only).
|
226
|
+
# If so, it changes the current directory to the
|
227
|
+
# directory containing the script (if such a directory exists).
|
228
|
+
#
|
229
|
+
# @param file pass __FILE__ in here
|
230
|
+
# @return true if so
|
231
|
+
#
|
232
|
+
def main?(file)
|
233
|
+
|
234
|
+
scr = $0
|
235
|
+
|
236
|
+
# The test/unit framework seems to be adding a suffix ": xxx#xxx.."
|
237
|
+
# to the .rb filename, so adjust in this case
|
238
|
+
i = scr.index(".rb: ")
|
239
|
+
if i
|
240
|
+
scr = scr[0...i+3]
|
241
|
+
end
|
242
|
+
|
243
|
+
if (ret = (file == scr))
|
244
|
+
dr = File.dirname(file)
|
245
|
+
if File.directory?(dr)
|
246
|
+
Dir.chdir(dr)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
ret
|
250
|
+
end
|
251
|
+
|
252
|
+
if defined? Test::Unit
|
253
|
+
|
254
|
+
# A simple extension to Ruby's Test::Unit class that provides
|
255
|
+
# suite-level setup/teardown methods.
|
256
|
+
#
|
257
|
+
# If test suite functionality is desired within a script,
|
258
|
+
# then require 'test/unit' before requiring 'tools.rb'.
|
259
|
+
# This will cause the following class, MyTestSuite, to be defined.
|
260
|
+
#
|
261
|
+
# The user's test script can define subclasses of this,
|
262
|
+
# and declare test methods with the name 'test_xxxx', where
|
263
|
+
# xxxx is lexicographically between 01 and zz.
|
264
|
+
#
|
265
|
+
# There are two levels of setup/teardown called : suite level, and
|
266
|
+
# method level. For example, if the user's test class performs two tests:
|
267
|
+
#
|
268
|
+
# def test_b ... end
|
269
|
+
# def test_c ... end
|
270
|
+
#
|
271
|
+
# Then the test framework will make these calls:
|
272
|
+
#
|
273
|
+
# suite_setup
|
274
|
+
#
|
275
|
+
# method_setup
|
276
|
+
# test_b
|
277
|
+
# method_teardown
|
278
|
+
#
|
279
|
+
# method_setup
|
280
|
+
# test_c
|
281
|
+
# method_teardown
|
282
|
+
#
|
283
|
+
# suite_teardown
|
284
|
+
#
|
285
|
+
# Notes
|
286
|
+
# -----
|
287
|
+
# 1) The usual setup / teardown methods should NOT be overridden; instead,
|
288
|
+
# use the method_xxx alternatives.
|
289
|
+
#
|
290
|
+
# 2) The base class implementations of method_/suite_xxx do nothing.
|
291
|
+
#
|
292
|
+
# 3) The number of test cases reported may be higher than you expect, since
|
293
|
+
# there are additional test methods defined by the TestSuite class to
|
294
|
+
# implement the suite setup / teardown functionality.
|
295
|
+
#
|
296
|
+
# 4) Avoid naming test methods that fall outside of test_01 ... test_zz.
|
297
|
+
#
|
298
|
+
class MyTestSuite < Test::Unit::TestCase
|
299
|
+
|
300
|
+
# This is named to be the FIRST test called. It
|
301
|
+
# will do suite-level setup, and nothing else.
|
302
|
+
def test_00_setup
|
303
|
+
@@suiteSetup = true
|
304
|
+
suite_setup()
|
305
|
+
end
|
306
|
+
|
307
|
+
# This is named to be the LAST test called. It
|
308
|
+
# will do suite-level teardown, and nothing else.
|
309
|
+
def test_zzzzzz_teardown
|
310
|
+
suite_teardown()
|
311
|
+
@@suiteSetup = false
|
312
|
+
end
|
313
|
+
|
314
|
+
# True if called within suite-level setup/teardown window
|
315
|
+
def _suite_active?
|
316
|
+
!(@__name__ == "test_00_setup" || @__name__ == "test_zzzzzz_teardown")
|
317
|
+
end
|
318
|
+
|
319
|
+
def setup
|
320
|
+
if _suite_active?
|
321
|
+
# If only a specific test was requested, the
|
322
|
+
# suite setup may not have run... if not, do it now.
|
323
|
+
if !defined? @@suiteSetup
|
324
|
+
suite_setup
|
325
|
+
end
|
326
|
+
return
|
327
|
+
end
|
328
|
+
method_setup
|
329
|
+
end
|
330
|
+
|
331
|
+
def out_dir
|
332
|
+
"_output_"
|
333
|
+
end
|
334
|
+
|
335
|
+
def out_path(f)
|
336
|
+
File.join(out_dir,f)
|
337
|
+
end
|
338
|
+
|
339
|
+
def teardown
|
340
|
+
if _suite_active?
|
341
|
+
if !defined? @@suiteSetup
|
342
|
+
suite_teardown
|
343
|
+
end
|
344
|
+
return
|
345
|
+
end
|
346
|
+
method_teardown
|
347
|
+
end
|
348
|
+
|
349
|
+
def suite_setup
|
350
|
+
end
|
351
|
+
|
352
|
+
def suite_teardown
|
353
|
+
end
|
354
|
+
|
355
|
+
def method_setup
|
356
|
+
end
|
357
|
+
|
358
|
+
def method_teardown
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require_relative '../lib/tokn/tokenizer'
|
2
|
+
|
3
|
+
class Example1
|
4
|
+
|
5
|
+
include Tokn
|
6
|
+
|
7
|
+
def dataPath(f)
|
8
|
+
File.dirname(__FILE__)+"/data/"+f
|
9
|
+
end
|
10
|
+
|
11
|
+
setTestDir()
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@sampleText = readTextFile(dataPath("sampletext.txt"))
|
15
|
+
end
|
16
|
+
|
17
|
+
def makeTok
|
18
|
+
@dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
|
19
|
+
Tokenizer.new(@dfa, @sampleText, "WS")
|
20
|
+
end
|
21
|
+
|
22
|
+
def go
|
23
|
+
puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
|
24
|
+
|
25
|
+
t = makeTok
|
26
|
+
|
27
|
+
s = ''
|
28
|
+
while t.hasNext do
|
29
|
+
|
30
|
+
tk = t.peek
|
31
|
+
|
32
|
+
if t.nameOf(tk) == 'BROP'
|
33
|
+
lst = t.readSequenceIf('BROP DO ID BRCL')
|
34
|
+
if lst
|
35
|
+
s << " ...read BROP DO ID sequence...\n"
|
36
|
+
lst.each{ |x| s << " #{d(x)}\n"}
|
37
|
+
next
|
38
|
+
else
|
39
|
+
s << " ...couldn't find sequence...\n"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
tk = t.read
|
44
|
+
s << d(tk) << "\n"
|
45
|
+
|
46
|
+
end
|
47
|
+
exp =<<"EXP"
|
48
|
+
(line 1, col 1) : speed
|
49
|
+
(line 1, col 6) : =
|
50
|
+
(line 1, col 7) : 42
|
51
|
+
(line 1, col 9) : gravity
|
52
|
+
(line 1, col 16) : =
|
53
|
+
(line 1, col 17) : -9.80
|
54
|
+
...couldn't find sequence...
|
55
|
+
(line 1, col 22) : {
|
56
|
+
(line 1, col 23) : color
|
57
|
+
(line 1, col 29) : =
|
58
|
+
(line 1, col 30) : green
|
59
|
+
(line 1, col 35) : }
|
60
|
+
(line 1, col 36) : title
|
61
|
+
(line 1, col 41) : =
|
62
|
+
(line 1, col 42) : 'This is a string with \' an escaped delimiter'
|
63
|
+
(line 1, col 89) : if
|
64
|
+
(line 1, col 91) : gravity
|
65
|
+
(line 1, col 98) : ==
|
66
|
+
(line 1, col 100) : 12
|
67
|
+
...read BROP DO ID sequence...
|
68
|
+
(line 1, col 102) : {
|
69
|
+
(line 1, col 103) : do
|
70
|
+
(line 1, col 105) : something
|
71
|
+
(line 1, col 114) : }
|
72
|
+
(line 1, col 115) : do
|
73
|
+
(line 1, col 117) : something_else
|
74
|
+
EXP
|
75
|
+
assert(s == exp)
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
Example1.new.go
|
File without changes
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":1.0,"tokens":["WS","DBL","INT","LBL","ID","ASSIGN","EQUIV","IF","DO","BROP","BRCL"],"states":[[false,[[[125,126],1],[[123,124],2],[[100,101],3],[[105,106],4],[[61,62],5],[[65,91,95,96,97,100,101,105,106,123],6],[[39,40],7],[[48,58],8],[[45,46],9],[[46,47],10],[[9,11,12,13,32,33,92,93],11],[[47,48],12]]],[false,[[[-12,-11],14]]],[false,[[[-11,-10],14]]],[false,[[[48,58,65,91,95,96,97,111,112,123],6],[[-6,-5],14],[[111,112],22]]],[false,[[[48,58,65,91,95,96,97,102,103,123],6],[[-6,-5],14],[[102,103],21]]],[false,[[[-7,-6],14],[[61,62],20]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-6,-5],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[39,40],17],[[92,93],18]]],[false,[[[48,58],8],[[46,47],10],[[-4,-3],14]]],[false,[[[48,58],8],[[46,47],10]]],[false,[[[48,58],16]]],[false,[[[9,11,12,13,32,33,92,93],11],[[-2,-1],14]]],[false,[[[47,48],13]]],[false,[[[0,10,11,1114112],13],[[-2,-1],14],[[10,11],15]]],[true,[]],[false,[[[-2,-1],14]]],[false,[[[-3,-2],14],[[48,58],16]]],[false,[[[-5,-4],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[92,93],18],[[39,40],19]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[-5,-4],14],[[39,40],17],[[92,93],18]]],[false,[[[-8,-7],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-9,-8],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-10,-9],14]]]]}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/test/test_tokn.rb
ADDED
@@ -0,0 +1,584 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
require_relative '../lib/tokn/tools.rb'
|
4
|
+
req('range_partition dfa dfa_builder tokenizer token_defn_parser')
|
5
|
+
|
6
|
+
# Get access to Tokn namespace
|
7
|
+
|
8
|
+
#setTestDir()
|
9
|
+
|
10
|
+
#SINGLETEST = "test_ps_output_multi"
|
11
|
+
if defined? SINGLETEST
|
12
|
+
if main?(__FILE__)
|
13
|
+
ARGV.concat("-n #{SINGLETEST}".split)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class TestTokn < MyTestSuite
|
18
|
+
|
19
|
+
include Tokn, ToknInternal
|
20
|
+
|
21
|
+
# def data_file(f)
|
22
|
+
# File.join("data",f)
|
23
|
+
# # File.dirname(__FILE__)+"/data/"+f
|
24
|
+
# end
|
25
|
+
|
26
|
+
def suite_setup
|
27
|
+
|
28
|
+
# Make current directory = the one containing this script
|
29
|
+
main?(__FILE__)
|
30
|
+
|
31
|
+
if !File.directory?(out_dir)
|
32
|
+
Dir.mkdir(out_dir)
|
33
|
+
end
|
34
|
+
|
35
|
+
@@sampleText = readTextFile("sampletext.txt")
|
36
|
+
@@sampleTokens = readTextFile("sampletokens.txt")
|
37
|
+
end
|
38
|
+
|
39
|
+
# def withinTestDir(f)
|
40
|
+
# File.join(@@testDir,"__source__")
|
41
|
+
|
42
|
+
def suite_teardown
|
43
|
+
remove_file_or_dir(out_dir)
|
44
|
+
end
|
45
|
+
|
46
|
+
def method_setup
|
47
|
+
end
|
48
|
+
|
49
|
+
def method_teardown
|
50
|
+
end
|
51
|
+
|
52
|
+
def add(lower, upper = nil)
|
53
|
+
@cs.add(lower,upper)
|
54
|
+
end
|
55
|
+
|
56
|
+
def remove(lower, upper = nil)
|
57
|
+
@cs.remove(lower,upper)
|
58
|
+
end
|
59
|
+
|
60
|
+
def swap
|
61
|
+
@ct = @cs
|
62
|
+
prep
|
63
|
+
end
|
64
|
+
|
65
|
+
def isect
|
66
|
+
@cs.intersect!(@ct)
|
67
|
+
end
|
68
|
+
|
69
|
+
def diff
|
70
|
+
@cs.difference!(@ct)
|
71
|
+
end
|
72
|
+
|
73
|
+
def equ(s, arr = nil)
|
74
|
+
arr ||= @cs.array
|
75
|
+
ia = s.split.map{|n| n.to_i}
|
76
|
+
assert_equal(ia,arr)
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_100_add
|
80
|
+
prep
|
81
|
+
|
82
|
+
add(72,81)
|
83
|
+
equ '72 81'
|
84
|
+
|
85
|
+
add(50)
|
86
|
+
equ '50 51 72 81'
|
87
|
+
|
88
|
+
add(75,77)
|
89
|
+
equ '50 51 72 81'
|
90
|
+
|
91
|
+
add(72,78)
|
92
|
+
equ '50 51 72 81'
|
93
|
+
|
94
|
+
add(70,78)
|
95
|
+
equ '50 51 70 81'
|
96
|
+
|
97
|
+
add 60
|
98
|
+
equ '50 51 60 61 70 81'
|
99
|
+
|
100
|
+
add 40
|
101
|
+
equ '40 41 50 51 60 61 70 81'
|
102
|
+
|
103
|
+
add 41
|
104
|
+
equ '40 42 50 51 60 61 70 81'
|
105
|
+
|
106
|
+
add 81
|
107
|
+
equ '40 42 50 51 60 61 70 82'
|
108
|
+
|
109
|
+
add 83
|
110
|
+
equ '40 42 50 51 60 61 70 82 83 84'
|
111
|
+
|
112
|
+
add 49,84
|
113
|
+
equ '40 42 49 84'
|
114
|
+
|
115
|
+
add 39,86
|
116
|
+
equ '39 86'
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_110_intersect
|
120
|
+
prep
|
121
|
+
add 39,86
|
122
|
+
swap
|
123
|
+
add 50,70
|
124
|
+
isect
|
125
|
+
equ '50 70'
|
126
|
+
|
127
|
+
swap
|
128
|
+
add 20,25
|
129
|
+
add 35,51
|
130
|
+
add 62,68
|
131
|
+
add 72,80
|
132
|
+
isect
|
133
|
+
equ '50 51 62 68'
|
134
|
+
|
135
|
+
prep
|
136
|
+
swap
|
137
|
+
add 50,70
|
138
|
+
isect
|
139
|
+
equ ''
|
140
|
+
|
141
|
+
add 50,70
|
142
|
+
swap
|
143
|
+
add 50,70
|
144
|
+
isect
|
145
|
+
equ '50 70'
|
146
|
+
|
147
|
+
prep
|
148
|
+
add 20,25
|
149
|
+
swap
|
150
|
+
add 25,30
|
151
|
+
isect
|
152
|
+
equ ''
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
def test_120_difference
|
157
|
+
prep
|
158
|
+
add 20,30
|
159
|
+
add 40,50
|
160
|
+
swap
|
161
|
+
|
162
|
+
add 20,80
|
163
|
+
diff
|
164
|
+
equ '30 40 50 80'
|
165
|
+
|
166
|
+
prep
|
167
|
+
add 19,32
|
168
|
+
diff
|
169
|
+
equ '19 20 30 32'
|
170
|
+
|
171
|
+
prep
|
172
|
+
add 30,40
|
173
|
+
diff
|
174
|
+
equ '30 40'
|
175
|
+
|
176
|
+
prep
|
177
|
+
add 20,30
|
178
|
+
add 40,50
|
179
|
+
diff
|
180
|
+
equ ''
|
181
|
+
|
182
|
+
prep
|
183
|
+
add 19,30
|
184
|
+
add 40,50
|
185
|
+
diff
|
186
|
+
equ '19 20'
|
187
|
+
|
188
|
+
prep
|
189
|
+
add 20,30
|
190
|
+
add 40,51
|
191
|
+
diff
|
192
|
+
equ '50 51'
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
def prep
|
197
|
+
@cs = CodeSet.new
|
198
|
+
end
|
199
|
+
|
200
|
+
def test_130_illegalRange
|
201
|
+
prep
|
202
|
+
|
203
|
+
assert_raise(RangeError) { add 60,50 }
|
204
|
+
assert_raise(RangeError) { add 60,60 }
|
205
|
+
end
|
206
|
+
|
207
|
+
def neg(lower, upper)
|
208
|
+
@cs.negate lower, upper
|
209
|
+
end
|
210
|
+
|
211
|
+
def test_140_negate
|
212
|
+
prep
|
213
|
+
add 10,15
|
214
|
+
add 20,25
|
215
|
+
add 30
|
216
|
+
add 40,45
|
217
|
+
equ '10 15 20 25 30 31 40 45'
|
218
|
+
neg 22,37
|
219
|
+
equ '10 15 20 22 25 30 31 37 40 45'
|
220
|
+
neg 25,27
|
221
|
+
equ '10 15 20 22 27 30 31 37 40 45'
|
222
|
+
neg 15,20
|
223
|
+
equ '10 22 27 30 31 37 40 45'
|
224
|
+
|
225
|
+
prep
|
226
|
+
add 10,22
|
227
|
+
@cs.negate
|
228
|
+
equ '0 10 22 1114112'
|
229
|
+
|
230
|
+
prep
|
231
|
+
add 10,20
|
232
|
+
neg 10,20
|
233
|
+
equ ''
|
234
|
+
|
235
|
+
prep
|
236
|
+
add 10,20
|
237
|
+
add 30,40
|
238
|
+
neg 5,10
|
239
|
+
equ '5 20 30 40'
|
240
|
+
|
241
|
+
prep
|
242
|
+
add 10,20
|
243
|
+
add 30,40
|
244
|
+
neg 25,30
|
245
|
+
equ '10 20 25 40'
|
246
|
+
|
247
|
+
prep
|
248
|
+
add 10,20
|
249
|
+
add 30,40
|
250
|
+
neg 40,50
|
251
|
+
equ '10 20 30 50'
|
252
|
+
|
253
|
+
prep
|
254
|
+
add 10,20
|
255
|
+
add 30,40
|
256
|
+
neg 41,50
|
257
|
+
equ '10 20 30 40 41 50'
|
258
|
+
|
259
|
+
prep
|
260
|
+
add 10,20
|
261
|
+
add 30,40
|
262
|
+
neg 15,35
|
263
|
+
equ '10 15 20 30 35 40'
|
264
|
+
end
|
265
|
+
|
266
|
+
def test_150_remove
|
267
|
+
|
268
|
+
prep
|
269
|
+
add 10,20
|
270
|
+
add 30,40
|
271
|
+
remove 29,41
|
272
|
+
equ '10 20'
|
273
|
+
|
274
|
+
add 30,40
|
275
|
+
equ '10 20 30 40'
|
276
|
+
|
277
|
+
remove 20,30
|
278
|
+
equ '10 20 30 40'
|
279
|
+
|
280
|
+
remove 15,35
|
281
|
+
equ '10 15 35 40'
|
282
|
+
|
283
|
+
remove 10,15
|
284
|
+
equ '35 40'
|
285
|
+
remove 35
|
286
|
+
equ '36 40'
|
287
|
+
remove 40
|
288
|
+
equ '36 40'
|
289
|
+
remove 38
|
290
|
+
equ '36 38 39 40'
|
291
|
+
remove 37,39
|
292
|
+
equ '36 37 39 40'
|
293
|
+
|
294
|
+
end
|
295
|
+
|
296
|
+
def dset(st)
|
297
|
+
s = ''
|
298
|
+
st.each{|x|
|
299
|
+
if s.length > 0
|
300
|
+
s+= ' '
|
301
|
+
end
|
302
|
+
s += d(x)
|
303
|
+
}
|
304
|
+
return s
|
305
|
+
end
|
306
|
+
|
307
|
+
def newpar
|
308
|
+
@par = RangePartition.new
|
309
|
+
end
|
310
|
+
|
311
|
+
def addset(lower, upper = nil)
|
312
|
+
upper ||= lower + 1
|
313
|
+
r = CodeSet.new(lower,upper)
|
314
|
+
@par.addSet(r)
|
315
|
+
end
|
316
|
+
|
317
|
+
def apply
|
318
|
+
list = @par.apply(@cs)
|
319
|
+
res = []
|
320
|
+
list.each do |x|
|
321
|
+
res.concat x.array
|
322
|
+
end
|
323
|
+
@parResult = res
|
324
|
+
end
|
325
|
+
|
326
|
+
def test_160_partition
|
327
|
+
|
328
|
+
newpar
|
329
|
+
addset(20,30)
|
330
|
+
addset(25,33)
|
331
|
+
addset(37)
|
332
|
+
addset(40,50)
|
333
|
+
@par.prepare
|
334
|
+
|
335
|
+
@par.generatePDF(out_dir)
|
336
|
+
|
337
|
+
prep
|
338
|
+
add 25,33
|
339
|
+
|
340
|
+
apply
|
341
|
+
equ('25 30 30 33', @parResult)
|
342
|
+
|
343
|
+
prep
|
344
|
+
add 37
|
345
|
+
apply
|
346
|
+
equ('37 38', @parResult)
|
347
|
+
|
348
|
+
prep
|
349
|
+
add 40,50
|
350
|
+
apply
|
351
|
+
equ('40 50', @parResult)
|
352
|
+
|
353
|
+
end
|
354
|
+
|
355
|
+
REGEX_SCRIPT = "(\\-?[0-9]+)|[_a-zA-Z][_a-zA-Z0-9]*|333q"
|
356
|
+
|
357
|
+
TOKEN_SCRIPT2 = <<'END'
|
358
|
+
sep: \s
|
359
|
+
tku: a(a|b)*
|
360
|
+
tkv: b(aa|b*)
|
361
|
+
tkw: bbb
|
362
|
+
END
|
363
|
+
|
364
|
+
def test_170_build_DFA
|
365
|
+
|
366
|
+
x = RegParse.new(REGEX_SCRIPT)
|
367
|
+
s = x.startState
|
368
|
+
x.endState.finalState = true
|
369
|
+
|
370
|
+
s.generatePDF(out_dir,"nfa")
|
371
|
+
|
372
|
+
r = s.reverseNFA()
|
373
|
+
r.generatePDF(out_dir,"reversed")
|
374
|
+
|
375
|
+
dfa = DFABuilder.nfa_to_dfa(s)
|
376
|
+
dfa.generatePDF(out_dir,"buildDFA")
|
377
|
+
end
|
378
|
+
|
379
|
+
def test_180_cvt_NFA_to_DFA
|
380
|
+
|
381
|
+
x = RegParse.new(REGEX_SCRIPT)
|
382
|
+
s = x.startState
|
383
|
+
x.endState.finalState = true
|
384
|
+
|
385
|
+
s.generatePDF(out_dir,"nfa")
|
386
|
+
|
387
|
+
dfa = DFABuilder.nfa_to_dfa(s)
|
388
|
+
dfa.generatePDF(out_dir,"dfa")
|
389
|
+
|
390
|
+
oldToNewMap, maxId2 = dfa.duplicateNFA(42)
|
391
|
+
dfa2 = oldToNewMap[dfa]
|
392
|
+
dfa2.generatePDF(out_dir,"dfa_duplicated")
|
393
|
+
end
|
394
|
+
|
395
|
+
def test_190_TokenDefParser
|
396
|
+
|
397
|
+
s = TOKEN_SCRIPT2
|
398
|
+
|
399
|
+
td = TokenDefParser.new(s)
|
400
|
+
|
401
|
+
tokDFA = td.dfa
|
402
|
+
tokDFA.startState.generatePDF(out_dir,"TokenDFA")
|
403
|
+
|
404
|
+
end
|
405
|
+
|
406
|
+
def makeTok
|
407
|
+
dfa = DFA.from_script(@@sampleTokens)
|
408
|
+
Tokenizer.new(dfa, @@sampleText)
|
409
|
+
end
|
410
|
+
|
411
|
+
def test_200_Tokenizer
|
412
|
+
|
413
|
+
tok = makeTok
|
414
|
+
|
415
|
+
tokList = []
|
416
|
+
while tok.hasNext
|
417
|
+
t = tok.read
|
418
|
+
tokList.push(t)
|
419
|
+
end
|
420
|
+
|
421
|
+
tok.unread(tokList.size)
|
422
|
+
|
423
|
+
tokList.each do |t1|
|
424
|
+
tName = tok.nameOf(t1)
|
425
|
+
t2 = tok.read(tName)
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
def test_210_Tokenizer_Missing_Expected
|
430
|
+
|
431
|
+
assert_raise TokenizerException do
|
432
|
+
|
433
|
+
tok = makeTok
|
434
|
+
|
435
|
+
tok.read
|
436
|
+
tok.read
|
437
|
+
tok.read
|
438
|
+
tok.read
|
439
|
+
tok.read("signedint")
|
440
|
+
end
|
441
|
+
|
442
|
+
end
|
443
|
+
|
444
|
+
def test_220_CompileDFAToDisk
|
445
|
+
tokScript = @@sampleTokens
|
446
|
+
testText = @@sampleText
|
447
|
+
|
448
|
+
destPath = out_path("sampletokens_dfa.txt")
|
449
|
+
|
450
|
+
if File.exist?(destPath)
|
451
|
+
File.delete(destPath)
|
452
|
+
end
|
453
|
+
assert(!File.exist?(destPath))
|
454
|
+
|
455
|
+
dfa = DFA.from_script(tokScript, destPath)
|
456
|
+
assert(File.exist?(destPath))
|
457
|
+
|
458
|
+
tok = Tokenizer.new(dfa, testText)
|
459
|
+
|
460
|
+
end
|
461
|
+
|
462
|
+
def prep2
|
463
|
+
testText = @@sampleText
|
464
|
+
dfa = DFA.from_file(out_path("sampletokens_dfa.txt"))
|
465
|
+
tok = Tokenizer.new(dfa, testText)
|
466
|
+
end
|
467
|
+
|
468
|
+
def test_230_readAndUnread
|
469
|
+
tok = prep2
|
470
|
+
unread = false
|
471
|
+
while tok.hasNext
|
472
|
+
t = tok.read
|
473
|
+
# pr("Read %-8s %s\n",tok.nameOf(t),d(t))
|
474
|
+
|
475
|
+
if !unread && tok.nameOf(t) == "DO"
|
476
|
+
# pr(" ...pushing back four tokens...\n")
|
477
|
+
tok.unread(4)
|
478
|
+
unread = true
|
479
|
+
# pr(" ...and resuming...\n")
|
480
|
+
end
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
484
|
+
def test_240_UnrecognizedToken
|
485
|
+
assert_raise TokenizerException do
|
486
|
+
tok = prep2
|
487
|
+
while tok.hasNext
|
488
|
+
t = tok.read
|
489
|
+
if tok.nameOf(t) == "DO"
|
490
|
+
tok.read("BRCL") # <== this should raise problem
|
491
|
+
end
|
492
|
+
end
|
493
|
+
end
|
494
|
+
end
|
495
|
+
|
496
|
+
def test_250_ReadPastEnd
|
497
|
+
assert_raise TokenizerException do
|
498
|
+
tok = prep2
|
499
|
+
while tok.hasNext
|
500
|
+
t = tok.read
|
501
|
+
end
|
502
|
+
tok.read
|
503
|
+
end
|
504
|
+
end
|
505
|
+
|
506
|
+
def test_260_UnreadBeforeStart
|
507
|
+
|
508
|
+
assert_raise TokenizerException do
|
509
|
+
tok = prep2
|
510
|
+
k = 0
|
511
|
+
while tok.hasNext
|
512
|
+
t = tok.read
|
513
|
+
k += 1
|
514
|
+
if k == 15
|
515
|
+
tok.unread(5)
|
516
|
+
tok.unread(7)
|
517
|
+
tok.read()
|
518
|
+
tok.unread(4)
|
519
|
+
tok.unread(3)
|
520
|
+
end
|
521
|
+
end
|
522
|
+
tok.read
|
523
|
+
end
|
524
|
+
end
|
525
|
+
|
526
|
+
def test_270_filter_ws
|
527
|
+
|
528
|
+
dfa = DFA.from_script_file("sampletokens.txt")
|
529
|
+
t = Tokenizer.new(dfa, readTextFile("sampletext.txt"), "WS")
|
530
|
+
|
531
|
+
s = ''
|
532
|
+
while t.hasNext do
|
533
|
+
|
534
|
+
tk = t.peek
|
535
|
+
|
536
|
+
if t.nameOf(tk) == 'BROP'
|
537
|
+
lst = t.readSequenceIf('BROP DO ID BRCL')
|
538
|
+
if lst
|
539
|
+
s << " ...read BROP DO ID sequence...\n"
|
540
|
+
lst.each{ |x| s << " #{d(x)}\n"}
|
541
|
+
next
|
542
|
+
else
|
543
|
+
s << " ...couldn't find sequence...\n"
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
tk = t.read
|
548
|
+
s << d(tk) << "\n"
|
549
|
+
|
550
|
+
end
|
551
|
+
exp =<<"EXP"
|
552
|
+
(line 1, col 1) : speed
|
553
|
+
(line 1, col 6) : =
|
554
|
+
(line 1, col 7) : 42
|
555
|
+
(line 1, col 9) : gravity
|
556
|
+
(line 1, col 16) : =
|
557
|
+
(line 1, col 17) : -9.80
|
558
|
+
...couldn't find sequence...
|
559
|
+
(line 1, col 22) : {
|
560
|
+
(line 1, col 23) : color
|
561
|
+
(line 1, col 29) : =
|
562
|
+
(line 1, col 30) : green
|
563
|
+
(line 1, col 35) : }
|
564
|
+
(line 1, col 36) : title
|
565
|
+
(line 1, col 41) : =
|
566
|
+
(line 1, col 42) : 'This is a string with \\' an escaped delimiter'
|
567
|
+
(line 1, col 89) : if
|
568
|
+
(line 1, col 91) : gravity
|
569
|
+
(line 1, col 98) : ==
|
570
|
+
(line 1, col 100) : 12
|
571
|
+
...read BROP DO ID sequence...
|
572
|
+
(line 1, col 102) : {
|
573
|
+
(line 1, col 103) : do
|
574
|
+
(line 1, col 105) : something
|
575
|
+
(line 1, col 114) : }
|
576
|
+
(line 1, col 115) : do
|
577
|
+
(line 1, col 117) : something_else
|
578
|
+
EXP
|
579
|
+
|
580
|
+
assert(s.strip == exp.strip)
|
581
|
+
end
|
582
|
+
|
583
|
+
end
|
584
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Sember
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "Given a script containing token descriptions (each a regular expression),
|
14
14
|
\ntokn compiles an automaton which it can then use to efficiently convert a \ntext
|
@@ -34,13 +34,15 @@ files:
|
|
34
34
|
- bin/tokncompile
|
35
35
|
- bin/toknprocess
|
36
36
|
- CHANGELOG.txt
|
37
|
-
- README.
|
38
|
-
- test/Example1.rb
|
39
|
-
- test/data/compileddfa.txt
|
40
|
-
- test/data/
|
41
|
-
- test/
|
42
|
-
- test/
|
43
|
-
- test/
|
37
|
+
- README.md
|
38
|
+
- test/_OLD_/Example1.rb
|
39
|
+
- test/_OLD_/data/compileddfa.txt
|
40
|
+
- test/_OLD_/data/sampletokens_dfa.txt
|
41
|
+
- test/_OLD_/test.rb
|
42
|
+
- test/_OLD_/testcmds
|
43
|
+
- test/sampletext.txt
|
44
|
+
- test/sampletokens.txt
|
45
|
+
- test/test_tokn.rb
|
44
46
|
homepage: http://www.cs.ubc.ca/~jpsember/
|
45
47
|
licenses:
|
46
48
|
- mit
|
@@ -66,6 +68,5 @@ signing_key:
|
|
66
68
|
specification_version: 4
|
67
69
|
summary: Extracts tokens from source files
|
68
70
|
test_files:
|
69
|
-
- test/
|
70
|
-
- test/test.rb
|
71
|
+
- test/test_tokn.rb
|
71
72
|
has_rdoc:
|
data/test/Example1.rb
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
require_relative '../lib/tokn/tokenizer'
|
2
|
-
|
3
|
-
class Example1
|
4
|
-
|
5
|
-
include Tokn
|
6
|
-
|
7
|
-
def dataPath(f)
|
8
|
-
File.dirname(__FILE__)+"/data/"+f
|
9
|
-
end
|
10
|
-
|
11
|
-
setTestDir()
|
12
|
-
|
13
|
-
def initialize
|
14
|
-
@sampleText = readTextFile(dataPath("sampletext.txt"))
|
15
|
-
end
|
16
|
-
|
17
|
-
def makeTok
|
18
|
-
@dfa = DFA.from_script_file(dataPath("sampletokens.txt"))
|
19
|
-
Tokenizer.new(@dfa, @sampleText, "WS")
|
20
|
-
end
|
21
|
-
|
22
|
-
def go
|
23
|
-
puts "Tokenizing the 'sampletext.txt' file, filtering out whitespace (WS) tokens...\n\n"
|
24
|
-
|
25
|
-
t = makeTok
|
26
|
-
|
27
|
-
while t.hasNext do
|
28
|
-
|
29
|
-
tk = t.peek
|
30
|
-
|
31
|
-
if t.nameOf(tk) == 'BROP'
|
32
|
-
lst = t.readSequenceIf('BROP DO ID BRCL')
|
33
|
-
if lst
|
34
|
-
pr(" ...read BROP DO ID sequence...\n")
|
35
|
-
lst.each{ |x| pr(" %s\n",d(x))}
|
36
|
-
next
|
37
|
-
else
|
38
|
-
pr(" ...couldn't find sequence...\n")
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
tk = t.read
|
43
|
-
pr("%s\n",d(tk))
|
44
|
-
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
end
|
49
|
-
|
50
|
-
Example1.new.go
|