tokn 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.txt +2 -1
- data/lib/tokn/dfa.rb +4 -4
- data/lib/tokn/tokn_const.rb +15 -0
- data/lib/tokn/tools.rb +473 -83
- data/test/_misc_/__mygraph__TokenDFA.pdf +0 -0
- data/test/_misc_/__mygraph__buildDFA.pdf +0 -0
- data/test/_misc_/__mygraph__dfa.pdf +0 -0
- data/test/_misc_/__mygraph__dfa_duplicated.pdf +0 -0
- data/test/_misc_/__mygraph__nfa.pdf +0 -0
- data/test/_misc_/__mygraph__partition.pdf +0 -0
- data/test/_misc_/__mygraph__reversed.pdf +0 -0
- data/test/_misc_/sampletokens_dfa.txt +1 -0
- data/test/_output_270_filter_ws.txt +26 -0
- data/test/test_tokn.rb +30 -63
- metadata +11 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84244706589683dd8fa77c96355a4494d6585561
|
4
|
+
data.tar.gz: cf5dcd362908eaad20371f9f62193000fc986d8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c2a6659f5c4819088ff95dda3a48b42b2a7deb7dcd46f01149b45121bff298778850e42597e63a3cd8ec5bcc94f9aba41c90b914c2b5d88f6cf13ad9d64dd4cf
|
7
|
+
data.tar.gz: 059e6c6601c8e35aa7f7e465fd408dc227a23e899458962592a12bdd4bcc9004bb802bf0ca4e580e8502c6a2ba2ebe4ea8e23705903198efd431e30833783cb6
|
data/CHANGELOG.txt
CHANGED
data/lib/tokn/dfa.rb
CHANGED
@@ -20,7 +20,7 @@ module Tokn
|
|
20
20
|
def self.from_script(script, persistPath = nil)
|
21
21
|
|
22
22
|
if persistPath and File.exist?(persistPath)
|
23
|
-
return extractDFA(
|
23
|
+
return extractDFA(read_text_file(persistPath))
|
24
24
|
end
|
25
25
|
|
26
26
|
req('token_defn_parser')
|
@@ -29,7 +29,7 @@ module Tokn
|
|
29
29
|
dfa = td.dfa
|
30
30
|
|
31
31
|
if persistPath
|
32
|
-
|
32
|
+
write_text_file(persistPath, dfa.serialize())
|
33
33
|
end
|
34
34
|
|
35
35
|
dfa
|
@@ -39,14 +39,14 @@ module Tokn
|
|
39
39
|
# the file at scriptPath.
|
40
40
|
#
|
41
41
|
def self.from_script_file(scriptPath, persistPath = nil)
|
42
|
-
self.from_script(
|
42
|
+
self.from_script(read_text_file(scriptPath), persistPath)
|
43
43
|
end
|
44
44
|
|
45
45
|
# Compile a Tokenizer DFA from a text file (that contains a
|
46
46
|
# JSON string)
|
47
47
|
#
|
48
48
|
def self.from_file(path)
|
49
|
-
from_json(
|
49
|
+
from_json(read_text_file(path))
|
50
50
|
end
|
51
51
|
|
52
52
|
# Compile a Tokenizer DFA from a JSON string
|
data/lib/tokn/tokn_const.rb
CHANGED
@@ -32,3 +32,18 @@ module ToknInternal
|
|
32
32
|
EPSILON-1-edgeLabel
|
33
33
|
end
|
34
34
|
end
|
35
|
+
|
36
|
+
# Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
|
37
|
+
#
|
38
|
+
# It does this by making a system call to the 'dot' utility.
|
39
|
+
#
|
40
|
+
def dotToPDF(dotFile, name = "", test_dir = nil)
|
41
|
+
gr = dotFile
|
42
|
+
|
43
|
+
raise ArgumentError if !test_dir
|
44
|
+
|
45
|
+
dotPath = File.join(test_dir,".__mygraph__.dot")
|
46
|
+
write_text_file(dotPath,gr)
|
47
|
+
destName = File.join(test_dir,"__mygraph__"+name+".pdf")
|
48
|
+
system("dot -Tpdf "+dotPath+" -o "+destName)
|
49
|
+
end
|
data/lib/tokn/tools.rb
CHANGED
@@ -1,13 +1,31 @@
|
|
1
1
|
require 'set'
|
2
2
|
require 'fileutils'
|
3
3
|
|
4
|
+
###############################################################
|
5
|
+
#
|
4
6
|
# Various utility and debug convenience functions.
|
5
7
|
#
|
8
|
+
###############################################################
|
6
9
|
|
7
|
-
#
|
10
|
+
# Exception class for objects in illegal states
|
8
11
|
#
|
9
|
-
|
10
|
-
|
12
|
+
class IllegalStateException < Exception
|
13
|
+
end
|
14
|
+
|
15
|
+
# A string containing a single zero, with ASCII 8-bit encoding (i.e., plain old bytes)
|
16
|
+
ZERO_CHAR = "\0".force_encoding("ASCII-8BIT")
|
17
|
+
|
18
|
+
# Construct a string of zeros
|
19
|
+
# @param count number of zeros
|
20
|
+
#
|
21
|
+
def zero_bytes(count)
|
22
|
+
ZERO_CHAR * count
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convenience method to perform 'require_relative' on a set of files
|
26
|
+
#
|
27
|
+
# @param fileListStr space-delimited file/path items, without .rb extensions
|
28
|
+
# @param subdir optional path to files relative to this file
|
11
29
|
#
|
12
30
|
def req(fileListStr,subdir = nil)
|
13
31
|
fileListStr.split(' ').each do |x|
|
@@ -21,67 +39,114 @@ end
|
|
21
39
|
|
22
40
|
# Shorthand for printf(...)
|
23
41
|
#
|
24
|
-
|
25
|
-
|
42
|
+
alias :pr :printf
|
43
|
+
|
44
|
+
# Convert an object to a human-readable string,
|
45
|
+
# or <nil>; should be considered a debug-only feature
|
46
|
+
#
|
47
|
+
def d(arg)
|
48
|
+
arg.nil? ? "<nil>" : arg.inspect
|
26
49
|
end
|
27
50
|
|
51
|
+
# Convert an object to a human-readable string,
|
52
|
+
# by calling a type-appropriate function: da, dh, or just d.
|
53
|
+
# @param arg object
|
54
|
+
# @param indent optional indentation for pretty printing; if result
|
55
|
+
# spans multiple lines, each line should be indented by this amount
|
56
|
+
#
|
57
|
+
def d2(arg, indent = 0)
|
58
|
+
return da(arg, indent) if arg.is_a? Array
|
59
|
+
return dh(arg, indent) if arg.is_a? Hash
|
60
|
+
return df(arg) if arg.class == FalseClass || arg.class == TrueClass
|
61
|
+
return d(arg)
|
62
|
+
end
|
28
63
|
|
29
|
-
# Convert an object to a human-readable string
|
30
|
-
# should be considered a debug-only feature
|
64
|
+
# Convert an object to a human-readable string, prefixed with its type
|
31
65
|
#
|
32
|
-
def
|
33
|
-
arg.nil?
|
66
|
+
def dt(arg)
|
67
|
+
if arg.nil?
|
68
|
+
return "<nil>"
|
69
|
+
end
|
70
|
+
s = arg.class.to_s
|
71
|
+
s << ':'
|
72
|
+
s << arg.inspect
|
73
|
+
s
|
74
|
+
end
|
75
|
+
|
76
|
+
# Append a particular number of spaces to a string
|
77
|
+
def add_sp(s, indent = 0)
|
78
|
+
s << ' ' * indent
|
79
|
+
end
|
80
|
+
|
81
|
+
# Pretty-print an array,
|
82
|
+
# one element to a line
|
83
|
+
# @param indent indentation of each line, in spaces
|
84
|
+
def da(array, indent = 0)
|
85
|
+
return d(array) if !array
|
86
|
+
s = 'Array ['
|
87
|
+
indent += 2
|
88
|
+
array.each do |x|
|
89
|
+
s << "\n"
|
90
|
+
add_sp(s,indent)
|
91
|
+
s2 = d2(x, indent + 2)
|
92
|
+
s << s2
|
93
|
+
end
|
94
|
+
s << " ]"
|
95
|
+
s
|
96
|
+
end
|
97
|
+
|
98
|
+
# Pretty-print a hash,
|
99
|
+
# one element to a line
|
100
|
+
# @param indent indentation of each line, in spaces
|
101
|
+
def dh(hash, indent = 0)
|
102
|
+
return d(hash) if !hash
|
103
|
+
s = 'Hash {'
|
104
|
+
indent += 2
|
105
|
+
hash.each_pair do |key,val|
|
106
|
+
s2 = d(key)
|
107
|
+
s3 = d2(val, indent + 4)
|
108
|
+
s << "\n "
|
109
|
+
add_sp(s,indent)
|
110
|
+
s << s2.chomp << " => " << s3.chomp
|
111
|
+
end
|
112
|
+
s << " }"
|
113
|
+
s
|
114
|
+
end
|
115
|
+
|
116
|
+
# Generate debug description of a boolean value
|
117
|
+
# @param flag value to interpret as a boolean; prints 'T' iff not nil
|
118
|
+
# @param label optional label
|
119
|
+
def df(flag, label=nil)
|
120
|
+
s = ''
|
121
|
+
if label
|
122
|
+
s << label << ':'
|
123
|
+
end
|
124
|
+
s << (flag ? "T" : "F")
|
125
|
+
s << ' '
|
126
|
+
s
|
34
127
|
end
|
35
128
|
|
36
129
|
# Assert that a value is true. Should be considered a
|
37
130
|
# very temporary, debug-only option; it is slow and
|
38
131
|
# generates a warning that it is being called.
|
39
|
-
#
|
40
|
-
|
41
|
-
|
132
|
+
# @param cond condition
|
133
|
+
# @param msg generates additional message using printf(), if these arguments exist
|
134
|
+
def assert!(cond, *msg)
|
135
|
+
one_time_alert("warning",0,"Checking assertion")
|
42
136
|
if not cond
|
43
137
|
str = (msg.size == 0) ? "assertion error" : sprintf(*msg)
|
44
138
|
raise Exception, str
|
45
139
|
end
|
46
140
|
end
|
47
141
|
|
48
|
-
|
49
|
-
## Set test directory. If nil, sets to home directory + "__test__"
|
50
|
-
##
|
51
|
-
#def setTestDir(d = nil)
|
52
|
-
# $testDir = d || File.join(Dir.home,"__test__")
|
53
|
-
#end
|
54
|
-
|
55
|
-
## Get a path within the test directory;
|
56
|
-
## create test directory if it doesn't exist.
|
57
|
-
##
|
58
|
-
## relPath : if nil, returns the test directory; else
|
59
|
-
## returns the test directory joined to this one
|
60
|
-
##
|
61
|
-
#def withinTestDir(relPath = nil)
|
62
|
-
# if !$testDir
|
63
|
-
# raise IllegalStateException, "No test directory has been defined"
|
64
|
-
# end
|
65
|
-
# if !File.directory?($testDir)
|
66
|
-
# Dir::mkdir($testDir)
|
67
|
-
# end
|
68
|
-
# relPath ? File.join($testDir,relPath) : $testDir
|
69
|
-
#end
|
70
|
-
|
71
|
-
# Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
|
72
|
-
# in the test directory.
|
73
|
-
#
|
74
|
-
# It does this by making a system call to the 'dot' utility.
|
142
|
+
# Abort with message about unimplemented code
|
75
143
|
#
|
76
|
-
def
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
writeTextFile(dotPath,gr)
|
83
|
-
destName = File.join(test_dir,"__mygraph__"+name+".pdf")
|
84
|
-
system("dot -Tpdf "+dotPath+" -o "+destName)
|
144
|
+
def unimp!(msg = nil)
|
145
|
+
msg2 = "Unimplemented code"
|
146
|
+
if msg
|
147
|
+
msg2 << ": " << msg
|
148
|
+
end
|
149
|
+
raise Exception, msg2
|
85
150
|
end
|
86
151
|
|
87
152
|
# Extensions to the Enumerable module
|
@@ -106,13 +171,12 @@ module Enumerable
|
|
106
171
|
end
|
107
172
|
end
|
108
173
|
|
109
|
-
|
110
174
|
# Get a nice, concise description of the file and line
|
111
175
|
# of some caller within the stack.
|
112
176
|
#
|
113
|
-
#
|
177
|
+
# @param nSkip the number of items deep in the call stack to look
|
114
178
|
#
|
115
|
-
def
|
179
|
+
def get_caller_location(nSkip = 2)
|
116
180
|
|
117
181
|
filename = nil
|
118
182
|
linenumber = nil
|
@@ -130,11 +194,11 @@ def getCallerLocation(nSkip = 2)
|
|
130
194
|
if pth.size
|
131
195
|
filename = pth[-1]
|
132
196
|
end
|
133
|
-
linenumber = fi[i+1,j-i-1]
|
197
|
+
linenumber = fi[i+1,j-i-1]
|
134
198
|
end
|
135
199
|
end
|
136
200
|
if filename && linenumber
|
137
|
-
loc = filename + " ("+linenumber
|
201
|
+
loc = filename + " ("+linenumber+")"
|
138
202
|
else
|
139
203
|
loc = "(UNKNOWN LOCATION)"
|
140
204
|
end
|
@@ -149,12 +213,12 @@ $AlertStrings = Set.new
|
|
149
213
|
# Print a message if it hasn't yet been printed,
|
150
214
|
# which includes the caller's location
|
151
215
|
#
|
152
|
-
#
|
153
|
-
#
|
154
|
-
#
|
216
|
+
# @param typeString e.g., "warning", "unimplemented"
|
217
|
+
# @param nSkip the number of levels deep that the caller is in the stack
|
218
|
+
# @param args if present, calls sprintf(...) with these to append to the message
|
155
219
|
#
|
156
|
-
def
|
157
|
-
loc =
|
220
|
+
def one_time_alert(typeString, nSkip, *args)
|
221
|
+
loc = get_caller_location(nSkip + 2)
|
158
222
|
s = "*** "+typeString+" " + loc
|
159
223
|
if args && args.size
|
160
224
|
s2 = sprintf(args[0], *args[1..-1])
|
@@ -169,33 +233,160 @@ def oneTimeAlert(typeString, nSkip, *args)
|
|
169
233
|
end
|
170
234
|
|
171
235
|
# Print a 'warning' alert, one time only
|
172
|
-
#
|
236
|
+
# @param args if present, calls printf() with these
|
173
237
|
def warn(*args)
|
174
|
-
|
238
|
+
one_time_alert("warning",0, *args)
|
239
|
+
end
|
240
|
+
|
241
|
+
# Convenience method for setting 'db' true within methods,
|
242
|
+
# and to print a one-time warning if so.
|
243
|
+
# @param val value to set db to; it is convenient to disable
|
244
|
+
# debug printing quickly by adding a zero, e.g., 'warndb 0'
|
245
|
+
#
|
246
|
+
def warndb(val = true)
|
247
|
+
if !val || val == 0
|
248
|
+
return false
|
249
|
+
end
|
250
|
+
one_time_alert("warning",1,"Debug printing enabled")
|
251
|
+
true
|
175
252
|
end
|
176
253
|
|
177
254
|
# Print an 'unimplemented' alert, one time only
|
178
|
-
#
|
255
|
+
# @param args if present, calls printf() with these
|
179
256
|
def unimp(*args)
|
180
|
-
|
257
|
+
one_time_alert("unimplemented", 0, *args)
|
181
258
|
end
|
182
259
|
|
183
260
|
# Write a string to a text file
|
184
261
|
#
|
185
|
-
def
|
262
|
+
def write_text_file(path, contents)
|
186
263
|
File.open(path, "wb") {|f| f.write(contents) }
|
187
264
|
end
|
188
265
|
|
189
266
|
# Read a file's contents, return as a string
|
190
267
|
#
|
191
|
-
def
|
268
|
+
def read_text_file(path)
|
192
269
|
contents = nil
|
193
270
|
File.open(path,"rb") {|f| contents = f.read }
|
194
271
|
contents
|
195
272
|
end
|
196
273
|
|
274
|
+
# Method that takes a code block as an argument to
|
275
|
+
# achieve the same functionality as Java/C++'s
|
276
|
+
# do {
|
277
|
+
# ...
|
278
|
+
# ... possibly with 'break' to jump to the end ...
|
279
|
+
# } while (false);
|
280
|
+
#
|
281
|
+
def block
|
282
|
+
yield
|
283
|
+
end
|
284
|
+
|
285
|
+
# Construct hex representation of value
|
286
|
+
# @param value integer value
|
287
|
+
# @param num_digits number of hex digits
|
288
|
+
#
|
289
|
+
def to_hex(value, num_digits=4)
|
290
|
+
s = sprintf("%x", value)
|
291
|
+
s.rjust(num_digits,'0')
|
292
|
+
end
|
293
|
+
|
294
|
+
# Hex dump a string or byte array
|
295
|
+
# @param byte_array_or_string
|
296
|
+
# @param title
|
297
|
+
# @param offset offset to first value within array
|
298
|
+
# @param length number of values to dump
|
299
|
+
# @param bytes_per_row
|
300
|
+
# @param with_text if true, displays ASCII values to right of hex dump
|
301
|
+
#
|
302
|
+
def hex_dump(byte_array_or_string, title=nil, offset=0, length= -1, bytes_per_row=16, with_text=true)
|
303
|
+
ss = hex_dump_to_string(byte_array_or_string, title, offset, length, bytes_per_row, with_text)
|
304
|
+
puts ss
|
305
|
+
end
|
306
|
+
|
307
|
+
# Hex dump a string or byte array to a string; see hex_dump for parameter descriptions
|
308
|
+
#
|
309
|
+
def hex_dump_to_string(byte_array_or_string, title=nil, offset=0, length= -1, bytes_per_row=16, with_text=true)
|
310
|
+
|
311
|
+
byte_array = byte_array_or_string
|
312
|
+
if byte_array.is_a? String
|
313
|
+
byte_array = byte_array.bytes.to_a
|
314
|
+
end
|
315
|
+
|
316
|
+
ss = ''
|
317
|
+
|
318
|
+
if title
|
319
|
+
ss << title << ":\n"
|
320
|
+
end
|
321
|
+
|
322
|
+
if length < 0
|
323
|
+
length = byte_array.size - offset
|
324
|
+
end
|
325
|
+
|
326
|
+
length = [length, byte_array.size - offset].min
|
327
|
+
|
328
|
+
max_addr = offset + length - 1
|
329
|
+
num_digits = 4
|
330
|
+
while (1 << (4 * num_digits)) <= max_addr
|
331
|
+
num_digits += 1
|
332
|
+
end
|
333
|
+
|
334
|
+
while true
|
335
|
+
ss << to_hex(offset, num_digits)
|
336
|
+
ss << ': '
|
337
|
+
|
338
|
+
chunk = [length, bytes_per_row].min
|
339
|
+
bytes_per_row.times do |i|
|
340
|
+
if i % 4 == 0
|
341
|
+
ss << ' '
|
342
|
+
end
|
343
|
+
|
344
|
+
if i < chunk
|
345
|
+
v = byte_array[offset + i]
|
346
|
+
ss << ((v != 0) ? to_hex(v,2) : '..')
|
347
|
+
ss << ' '
|
348
|
+
else
|
349
|
+
ss << ' '
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
if with_text
|
354
|
+
ss << ' |'
|
355
|
+
bytes_per_row.times do |i|
|
356
|
+
if i < chunk
|
357
|
+
v = byte_array[offset + i]
|
358
|
+
ss << ((v >= 32 && v < 127) ? v : '_')
|
359
|
+
end
|
360
|
+
end
|
361
|
+
ss << '|'
|
362
|
+
end
|
363
|
+
ss << "\n"
|
364
|
+
|
365
|
+
length -= chunk
|
366
|
+
offset += chunk
|
367
|
+
break if length <= 0
|
368
|
+
end
|
369
|
+
ss
|
370
|
+
end
|
371
|
+
|
372
|
+
$prevTime = nil
|
373
|
+
|
374
|
+
# Calculate time elapsed, in seconds, from last call to this function;
|
375
|
+
# if it's never been called, returns zero
|
376
|
+
#
|
377
|
+
def elapsed
|
378
|
+
curr = Time.now.to_f
|
379
|
+
elap = 0
|
380
|
+
if $prevTime
|
381
|
+
elap = curr - $prevTime
|
382
|
+
end
|
383
|
+
$prevTime = curr
|
384
|
+
elap
|
385
|
+
end
|
386
|
+
|
197
387
|
# Delete a file or directory, if it exists.
|
198
388
|
# Caution! If directory, deletes all files and subdirectories.
|
389
|
+
#
|
199
390
|
def remove_file_or_dir(pth)
|
200
391
|
if File.directory?(pth)
|
201
392
|
FileUtils.remove_dir(pth)
|
@@ -204,22 +395,96 @@ def remove_file_or_dir(pth)
|
|
204
395
|
end
|
205
396
|
end
|
206
397
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
#
|
398
|
+
require 'stringio'
|
399
|
+
|
400
|
+
$IODest = nil
|
401
|
+
$OldStdOut = nil
|
402
|
+
|
403
|
+
# Redirect standard output to an internal string
|
213
404
|
#
|
214
|
-
def
|
215
|
-
|
405
|
+
def capture_begin
|
406
|
+
raise IllegalStateException if $IODest
|
407
|
+
$IODest = StringIO.new
|
408
|
+
$OldStdOut, $stdout = $stdout, $IODest
|
216
409
|
end
|
217
410
|
|
218
|
-
#
|
411
|
+
# Restore standard output; return captured text
|
412
|
+
# @return text that was redirected
|
219
413
|
#
|
220
|
-
|
414
|
+
def capture_end
|
415
|
+
raise IllegalStateException if !$IODest
|
416
|
+
$stdout = $OldStdOut
|
417
|
+
ret = $IODest.string
|
418
|
+
$IODest = nil
|
419
|
+
ret
|
420
|
+
end
|
421
|
+
|
422
|
+
# Compare a string with disk file; abort if different. Disk filename is derived
|
423
|
+
# from caller function name; e.g., test_xxx produces filename _output_xxx
|
424
|
+
#
|
425
|
+
# @param str if not nil, string to compare; if nil, calls capture_end to get string
|
426
|
+
#
|
427
|
+
def match_expected_output(str = nil)
|
428
|
+
|
429
|
+
if !str
|
430
|
+
str = capture_end
|
431
|
+
end
|
432
|
+
|
433
|
+
cl_method = caller[0][/`.*'/][1..-2]
|
434
|
+
if (cl_method.start_with?("test_"))
|
435
|
+
cl_method = cl_method[5..-1]
|
436
|
+
end
|
437
|
+
path = "_output_" + cl_method + ".txt"
|
438
|
+
|
439
|
+
if !File.file?(path)
|
440
|
+
printf("no such file #{path} exists, writing it...\n")
|
441
|
+
write_text_file(path,str)
|
442
|
+
else
|
443
|
+
exp_cont = read_text_file(path)
|
444
|
+
if str != exp_cont
|
445
|
+
d1 = str
|
446
|
+
d2 = exp_cont
|
447
|
+
|
448
|
+
# Find location where they differ
|
449
|
+
lines1 = d1.split("\n")
|
450
|
+
lines2 = d2.split("\n")
|
451
|
+
j = [lines1.size, lines2.size].max
|
452
|
+
|
453
|
+
s = "???"
|
454
|
+
found_diff = false
|
455
|
+
hist = []
|
456
|
+
|
457
|
+
found_count = 0
|
458
|
+
j.times do |i|
|
459
|
+
found_diff ||= (i >= lines1.size || i >= lines2.size || lines1[i] != lines2[i])
|
460
|
+
s = sprintf("%3d:",i)
|
461
|
+
if !found_diff
|
462
|
+
hist << "#{s} #{lines1[i]}\n #{lines2[i]}\n"
|
463
|
+
else
|
464
|
+
if found_count < 3
|
465
|
+
if i < lines1.size
|
466
|
+
s << " #{lines1[i]}\n"
|
467
|
+
else
|
468
|
+
s << " ---END---\n"
|
469
|
+
end
|
470
|
+
if i < lines2.size
|
471
|
+
s << " #{lines2[i]}\n"
|
472
|
+
else
|
473
|
+
s << " ---END---\n"
|
474
|
+
end
|
475
|
+
hist << s
|
476
|
+
end
|
477
|
+
found_count += 1
|
478
|
+
end
|
479
|
+
while hist.size > 6
|
480
|
+
hist.shift
|
481
|
+
end
|
482
|
+
end
|
483
|
+
dash = "-" * 95 + "\n"
|
484
|
+
raise IllegalStateException,"output did not match expected:\n#{dash}#{hist.join('')}#{dash}"
|
485
|
+
end
|
486
|
+
end
|
221
487
|
end
|
222
|
-
|
223
488
|
|
224
489
|
# Convenience method to detect if a script is being run
|
225
490
|
# e.g. as a 'main' method (for debug purposes only).
|
@@ -328,14 +593,6 @@ if defined? Test::Unit
|
|
328
593
|
method_setup
|
329
594
|
end
|
330
595
|
|
331
|
-
def out_dir
|
332
|
-
"_output_"
|
333
|
-
end
|
334
|
-
|
335
|
-
def out_path(f)
|
336
|
-
File.join(out_dir,f)
|
337
|
-
end
|
338
|
-
|
339
596
|
def teardown
|
340
597
|
if _suite_active?
|
341
598
|
if !defined? @@suiteSetup
|
@@ -360,3 +617,136 @@ if defined? Test::Unit
|
|
360
617
|
end
|
361
618
|
end
|
362
619
|
|
620
|
+
# Construct a string from an array of bytes
|
621
|
+
# @param byte_array array of bytes, or string (in which case it
|
622
|
+
# returns it unchanged)
|
623
|
+
#
|
624
|
+
def bytes_to_str(byte_array)
|
625
|
+
return byte_array if byte_array.is_a? String
|
626
|
+
|
627
|
+
byte_array.pack('C*')
|
628
|
+
end
|
629
|
+
|
630
|
+
# Construct an array of bytes from a string
|
631
|
+
# @param str string, or array of bytes (in which case it
|
632
|
+
# returns it unchanged)
|
633
|
+
#
|
634
|
+
def str_to_bytes(str)
|
635
|
+
return str if str.is_a? Array
|
636
|
+
str.bytes
|
637
|
+
end
|
638
|
+
|
639
|
+
# Get directory entries, excluding '.' and '..'
|
640
|
+
#
|
641
|
+
def dir_entries(path)
|
642
|
+
ents = Dir.entries(path)
|
643
|
+
ents.reject!{|entry| entry == '.' || entry == '..'}
|
644
|
+
end
|
645
|
+
|
646
|
+
def int_to_bytes(x)
|
647
|
+
[(x >> 24) & 0xff, (x >> 16) & 0xff, (x >> 8) & 0xff, x & 0xff]
|
648
|
+
end
|
649
|
+
|
650
|
+
def short_to_bytes(x)
|
651
|
+
[(x >> 8) & 0xff, x & 0xff]
|
652
|
+
end
|
653
|
+
|
654
|
+
# Decode a short from an array of bytes (big-endian).
|
655
|
+
# @param ba array of bytes
|
656
|
+
# @param offset offset of first (most significant) byte
|
657
|
+
#
|
658
|
+
def short_from_bytes(ba, offset=0)
|
659
|
+
(ba[offset] << 8) | ba[offset + 1]
|
660
|
+
end
|
661
|
+
|
662
|
+
# Decode an int from an array of bytes (big-endian).
|
663
|
+
# @param ba array of bytes
|
664
|
+
# @param offset offset of first (most significant) byte
|
665
|
+
#
|
666
|
+
def int_from_bytes(ba, offset=0)
|
667
|
+
(((((ba[offset] << 8) | ba[offset + 1]) << 8) | \
|
668
|
+
ba[offset + 2]) << 8) | ba[offset + 3]
|
669
|
+
end
|
670
|
+
|
671
|
+
# Transform string to 8-bit ASCII (i.e., just treat each byte as-is)
|
672
|
+
#
|
673
|
+
def to_ascii8(str)
|
674
|
+
str.force_encoding("ASCII-8BIT")
|
675
|
+
end
|
676
|
+
|
677
|
+
# Verify that a string is encoded as ASCII-8BIT
|
678
|
+
def simple_str(s)
|
679
|
+
if s.encoding.name != 'ASCII-8BIT' && s.encoding.name != 'UTF-8'
|
680
|
+
pr("string [%s]\n encoding is %s,\n expected ASCII-8BIT\n",s,s.encoding.name)
|
681
|
+
assert!(false)
|
682
|
+
end
|
683
|
+
end
|
684
|
+
|
685
|
+
# Truncate or pad string so it has a particular size
|
686
|
+
#
|
687
|
+
# @param s input string
|
688
|
+
# @param size
|
689
|
+
# @param pad padding character to use if string needs to grow
|
690
|
+
# @return modified string
|
691
|
+
#
|
692
|
+
def str_sized(s, size, pad="\0")
|
693
|
+
s[0...size].ljust(size,pad)
|
694
|
+
end
|
695
|
+
|
696
|
+
# Determine if running on the Windows operating system.
|
697
|
+
# Note: there is some debate about the best way to do this.
|
698
|
+
#
|
699
|
+
def windows?
|
700
|
+
if !defined? $__windows__
|
701
|
+
$__windows__ = (RUBY_PLATFORM =~ /mswin/)
|
702
|
+
end
|
703
|
+
$__windows__
|
704
|
+
end
|
705
|
+
|
706
|
+
# Mark all constants ending with '_' as private constants
|
707
|
+
#
|
708
|
+
# @param entity the class to examine
|
709
|
+
# @param add_non_suffix_versions if true, for each constant ABC_ found, also
|
710
|
+
# defines a constant ABC with the same value that is also private
|
711
|
+
#
|
712
|
+
def privatize(entity, add_non_suffix_versions = false)
|
713
|
+
|
714
|
+
db = false
|
715
|
+
|
716
|
+
# First command defines constants ABC = n for each constant ABC_ = n;
|
717
|
+
# Second declares both versions to be private
|
718
|
+
|
719
|
+
cmd1 = nil
|
720
|
+
cmd2 = nil
|
721
|
+
|
722
|
+
entity.constants.each do |c|
|
723
|
+
nm = c.to_s
|
724
|
+
|
725
|
+
if nm.end_with?('_')
|
726
|
+
nm_small = nm[0..-2]
|
727
|
+
|
728
|
+
if !cmd2
|
729
|
+
if add_non_suffix_versions
|
730
|
+
cmd1 = ''
|
731
|
+
end
|
732
|
+
cmd2 = 'private_constant '
|
733
|
+
else
|
734
|
+
cmd2 << ','
|
735
|
+
end
|
736
|
+
|
737
|
+
|
738
|
+
!cmd1 || cmd1 << entity.to_s << '::' << nm_small << '=' << entity.const_get(c).to_s << "\n"
|
739
|
+
!cmd1 || cmd2 << ':' << nm_small << ','
|
740
|
+
cmd2 << ':' << nm
|
741
|
+
end
|
742
|
+
end
|
743
|
+
|
744
|
+
if cmd2
|
745
|
+
if cmd1
|
746
|
+
!db || pr("about to eval:\n%s\n",cmd1)
|
747
|
+
eval(cmd1)
|
748
|
+
end
|
749
|
+
!db || pr("about to eval:\n%s\n",cmd2)
|
750
|
+
eval(cmd2)
|
751
|
+
end
|
752
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":1.0,"tokens":["WS","DBL","INT","LBL","ID","ASSIGN","EQUIV","IF","DO","BROP","BRCL"],"states":[[false,[[[125,126],1],[[123,124],2],[[100,101],3],[[105,106],4],[[61,62],5],[[65,91,95,96,97,100,101,105,106,123],6],[[39,40],7],[[48,58],8],[[45,46],9],[[46,47],10],[[9,11,12,13,32,33,92,93],11],[[47,48],12]]],[false,[[[-12,-11],14]]],[false,[[[-11,-10],14]]],[false,[[[48,58,65,91,95,96,97,111,112,123],6],[[-6,-5],14],[[111,112],22]]],[false,[[[48,58,65,91,95,96,97,102,103,123],6],[[-6,-5],14],[[102,103],21]]],[false,[[[-7,-6],14],[[61,62],20]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-6,-5],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[39,40],17],[[92,93],18]]],[false,[[[48,58],8],[[46,47],10],[[-4,-3],14]]],[false,[[[48,58],8],[[46,47],10]]],[false,[[[48,58],16]]],[false,[[[9,11,12,13,32,33,92,93],11],[[-2,-1],14]]],[false,[[[47,48],13]]],[false,[[[0,10,11,1114112],13],[[-2,-1],14],[[10,11],15]]],[true,[]],[false,[[[-2,-1],14]]],[false,[[[-3,-2],14],[[48,58],16]]],[false,[[[-5,-4],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[92,93],18],[[39,40],19]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[-5,-4],14],[[39,40],17],[[92,93],18]]],[false,[[[-8,-7],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-9,-8],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-10,-9],14]]]]}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
(line 1, col 1) : speed
|
2
|
+
(line 1, col 6) : =
|
3
|
+
(line 1, col 7) : 42
|
4
|
+
(line 1, col 9) : gravity
|
5
|
+
(line 1, col 16) : =
|
6
|
+
(line 1, col 17) : -9.80
|
7
|
+
...couldn't find sequence...
|
8
|
+
(line 1, col 22) : {
|
9
|
+
(line 1, col 23) : color
|
10
|
+
(line 1, col 29) : =
|
11
|
+
(line 1, col 30) : green
|
12
|
+
(line 1, col 35) : }
|
13
|
+
(line 1, col 36) : title
|
14
|
+
(line 1, col 41) : =
|
15
|
+
(line 1, col 42) : 'This is a string with \' an escaped delimiter'
|
16
|
+
(line 1, col 89) : if
|
17
|
+
(line 1, col 91) : gravity
|
18
|
+
(line 1, col 98) : ==
|
19
|
+
(line 1, col 100) : 12
|
20
|
+
...read BROP DO ID sequence...
|
21
|
+
(line 1, col 102) : {
|
22
|
+
(line 1, col 103) : do
|
23
|
+
(line 1, col 105) : something
|
24
|
+
(line 1, col 114) : }
|
25
|
+
(line 1, col 115) : do
|
26
|
+
(line 1, col 117) : something_else
|
data/test/test_tokn.rb
CHANGED
@@ -3,11 +3,8 @@ require 'test/unit'
|
|
3
3
|
require_relative '../lib/tokn/tools.rb'
|
4
4
|
req('range_partition dfa dfa_builder tokenizer token_defn_parser')
|
5
5
|
|
6
|
-
#
|
6
|
+
#SINGLETEST = "test_100_add"
|
7
7
|
|
8
|
-
#setTestDir()
|
9
|
-
|
10
|
-
#SINGLETEST = "test_ps_output_multi"
|
11
8
|
if defined? SINGLETEST
|
12
9
|
if main?(__FILE__)
|
13
10
|
ARGV.concat("-n #{SINGLETEST}".split)
|
@@ -18,29 +15,27 @@ class TestTokn < MyTestSuite
|
|
18
15
|
|
19
16
|
include Tokn, ToknInternal
|
20
17
|
|
21
|
-
# def data_file(f)
|
22
|
-
# File.join("data",f)
|
23
|
-
# # File.dirname(__FILE__)+"/data/"+f
|
24
|
-
# end
|
25
|
-
|
26
18
|
def suite_setup
|
27
|
-
|
19
|
+
|
28
20
|
# Make current directory = the one containing this script
|
29
21
|
main?(__FILE__)
|
30
22
|
|
31
|
-
|
32
|
-
|
23
|
+
@@out_dir = File.absolute_path(File.join(Dir.pwd,"_misc_"))
|
24
|
+
|
25
|
+
if !File.directory?(@@out_dir)
|
26
|
+
Dir.mkdir(@@out_dir)
|
33
27
|
end
|
34
28
|
|
35
|
-
@@sampleText =
|
36
|
-
@@sampleTokens =
|
29
|
+
@@sampleText = read_text_file("sampletext.txt")
|
30
|
+
@@sampleTokens = read_text_file("sampletokens.txt")
|
37
31
|
end
|
38
32
|
|
39
|
-
|
40
|
-
|
41
|
-
|
33
|
+
def out_path(f)
|
34
|
+
File.join(@@out_dir,f)
|
35
|
+
end
|
36
|
+
|
42
37
|
def suite_teardown
|
43
|
-
remove_file_or_dir(out_dir)
|
38
|
+
# remove_file_or_dir(@@out_dir)
|
44
39
|
end
|
45
40
|
|
46
41
|
def method_setup
|
@@ -332,7 +327,7 @@ class TestTokn < MyTestSuite
|
|
332
327
|
addset(40,50)
|
333
328
|
@par.prepare
|
334
329
|
|
335
|
-
@par.generatePDF(out_dir)
|
330
|
+
@par.generatePDF(@@out_dir)
|
336
331
|
|
337
332
|
prep
|
338
333
|
add 25,33
|
@@ -367,13 +362,13 @@ END
|
|
367
362
|
s = x.startState
|
368
363
|
x.endState.finalState = true
|
369
364
|
|
370
|
-
s.generatePDF(out_dir,"nfa")
|
365
|
+
s.generatePDF(@@out_dir,"nfa")
|
371
366
|
|
372
367
|
r = s.reverseNFA()
|
373
|
-
r.generatePDF(out_dir,"reversed")
|
368
|
+
r.generatePDF(@@out_dir,"reversed")
|
374
369
|
|
375
370
|
dfa = DFABuilder.nfa_to_dfa(s)
|
376
|
-
dfa.generatePDF(out_dir,"buildDFA")
|
371
|
+
dfa.generatePDF(@@out_dir,"buildDFA")
|
377
372
|
end
|
378
373
|
|
379
374
|
def test_180_cvt_NFA_to_DFA
|
@@ -382,14 +377,14 @@ END
|
|
382
377
|
s = x.startState
|
383
378
|
x.endState.finalState = true
|
384
379
|
|
385
|
-
s.generatePDF(out_dir,"nfa")
|
380
|
+
s.generatePDF(@@out_dir,"nfa")
|
386
381
|
|
387
382
|
dfa = DFABuilder.nfa_to_dfa(s)
|
388
|
-
dfa.generatePDF(out_dir,"dfa")
|
383
|
+
dfa.generatePDF(@@out_dir,"dfa")
|
389
384
|
|
390
385
|
oldToNewMap, maxId2 = dfa.duplicateNFA(42)
|
391
386
|
dfa2 = oldToNewMap[dfa]
|
392
|
-
dfa2.generatePDF(out_dir,"dfa_duplicated")
|
387
|
+
dfa2.generatePDF(@@out_dir,"dfa_duplicated")
|
393
388
|
end
|
394
389
|
|
395
390
|
def test_190_TokenDefParser
|
@@ -399,7 +394,7 @@ END
|
|
399
394
|
td = TokenDefParser.new(s)
|
400
395
|
|
401
396
|
tokDFA = td.dfa
|
402
|
-
tokDFA.startState.generatePDF(out_dir,"TokenDFA")
|
397
|
+
tokDFA.startState.generatePDF(@@out_dir,"TokenDFA")
|
403
398
|
|
404
399
|
end
|
405
400
|
|
@@ -525,10 +520,11 @@ END
|
|
525
520
|
|
526
521
|
def test_270_filter_ws
|
527
522
|
|
523
|
+
capture_begin
|
524
|
+
|
528
525
|
dfa = DFA.from_script_file("sampletokens.txt")
|
529
|
-
t = Tokenizer.new(dfa,
|
526
|
+
t = Tokenizer.new(dfa, read_text_file("sampletext.txt"), "WS")
|
530
527
|
|
531
|
-
s = ''
|
532
528
|
while t.hasNext do
|
533
529
|
|
534
530
|
tk = t.peek
|
@@ -536,48 +532,19 @@ END
|
|
536
532
|
if t.nameOf(tk) == 'BROP'
|
537
533
|
lst = t.readSequenceIf('BROP DO ID BRCL')
|
538
534
|
if lst
|
539
|
-
|
540
|
-
lst.each{ |x|
|
535
|
+
puts " ...read BROP DO ID sequence..."
|
536
|
+
lst.each{ |x| puts " #{d(x)}"}
|
541
537
|
next
|
542
538
|
else
|
543
|
-
|
539
|
+
puts " ...couldn't find sequence..."
|
544
540
|
end
|
545
541
|
end
|
546
542
|
|
547
543
|
tk = t.read
|
548
|
-
|
549
|
-
|
544
|
+
puts d(tk)
|
550
545
|
end
|
551
|
-
|
552
|
-
|
553
|
-
(line 1, col 6) : =
|
554
|
-
(line 1, col 7) : 42
|
555
|
-
(line 1, col 9) : gravity
|
556
|
-
(line 1, col 16) : =
|
557
|
-
(line 1, col 17) : -9.80
|
558
|
-
...couldn't find sequence...
|
559
|
-
(line 1, col 22) : {
|
560
|
-
(line 1, col 23) : color
|
561
|
-
(line 1, col 29) : =
|
562
|
-
(line 1, col 30) : green
|
563
|
-
(line 1, col 35) : }
|
564
|
-
(line 1, col 36) : title
|
565
|
-
(line 1, col 41) : =
|
566
|
-
(line 1, col 42) : 'This is a string with \\' an escaped delimiter'
|
567
|
-
(line 1, col 89) : if
|
568
|
-
(line 1, col 91) : gravity
|
569
|
-
(line 1, col 98) : ==
|
570
|
-
(line 1, col 100) : 12
|
571
|
-
...read BROP DO ID sequence...
|
572
|
-
(line 1, col 102) : {
|
573
|
-
(line 1, col 103) : do
|
574
|
-
(line 1, col 105) : something
|
575
|
-
(line 1, col 114) : }
|
576
|
-
(line 1, col 115) : do
|
577
|
-
(line 1, col 117) : something_else
|
578
|
-
EXP
|
579
|
-
|
580
|
-
assert(s.strip == exp.strip)
|
546
|
+
|
547
|
+
match_expected_output
|
581
548
|
end
|
582
549
|
|
583
550
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Sember
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-04-
|
11
|
+
date: 2013-04-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "Given a script containing token descriptions (each a regular expression),
|
14
14
|
\ntokn compiles an automaton which it can then use to efficiently convert a \ntext
|
@@ -40,6 +40,15 @@ files:
|
|
40
40
|
- test/_OLD_/data/sampletokens_dfa.txt
|
41
41
|
- test/_OLD_/test.rb
|
42
42
|
- test/_OLD_/testcmds
|
43
|
+
- test/_misc_/__mygraph__TokenDFA.pdf
|
44
|
+
- test/_misc_/__mygraph__buildDFA.pdf
|
45
|
+
- test/_misc_/__mygraph__dfa.pdf
|
46
|
+
- test/_misc_/__mygraph__dfa_duplicated.pdf
|
47
|
+
- test/_misc_/__mygraph__nfa.pdf
|
48
|
+
- test/_misc_/__mygraph__partition.pdf
|
49
|
+
- test/_misc_/__mygraph__reversed.pdf
|
50
|
+
- test/_misc_/sampletokens_dfa.txt
|
51
|
+
- test/_output_270_filter_ws.txt
|
43
52
|
- test/sampletext.txt
|
44
53
|
- test/sampletokens.txt
|
45
54
|
- test/test_tokn.rb
|