tokn 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.txt +2 -1
- data/lib/tokn/dfa.rb +4 -4
- data/lib/tokn/tokn_const.rb +15 -0
- data/lib/tokn/tools.rb +473 -83
- data/test/_misc_/__mygraph__TokenDFA.pdf +0 -0
- data/test/_misc_/__mygraph__buildDFA.pdf +0 -0
- data/test/_misc_/__mygraph__dfa.pdf +0 -0
- data/test/_misc_/__mygraph__dfa_duplicated.pdf +0 -0
- data/test/_misc_/__mygraph__nfa.pdf +0 -0
- data/test/_misc_/__mygraph__partition.pdf +0 -0
- data/test/_misc_/__mygraph__reversed.pdf +0 -0
- data/test/_misc_/sampletokens_dfa.txt +1 -0
- data/test/_output_270_filter_ws.txt +26 -0
- data/test/test_tokn.rb +30 -63
- metadata +11 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84244706589683dd8fa77c96355a4494d6585561
|
4
|
+
data.tar.gz: cf5dcd362908eaad20371f9f62193000fc986d8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c2a6659f5c4819088ff95dda3a48b42b2a7deb7dcd46f01149b45121bff298778850e42597e63a3cd8ec5bcc94f9aba41c90b914c2b5d88f6cf13ad9d64dd4cf
|
7
|
+
data.tar.gz: 059e6c6601c8e35aa7f7e465fd408dc227a23e899458962592a12bdd4bcc9004bb802bf0ca4e580e8502c6a2ba2ebe4ea8e23705903198efd431e30833783cb6
|
data/CHANGELOG.txt
CHANGED
data/lib/tokn/dfa.rb
CHANGED
@@ -20,7 +20,7 @@ module Tokn
|
|
20
20
|
def self.from_script(script, persistPath = nil)
|
21
21
|
|
22
22
|
if persistPath and File.exist?(persistPath)
|
23
|
-
return extractDFA(
|
23
|
+
return extractDFA(read_text_file(persistPath))
|
24
24
|
end
|
25
25
|
|
26
26
|
req('token_defn_parser')
|
@@ -29,7 +29,7 @@ module Tokn
|
|
29
29
|
dfa = td.dfa
|
30
30
|
|
31
31
|
if persistPath
|
32
|
-
|
32
|
+
write_text_file(persistPath, dfa.serialize())
|
33
33
|
end
|
34
34
|
|
35
35
|
dfa
|
@@ -39,14 +39,14 @@ module Tokn
|
|
39
39
|
# the file at scriptPath.
|
40
40
|
#
|
41
41
|
def self.from_script_file(scriptPath, persistPath = nil)
|
42
|
-
self.from_script(
|
42
|
+
self.from_script(read_text_file(scriptPath), persistPath)
|
43
43
|
end
|
44
44
|
|
45
45
|
# Compile a Tokenizer DFA from a text file (that contains a
|
46
46
|
# JSON string)
|
47
47
|
#
|
48
48
|
def self.from_file(path)
|
49
|
-
from_json(
|
49
|
+
from_json(read_text_file(path))
|
50
50
|
end
|
51
51
|
|
52
52
|
# Compile a Tokenizer DFA from a JSON string
|
data/lib/tokn/tokn_const.rb
CHANGED
@@ -32,3 +32,18 @@ module ToknInternal
|
|
32
32
|
EPSILON-1-edgeLabel
|
33
33
|
end
|
34
34
|
end
|
35
|
+
|
36
|
+
# Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
|
37
|
+
#
|
38
|
+
# It does this by making a system call to the 'dot' utility.
|
39
|
+
#
|
40
|
+
def dotToPDF(dotFile, name = "", test_dir = nil)
|
41
|
+
gr = dotFile
|
42
|
+
|
43
|
+
raise ArgumentError if !test_dir
|
44
|
+
|
45
|
+
dotPath = File.join(test_dir,".__mygraph__.dot")
|
46
|
+
write_text_file(dotPath,gr)
|
47
|
+
destName = File.join(test_dir,"__mygraph__"+name+".pdf")
|
48
|
+
system("dot -Tpdf "+dotPath+" -o "+destName)
|
49
|
+
end
|
data/lib/tokn/tools.rb
CHANGED
@@ -1,13 +1,31 @@
|
|
1
1
|
require 'set'
|
2
2
|
require 'fileutils'
|
3
3
|
|
4
|
+
###############################################################
|
5
|
+
#
|
4
6
|
# Various utility and debug convenience functions.
|
5
7
|
#
|
8
|
+
###############################################################
|
6
9
|
|
7
|
-
#
|
10
|
+
# Exception class for objects in illegal states
|
8
11
|
#
|
9
|
-
|
10
|
-
|
12
|
+
class IllegalStateException < Exception
|
13
|
+
end
|
14
|
+
|
15
|
+
# A string containing a single zero, with ASCII 8-bit encoding (i.e., plain old bytes)
|
16
|
+
ZERO_CHAR = "\0".force_encoding("ASCII-8BIT")
|
17
|
+
|
18
|
+
# Construct a string of zeros
|
19
|
+
# @param count number of zeros
|
20
|
+
#
|
21
|
+
def zero_bytes(count)
|
22
|
+
ZERO_CHAR * count
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convenience method to perform 'require_relative' on a set of files
|
26
|
+
#
|
27
|
+
# @param fileListStr space-delimited file/path items, without .rb extensions
|
28
|
+
# @param subdir optional path to files relative to this file
|
11
29
|
#
|
12
30
|
def req(fileListStr,subdir = nil)
|
13
31
|
fileListStr.split(' ').each do |x|
|
@@ -21,67 +39,114 @@ end
|
|
21
39
|
|
22
40
|
# Shorthand for printf(...)
|
23
41
|
#
|
24
|
-
|
25
|
-
|
42
|
+
alias :pr :printf
|
43
|
+
|
44
|
+
# Convert an object to a human-readable string,
|
45
|
+
# or <nil>; should be considered a debug-only feature
|
46
|
+
#
|
47
|
+
def d(arg)
|
48
|
+
arg.nil? ? "<nil>" : arg.inspect
|
26
49
|
end
|
27
50
|
|
51
|
+
# Convert an object to a human-readable string,
|
52
|
+
# by calling a type-appropriate function: da, dh, or just d.
|
53
|
+
# @param arg object
|
54
|
+
# @param indent optional indentation for pretty printing; if result
|
55
|
+
# spans multiple lines, each line should be indented by this amount
|
56
|
+
#
|
57
|
+
def d2(arg, indent = 0)
|
58
|
+
return da(arg, indent) if arg.is_a? Array
|
59
|
+
return dh(arg, indent) if arg.is_a? Hash
|
60
|
+
return df(arg) if arg.class == FalseClass || arg.class == TrueClass
|
61
|
+
return d(arg)
|
62
|
+
end
|
28
63
|
|
29
|
-
# Convert an object to a human-readable string
|
30
|
-
# should be considered a debug-only feature
|
64
|
+
# Convert an object to a human-readable string, prefixed with its type
|
31
65
|
#
|
32
|
-
def
|
33
|
-
arg.nil?
|
66
|
+
def dt(arg)
|
67
|
+
if arg.nil?
|
68
|
+
return "<nil>"
|
69
|
+
end
|
70
|
+
s = arg.class.to_s
|
71
|
+
s << ':'
|
72
|
+
s << arg.inspect
|
73
|
+
s
|
74
|
+
end
|
75
|
+
|
76
|
+
# Append a particular number of spaces to a string
|
77
|
+
def add_sp(s, indent = 0)
|
78
|
+
s << ' ' * indent
|
79
|
+
end
|
80
|
+
|
81
|
+
# Pretty-print an array,
|
82
|
+
# one element to a line
|
83
|
+
# @param indent indentation of each line, in spaces
|
84
|
+
def da(array, indent = 0)
|
85
|
+
return d(array) if !array
|
86
|
+
s = 'Array ['
|
87
|
+
indent += 2
|
88
|
+
array.each do |x|
|
89
|
+
s << "\n"
|
90
|
+
add_sp(s,indent)
|
91
|
+
s2 = d2(x, indent + 2)
|
92
|
+
s << s2
|
93
|
+
end
|
94
|
+
s << " ]"
|
95
|
+
s
|
96
|
+
end
|
97
|
+
|
98
|
+
# Pretty-print a hash,
|
99
|
+
# one element to a line
|
100
|
+
# @param indent indentation of each line, in spaces
|
101
|
+
def dh(hash, indent = 0)
|
102
|
+
return d(hash) if !hash
|
103
|
+
s = 'Hash {'
|
104
|
+
indent += 2
|
105
|
+
hash.each_pair do |key,val|
|
106
|
+
s2 = d(key)
|
107
|
+
s3 = d2(val, indent + 4)
|
108
|
+
s << "\n "
|
109
|
+
add_sp(s,indent)
|
110
|
+
s << s2.chomp << " => " << s3.chomp
|
111
|
+
end
|
112
|
+
s << " }"
|
113
|
+
s
|
114
|
+
end
|
115
|
+
|
116
|
+
# Generate debug description of a boolean value
|
117
|
+
# @param flag value to interpret as a boolean; prints 'T' iff not nil
|
118
|
+
# @param label optional label
|
119
|
+
def df(flag, label=nil)
|
120
|
+
s = ''
|
121
|
+
if label
|
122
|
+
s << label << ':'
|
123
|
+
end
|
124
|
+
s << (flag ? "T" : "F")
|
125
|
+
s << ' '
|
126
|
+
s
|
34
127
|
end
|
35
128
|
|
36
129
|
# Assert that a value is true. Should be considered a
|
37
130
|
# very temporary, debug-only option; it is slow and
|
38
131
|
# generates a warning that it is being called.
|
39
|
-
#
|
40
|
-
|
41
|
-
|
132
|
+
# @param cond condition
|
133
|
+
# @param msg generates additional message using printf(), if these arguments exist
|
134
|
+
def assert!(cond, *msg)
|
135
|
+
one_time_alert("warning",0,"Checking assertion")
|
42
136
|
if not cond
|
43
137
|
str = (msg.size == 0) ? "assertion error" : sprintf(*msg)
|
44
138
|
raise Exception, str
|
45
139
|
end
|
46
140
|
end
|
47
141
|
|
48
|
-
|
49
|
-
## Set test directory. If nil, sets to home directory + "__test__"
|
50
|
-
##
|
51
|
-
#def setTestDir(d = nil)
|
52
|
-
# $testDir = d || File.join(Dir.home,"__test__")
|
53
|
-
#end
|
54
|
-
|
55
|
-
## Get a path within the test directory;
|
56
|
-
## create test directory if it doesn't exist.
|
57
|
-
##
|
58
|
-
## relPath : if nil, returns the test directory; else
|
59
|
-
## returns the test directory joined to this one
|
60
|
-
##
|
61
|
-
#def withinTestDir(relPath = nil)
|
62
|
-
# if !$testDir
|
63
|
-
# raise IllegalStateException, "No test directory has been defined"
|
64
|
-
# end
|
65
|
-
# if !File.directory?($testDir)
|
66
|
-
# Dir::mkdir($testDir)
|
67
|
-
# end
|
68
|
-
# relPath ? File.join($testDir,relPath) : $testDir
|
69
|
-
#end
|
70
|
-
|
71
|
-
# Convert a .dot file (string) to a PDF file "__mygraph__nnn.pdf"
|
72
|
-
# in the test directory.
|
73
|
-
#
|
74
|
-
# It does this by making a system call to the 'dot' utility.
|
142
|
+
# Abort with message about unimplemented code
|
75
143
|
#
|
76
|
-
def
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
writeTextFile(dotPath,gr)
|
83
|
-
destName = File.join(test_dir,"__mygraph__"+name+".pdf")
|
84
|
-
system("dot -Tpdf "+dotPath+" -o "+destName)
|
144
|
+
def unimp!(msg = nil)
|
145
|
+
msg2 = "Unimplemented code"
|
146
|
+
if msg
|
147
|
+
msg2 << ": " << msg
|
148
|
+
end
|
149
|
+
raise Exception, msg2
|
85
150
|
end
|
86
151
|
|
87
152
|
# Extensions to the Enumerable module
|
@@ -106,13 +171,12 @@ module Enumerable
|
|
106
171
|
end
|
107
172
|
end
|
108
173
|
|
109
|
-
|
110
174
|
# Get a nice, concise description of the file and line
|
111
175
|
# of some caller within the stack.
|
112
176
|
#
|
113
|
-
#
|
177
|
+
# @param nSkip the number of items deep in the call stack to look
|
114
178
|
#
|
115
|
-
def
|
179
|
+
def get_caller_location(nSkip = 2)
|
116
180
|
|
117
181
|
filename = nil
|
118
182
|
linenumber = nil
|
@@ -130,11 +194,11 @@ def getCallerLocation(nSkip = 2)
|
|
130
194
|
if pth.size
|
131
195
|
filename = pth[-1]
|
132
196
|
end
|
133
|
-
linenumber = fi[i+1,j-i-1]
|
197
|
+
linenumber = fi[i+1,j-i-1]
|
134
198
|
end
|
135
199
|
end
|
136
200
|
if filename && linenumber
|
137
|
-
loc = filename + " ("+linenumber
|
201
|
+
loc = filename + " ("+linenumber+")"
|
138
202
|
else
|
139
203
|
loc = "(UNKNOWN LOCATION)"
|
140
204
|
end
|
@@ -149,12 +213,12 @@ $AlertStrings = Set.new
|
|
149
213
|
# Print a message if it hasn't yet been printed,
|
150
214
|
# which includes the caller's location
|
151
215
|
#
|
152
|
-
#
|
153
|
-
#
|
154
|
-
#
|
216
|
+
# @param typeString e.g., "warning", "unimplemented"
|
217
|
+
# @param nSkip the number of levels deep that the caller is in the stack
|
218
|
+
# @param args if present, calls sprintf(...) with these to append to the message
|
155
219
|
#
|
156
|
-
def
|
157
|
-
loc =
|
220
|
+
def one_time_alert(typeString, nSkip, *args)
|
221
|
+
loc = get_caller_location(nSkip + 2)
|
158
222
|
s = "*** "+typeString+" " + loc
|
159
223
|
if args && args.size
|
160
224
|
s2 = sprintf(args[0], *args[1..-1])
|
@@ -169,33 +233,160 @@ def oneTimeAlert(typeString, nSkip, *args)
|
|
169
233
|
end
|
170
234
|
|
171
235
|
# Print a 'warning' alert, one time only
|
172
|
-
#
|
236
|
+
# @param args if present, calls printf() with these
|
173
237
|
def warn(*args)
|
174
|
-
|
238
|
+
one_time_alert("warning",0, *args)
|
239
|
+
end
|
240
|
+
|
241
|
+
# Convenience method for setting 'db' true within methods,
|
242
|
+
# and to print a one-time warning if so.
|
243
|
+
# @param val value to set db to; it is convenient to disable
|
244
|
+
# debug printing quickly by adding a zero, e.g., 'warndb 0'
|
245
|
+
#
|
246
|
+
def warndb(val = true)
|
247
|
+
if !val || val == 0
|
248
|
+
return false
|
249
|
+
end
|
250
|
+
one_time_alert("warning",1,"Debug printing enabled")
|
251
|
+
true
|
175
252
|
end
|
176
253
|
|
177
254
|
# Print an 'unimplemented' alert, one time only
|
178
|
-
#
|
255
|
+
# @param args if present, calls printf() with these
|
179
256
|
def unimp(*args)
|
180
|
-
|
257
|
+
one_time_alert("unimplemented", 0, *args)
|
181
258
|
end
|
182
259
|
|
183
260
|
# Write a string to a text file
|
184
261
|
#
|
185
|
-
def
|
262
|
+
def write_text_file(path, contents)
|
186
263
|
File.open(path, "wb") {|f| f.write(contents) }
|
187
264
|
end
|
188
265
|
|
189
266
|
# Read a file's contents, return as a string
|
190
267
|
#
|
191
|
-
def
|
268
|
+
def read_text_file(path)
|
192
269
|
contents = nil
|
193
270
|
File.open(path,"rb") {|f| contents = f.read }
|
194
271
|
contents
|
195
272
|
end
|
196
273
|
|
274
|
+
# Method that takes a code block as an argument to
|
275
|
+
# achieve the same functionality as Java/C++'s
|
276
|
+
# do {
|
277
|
+
# ...
|
278
|
+
# ... possibly with 'break' to jump to the end ...
|
279
|
+
# } while (false);
|
280
|
+
#
|
281
|
+
def block
|
282
|
+
yield
|
283
|
+
end
|
284
|
+
|
285
|
+
# Construct hex representation of value
|
286
|
+
# @param value integer value
|
287
|
+
# @param num_digits number of hex digits
|
288
|
+
#
|
289
|
+
def to_hex(value, num_digits=4)
|
290
|
+
s = sprintf("%x", value)
|
291
|
+
s.rjust(num_digits,'0')
|
292
|
+
end
|
293
|
+
|
294
|
+
# Hex dump a string or byte array
|
295
|
+
# @param byte_array_or_string
|
296
|
+
# @param title
|
297
|
+
# @param offset offset to first value within array
|
298
|
+
# @param length number of values to dump
|
299
|
+
# @param bytes_per_row
|
300
|
+
# @param with_text if true, displays ASCII values to right of hex dump
|
301
|
+
#
|
302
|
+
def hex_dump(byte_array_or_string, title=nil, offset=0, length= -1, bytes_per_row=16, with_text=true)
|
303
|
+
ss = hex_dump_to_string(byte_array_or_string, title, offset, length, bytes_per_row, with_text)
|
304
|
+
puts ss
|
305
|
+
end
|
306
|
+
|
307
|
+
# Hex dump a string or byte array to a string; see hex_dump for parameter descriptions
|
308
|
+
#
|
309
|
+
def hex_dump_to_string(byte_array_or_string, title=nil, offset=0, length= -1, bytes_per_row=16, with_text=true)
|
310
|
+
|
311
|
+
byte_array = byte_array_or_string
|
312
|
+
if byte_array.is_a? String
|
313
|
+
byte_array = byte_array.bytes.to_a
|
314
|
+
end
|
315
|
+
|
316
|
+
ss = ''
|
317
|
+
|
318
|
+
if title
|
319
|
+
ss << title << ":\n"
|
320
|
+
end
|
321
|
+
|
322
|
+
if length < 0
|
323
|
+
length = byte_array.size - offset
|
324
|
+
end
|
325
|
+
|
326
|
+
length = [length, byte_array.size - offset].min
|
327
|
+
|
328
|
+
max_addr = offset + length - 1
|
329
|
+
num_digits = 4
|
330
|
+
while (1 << (4 * num_digits)) <= max_addr
|
331
|
+
num_digits += 1
|
332
|
+
end
|
333
|
+
|
334
|
+
while true
|
335
|
+
ss << to_hex(offset, num_digits)
|
336
|
+
ss << ': '
|
337
|
+
|
338
|
+
chunk = [length, bytes_per_row].min
|
339
|
+
bytes_per_row.times do |i|
|
340
|
+
if i % 4 == 0
|
341
|
+
ss << ' '
|
342
|
+
end
|
343
|
+
|
344
|
+
if i < chunk
|
345
|
+
v = byte_array[offset + i]
|
346
|
+
ss << ((v != 0) ? to_hex(v,2) : '..')
|
347
|
+
ss << ' '
|
348
|
+
else
|
349
|
+
ss << ' '
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
if with_text
|
354
|
+
ss << ' |'
|
355
|
+
bytes_per_row.times do |i|
|
356
|
+
if i < chunk
|
357
|
+
v = byte_array[offset + i]
|
358
|
+
ss << ((v >= 32 && v < 127) ? v : '_')
|
359
|
+
end
|
360
|
+
end
|
361
|
+
ss << '|'
|
362
|
+
end
|
363
|
+
ss << "\n"
|
364
|
+
|
365
|
+
length -= chunk
|
366
|
+
offset += chunk
|
367
|
+
break if length <= 0
|
368
|
+
end
|
369
|
+
ss
|
370
|
+
end
|
371
|
+
|
372
|
+
$prevTime = nil
|
373
|
+
|
374
|
+
# Calculate time elapsed, in seconds, from last call to this function;
|
375
|
+
# if it's never been called, returns zero
|
376
|
+
#
|
377
|
+
def elapsed
|
378
|
+
curr = Time.now.to_f
|
379
|
+
elap = 0
|
380
|
+
if $prevTime
|
381
|
+
elap = curr - $prevTime
|
382
|
+
end
|
383
|
+
$prevTime = curr
|
384
|
+
elap
|
385
|
+
end
|
386
|
+
|
197
387
|
# Delete a file or directory, if it exists.
|
198
388
|
# Caution! If directory, deletes all files and subdirectories.
|
389
|
+
#
|
199
390
|
def remove_file_or_dir(pth)
|
200
391
|
if File.directory?(pth)
|
201
392
|
FileUtils.remove_dir(pth)
|
@@ -204,22 +395,96 @@ def remove_file_or_dir(pth)
|
|
204
395
|
end
|
205
396
|
end
|
206
397
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
#
|
398
|
+
require 'stringio'
|
399
|
+
|
400
|
+
$IODest = nil
|
401
|
+
$OldStdOut = nil
|
402
|
+
|
403
|
+
# Redirect standard output to an internal string
|
213
404
|
#
|
214
|
-
def
|
215
|
-
|
405
|
+
def capture_begin
|
406
|
+
raise IllegalStateException if $IODest
|
407
|
+
$IODest = StringIO.new
|
408
|
+
$OldStdOut, $stdout = $stdout, $IODest
|
216
409
|
end
|
217
410
|
|
218
|
-
#
|
411
|
+
# Restore standard output; return captured text
|
412
|
+
# @return text that was redirected
|
219
413
|
#
|
220
|
-
|
414
|
+
def capture_end
|
415
|
+
raise IllegalStateException if !$IODest
|
416
|
+
$stdout = $OldStdOut
|
417
|
+
ret = $IODest.string
|
418
|
+
$IODest = nil
|
419
|
+
ret
|
420
|
+
end
|
421
|
+
|
422
|
+
# Compare a string with disk file; abort if different. Disk filename is derived
|
423
|
+
# from caller function name; e.g., test_xxx produces filename _output_xxx
|
424
|
+
#
|
425
|
+
# @param str if not nil, string to compare; if nil, calls capture_end to get string
|
426
|
+
#
|
427
|
+
def match_expected_output(str = nil)
|
428
|
+
|
429
|
+
if !str
|
430
|
+
str = capture_end
|
431
|
+
end
|
432
|
+
|
433
|
+
cl_method = caller[0][/`.*'/][1..-2]
|
434
|
+
if (cl_method.start_with?("test_"))
|
435
|
+
cl_method = cl_method[5..-1]
|
436
|
+
end
|
437
|
+
path = "_output_" + cl_method + ".txt"
|
438
|
+
|
439
|
+
if !File.file?(path)
|
440
|
+
printf("no such file #{path} exists, writing it...\n")
|
441
|
+
write_text_file(path,str)
|
442
|
+
else
|
443
|
+
exp_cont = read_text_file(path)
|
444
|
+
if str != exp_cont
|
445
|
+
d1 = str
|
446
|
+
d2 = exp_cont
|
447
|
+
|
448
|
+
# Find location where they differ
|
449
|
+
lines1 = d1.split("\n")
|
450
|
+
lines2 = d2.split("\n")
|
451
|
+
j = [lines1.size, lines2.size].max
|
452
|
+
|
453
|
+
s = "???"
|
454
|
+
found_diff = false
|
455
|
+
hist = []
|
456
|
+
|
457
|
+
found_count = 0
|
458
|
+
j.times do |i|
|
459
|
+
found_diff ||= (i >= lines1.size || i >= lines2.size || lines1[i] != lines2[i])
|
460
|
+
s = sprintf("%3d:",i)
|
461
|
+
if !found_diff
|
462
|
+
hist << "#{s} #{lines1[i]}\n #{lines2[i]}\n"
|
463
|
+
else
|
464
|
+
if found_count < 3
|
465
|
+
if i < lines1.size
|
466
|
+
s << " #{lines1[i]}\n"
|
467
|
+
else
|
468
|
+
s << " ---END---\n"
|
469
|
+
end
|
470
|
+
if i < lines2.size
|
471
|
+
s << " #{lines2[i]}\n"
|
472
|
+
else
|
473
|
+
s << " ---END---\n"
|
474
|
+
end
|
475
|
+
hist << s
|
476
|
+
end
|
477
|
+
found_count += 1
|
478
|
+
end
|
479
|
+
while hist.size > 6
|
480
|
+
hist.shift
|
481
|
+
end
|
482
|
+
end
|
483
|
+
dash = "-" * 95 + "\n"
|
484
|
+
raise IllegalStateException,"output did not match expected:\n#{dash}#{hist.join('')}#{dash}"
|
485
|
+
end
|
486
|
+
end
|
221
487
|
end
|
222
|
-
|
223
488
|
|
224
489
|
# Convenience method to detect if a script is being run
|
225
490
|
# e.g. as a 'main' method (for debug purposes only).
|
@@ -328,14 +593,6 @@ if defined? Test::Unit
|
|
328
593
|
method_setup
|
329
594
|
end
|
330
595
|
|
331
|
-
def out_dir
|
332
|
-
"_output_"
|
333
|
-
end
|
334
|
-
|
335
|
-
def out_path(f)
|
336
|
-
File.join(out_dir,f)
|
337
|
-
end
|
338
|
-
|
339
596
|
def teardown
|
340
597
|
if _suite_active?
|
341
598
|
if !defined? @@suiteSetup
|
@@ -360,3 +617,136 @@ if defined? Test::Unit
|
|
360
617
|
end
|
361
618
|
end
|
362
619
|
|
620
|
+
# Construct a string from an array of bytes
|
621
|
+
# @param byte_array array of bytes, or string (in which case it
|
622
|
+
# returns it unchanged)
|
623
|
+
#
|
624
|
+
def bytes_to_str(byte_array)
|
625
|
+
return byte_array if byte_array.is_a? String
|
626
|
+
|
627
|
+
byte_array.pack('C*')
|
628
|
+
end
|
629
|
+
|
630
|
+
# Construct an array of bytes from a string
|
631
|
+
# @param str string, or array of bytes (in which case it
|
632
|
+
# returns it unchanged)
|
633
|
+
#
|
634
|
+
def str_to_bytes(str)
|
635
|
+
return str if str.is_a? Array
|
636
|
+
str.bytes
|
637
|
+
end
|
638
|
+
|
639
|
+
# Get directory entries, excluding '.' and '..'
|
640
|
+
#
|
641
|
+
def dir_entries(path)
|
642
|
+
ents = Dir.entries(path)
|
643
|
+
ents.reject!{|entry| entry == '.' || entry == '..'}
|
644
|
+
end
|
645
|
+
|
646
|
+
def int_to_bytes(x)
|
647
|
+
[(x >> 24) & 0xff, (x >> 16) & 0xff, (x >> 8) & 0xff, x & 0xff]
|
648
|
+
end
|
649
|
+
|
650
|
+
def short_to_bytes(x)
|
651
|
+
[(x >> 8) & 0xff, x & 0xff]
|
652
|
+
end
|
653
|
+
|
654
|
+
# Decode a short from an array of bytes (big-endian).
|
655
|
+
# @param ba array of bytes
|
656
|
+
# @param offset offset of first (most significant) byte
|
657
|
+
#
|
658
|
+
def short_from_bytes(ba, offset=0)
|
659
|
+
(ba[offset] << 8) | ba[offset + 1]
|
660
|
+
end
|
661
|
+
|
662
|
+
# Decode an int from an array of bytes (big-endian).
|
663
|
+
# @param ba array of bytes
|
664
|
+
# @param offset offset of first (most significant) byte
|
665
|
+
#
|
666
|
+
def int_from_bytes(ba, offset=0)
|
667
|
+
(((((ba[offset] << 8) | ba[offset + 1]) << 8) | \
|
668
|
+
ba[offset + 2]) << 8) | ba[offset + 3]
|
669
|
+
end
|
670
|
+
|
671
|
+
# Transform string to 8-bit ASCII (i.e., just treat each byte as-is)
|
672
|
+
#
|
673
|
+
def to_ascii8(str)
|
674
|
+
str.force_encoding("ASCII-8BIT")
|
675
|
+
end
|
676
|
+
|
677
|
+
# Verify that a string is encoded as ASCII-8BIT
|
678
|
+
def simple_str(s)
|
679
|
+
if s.encoding.name != 'ASCII-8BIT' && s.encoding.name != 'UTF-8'
|
680
|
+
pr("string [%s]\n encoding is %s,\n expected ASCII-8BIT\n",s,s.encoding.name)
|
681
|
+
assert!(false)
|
682
|
+
end
|
683
|
+
end
|
684
|
+
|
685
|
+
# Truncate or pad string so it has a particular size
|
686
|
+
#
|
687
|
+
# @param s input string
|
688
|
+
# @param size
|
689
|
+
# @param pad padding character to use if string needs to grow
|
690
|
+
# @return modified string
|
691
|
+
#
|
692
|
+
def str_sized(s, size, pad="\0")
|
693
|
+
s[0...size].ljust(size,pad)
|
694
|
+
end
|
695
|
+
|
696
|
+
# Determine if running on the Windows operating system.
|
697
|
+
# Note: there is some debate about the best way to do this.
|
698
|
+
#
|
699
|
+
def windows?
|
700
|
+
if !defined? $__windows__
|
701
|
+
$__windows__ = (RUBY_PLATFORM =~ /mswin/)
|
702
|
+
end
|
703
|
+
$__windows__
|
704
|
+
end
|
705
|
+
|
706
|
+
# Mark all constants ending with '_' as private constants
|
707
|
+
#
|
708
|
+
# @param entity the class to examine
|
709
|
+
# @param add_non_suffix_versions if true, for each constant ABC_ found, also
|
710
|
+
# defines a constant ABC with the same value that is also private
|
711
|
+
#
|
712
|
+
def privatize(entity, add_non_suffix_versions = false)
|
713
|
+
|
714
|
+
db = false
|
715
|
+
|
716
|
+
# First command defines constants ABC = n for each constant ABC_ = n;
|
717
|
+
# Second declares both versions to be private
|
718
|
+
|
719
|
+
cmd1 = nil
|
720
|
+
cmd2 = nil
|
721
|
+
|
722
|
+
entity.constants.each do |c|
|
723
|
+
nm = c.to_s
|
724
|
+
|
725
|
+
if nm.end_with?('_')
|
726
|
+
nm_small = nm[0..-2]
|
727
|
+
|
728
|
+
if !cmd2
|
729
|
+
if add_non_suffix_versions
|
730
|
+
cmd1 = ''
|
731
|
+
end
|
732
|
+
cmd2 = 'private_constant '
|
733
|
+
else
|
734
|
+
cmd2 << ','
|
735
|
+
end
|
736
|
+
|
737
|
+
|
738
|
+
!cmd1 || cmd1 << entity.to_s << '::' << nm_small << '=' << entity.const_get(c).to_s << "\n"
|
739
|
+
!cmd1 || cmd2 << ':' << nm_small << ','
|
740
|
+
cmd2 << ':' << nm
|
741
|
+
end
|
742
|
+
end
|
743
|
+
|
744
|
+
if cmd2
|
745
|
+
if cmd1
|
746
|
+
!db || pr("about to eval:\n%s\n",cmd1)
|
747
|
+
eval(cmd1)
|
748
|
+
end
|
749
|
+
!db || pr("about to eval:\n%s\n",cmd2)
|
750
|
+
eval(cmd2)
|
751
|
+
end
|
752
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":1.0,"tokens":["WS","DBL","INT","LBL","ID","ASSIGN","EQUIV","IF","DO","BROP","BRCL"],"states":[[false,[[[125,126],1],[[123,124],2],[[100,101],3],[[105,106],4],[[61,62],5],[[65,91,95,96,97,100,101,105,106,123],6],[[39,40],7],[[48,58],8],[[45,46],9],[[46,47],10],[[9,11,12,13,32,33,92,93],11],[[47,48],12]]],[false,[[[-12,-11],14]]],[false,[[[-11,-10],14]]],[false,[[[48,58,65,91,95,96,97,111,112,123],6],[[-6,-5],14],[[111,112],22]]],[false,[[[48,58,65,91,95,96,97,102,103,123],6],[[-6,-5],14],[[102,103],21]]],[false,[[[-7,-6],14],[[61,62],20]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-6,-5],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[39,40],17],[[92,93],18]]],[false,[[[48,58],8],[[46,47],10],[[-4,-3],14]]],[false,[[[48,58],8],[[46,47],10]]],[false,[[[48,58],16]]],[false,[[[9,11,12,13,32,33,92,93],11],[[-2,-1],14]]],[false,[[[47,48],13]]],[false,[[[0,10,11,1114112],13],[[-2,-1],14],[[10,11],15]]],[true,[]],[false,[[[-2,-1],14]]],[false,[[[-3,-2],14],[[48,58],16]]],[false,[[[-5,-4],14]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[92,93],18],[[39,40],19]]],[false,[[[0,10,11,39,40,92,93,1114112],7],[[-5,-4],14],[[39,40],17],[[92,93],18]]],[false,[[[-8,-7],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-9,-8],14]]],[false,[[[48,58,65,91,95,96,97,123],6],[[-10,-9],14]]]]}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
(line 1, col 1) : speed
|
2
|
+
(line 1, col 6) : =
|
3
|
+
(line 1, col 7) : 42
|
4
|
+
(line 1, col 9) : gravity
|
5
|
+
(line 1, col 16) : =
|
6
|
+
(line 1, col 17) : -9.80
|
7
|
+
...couldn't find sequence...
|
8
|
+
(line 1, col 22) : {
|
9
|
+
(line 1, col 23) : color
|
10
|
+
(line 1, col 29) : =
|
11
|
+
(line 1, col 30) : green
|
12
|
+
(line 1, col 35) : }
|
13
|
+
(line 1, col 36) : title
|
14
|
+
(line 1, col 41) : =
|
15
|
+
(line 1, col 42) : 'This is a string with \' an escaped delimiter'
|
16
|
+
(line 1, col 89) : if
|
17
|
+
(line 1, col 91) : gravity
|
18
|
+
(line 1, col 98) : ==
|
19
|
+
(line 1, col 100) : 12
|
20
|
+
...read BROP DO ID sequence...
|
21
|
+
(line 1, col 102) : {
|
22
|
+
(line 1, col 103) : do
|
23
|
+
(line 1, col 105) : something
|
24
|
+
(line 1, col 114) : }
|
25
|
+
(line 1, col 115) : do
|
26
|
+
(line 1, col 117) : something_else
|
data/test/test_tokn.rb
CHANGED
@@ -3,11 +3,8 @@ require 'test/unit'
|
|
3
3
|
require_relative '../lib/tokn/tools.rb'
|
4
4
|
req('range_partition dfa dfa_builder tokenizer token_defn_parser')
|
5
5
|
|
6
|
-
#
|
6
|
+
#SINGLETEST = "test_100_add"
|
7
7
|
|
8
|
-
#setTestDir()
|
9
|
-
|
10
|
-
#SINGLETEST = "test_ps_output_multi"
|
11
8
|
if defined? SINGLETEST
|
12
9
|
if main?(__FILE__)
|
13
10
|
ARGV.concat("-n #{SINGLETEST}".split)
|
@@ -18,29 +15,27 @@ class TestTokn < MyTestSuite
|
|
18
15
|
|
19
16
|
include Tokn, ToknInternal
|
20
17
|
|
21
|
-
# def data_file(f)
|
22
|
-
# File.join("data",f)
|
23
|
-
# # File.dirname(__FILE__)+"/data/"+f
|
24
|
-
# end
|
25
|
-
|
26
18
|
def suite_setup
|
27
|
-
|
19
|
+
|
28
20
|
# Make current directory = the one containing this script
|
29
21
|
main?(__FILE__)
|
30
22
|
|
31
|
-
|
32
|
-
|
23
|
+
@@out_dir = File.absolute_path(File.join(Dir.pwd,"_misc_"))
|
24
|
+
|
25
|
+
if !File.directory?(@@out_dir)
|
26
|
+
Dir.mkdir(@@out_dir)
|
33
27
|
end
|
34
28
|
|
35
|
-
@@sampleText =
|
36
|
-
@@sampleTokens =
|
29
|
+
@@sampleText = read_text_file("sampletext.txt")
|
30
|
+
@@sampleTokens = read_text_file("sampletokens.txt")
|
37
31
|
end
|
38
32
|
|
39
|
-
|
40
|
-
|
41
|
-
|
33
|
+
def out_path(f)
|
34
|
+
File.join(@@out_dir,f)
|
35
|
+
end
|
36
|
+
|
42
37
|
def suite_teardown
|
43
|
-
remove_file_or_dir(out_dir)
|
38
|
+
# remove_file_or_dir(@@out_dir)
|
44
39
|
end
|
45
40
|
|
46
41
|
def method_setup
|
@@ -332,7 +327,7 @@ class TestTokn < MyTestSuite
|
|
332
327
|
addset(40,50)
|
333
328
|
@par.prepare
|
334
329
|
|
335
|
-
@par.generatePDF(out_dir)
|
330
|
+
@par.generatePDF(@@out_dir)
|
336
331
|
|
337
332
|
prep
|
338
333
|
add 25,33
|
@@ -367,13 +362,13 @@ END
|
|
367
362
|
s = x.startState
|
368
363
|
x.endState.finalState = true
|
369
364
|
|
370
|
-
s.generatePDF(out_dir,"nfa")
|
365
|
+
s.generatePDF(@@out_dir,"nfa")
|
371
366
|
|
372
367
|
r = s.reverseNFA()
|
373
|
-
r.generatePDF(out_dir,"reversed")
|
368
|
+
r.generatePDF(@@out_dir,"reversed")
|
374
369
|
|
375
370
|
dfa = DFABuilder.nfa_to_dfa(s)
|
376
|
-
dfa.generatePDF(out_dir,"buildDFA")
|
371
|
+
dfa.generatePDF(@@out_dir,"buildDFA")
|
377
372
|
end
|
378
373
|
|
379
374
|
def test_180_cvt_NFA_to_DFA
|
@@ -382,14 +377,14 @@ END
|
|
382
377
|
s = x.startState
|
383
378
|
x.endState.finalState = true
|
384
379
|
|
385
|
-
s.generatePDF(out_dir,"nfa")
|
380
|
+
s.generatePDF(@@out_dir,"nfa")
|
386
381
|
|
387
382
|
dfa = DFABuilder.nfa_to_dfa(s)
|
388
|
-
dfa.generatePDF(out_dir,"dfa")
|
383
|
+
dfa.generatePDF(@@out_dir,"dfa")
|
389
384
|
|
390
385
|
oldToNewMap, maxId2 = dfa.duplicateNFA(42)
|
391
386
|
dfa2 = oldToNewMap[dfa]
|
392
|
-
dfa2.generatePDF(out_dir,"dfa_duplicated")
|
387
|
+
dfa2.generatePDF(@@out_dir,"dfa_duplicated")
|
393
388
|
end
|
394
389
|
|
395
390
|
def test_190_TokenDefParser
|
@@ -399,7 +394,7 @@ END
|
|
399
394
|
td = TokenDefParser.new(s)
|
400
395
|
|
401
396
|
tokDFA = td.dfa
|
402
|
-
tokDFA.startState.generatePDF(out_dir,"TokenDFA")
|
397
|
+
tokDFA.startState.generatePDF(@@out_dir,"TokenDFA")
|
403
398
|
|
404
399
|
end
|
405
400
|
|
@@ -525,10 +520,11 @@ END
|
|
525
520
|
|
526
521
|
def test_270_filter_ws
|
527
522
|
|
523
|
+
capture_begin
|
524
|
+
|
528
525
|
dfa = DFA.from_script_file("sampletokens.txt")
|
529
|
-
t = Tokenizer.new(dfa,
|
526
|
+
t = Tokenizer.new(dfa, read_text_file("sampletext.txt"), "WS")
|
530
527
|
|
531
|
-
s = ''
|
532
528
|
while t.hasNext do
|
533
529
|
|
534
530
|
tk = t.peek
|
@@ -536,48 +532,19 @@ END
|
|
536
532
|
if t.nameOf(tk) == 'BROP'
|
537
533
|
lst = t.readSequenceIf('BROP DO ID BRCL')
|
538
534
|
if lst
|
539
|
-
|
540
|
-
lst.each{ |x|
|
535
|
+
puts " ...read BROP DO ID sequence..."
|
536
|
+
lst.each{ |x| puts " #{d(x)}"}
|
541
537
|
next
|
542
538
|
else
|
543
|
-
|
539
|
+
puts " ...couldn't find sequence..."
|
544
540
|
end
|
545
541
|
end
|
546
542
|
|
547
543
|
tk = t.read
|
548
|
-
|
549
|
-
|
544
|
+
puts d(tk)
|
550
545
|
end
|
551
|
-
|
552
|
-
|
553
|
-
(line 1, col 6) : =
|
554
|
-
(line 1, col 7) : 42
|
555
|
-
(line 1, col 9) : gravity
|
556
|
-
(line 1, col 16) : =
|
557
|
-
(line 1, col 17) : -9.80
|
558
|
-
...couldn't find sequence...
|
559
|
-
(line 1, col 22) : {
|
560
|
-
(line 1, col 23) : color
|
561
|
-
(line 1, col 29) : =
|
562
|
-
(line 1, col 30) : green
|
563
|
-
(line 1, col 35) : }
|
564
|
-
(line 1, col 36) : title
|
565
|
-
(line 1, col 41) : =
|
566
|
-
(line 1, col 42) : 'This is a string with \\' an escaped delimiter'
|
567
|
-
(line 1, col 89) : if
|
568
|
-
(line 1, col 91) : gravity
|
569
|
-
(line 1, col 98) : ==
|
570
|
-
(line 1, col 100) : 12
|
571
|
-
...read BROP DO ID sequence...
|
572
|
-
(line 1, col 102) : {
|
573
|
-
(line 1, col 103) : do
|
574
|
-
(line 1, col 105) : something
|
575
|
-
(line 1, col 114) : }
|
576
|
-
(line 1, col 115) : do
|
577
|
-
(line 1, col 117) : something_else
|
578
|
-
EXP
|
579
|
-
|
580
|
-
assert(s.strip == exp.strip)
|
546
|
+
|
547
|
+
match_expected_output
|
581
548
|
end
|
582
549
|
|
583
550
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Sember
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-04-
|
11
|
+
date: 2013-04-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "Given a script containing token descriptions (each a regular expression),
|
14
14
|
\ntokn compiles an automaton which it can then use to efficiently convert a \ntext
|
@@ -40,6 +40,15 @@ files:
|
|
40
40
|
- test/_OLD_/data/sampletokens_dfa.txt
|
41
41
|
- test/_OLD_/test.rb
|
42
42
|
- test/_OLD_/testcmds
|
43
|
+
- test/_misc_/__mygraph__TokenDFA.pdf
|
44
|
+
- test/_misc_/__mygraph__buildDFA.pdf
|
45
|
+
- test/_misc_/__mygraph__dfa.pdf
|
46
|
+
- test/_misc_/__mygraph__dfa_duplicated.pdf
|
47
|
+
- test/_misc_/__mygraph__nfa.pdf
|
48
|
+
- test/_misc_/__mygraph__partition.pdf
|
49
|
+
- test/_misc_/__mygraph__reversed.pdf
|
50
|
+
- test/_misc_/sampletokens_dfa.txt
|
51
|
+
- test/_output_270_filter_ws.txt
|
43
52
|
- test/sampletext.txt
|
44
53
|
- test/sampletokens.txt
|
45
54
|
- test/test_tokn.rb
|