tokn 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d9e7bb7fabdbd657fefe13b23eeddfff4493dd76
4
- data.tar.gz: e1c7c681b4a4928002de7472ab7c8bc0132c2b4e
3
+ metadata.gz: 75d7206c10817a05dbbd9e5ff36b25f20ef5ad18
4
+ data.tar.gz: d1d84da2be85c05b567b476b53471ac2a7c6a04f
5
5
  SHA512:
6
- metadata.gz: 3d217315f1b9559b58b4bef86c92402076f67323cd7715693425aee26fceb8c5ca2dfa021b4678303103cb678c9fe47f5978d2cbc2dc07e6060c0678e11883c2
7
- data.tar.gz: 15d5d9577c4402a593c19fb522738b996a4cdb7b01ee4c7d576a64f07aa74f31c40917cdefdd48b35c28608abf6425bfcf59cf59e243774cc17e01259cb2c8bd
6
+ metadata.gz: 47a078062419175ef5fd1d66a46bae95b423c1b69cd8d27a28875589b13abdcdd5474c81e70a9b09f11a27b302f9f9038302fd1ad9121bf81d7db42cf20cea96
7
+ data.tar.gz: 2bd489cb22b1b68d0a365fd70c337e7cc3bfd1872d501a88e735fe23fe3780a77415262be265263cd906eb7cf32afea7f5e1c8cb849c6feef44567ef80b4717b
data/README.txt CHANGED
@@ -66,7 +66,9 @@ definitions shown earlier.
66
66
 
67
67
  require "Tokenizer"
68
68
 
69
- dfa = dfa_from_script(readTextFile("tokendefs.txt"))
69
+ include Tokn # Avoids having to prefix things with 'Tokn::'
70
+
71
+ dfa = DFA.from_script(readTextFile("tokendefs.txt"))
70
72
 
71
73
  t = Tokenizer.new(dfa, readTextFile("source.txt"))
72
74
 
@@ -92,7 +94,7 @@ The module has two utility scripts: tokncompile, and toknprocess. These can be
92
94
  found in the bin/ directory.
93
95
 
94
96
  The tokncompile script reads a token definition script from standard input, and
95
- compiles it to a DFA. For example, if you are in the tokn directory, you can
97
+ compiles it to a DFA. For example, if you are in the tokn/test/data directory, you can
96
98
  type:
97
99
 
98
100
  tokncompile < sampletokens.txt > compileddfa.txt
@@ -189,6 +191,3 @@ only reads Ruby characters from the input, which I believe are only 8 bits wide.
189
191
 
190
192
  Well, I can be reached as jpsember at gmail dot com.
191
193
 
192
-
193
-
194
-
data/bin/tokncompile CHANGED
@@ -11,6 +11,6 @@
11
11
 
12
12
  require 'tokn'
13
13
 
14
- puts dfa_from_script(ARGF.read).serialize()
14
+ puts Tokn::DFA.from_script(ARGF.read).serialize()
15
15
 
16
16
 
data/bin/toknprocess CHANGED
@@ -11,14 +11,20 @@
11
11
  #
12
12
 
13
13
  require 'tokn'
14
+ include Tokn
14
15
 
15
- if ARGV.size != 2
16
- puts "Usage: toknprocess <dfa file> <source file>"
16
+ if ARGV.size < 2 || ARGV.size > 3
17
+ puts "Usage: toknprocess <dfa file> <source file> [<skip token name>]"
17
18
  abort
18
19
  end
19
20
 
20
- dfa = dfa_from_file(ARGV[0])
21
- tk = Tokenizer.new(dfa, readTextFile(ARGV[1]))
21
+ dfa = DFA.from_file(ARGV[0])
22
+ skipName = nil
23
+ if ARGV.size >= 3
24
+ skipName = ARGV[2]
25
+ end
26
+
27
+ tk = Tokenizer.new(dfa, readTextFile(ARGV[1]), skipName)
22
28
 
23
29
  while tk.hasNext()
24
30
  t = tk.read
data/lib/tokn/code_set.rb CHANGED
@@ -1,392 +1,387 @@
1
1
  require_relative 'tools'
2
2
 
3
- req('tokn_const')
4
-
5
-
6
- # A CodeSet is an ordered set of character or token codes that
7
- # are used as labels on DFA edges.
8
- #
9
- # In addition to unicode character codes 0...0x10ffff, they
10
- # also represent epsilon transitions (-1), or token identifiers ( < -1).
11
- #
12
- # Each CodeSet is represented as an array with 2n elements;
13
- # each pair represents a closed lower and open upper range of values.
14
- #
15
- # Thus a value x is within the set [a1,a2,b1,b2,..]
16
- # iff (a1 <= x < a2) or (b1 <= x < b2) or ...
17
- #
18
- class CodeSet
19
-
20
- include Tokn
21
-
22
- # Construct a copy of this set
23
- #
24
- def makeCopy
25
- c = CodeSet.new
26
- c.setTo(self)
27
- c
28
- end
3
+ module ToknInternal
29
4
 
30
- # Initialize set; optionally add an initial contiguous range
31
- #
32
- def initialize(lower = nil, upper = nil)
33
- @elem = []
34
- if lower
35
- add(lower,upper)
36
- end
37
- end
38
-
39
- # Replace this set with a copy of another
40
- #
41
- def setTo(otherSet)
42
- @elem.replace(otherSet.array)
43
- end
44
-
45
- # Get the array containing the code set range pairs
46
- #
47
- def array
48
- return @elem
49
- end
50
-
51
- # Replace this set's array
52
- # @param a array to point to (does not make a copy of it)
53
- #
54
- def setArray(a)
55
- @elem = a
56
- end
57
-
58
-
59
- def hash
60
- return @elem.hash
61
- end
62
-
63
- # Determine if this set is equivalent to another
5
+ # A CodeSet is an ordered set of character or token codes that
6
+ # are used as labels on DFA edges.
64
7
  #
65
- def eql?(other)
66
- @elem == other.array
67
- end
68
-
69
-
70
- # Add a contiguous range of values to the set
71
- # @param lower min value in range
72
- # @param upper one plus max value in range
8
+ # In addition to unicode character codes 0...0x10ffff, they
9
+ # also represent epsilon transitions (-1), or token identifiers ( < -1).
73
10
  #
74
- def add(lower, upper = nil)
75
- if not upper
76
- upper = lower + 1
77
- end
78
-
79
- if lower >= upper
80
- raise RangeError
81
- end
82
-
83
- newSet = []
84
- i = 0
85
- while i < @elem.size and @elem[i] < lower
86
- newSet.push(@elem[i])
87
- i += 1
88
- end
89
-
90
- if (i & 1) == 0
91
- newSet.push(lower)
92
- end
93
-
94
- while i < @elem.size and @elem[i] <= upper
95
- i += 1
96
- end
97
-
98
- if (i & 1) == 0
99
- newSet.push(upper)
100
- end
101
-
102
- while i < @elem.size
103
- newSet.push(@elem[i])
104
- i += 1
105
- end
106
-
107
- @elem = newSet
108
-
109
- end
110
-
111
-
112
-
113
-
114
-
115
-
116
- # Remove a contiguous range of values from the set
117
- # @param lower min value in range
118
- # @param upper one plus max value in range
11
+ # Each CodeSet is represented as an array with 2n elements;
12
+ # each pair represents a closed lower and open upper range of values.
13
+ #
14
+ # Thus a value x is within the set [a1,a2,b1,b2,..]
15
+ # iff (a1 <= x < a2) or (b1 <= x < b2) or ...
119
16
  #
120
- def remove(lower, upper = nil)
121
- if not upper
122
- upper = lower + 1
17
+ class CodeSet
18
+
19
+ # Construct a copy of this set
20
+ #
21
+ def makeCopy
22
+ c = CodeSet.new
23
+ c.setTo(self)
24
+ c
123
25
  end
124
26
 
125
- if lower >= upper
126
- raise RangeError
27
+ # Initialize set; optionally add an initial contiguous range
28
+ #
29
+ def initialize(lower = nil, upper = nil)
30
+ @elem = []
31
+ if lower
32
+ add(lower,upper)
33
+ end
127
34
  end
128
35
 
129
- newSet = []
130
- i = 0
131
- while i < @elem.size and @elem[i] < lower
132
- newSet.push(@elem[i])
133
- i += 1
134
- end
135
-
136
- if (i & 1) == 1
137
- newSet.push(lower)
36
+ # Replace this set with a copy of another
37
+ #
38
+ def setTo(otherSet)
39
+ @elem.replace(otherSet.array)
138
40
  end
139
41
 
140
- while i < @elem.size and @elem[i] <= upper
141
- i += 1
42
+ # Get the array containing the code set range pairs
43
+ #
44
+ def array
45
+ @elem
142
46
  end
143
47
 
144
- if (i & 1) == 1
145
- newSet.push(upper)
48
+ # Replace this set's array
49
+ # @param a array to point to (does not make a copy of it)
50
+ #
51
+ def setArray(a)
52
+ @elem = a
146
53
  end
147
-
148
- while i < @elem.size
149
- newSet.push(@elem[i])
150
- i += 1
54
+
55
+ # Get hash code; just uses hash code of the contained array
56
+ def hash
57
+ @elem.hash
151
58
  end
152
-
153
- setArray(newSet)
154
-
155
- end
156
-
157
- # Replace this set with itself minus another
158
- #
159
- def difference!(s)
160
- setTo(difference(s))
161
- end
162
59
 
163
- # Calculate difference of this set minus another
164
- def difference(s)
165
- combineWith(s, 'd')
166
- end
167
-
168
- # Calculate the intersection of this set and another
169
- def intersect(s)
170
- combineWith(s, 'i')
171
- end
172
-
173
-
174
-
175
- # Set this set equal to its intersection with another
176
- def intersect!(s)
177
- setTo(intersect(s))
178
- end
60
+ # Determine if this set is equivalent to another, by
61
+ # comparing the contained arrays
62
+ #
63
+ def eql?(other)
64
+ @elem == other.array
65
+ end
179
66
 
180
- # Add every value from another CodeSet to this one
181
- def addSet(s)
182
- sa = s.array
183
-
184
- (0 ... sa.length).step(2) {
185
- |i| add(sa[i],sa[i+1])
186
- }
187
- end
188
67
 
189
- # Determine if this set contains a particular value
190
- def contains?(val)
191
- ret = false
192
- i = 0
193
- while i < @elem.size
194
- if val < @elem[i]
195
- break
68
+ # Add a contiguous range of values to the set
69
+ # @param lower min value in range
70
+ # @param upper one plus max value in range
71
+ #
72
+ def add(lower, upper = nil)
73
+ if not upper
74
+ upper = lower + 1
75
+ end
76
+
77
+ if lower >= upper
78
+ raise RangeError
79
+ end
80
+
81
+ newSet = []
82
+ i = 0
83
+ while i < @elem.size and @elem[i] < lower
84
+ newSet.push(@elem[i])
85
+ i += 1
196
86
  end
197
- if val < @elem[i+1]
198
- ret = true
199
- break
87
+
88
+ if (i & 1) == 0
89
+ newSet.push(lower)
200
90
  end
201
- i += 2
202
- end
203
-
204
- ret
205
-
206
- end
207
-
208
- # Get string representation of set, treating them (where
209
- # possible) as printable ASCII characters
210
- #
211
- def to_s
212
- s = ''
213
- i = 0
214
- while i < @elem.size
215
- if s.size
216
- s += ' '
91
+
92
+ while i < @elem.size and @elem[i] <= upper
93
+ i += 1
217
94
  end
218
95
 
219
- lower = @elem[i]
220
- upper = @elem[i+1]
221
- s += dbStr(lower)
222
- if upper != 1+lower
223
- s += '..' + dbStr(upper-1)
96
+ if (i & 1) == 0
97
+ newSet.push(upper)
224
98
  end
225
- i += 2
99
+
100
+ while i < @elem.size
101
+ newSet.push(@elem[i])
102
+ i += 1
103
+ end
104
+
105
+ @elem = newSet
106
+
226
107
  end
227
- return s
228
- end
229
108
 
230
- def inspect
231
- to_s
232
- end
233
109
 
234
- # Get string representation of set, treating them
235
- # as integers
236
- #
237
- def to_s_alt
238
- s = ''
239
- i = 0
240
- while i < @elem.size
241
- if s.length > 0
242
- s += ' '
110
+ # Remove a contiguous range of values from the set
111
+ # @param lower min value in range
112
+ # @param upper one plus max value in range
113
+ #
114
+ def remove(lower, upper = nil)
115
+ if not upper
116
+ upper = lower + 1
243
117
  end
244
- low = @elem[i]
245
- upr = @elem[i+1]
246
- s += low.to_s
247
- if upr > low+1
248
- s += '..'
249
- s += (upr-1).to_s
118
+
119
+ if lower >= upper
120
+ raise RangeError
250
121
  end
251
- i += 2
122
+
123
+ newSet = []
124
+ i = 0
125
+ while i < @elem.size and @elem[i] < lower
126
+ newSet.push(@elem[i])
127
+ i += 1
128
+ end
129
+
130
+ if (i & 1) == 1
131
+ newSet.push(lower)
132
+ end
133
+
134
+ while i < @elem.size and @elem[i] <= upper
135
+ i += 1
136
+ end
137
+
138
+ if (i & 1) == 1
139
+ newSet.push(upper)
140
+ end
141
+
142
+ while i < @elem.size
143
+ newSet.push(@elem[i])
144
+ i += 1
145
+ end
146
+
147
+ setArray(newSet)
148
+
252
149
  end
253
- return s
254
- end
255
-
256
150
 
257
- # Negate the inclusion of a contiguous range of values
258
- #
259
- # @param lower min value in range
260
- # @param upper one plus max value in range
261
- #
262
- def negate(lower = 0, upper = CODEMAX)
263
- s2 = CodeSet.new(lower,upper)
264
- if lower >= upper
265
- raise RangeError
151
+ # Replace this set with itself minus another
152
+ #
153
+ def difference!(s)
154
+ setTo(difference(s))
266
155
  end
267
156
 
268
- newSet = []
269
- i = 0
270
- while i < @elem.size and @elem[i] <= lower
271
- newSet.push(@elem[i])
272
- i += 1
273
- end
274
-
275
- if i > 0 and newSet[i-1] == lower
276
- newSet.pop
277
- else
278
- newSet.push(lower)
157
+ # Calculate difference of this set minus another
158
+ def difference(s)
159
+ combineWith(s, 'd')
279
160
  end
280
-
281
- while i < @elem.size and @elem[i] <= upper
282
- newSet.push(@elem[i])
283
- i += 1
284
- end
285
-
286
-
287
- if newSet.length > 0 and newSet.last == upper
288
- newSet.pop
289
- else
290
- newSet.push(upper)
291
- end
292
-
293
- while i < @elem.size
294
- newSet.push(@elem[i])
295
- i += 1
296
- end
297
-
298
- @elem = newSet
299
-
300
- end
301
161
 
302
- # Determine how many distinct values are represented by this set
303
- def cardinality
304
- c = 0
305
- i = 0
306
- while i < @elem.length
307
- c += @elem[i+1] - @elem[i]
308
- i += 2
162
+ # Calculate the intersection of this set and another
163
+ def intersect(s)
164
+ combineWith(s, 'i')
309
165
  end
310
- c
311
- end
312
166
 
313
- # Determine if this set is empty
314
- #
315
- def empty?
316
- @elem.empty?
317
- end
318
167
 
319
- private
320
-
321
- # Get a debug description of a value within a CodeSet, suitable
322
- # for including within a .dot label
323
- #
324
- def dbStr(charCode)
168
+ # Set this set equal to its intersection with another
169
+ def intersect!(s)
170
+ setTo(intersect(s))
171
+ end
325
172
 
326
- # Unless it corresponds to a non-confusing printable ASCII value,
327
- # just print its decimal equivalent
173
+ # Add every value from another CodeSet to this one
174
+ def addSet(s)
175
+ sa = s.array
176
+
177
+ (0 ... sa.length).step(2) {
178
+ |i| add(sa[i],sa[i+1])
179
+ }
180
+ end
181
+
182
+ # Determine if this set contains a particular value
183
+ def contains?(val)
184
+ ret = false
185
+ i = 0
186
+ while i < @elem.size
187
+ if val < @elem[i]
188
+ break
189
+ end
190
+ if val < @elem[i+1]
191
+ ret = true
192
+ break
193
+ end
194
+ i += 2
195
+ end
196
+
197
+ ret
198
+
199
+ end
328
200
 
329
- s = charCode.to_s
201
+ # Get string representation of set, treating them (where
202
+ # possible) as printable ASCII characters
203
+ #
204
+ def to_s
205
+ s = ''
206
+ i = 0
207
+ while i < @elem.size
208
+ if s.size
209
+ s += ' '
210
+ end
211
+
212
+ lower = @elem[i]
213
+ upper = @elem[i+1]
214
+ s += dbStr(lower)
215
+ if upper != 1+lower
216
+ s += '..' + dbStr(upper-1)
217
+ end
218
+ i += 2
219
+ end
220
+ return s
221
+ end
330
222
 
331
- if charCode == EPSILON
332
- s = "(e)"
333
- elsif (charCode > 32 && charCode < 0x7f && !"'\"\\[]{}()".index(charCode.chr))
334
- s = charCode.chr
335
- end
336
- return s
337
- end
338
-
339
- # Combine this range (a) with another (b) according to particular operation
340
- # > s other range (b)
341
- # > oper 'i': intersection, a^b
342
- # 'd': difference, a-b
343
- # 'n': negation, (a & !b) | (!a & b)
344
- #
345
- def combineWith(s, oper)
346
- sa = array
347
- sb = s.array
223
+ # Calls to_s
224
+ def inspect
225
+ to_s
226
+ end
348
227
 
349
- i = 0
350
- j = 0
351
- c = []
228
+ # Get string representation of set, treating them
229
+ # as integers
230
+ #
231
+ def to_s_alt
232
+ s = ''
233
+ i = 0
234
+ while i < @elem.size
235
+ if s.length > 0
236
+ s += ' '
237
+ end
238
+ low = @elem[i]
239
+ upr = @elem[i+1]
240
+ s += low.to_s
241
+ if upr > low+1
242
+ s += '..'
243
+ s += (upr-1).to_s
244
+ end
245
+ i += 2
246
+ end
247
+ return s
248
+ end
352
249
 
353
- wasInside = false
354
250
 
355
- while i < sa.length || j < sb.length
251
+ # Negate the inclusion of a contiguous range of values
252
+ #
253
+ # @param lower min value in range
254
+ # @param upper one plus max value in range
255
+ #
256
+ def negate(lower = 0, upper = CODEMAX)
257
+ s2 = CodeSet.new(lower,upper)
258
+ if lower >= upper
259
+ raise RangeError
260
+ end
356
261
 
357
- if i == sa.length
358
- v = sb[j]
359
- elsif j == sb.length
360
- v = sa[i]
262
+ newSet = []
263
+ i = 0
264
+ while i < @elem.size and @elem[i] <= lower
265
+ newSet.push(@elem[i])
266
+ i += 1
267
+ end
268
+
269
+ if i > 0 and newSet[i-1] == lower
270
+ newSet.pop
361
271
  else
362
- v = [sa[i],sb[j]].min
272
+ newSet.push(lower)
363
273
  end
364
-
365
- if i < sa.length && v == sa[i]
274
+
275
+ while i < @elem.size and @elem[i] <= upper
276
+ newSet.push(@elem[i])
366
277
  i += 1
367
- end
368
- if j < sb.length && v == sb[j]
369
- j += 1
370
- end
371
-
372
- case oper
373
- when 'i'
374
- inside = ((i & 1) == 1) && ((j & 1) == 1)
375
- when 'd'
376
- inside = ((i & 1) == 1) && ((j & 1) == 0)
278
+ end
279
+
280
+
281
+ if newSet.length > 0 and newSet.last == upper
282
+ newSet.pop
377
283
  else
378
- raise Exception, "illegal"
284
+ newSet.push(upper)
285
+ end
286
+
287
+ while i < @elem.size
288
+ newSet.push(@elem[i])
289
+ i += 1
290
+ end
291
+
292
+ @elem = newSet
293
+
294
+ end
295
+
296
+ # Determine how many distinct values are represented by this set
297
+ def cardinality
298
+ c = 0
299
+ i = 0
300
+ while i < @elem.length
301
+ c += @elem[i+1] - @elem[i]
302
+ i += 2
379
303
  end
304
+ c
305
+ end
306
+
307
+ # Determine if this set is empty
308
+ #
309
+ def empty?
310
+ @elem.empty?
311
+ end
312
+
313
+ private
314
+
315
+ # Get a debug description of a value within a CodeSet, suitable
316
+ # for including within a .dot label
317
+ #
318
+ def dbStr(charCode)
380
319
 
381
- if inside != wasInside
382
- c.push v
383
- wasInside = inside
320
+ # Unless it corresponds to a non-confusing printable ASCII value,
321
+ # just print its decimal equivalent
322
+
323
+ s = charCode.to_s
324
+
325
+ if charCode == EPSILON
326
+ s = "(e)"
327
+ elsif (charCode > 32 && charCode < 0x7f && !"'\"\\[]{}()".index(charCode.chr))
328
+ s = charCode.chr
329
+ end
330
+ return s
331
+ end
332
+
333
+ # Combine this range (a) with another (b) according to particular operation
334
+ # > s other range (b)
335
+ # > oper 'i': intersection, a^b
336
+ # 'd': difference, a-b
337
+ # 'n': negation, (a & !b) | (!a & b)
338
+ #
339
+ def combineWith(s, oper)
340
+ sa = array
341
+ sb = s.array
342
+
343
+ i = 0
344
+ j = 0
345
+ c = []
346
+
347
+ wasInside = false
348
+
349
+ while i < sa.length || j < sb.length
350
+
351
+ if i == sa.length
352
+ v = sb[j]
353
+ elsif j == sb.length
354
+ v = sa[i]
355
+ else
356
+ v = [sa[i],sb[j]].min
357
+ end
358
+
359
+ if i < sa.length && v == sa[i]
360
+ i += 1
361
+ end
362
+ if j < sb.length && v == sb[j]
363
+ j += 1
364
+ end
365
+
366
+ case oper
367
+ when 'i'
368
+ inside = ((i & 1) == 1) && ((j & 1) == 1)
369
+ when 'd'
370
+ inside = ((i & 1) == 1) && ((j & 1) == 0)
371
+ else
372
+ raise Exception, "illegal"
373
+ end
374
+
375
+ if inside != wasInside
376
+ c.push v
377
+ wasInside = inside
378
+ end
384
379
  end
380
+ ret = CodeSet.new()
381
+ ret.setArray(c)
382
+ ret
385
383
  end
386
- ret = CodeSet.new()
387
- ret.setArray(c)
388
- ret
384
+
389
385
  end
390
386
 
391
- end
392
-
387
+ end