tokn 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d9e7bb7fabdbd657fefe13b23eeddfff4493dd76
4
- data.tar.gz: e1c7c681b4a4928002de7472ab7c8bc0132c2b4e
3
+ metadata.gz: 75d7206c10817a05dbbd9e5ff36b25f20ef5ad18
4
+ data.tar.gz: d1d84da2be85c05b567b476b53471ac2a7c6a04f
5
5
  SHA512:
6
- metadata.gz: 3d217315f1b9559b58b4bef86c92402076f67323cd7715693425aee26fceb8c5ca2dfa021b4678303103cb678c9fe47f5978d2cbc2dc07e6060c0678e11883c2
7
- data.tar.gz: 15d5d9577c4402a593c19fb522738b996a4cdb7b01ee4c7d576a64f07aa74f31c40917cdefdd48b35c28608abf6425bfcf59cf59e243774cc17e01259cb2c8bd
6
+ metadata.gz: 47a078062419175ef5fd1d66a46bae95b423c1b69cd8d27a28875589b13abdcdd5474c81e70a9b09f11a27b302f9f9038302fd1ad9121bf81d7db42cf20cea96
7
+ data.tar.gz: 2bd489cb22b1b68d0a365fd70c337e7cc3bfd1872d501a88e735fe23fe3780a77415262be265263cd906eb7cf32afea7f5e1c8cb849c6feef44567ef80b4717b
data/README.txt CHANGED
@@ -66,7 +66,9 @@ definitions shown earlier.
66
66
 
67
67
  require "Tokenizer"
68
68
 
69
- dfa = dfa_from_script(readTextFile("tokendefs.txt"))
69
+ include Tokn # Avoids having to prefix things with 'Tokn::'
70
+
71
+ dfa = DFA.from_script(readTextFile("tokendefs.txt"))
70
72
 
71
73
  t = Tokenizer.new(dfa, readTextFile("source.txt"))
72
74
 
@@ -92,7 +94,7 @@ The module has two utility scripts: tokncompile, and toknprocess. These can be
92
94
  found in the bin/ directory.
93
95
 
94
96
  The tokncompile script reads a token definition script from standard input, and
95
- compiles it to a DFA. For example, if you are in the tokn directory, you can
97
+ compiles it to a DFA. For example, if you are in the tokn/test/data directory, you can
96
98
  type:
97
99
 
98
100
  tokncompile < sampletokens.txt > compileddfa.txt
@@ -189,6 +191,3 @@ only reads Ruby characters from the input, which I believe are only 8 bits wide.
189
191
 
190
192
  Well, I can be reached as jpsember at gmail dot com.
191
193
 
192
-
193
-
194
-
data/bin/tokncompile CHANGED
@@ -11,6 +11,6 @@
11
11
 
12
12
  require 'tokn'
13
13
 
14
- puts dfa_from_script(ARGF.read).serialize()
14
+ puts Tokn::DFA.from_script(ARGF.read).serialize()
15
15
 
16
16
 
data/bin/toknprocess CHANGED
@@ -11,14 +11,20 @@
11
11
  #
12
12
 
13
13
  require 'tokn'
14
+ include Tokn
14
15
 
15
- if ARGV.size != 2
16
- puts "Usage: toknprocess <dfa file> <source file>"
16
+ if ARGV.size < 2 || ARGV.size > 3
17
+ puts "Usage: toknprocess <dfa file> <source file> [<skip token name>]"
17
18
  abort
18
19
  end
19
20
 
20
- dfa = dfa_from_file(ARGV[0])
21
- tk = Tokenizer.new(dfa, readTextFile(ARGV[1]))
21
+ dfa = DFA.from_file(ARGV[0])
22
+ skipName = nil
23
+ if ARGV.size >= 3
24
+ skipName = ARGV[2]
25
+ end
26
+
27
+ tk = Tokenizer.new(dfa, readTextFile(ARGV[1]), skipName)
22
28
 
23
29
  while tk.hasNext()
24
30
  t = tk.read
data/lib/tokn/code_set.rb CHANGED
@@ -1,392 +1,387 @@
1
1
  require_relative 'tools'
2
2
 
3
- req('tokn_const')
4
-
5
-
6
- # A CodeSet is an ordered set of character or token codes that
7
- # are used as labels on DFA edges.
8
- #
9
- # In addition to unicode character codes 0...0x10ffff, they
10
- # also represent epsilon transitions (-1), or token identifiers ( < -1).
11
- #
12
- # Each CodeSet is represented as an array with 2n elements;
13
- # each pair represents a closed lower and open upper range of values.
14
- #
15
- # Thus a value x is within the set [a1,a2,b1,b2,..]
16
- # iff (a1 <= x < a2) or (b1 <= x < b2) or ...
17
- #
18
- class CodeSet
19
-
20
- include Tokn
21
-
22
- # Construct a copy of this set
23
- #
24
- def makeCopy
25
- c = CodeSet.new
26
- c.setTo(self)
27
- c
28
- end
3
+ module ToknInternal
29
4
 
30
- # Initialize set; optionally add an initial contiguous range
31
- #
32
- def initialize(lower = nil, upper = nil)
33
- @elem = []
34
- if lower
35
- add(lower,upper)
36
- end
37
- end
38
-
39
- # Replace this set with a copy of another
40
- #
41
- def setTo(otherSet)
42
- @elem.replace(otherSet.array)
43
- end
44
-
45
- # Get the array containing the code set range pairs
46
- #
47
- def array
48
- return @elem
49
- end
50
-
51
- # Replace this set's array
52
- # @param a array to point to (does not make a copy of it)
53
- #
54
- def setArray(a)
55
- @elem = a
56
- end
57
-
58
-
59
- def hash
60
- return @elem.hash
61
- end
62
-
63
- # Determine if this set is equivalent to another
5
+ # A CodeSet is an ordered set of character or token codes that
6
+ # are used as labels on DFA edges.
64
7
  #
65
- def eql?(other)
66
- @elem == other.array
67
- end
68
-
69
-
70
- # Add a contiguous range of values to the set
71
- # @param lower min value in range
72
- # @param upper one plus max value in range
8
+ # In addition to unicode character codes 0...0x10ffff, they
9
+ # also represent epsilon transitions (-1), or token identifiers ( < -1).
73
10
  #
74
- def add(lower, upper = nil)
75
- if not upper
76
- upper = lower + 1
77
- end
78
-
79
- if lower >= upper
80
- raise RangeError
81
- end
82
-
83
- newSet = []
84
- i = 0
85
- while i < @elem.size and @elem[i] < lower
86
- newSet.push(@elem[i])
87
- i += 1
88
- end
89
-
90
- if (i & 1) == 0
91
- newSet.push(lower)
92
- end
93
-
94
- while i < @elem.size and @elem[i] <= upper
95
- i += 1
96
- end
97
-
98
- if (i & 1) == 0
99
- newSet.push(upper)
100
- end
101
-
102
- while i < @elem.size
103
- newSet.push(@elem[i])
104
- i += 1
105
- end
106
-
107
- @elem = newSet
108
-
109
- end
110
-
111
-
112
-
113
-
114
-
115
-
116
- # Remove a contiguous range of values from the set
117
- # @param lower min value in range
118
- # @param upper one plus max value in range
11
+ # Each CodeSet is represented as an array with 2n elements;
12
+ # each pair represents a closed lower and open upper range of values.
13
+ #
14
+ # Thus a value x is within the set [a1,a2,b1,b2,..]
15
+ # iff (a1 <= x < a2) or (b1 <= x < b2) or ...
119
16
  #
120
- def remove(lower, upper = nil)
121
- if not upper
122
- upper = lower + 1
17
+ class CodeSet
18
+
19
+ # Construct a copy of this set
20
+ #
21
+ def makeCopy
22
+ c = CodeSet.new
23
+ c.setTo(self)
24
+ c
123
25
  end
124
26
 
125
- if lower >= upper
126
- raise RangeError
27
+ # Initialize set; optionally add an initial contiguous range
28
+ #
29
+ def initialize(lower = nil, upper = nil)
30
+ @elem = []
31
+ if lower
32
+ add(lower,upper)
33
+ end
127
34
  end
128
35
 
129
- newSet = []
130
- i = 0
131
- while i < @elem.size and @elem[i] < lower
132
- newSet.push(@elem[i])
133
- i += 1
134
- end
135
-
136
- if (i & 1) == 1
137
- newSet.push(lower)
36
+ # Replace this set with a copy of another
37
+ #
38
+ def setTo(otherSet)
39
+ @elem.replace(otherSet.array)
138
40
  end
139
41
 
140
- while i < @elem.size and @elem[i] <= upper
141
- i += 1
42
+ # Get the array containing the code set range pairs
43
+ #
44
+ def array
45
+ @elem
142
46
  end
143
47
 
144
- if (i & 1) == 1
145
- newSet.push(upper)
48
+ # Replace this set's array
49
+ # @param a array to point to (does not make a copy of it)
50
+ #
51
+ def setArray(a)
52
+ @elem = a
146
53
  end
147
-
148
- while i < @elem.size
149
- newSet.push(@elem[i])
150
- i += 1
54
+
55
+ # Get hash code; just uses hash code of the contained array
56
+ def hash
57
+ @elem.hash
151
58
  end
152
-
153
- setArray(newSet)
154
-
155
- end
156
-
157
- # Replace this set with itself minus another
158
- #
159
- def difference!(s)
160
- setTo(difference(s))
161
- end
162
59
 
163
- # Calculate difference of this set minus another
164
- def difference(s)
165
- combineWith(s, 'd')
166
- end
167
-
168
- # Calculate the intersection of this set and another
169
- def intersect(s)
170
- combineWith(s, 'i')
171
- end
172
-
173
-
174
-
175
- # Set this set equal to its intersection with another
176
- def intersect!(s)
177
- setTo(intersect(s))
178
- end
60
+ # Determine if this set is equivalent to another, by
61
+ # comparing the contained arrays
62
+ #
63
+ def eql?(other)
64
+ @elem == other.array
65
+ end
179
66
 
180
- # Add every value from another CodeSet to this one
181
- def addSet(s)
182
- sa = s.array
183
-
184
- (0 ... sa.length).step(2) {
185
- |i| add(sa[i],sa[i+1])
186
- }
187
- end
188
67
 
189
- # Determine if this set contains a particular value
190
- def contains?(val)
191
- ret = false
192
- i = 0
193
- while i < @elem.size
194
- if val < @elem[i]
195
- break
68
+ # Add a contiguous range of values to the set
69
+ # @param lower min value in range
70
+ # @param upper one plus max value in range
71
+ #
72
+ def add(lower, upper = nil)
73
+ if not upper
74
+ upper = lower + 1
75
+ end
76
+
77
+ if lower >= upper
78
+ raise RangeError
79
+ end
80
+
81
+ newSet = []
82
+ i = 0
83
+ while i < @elem.size and @elem[i] < lower
84
+ newSet.push(@elem[i])
85
+ i += 1
196
86
  end
197
- if val < @elem[i+1]
198
- ret = true
199
- break
87
+
88
+ if (i & 1) == 0
89
+ newSet.push(lower)
200
90
  end
201
- i += 2
202
- end
203
-
204
- ret
205
-
206
- end
207
-
208
- # Get string representation of set, treating them (where
209
- # possible) as printable ASCII characters
210
- #
211
- def to_s
212
- s = ''
213
- i = 0
214
- while i < @elem.size
215
- if s.size
216
- s += ' '
91
+
92
+ while i < @elem.size and @elem[i] <= upper
93
+ i += 1
217
94
  end
218
95
 
219
- lower = @elem[i]
220
- upper = @elem[i+1]
221
- s += dbStr(lower)
222
- if upper != 1+lower
223
- s += '..' + dbStr(upper-1)
96
+ if (i & 1) == 0
97
+ newSet.push(upper)
224
98
  end
225
- i += 2
99
+
100
+ while i < @elem.size
101
+ newSet.push(@elem[i])
102
+ i += 1
103
+ end
104
+
105
+ @elem = newSet
106
+
226
107
  end
227
- return s
228
- end
229
108
 
230
- def inspect
231
- to_s
232
- end
233
109
 
234
- # Get string representation of set, treating them
235
- # as integers
236
- #
237
- def to_s_alt
238
- s = ''
239
- i = 0
240
- while i < @elem.size
241
- if s.length > 0
242
- s += ' '
110
+ # Remove a contiguous range of values from the set
111
+ # @param lower min value in range
112
+ # @param upper one plus max value in range
113
+ #
114
+ def remove(lower, upper = nil)
115
+ if not upper
116
+ upper = lower + 1
243
117
  end
244
- low = @elem[i]
245
- upr = @elem[i+1]
246
- s += low.to_s
247
- if upr > low+1
248
- s += '..'
249
- s += (upr-1).to_s
118
+
119
+ if lower >= upper
120
+ raise RangeError
250
121
  end
251
- i += 2
122
+
123
+ newSet = []
124
+ i = 0
125
+ while i < @elem.size and @elem[i] < lower
126
+ newSet.push(@elem[i])
127
+ i += 1
128
+ end
129
+
130
+ if (i & 1) == 1
131
+ newSet.push(lower)
132
+ end
133
+
134
+ while i < @elem.size and @elem[i] <= upper
135
+ i += 1
136
+ end
137
+
138
+ if (i & 1) == 1
139
+ newSet.push(upper)
140
+ end
141
+
142
+ while i < @elem.size
143
+ newSet.push(@elem[i])
144
+ i += 1
145
+ end
146
+
147
+ setArray(newSet)
148
+
252
149
  end
253
- return s
254
- end
255
-
256
150
 
257
- # Negate the inclusion of a contiguous range of values
258
- #
259
- # @param lower min value in range
260
- # @param upper one plus max value in range
261
- #
262
- def negate(lower = 0, upper = CODEMAX)
263
- s2 = CodeSet.new(lower,upper)
264
- if lower >= upper
265
- raise RangeError
151
+ # Replace this set with itself minus another
152
+ #
153
+ def difference!(s)
154
+ setTo(difference(s))
266
155
  end
267
156
 
268
- newSet = []
269
- i = 0
270
- while i < @elem.size and @elem[i] <= lower
271
- newSet.push(@elem[i])
272
- i += 1
273
- end
274
-
275
- if i > 0 and newSet[i-1] == lower
276
- newSet.pop
277
- else
278
- newSet.push(lower)
157
+ # Calculate difference of this set minus another
158
+ def difference(s)
159
+ combineWith(s, 'd')
279
160
  end
280
-
281
- while i < @elem.size and @elem[i] <= upper
282
- newSet.push(@elem[i])
283
- i += 1
284
- end
285
-
286
-
287
- if newSet.length > 0 and newSet.last == upper
288
- newSet.pop
289
- else
290
- newSet.push(upper)
291
- end
292
-
293
- while i < @elem.size
294
- newSet.push(@elem[i])
295
- i += 1
296
- end
297
-
298
- @elem = newSet
299
-
300
- end
301
161
 
302
- # Determine how many distinct values are represented by this set
303
- def cardinality
304
- c = 0
305
- i = 0
306
- while i < @elem.length
307
- c += @elem[i+1] - @elem[i]
308
- i += 2
162
+ # Calculate the intersection of this set and another
163
+ def intersect(s)
164
+ combineWith(s, 'i')
309
165
  end
310
- c
311
- end
312
166
 
313
- # Determine if this set is empty
314
- #
315
- def empty?
316
- @elem.empty?
317
- end
318
167
 
319
- private
320
-
321
- # Get a debug description of a value within a CodeSet, suitable
322
- # for including within a .dot label
323
- #
324
- def dbStr(charCode)
168
+ # Set this set equal to its intersection with another
169
+ def intersect!(s)
170
+ setTo(intersect(s))
171
+ end
325
172
 
326
- # Unless it corresponds to a non-confusing printable ASCII value,
327
- # just print its decimal equivalent
173
+ # Add every value from another CodeSet to this one
174
+ def addSet(s)
175
+ sa = s.array
176
+
177
+ (0 ... sa.length).step(2) {
178
+ |i| add(sa[i],sa[i+1])
179
+ }
180
+ end
181
+
182
+ # Determine if this set contains a particular value
183
+ def contains?(val)
184
+ ret = false
185
+ i = 0
186
+ while i < @elem.size
187
+ if val < @elem[i]
188
+ break
189
+ end
190
+ if val < @elem[i+1]
191
+ ret = true
192
+ break
193
+ end
194
+ i += 2
195
+ end
196
+
197
+ ret
198
+
199
+ end
328
200
 
329
- s = charCode.to_s
201
+ # Get string representation of set, treating them (where
202
+ # possible) as printable ASCII characters
203
+ #
204
+ def to_s
205
+ s = ''
206
+ i = 0
207
+ while i < @elem.size
208
+ if s.size
209
+ s += ' '
210
+ end
211
+
212
+ lower = @elem[i]
213
+ upper = @elem[i+1]
214
+ s += dbStr(lower)
215
+ if upper != 1+lower
216
+ s += '..' + dbStr(upper-1)
217
+ end
218
+ i += 2
219
+ end
220
+ return s
221
+ end
330
222
 
331
- if charCode == EPSILON
332
- s = "(e)"
333
- elsif (charCode > 32 && charCode < 0x7f && !"'\"\\[]{}()".index(charCode.chr))
334
- s = charCode.chr
335
- end
336
- return s
337
- end
338
-
339
- # Combine this range (a) with another (b) according to particular operation
340
- # > s other range (b)
341
- # > oper 'i': intersection, a^b
342
- # 'd': difference, a-b
343
- # 'n': negation, (a & !b) | (!a & b)
344
- #
345
- def combineWith(s, oper)
346
- sa = array
347
- sb = s.array
223
+ # Calls to_s
224
+ def inspect
225
+ to_s
226
+ end
348
227
 
349
- i = 0
350
- j = 0
351
- c = []
228
+ # Get string representation of set, treating them
229
+ # as integers
230
+ #
231
+ def to_s_alt
232
+ s = ''
233
+ i = 0
234
+ while i < @elem.size
235
+ if s.length > 0
236
+ s += ' '
237
+ end
238
+ low = @elem[i]
239
+ upr = @elem[i+1]
240
+ s += low.to_s
241
+ if upr > low+1
242
+ s += '..'
243
+ s += (upr-1).to_s
244
+ end
245
+ i += 2
246
+ end
247
+ return s
248
+ end
352
249
 
353
- wasInside = false
354
250
 
355
- while i < sa.length || j < sb.length
251
+ # Negate the inclusion of a contiguous range of values
252
+ #
253
+ # @param lower min value in range
254
+ # @param upper one plus max value in range
255
+ #
256
+ def negate(lower = 0, upper = CODEMAX)
257
+ s2 = CodeSet.new(lower,upper)
258
+ if lower >= upper
259
+ raise RangeError
260
+ end
356
261
 
357
- if i == sa.length
358
- v = sb[j]
359
- elsif j == sb.length
360
- v = sa[i]
262
+ newSet = []
263
+ i = 0
264
+ while i < @elem.size and @elem[i] <= lower
265
+ newSet.push(@elem[i])
266
+ i += 1
267
+ end
268
+
269
+ if i > 0 and newSet[i-1] == lower
270
+ newSet.pop
361
271
  else
362
- v = [sa[i],sb[j]].min
272
+ newSet.push(lower)
363
273
  end
364
-
365
- if i < sa.length && v == sa[i]
274
+
275
+ while i < @elem.size and @elem[i] <= upper
276
+ newSet.push(@elem[i])
366
277
  i += 1
367
- end
368
- if j < sb.length && v == sb[j]
369
- j += 1
370
- end
371
-
372
- case oper
373
- when 'i'
374
- inside = ((i & 1) == 1) && ((j & 1) == 1)
375
- when 'd'
376
- inside = ((i & 1) == 1) && ((j & 1) == 0)
278
+ end
279
+
280
+
281
+ if newSet.length > 0 and newSet.last == upper
282
+ newSet.pop
377
283
  else
378
- raise Exception, "illegal"
284
+ newSet.push(upper)
285
+ end
286
+
287
+ while i < @elem.size
288
+ newSet.push(@elem[i])
289
+ i += 1
290
+ end
291
+
292
+ @elem = newSet
293
+
294
+ end
295
+
296
+ # Determine how many distinct values are represented by this set
297
+ def cardinality
298
+ c = 0
299
+ i = 0
300
+ while i < @elem.length
301
+ c += @elem[i+1] - @elem[i]
302
+ i += 2
379
303
  end
304
+ c
305
+ end
306
+
307
+ # Determine if this set is empty
308
+ #
309
+ def empty?
310
+ @elem.empty?
311
+ end
312
+
313
+ private
314
+
315
+ # Get a debug description of a value within a CodeSet, suitable
316
+ # for including within a .dot label
317
+ #
318
+ def dbStr(charCode)
380
319
 
381
- if inside != wasInside
382
- c.push v
383
- wasInside = inside
320
+ # Unless it corresponds to a non-confusing printable ASCII value,
321
+ # just print its decimal equivalent
322
+
323
+ s = charCode.to_s
324
+
325
+ if charCode == EPSILON
326
+ s = "(e)"
327
+ elsif (charCode > 32 && charCode < 0x7f && !"'\"\\[]{}()".index(charCode.chr))
328
+ s = charCode.chr
329
+ end
330
+ return s
331
+ end
332
+
333
+ # Combine this range (a) with another (b) according to particular operation
334
+ # > s other range (b)
335
+ # > oper 'i': intersection, a^b
336
+ # 'd': difference, a-b
337
+ # 'n': negation, (a & !b) | (!a & b)
338
+ #
339
+ def combineWith(s, oper)
340
+ sa = array
341
+ sb = s.array
342
+
343
+ i = 0
344
+ j = 0
345
+ c = []
346
+
347
+ wasInside = false
348
+
349
+ while i < sa.length || j < sb.length
350
+
351
+ if i == sa.length
352
+ v = sb[j]
353
+ elsif j == sb.length
354
+ v = sa[i]
355
+ else
356
+ v = [sa[i],sb[j]].min
357
+ end
358
+
359
+ if i < sa.length && v == sa[i]
360
+ i += 1
361
+ end
362
+ if j < sb.length && v == sb[j]
363
+ j += 1
364
+ end
365
+
366
+ case oper
367
+ when 'i'
368
+ inside = ((i & 1) == 1) && ((j & 1) == 1)
369
+ when 'd'
370
+ inside = ((i & 1) == 1) && ((j & 1) == 0)
371
+ else
372
+ raise Exception, "illegal"
373
+ end
374
+
375
+ if inside != wasInside
376
+ c.push v
377
+ wasInside = inside
378
+ end
384
379
  end
380
+ ret = CodeSet.new()
381
+ ret.setArray(c)
382
+ ret
385
383
  end
386
- ret = CodeSet.new()
387
- ret.setArray(c)
388
- ret
384
+
389
385
  end
390
386
 
391
- end
392
-
387
+ end