tokn 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.txt +4 -5
- data/bin/tokncompile +1 -1
- data/bin/toknprocess +10 -4
- data/lib/tokn/code_set.rb +332 -337
- data/lib/tokn/dfa.rb +187 -162
- data/lib/tokn/dfa_builder.rb +218 -220
- data/lib/tokn/range_partition.rb +205 -203
- data/lib/tokn/reg_parse.rb +336 -331
- data/lib/tokn/state.rb +267 -270
- data/lib/tokn/token_defn_parser.rb +144 -139
- data/lib/tokn/tokenizer.rb +243 -175
- data/lib/tokn/tokn_const.rb +11 -6
- data/lib/tokn/tools.rb +42 -20
- data/test/Example1.rb +50 -0
- data/test/data/compileddfa.txt +1 -0
- data/test/data/sampletext.txt +6 -1
- data/test/test.rb +17 -12
- metadata +7 -6
- data/test/simple.rb +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75d7206c10817a05dbbd9e5ff36b25f20ef5ad18
|
4
|
+
data.tar.gz: d1d84da2be85c05b567b476b53471ac2a7c6a04f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 47a078062419175ef5fd1d66a46bae95b423c1b69cd8d27a28875589b13abdcdd5474c81e70a9b09f11a27b302f9f9038302fd1ad9121bf81d7db42cf20cea96
|
7
|
+
data.tar.gz: 2bd489cb22b1b68d0a365fd70c337e7cc3bfd1872d501a88e735fe23fe3780a77415262be265263cd906eb7cf32afea7f5e1c8cb849c6feef44567ef80b4717b
|
data/README.txt
CHANGED
@@ -66,7 +66,9 @@ definitions shown earlier.
|
|
66
66
|
|
67
67
|
require "Tokenizer"
|
68
68
|
|
69
|
-
|
69
|
+
include Tokn # Avoids having to prefix things with 'Tokn::'
|
70
|
+
|
71
|
+
dfa = DFA.from_script(readTextFile("tokendefs.txt"))
|
70
72
|
|
71
73
|
t = Tokenizer.new(dfa, readTextFile("source.txt"))
|
72
74
|
|
@@ -92,7 +94,7 @@ The module has two utility scripts: tokncompile, and toknprocess. These can be
|
|
92
94
|
found in the bin/ directory.
|
93
95
|
|
94
96
|
The tokncompile script reads a token definition script from standard input, and
|
95
|
-
compiles it to a DFA. For example, if you are in the tokn directory, you can
|
97
|
+
compiles it to a DFA. For example, if you are in the tokn/test/data directory, you can
|
96
98
|
type:
|
97
99
|
|
98
100
|
tokncompile < sampletokens.txt > compileddfa.txt
|
@@ -189,6 +191,3 @@ only reads Ruby characters from the input, which I believe are only 8 bits wide.
|
|
189
191
|
|
190
192
|
Well, I can be reached as jpsember at gmail dot com.
|
191
193
|
|
192
|
-
|
193
|
-
|
194
|
-
|
data/bin/tokncompile
CHANGED
data/bin/toknprocess
CHANGED
@@ -11,14 +11,20 @@
|
|
11
11
|
#
|
12
12
|
|
13
13
|
require 'tokn'
|
14
|
+
include Tokn
|
14
15
|
|
15
|
-
if ARGV.size
|
16
|
-
puts "Usage: toknprocess <dfa file> <source file>"
|
16
|
+
if ARGV.size < 2 || ARGV.size > 3
|
17
|
+
puts "Usage: toknprocess <dfa file> <source file> [<skip token name>]"
|
17
18
|
abort
|
18
19
|
end
|
19
20
|
|
20
|
-
dfa =
|
21
|
-
|
21
|
+
dfa = DFA.from_file(ARGV[0])
|
22
|
+
skipName = nil
|
23
|
+
if ARGV.size >= 3
|
24
|
+
skipName = ARGV[2]
|
25
|
+
end
|
26
|
+
|
27
|
+
tk = Tokenizer.new(dfa, readTextFile(ARGV[1]), skipName)
|
22
28
|
|
23
29
|
while tk.hasNext()
|
24
30
|
t = tk.read
|
data/lib/tokn/code_set.rb
CHANGED
@@ -1,392 +1,387 @@
|
|
1
1
|
require_relative 'tools'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
# A CodeSet is an ordered set of character or token codes that
|
7
|
-
# are used as labels on DFA edges.
|
8
|
-
#
|
9
|
-
# In addition to unicode character codes 0...0x10ffff, they
|
10
|
-
# also represent epsilon transitions (-1), or token identifiers ( < -1).
|
11
|
-
#
|
12
|
-
# Each CodeSet is represented as an array with 2n elements;
|
13
|
-
# each pair represents a closed lower and open upper range of values.
|
14
|
-
#
|
15
|
-
# Thus a value x is within the set [a1,a2,b1,b2,..]
|
16
|
-
# iff (a1 <= x < a2) or (b1 <= x < b2) or ...
|
17
|
-
#
|
18
|
-
class CodeSet
|
19
|
-
|
20
|
-
include Tokn
|
21
|
-
|
22
|
-
# Construct a copy of this set
|
23
|
-
#
|
24
|
-
def makeCopy
|
25
|
-
c = CodeSet.new
|
26
|
-
c.setTo(self)
|
27
|
-
c
|
28
|
-
end
|
3
|
+
module ToknInternal
|
29
4
|
|
30
|
-
#
|
31
|
-
#
|
32
|
-
def initialize(lower = nil, upper = nil)
|
33
|
-
@elem = []
|
34
|
-
if lower
|
35
|
-
add(lower,upper)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# Replace this set with a copy of another
|
40
|
-
#
|
41
|
-
def setTo(otherSet)
|
42
|
-
@elem.replace(otherSet.array)
|
43
|
-
end
|
44
|
-
|
45
|
-
# Get the array containing the code set range pairs
|
46
|
-
#
|
47
|
-
def array
|
48
|
-
return @elem
|
49
|
-
end
|
50
|
-
|
51
|
-
# Replace this set's array
|
52
|
-
# @param a array to point to (does not make a copy of it)
|
53
|
-
#
|
54
|
-
def setArray(a)
|
55
|
-
@elem = a
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
def hash
|
60
|
-
return @elem.hash
|
61
|
-
end
|
62
|
-
|
63
|
-
# Determine if this set is equivalent to another
|
5
|
+
# A CodeSet is an ordered set of character or token codes that
|
6
|
+
# are used as labels on DFA edges.
|
64
7
|
#
|
65
|
-
|
66
|
-
|
67
|
-
end
|
68
|
-
|
69
|
-
|
70
|
-
# Add a contiguous range of values to the set
|
71
|
-
# @param lower min value in range
|
72
|
-
# @param upper one plus max value in range
|
8
|
+
# In addition to unicode character codes 0...0x10ffff, they
|
9
|
+
# also represent epsilon transitions (-1), or token identifiers ( < -1).
|
73
10
|
#
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
if lower >= upper
|
80
|
-
raise RangeError
|
81
|
-
end
|
82
|
-
|
83
|
-
newSet = []
|
84
|
-
i = 0
|
85
|
-
while i < @elem.size and @elem[i] < lower
|
86
|
-
newSet.push(@elem[i])
|
87
|
-
i += 1
|
88
|
-
end
|
89
|
-
|
90
|
-
if (i & 1) == 0
|
91
|
-
newSet.push(lower)
|
92
|
-
end
|
93
|
-
|
94
|
-
while i < @elem.size and @elem[i] <= upper
|
95
|
-
i += 1
|
96
|
-
end
|
97
|
-
|
98
|
-
if (i & 1) == 0
|
99
|
-
newSet.push(upper)
|
100
|
-
end
|
101
|
-
|
102
|
-
while i < @elem.size
|
103
|
-
newSet.push(@elem[i])
|
104
|
-
i += 1
|
105
|
-
end
|
106
|
-
|
107
|
-
@elem = newSet
|
108
|
-
|
109
|
-
end
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
# Remove a contiguous range of values from the set
|
117
|
-
# @param lower min value in range
|
118
|
-
# @param upper one plus max value in range
|
11
|
+
# Each CodeSet is represented as an array with 2n elements;
|
12
|
+
# each pair represents a closed lower and open upper range of values.
|
13
|
+
#
|
14
|
+
# Thus a value x is within the set [a1,a2,b1,b2,..]
|
15
|
+
# iff (a1 <= x < a2) or (b1 <= x < b2) or ...
|
119
16
|
#
|
120
|
-
|
121
|
-
|
122
|
-
|
17
|
+
class CodeSet
|
18
|
+
|
19
|
+
# Construct a copy of this set
|
20
|
+
#
|
21
|
+
def makeCopy
|
22
|
+
c = CodeSet.new
|
23
|
+
c.setTo(self)
|
24
|
+
c
|
123
25
|
end
|
124
26
|
|
125
|
-
|
126
|
-
|
27
|
+
# Initialize set; optionally add an initial contiguous range
|
28
|
+
#
|
29
|
+
def initialize(lower = nil, upper = nil)
|
30
|
+
@elem = []
|
31
|
+
if lower
|
32
|
+
add(lower,upper)
|
33
|
+
end
|
127
34
|
end
|
128
35
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
i += 1
|
134
|
-
end
|
135
|
-
|
136
|
-
if (i & 1) == 1
|
137
|
-
newSet.push(lower)
|
36
|
+
# Replace this set with a copy of another
|
37
|
+
#
|
38
|
+
def setTo(otherSet)
|
39
|
+
@elem.replace(otherSet.array)
|
138
40
|
end
|
139
41
|
|
140
|
-
|
141
|
-
|
42
|
+
# Get the array containing the code set range pairs
|
43
|
+
#
|
44
|
+
def array
|
45
|
+
@elem
|
142
46
|
end
|
143
47
|
|
144
|
-
|
145
|
-
|
48
|
+
# Replace this set's array
|
49
|
+
# @param a array to point to (does not make a copy of it)
|
50
|
+
#
|
51
|
+
def setArray(a)
|
52
|
+
@elem = a
|
146
53
|
end
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
54
|
+
|
55
|
+
# Get hash code; just uses hash code of the contained array
|
56
|
+
def hash
|
57
|
+
@elem.hash
|
151
58
|
end
|
152
|
-
|
153
|
-
setArray(newSet)
|
154
|
-
|
155
|
-
end
|
156
|
-
|
157
|
-
# Replace this set with itself minus another
|
158
|
-
#
|
159
|
-
def difference!(s)
|
160
|
-
setTo(difference(s))
|
161
|
-
end
|
162
59
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
def intersect(s)
|
170
|
-
combineWith(s, 'i')
|
171
|
-
end
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
# Set this set equal to its intersection with another
|
176
|
-
def intersect!(s)
|
177
|
-
setTo(intersect(s))
|
178
|
-
end
|
60
|
+
# Determine if this set is equivalent to another, by
|
61
|
+
# comparing the contained arrays
|
62
|
+
#
|
63
|
+
def eql?(other)
|
64
|
+
@elem == other.array
|
65
|
+
end
|
179
66
|
|
180
|
-
# Add every value from another CodeSet to this one
|
181
|
-
def addSet(s)
|
182
|
-
sa = s.array
|
183
|
-
|
184
|
-
(0 ... sa.length).step(2) {
|
185
|
-
|i| add(sa[i],sa[i+1])
|
186
|
-
}
|
187
|
-
end
|
188
67
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
if
|
195
|
-
|
68
|
+
# Add a contiguous range of values to the set
|
69
|
+
# @param lower min value in range
|
70
|
+
# @param upper one plus max value in range
|
71
|
+
#
|
72
|
+
def add(lower, upper = nil)
|
73
|
+
if not upper
|
74
|
+
upper = lower + 1
|
75
|
+
end
|
76
|
+
|
77
|
+
if lower >= upper
|
78
|
+
raise RangeError
|
79
|
+
end
|
80
|
+
|
81
|
+
newSet = []
|
82
|
+
i = 0
|
83
|
+
while i < @elem.size and @elem[i] < lower
|
84
|
+
newSet.push(@elem[i])
|
85
|
+
i += 1
|
196
86
|
end
|
197
|
-
|
198
|
-
|
199
|
-
|
87
|
+
|
88
|
+
if (i & 1) == 0
|
89
|
+
newSet.push(lower)
|
200
90
|
end
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
ret
|
205
|
-
|
206
|
-
end
|
207
|
-
|
208
|
-
# Get string representation of set, treating them (where
|
209
|
-
# possible) as printable ASCII characters
|
210
|
-
#
|
211
|
-
def to_s
|
212
|
-
s = ''
|
213
|
-
i = 0
|
214
|
-
while i < @elem.size
|
215
|
-
if s.size
|
216
|
-
s += ' '
|
91
|
+
|
92
|
+
while i < @elem.size and @elem[i] <= upper
|
93
|
+
i += 1
|
217
94
|
end
|
218
95
|
|
219
|
-
|
220
|
-
|
221
|
-
s += dbStr(lower)
|
222
|
-
if upper != 1+lower
|
223
|
-
s += '..' + dbStr(upper-1)
|
96
|
+
if (i & 1) == 0
|
97
|
+
newSet.push(upper)
|
224
98
|
end
|
225
|
-
|
99
|
+
|
100
|
+
while i < @elem.size
|
101
|
+
newSet.push(@elem[i])
|
102
|
+
i += 1
|
103
|
+
end
|
104
|
+
|
105
|
+
@elem = newSet
|
106
|
+
|
226
107
|
end
|
227
|
-
return s
|
228
|
-
end
|
229
108
|
|
230
|
-
def inspect
|
231
|
-
to_s
|
232
|
-
end
|
233
109
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
if s.length > 0
|
242
|
-
s += ' '
|
110
|
+
# Remove a contiguous range of values from the set
|
111
|
+
# @param lower min value in range
|
112
|
+
# @param upper one plus max value in range
|
113
|
+
#
|
114
|
+
def remove(lower, upper = nil)
|
115
|
+
if not upper
|
116
|
+
upper = lower + 1
|
243
117
|
end
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
if upr > low+1
|
248
|
-
s += '..'
|
249
|
-
s += (upr-1).to_s
|
118
|
+
|
119
|
+
if lower >= upper
|
120
|
+
raise RangeError
|
250
121
|
end
|
251
|
-
|
122
|
+
|
123
|
+
newSet = []
|
124
|
+
i = 0
|
125
|
+
while i < @elem.size and @elem[i] < lower
|
126
|
+
newSet.push(@elem[i])
|
127
|
+
i += 1
|
128
|
+
end
|
129
|
+
|
130
|
+
if (i & 1) == 1
|
131
|
+
newSet.push(lower)
|
132
|
+
end
|
133
|
+
|
134
|
+
while i < @elem.size and @elem[i] <= upper
|
135
|
+
i += 1
|
136
|
+
end
|
137
|
+
|
138
|
+
if (i & 1) == 1
|
139
|
+
newSet.push(upper)
|
140
|
+
end
|
141
|
+
|
142
|
+
while i < @elem.size
|
143
|
+
newSet.push(@elem[i])
|
144
|
+
i += 1
|
145
|
+
end
|
146
|
+
|
147
|
+
setArray(newSet)
|
148
|
+
|
252
149
|
end
|
253
|
-
return s
|
254
|
-
end
|
255
|
-
|
256
150
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
#
|
262
|
-
def negate(lower = 0, upper = CODEMAX)
|
263
|
-
s2 = CodeSet.new(lower,upper)
|
264
|
-
if lower >= upper
|
265
|
-
raise RangeError
|
151
|
+
# Replace this set with itself minus another
|
152
|
+
#
|
153
|
+
def difference!(s)
|
154
|
+
setTo(difference(s))
|
266
155
|
end
|
267
156
|
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
newSet.push(@elem[i])
|
272
|
-
i += 1
|
273
|
-
end
|
274
|
-
|
275
|
-
if i > 0 and newSet[i-1] == lower
|
276
|
-
newSet.pop
|
277
|
-
else
|
278
|
-
newSet.push(lower)
|
157
|
+
# Calculate difference of this set minus another
|
158
|
+
def difference(s)
|
159
|
+
combineWith(s, 'd')
|
279
160
|
end
|
280
|
-
|
281
|
-
while i < @elem.size and @elem[i] <= upper
|
282
|
-
newSet.push(@elem[i])
|
283
|
-
i += 1
|
284
|
-
end
|
285
|
-
|
286
|
-
|
287
|
-
if newSet.length > 0 and newSet.last == upper
|
288
|
-
newSet.pop
|
289
|
-
else
|
290
|
-
newSet.push(upper)
|
291
|
-
end
|
292
|
-
|
293
|
-
while i < @elem.size
|
294
|
-
newSet.push(@elem[i])
|
295
|
-
i += 1
|
296
|
-
end
|
297
|
-
|
298
|
-
@elem = newSet
|
299
|
-
|
300
|
-
end
|
301
161
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
i = 0
|
306
|
-
while i < @elem.length
|
307
|
-
c += @elem[i+1] - @elem[i]
|
308
|
-
i += 2
|
162
|
+
# Calculate the intersection of this set and another
|
163
|
+
def intersect(s)
|
164
|
+
combineWith(s, 'i')
|
309
165
|
end
|
310
|
-
c
|
311
|
-
end
|
312
166
|
|
313
|
-
# Determine if this set is empty
|
314
|
-
#
|
315
|
-
def empty?
|
316
|
-
@elem.empty?
|
317
|
-
end
|
318
167
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
#
|
324
|
-
def dbStr(charCode)
|
168
|
+
# Set this set equal to its intersection with another
|
169
|
+
def intersect!(s)
|
170
|
+
setTo(intersect(s))
|
171
|
+
end
|
325
172
|
|
326
|
-
#
|
327
|
-
|
173
|
+
# Add every value from another CodeSet to this one
|
174
|
+
def addSet(s)
|
175
|
+
sa = s.array
|
176
|
+
|
177
|
+
(0 ... sa.length).step(2) {
|
178
|
+
|i| add(sa[i],sa[i+1])
|
179
|
+
}
|
180
|
+
end
|
181
|
+
|
182
|
+
# Determine if this set contains a particular value
|
183
|
+
def contains?(val)
|
184
|
+
ret = false
|
185
|
+
i = 0
|
186
|
+
while i < @elem.size
|
187
|
+
if val < @elem[i]
|
188
|
+
break
|
189
|
+
end
|
190
|
+
if val < @elem[i+1]
|
191
|
+
ret = true
|
192
|
+
break
|
193
|
+
end
|
194
|
+
i += 2
|
195
|
+
end
|
196
|
+
|
197
|
+
ret
|
198
|
+
|
199
|
+
end
|
328
200
|
|
329
|
-
|
201
|
+
# Get string representation of set, treating them (where
|
202
|
+
# possible) as printable ASCII characters
|
203
|
+
#
|
204
|
+
def to_s
|
205
|
+
s = ''
|
206
|
+
i = 0
|
207
|
+
while i < @elem.size
|
208
|
+
if s.size
|
209
|
+
s += ' '
|
210
|
+
end
|
211
|
+
|
212
|
+
lower = @elem[i]
|
213
|
+
upper = @elem[i+1]
|
214
|
+
s += dbStr(lower)
|
215
|
+
if upper != 1+lower
|
216
|
+
s += '..' + dbStr(upper-1)
|
217
|
+
end
|
218
|
+
i += 2
|
219
|
+
end
|
220
|
+
return s
|
221
|
+
end
|
330
222
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
end
|
336
|
-
return s
|
337
|
-
end
|
338
|
-
|
339
|
-
# Combine this range (a) with another (b) according to particular operation
|
340
|
-
# > s other range (b)
|
341
|
-
# > oper 'i': intersection, a^b
|
342
|
-
# 'd': difference, a-b
|
343
|
-
# 'n': negation, (a & !b) | (!a & b)
|
344
|
-
#
|
345
|
-
def combineWith(s, oper)
|
346
|
-
sa = array
|
347
|
-
sb = s.array
|
223
|
+
# Calls to_s
|
224
|
+
def inspect
|
225
|
+
to_s
|
226
|
+
end
|
348
227
|
|
349
|
-
|
350
|
-
|
351
|
-
|
228
|
+
# Get string representation of set, treating them
|
229
|
+
# as integers
|
230
|
+
#
|
231
|
+
def to_s_alt
|
232
|
+
s = ''
|
233
|
+
i = 0
|
234
|
+
while i < @elem.size
|
235
|
+
if s.length > 0
|
236
|
+
s += ' '
|
237
|
+
end
|
238
|
+
low = @elem[i]
|
239
|
+
upr = @elem[i+1]
|
240
|
+
s += low.to_s
|
241
|
+
if upr > low+1
|
242
|
+
s += '..'
|
243
|
+
s += (upr-1).to_s
|
244
|
+
end
|
245
|
+
i += 2
|
246
|
+
end
|
247
|
+
return s
|
248
|
+
end
|
352
249
|
|
353
|
-
wasInside = false
|
354
250
|
|
355
|
-
|
251
|
+
# Negate the inclusion of a contiguous range of values
|
252
|
+
#
|
253
|
+
# @param lower min value in range
|
254
|
+
# @param upper one plus max value in range
|
255
|
+
#
|
256
|
+
def negate(lower = 0, upper = CODEMAX)
|
257
|
+
s2 = CodeSet.new(lower,upper)
|
258
|
+
if lower >= upper
|
259
|
+
raise RangeError
|
260
|
+
end
|
356
261
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
262
|
+
newSet = []
|
263
|
+
i = 0
|
264
|
+
while i < @elem.size and @elem[i] <= lower
|
265
|
+
newSet.push(@elem[i])
|
266
|
+
i += 1
|
267
|
+
end
|
268
|
+
|
269
|
+
if i > 0 and newSet[i-1] == lower
|
270
|
+
newSet.pop
|
361
271
|
else
|
362
|
-
|
272
|
+
newSet.push(lower)
|
363
273
|
end
|
364
|
-
|
365
|
-
|
274
|
+
|
275
|
+
while i < @elem.size and @elem[i] <= upper
|
276
|
+
newSet.push(@elem[i])
|
366
277
|
i += 1
|
367
|
-
end
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
case oper
|
373
|
-
when 'i'
|
374
|
-
inside = ((i & 1) == 1) && ((j & 1) == 1)
|
375
|
-
when 'd'
|
376
|
-
inside = ((i & 1) == 1) && ((j & 1) == 0)
|
278
|
+
end
|
279
|
+
|
280
|
+
|
281
|
+
if newSet.length > 0 and newSet.last == upper
|
282
|
+
newSet.pop
|
377
283
|
else
|
378
|
-
|
284
|
+
newSet.push(upper)
|
285
|
+
end
|
286
|
+
|
287
|
+
while i < @elem.size
|
288
|
+
newSet.push(@elem[i])
|
289
|
+
i += 1
|
290
|
+
end
|
291
|
+
|
292
|
+
@elem = newSet
|
293
|
+
|
294
|
+
end
|
295
|
+
|
296
|
+
# Determine how many distinct values are represented by this set
|
297
|
+
def cardinality
|
298
|
+
c = 0
|
299
|
+
i = 0
|
300
|
+
while i < @elem.length
|
301
|
+
c += @elem[i+1] - @elem[i]
|
302
|
+
i += 2
|
379
303
|
end
|
304
|
+
c
|
305
|
+
end
|
306
|
+
|
307
|
+
# Determine if this set is empty
|
308
|
+
#
|
309
|
+
def empty?
|
310
|
+
@elem.empty?
|
311
|
+
end
|
312
|
+
|
313
|
+
private
|
314
|
+
|
315
|
+
# Get a debug description of a value within a CodeSet, suitable
|
316
|
+
# for including within a .dot label
|
317
|
+
#
|
318
|
+
def dbStr(charCode)
|
380
319
|
|
381
|
-
|
382
|
-
|
383
|
-
|
320
|
+
# Unless it corresponds to a non-confusing printable ASCII value,
|
321
|
+
# just print its decimal equivalent
|
322
|
+
|
323
|
+
s = charCode.to_s
|
324
|
+
|
325
|
+
if charCode == EPSILON
|
326
|
+
s = "(e)"
|
327
|
+
elsif (charCode > 32 && charCode < 0x7f && !"'\"\\[]{}()".index(charCode.chr))
|
328
|
+
s = charCode.chr
|
329
|
+
end
|
330
|
+
return s
|
331
|
+
end
|
332
|
+
|
333
|
+
# Combine this range (a) with another (b) according to particular operation
|
334
|
+
# > s other range (b)
|
335
|
+
# > oper 'i': intersection, a^b
|
336
|
+
# 'd': difference, a-b
|
337
|
+
# 'n': negation, (a & !b) | (!a & b)
|
338
|
+
#
|
339
|
+
def combineWith(s, oper)
|
340
|
+
sa = array
|
341
|
+
sb = s.array
|
342
|
+
|
343
|
+
i = 0
|
344
|
+
j = 0
|
345
|
+
c = []
|
346
|
+
|
347
|
+
wasInside = false
|
348
|
+
|
349
|
+
while i < sa.length || j < sb.length
|
350
|
+
|
351
|
+
if i == sa.length
|
352
|
+
v = sb[j]
|
353
|
+
elsif j == sb.length
|
354
|
+
v = sa[i]
|
355
|
+
else
|
356
|
+
v = [sa[i],sb[j]].min
|
357
|
+
end
|
358
|
+
|
359
|
+
if i < sa.length && v == sa[i]
|
360
|
+
i += 1
|
361
|
+
end
|
362
|
+
if j < sb.length && v == sb[j]
|
363
|
+
j += 1
|
364
|
+
end
|
365
|
+
|
366
|
+
case oper
|
367
|
+
when 'i'
|
368
|
+
inside = ((i & 1) == 1) && ((j & 1) == 1)
|
369
|
+
when 'd'
|
370
|
+
inside = ((i & 1) == 1) && ((j & 1) == 0)
|
371
|
+
else
|
372
|
+
raise Exception, "illegal"
|
373
|
+
end
|
374
|
+
|
375
|
+
if inside != wasInside
|
376
|
+
c.push v
|
377
|
+
wasInside = inside
|
378
|
+
end
|
384
379
|
end
|
380
|
+
ret = CodeSet.new()
|
381
|
+
ret.setArray(c)
|
382
|
+
ret
|
385
383
|
end
|
386
|
-
|
387
|
-
ret.setArray(c)
|
388
|
-
ret
|
384
|
+
|
389
385
|
end
|
390
386
|
|
391
|
-
end
|
392
|
-
|
387
|
+
end
|