chars 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +1 -1
- data/.gemspec +0 -0
- data/.github/workflows/ruby.yml +28 -0
- data/.gitignore +8 -0
- data/.yardopts +1 -1
- data/ChangeLog.md +30 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +1 -1
- data/README.md +79 -46
- data/Rakefile +7 -30
- data/benchmarks/compare.rb +16 -0
- data/benchmarks/strings_in.rb +23 -0
- data/chars.gemspec +58 -7
- data/gemspec.yml +10 -4
- data/lib/chars/char_set.rb +193 -94
- data/lib/chars/chars.rb +78 -38
- data/lib/chars/extensions/integer.rb +51 -34
- data/lib/chars/extensions/string.rb +34 -17
- data/lib/chars/version.rb +2 -2
- data/spec/char_set_spec.rb +236 -133
- data/spec/chars_spec.rb +14 -73
- data/spec/{integer_spec.rb → extensions/integer_spec.rb} +19 -20
- data/spec/{string_spec.rb → extensions/string_spec.rb} +20 -19
- data/spec/spec_helper.rb +0 -1
- metadata +56 -98
data/lib/chars/char_set.rb
CHANGED
@@ -1,28 +1,85 @@
|
|
1
1
|
require 'set'
|
2
2
|
|
3
3
|
module Chars
|
4
|
-
class CharSet <
|
4
|
+
class CharSet < Set
|
5
5
|
|
6
6
|
#
|
7
7
|
# Creates a new CharSet object.
|
8
8
|
#
|
9
|
-
# @param [Array<String, Integer,
|
9
|
+
# @param [Array<String, Integer, Enumerable>] arguments
|
10
10
|
# The chars for the CharSet.
|
11
11
|
#
|
12
|
-
|
12
|
+
# @raise [TypeError]
|
13
|
+
# One of the arguments was not a {String}, {Integer} or `Enumerable`.
|
14
|
+
#
|
15
|
+
def initialize(*arguments)
|
13
16
|
super()
|
14
17
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
@chars = Hash.new { |hash,key| hash[key] = key.chr(Encoding::UTF_8) }
|
19
|
+
|
20
|
+
arguments.each do |subset|
|
21
|
+
case subset
|
22
|
+
when String, Integer
|
23
|
+
self << subset
|
24
|
+
when Enumerable
|
25
|
+
subset.each { |char| self << char }
|
26
|
+
else
|
27
|
+
raise(TypeError,"arguments must be a String, Integer or Enumerable")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Initializes the copy of another {CharSet} object.
|
34
|
+
#
|
35
|
+
# @param [CharSet] other
|
36
|
+
# The other {CharSet} object.
|
37
|
+
#
|
38
|
+
def initialize_copy(other)
|
39
|
+
@chars = other.instance_variable_get('@chars').dup
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Creates a new {CharSet}.
|
44
|
+
#
|
45
|
+
# @see #initialize
|
46
|
+
#
|
47
|
+
# @since 0.2.1
|
48
|
+
#
|
49
|
+
def self.[](*arguments)
|
50
|
+
new(*arguments)
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Adds a character to the set.
|
55
|
+
#
|
56
|
+
# @param [String, Integer] other
|
57
|
+
# The character(s) or byte to add.
|
58
|
+
#
|
59
|
+
# @return [CharSet]
|
60
|
+
# The modified character set.
|
61
|
+
#
|
62
|
+
# @raise [TypeError]
|
63
|
+
# The argument was not a {String} or {Integer}.
|
64
|
+
#
|
65
|
+
# @since 0.2.1
|
66
|
+
#
|
67
|
+
def <<(other)
|
68
|
+
case other
|
69
|
+
when String
|
70
|
+
other.each_char do |char|
|
71
|
+
byte = char.ord
|
72
|
+
|
73
|
+
@chars[byte] = char
|
74
|
+
super(byte)
|
22
75
|
end
|
23
|
-
}
|
24
76
|
|
25
|
-
|
77
|
+
return self
|
78
|
+
when Integer
|
79
|
+
super(other)
|
80
|
+
else
|
81
|
+
raise(TypeError,"can only append Strings and Integers")
|
82
|
+
end
|
26
83
|
end
|
27
84
|
|
28
85
|
alias include_byte? include?
|
@@ -32,100 +89,104 @@ module Chars
|
|
32
89
|
alias map_bytes map
|
33
90
|
|
34
91
|
#
|
35
|
-
# Determines if a character is contained within the
|
92
|
+
# Determines if a character is contained within the {CharSet}.
|
36
93
|
#
|
37
94
|
# @param [String] char
|
38
95
|
# The character to search for.
|
39
96
|
#
|
40
97
|
# @return [Boolean]
|
41
98
|
# Specifies whether the character is contained within the
|
42
|
-
#
|
99
|
+
# {CharSet}.
|
43
100
|
#
|
44
101
|
def include_char?(char)
|
45
|
-
|
46
|
-
|
102
|
+
unless char.empty?
|
103
|
+
@chars.has_value?(char) || include_byte?(char.ord)
|
47
104
|
else
|
48
105
|
false
|
49
106
|
end
|
50
107
|
end
|
51
108
|
|
52
109
|
#
|
53
|
-
# The characters within the
|
110
|
+
# The characters within the {CharSet}.
|
54
111
|
#
|
55
112
|
# @return [Array<String>]
|
56
|
-
# All the characters within the
|
113
|
+
# All the characters within the {CharSet}.
|
57
114
|
#
|
58
115
|
def chars
|
59
|
-
map { |
|
116
|
+
map { |byte| @chars[byte] }
|
60
117
|
end
|
61
118
|
|
62
119
|
#
|
63
|
-
# Iterates over every character within the
|
120
|
+
# Iterates over every character within the {CharSet}.
|
64
121
|
#
|
65
122
|
# @yield [char]
|
66
123
|
# If a block is given, it will be passed each character in the
|
67
|
-
#
|
124
|
+
# {CharSet}.
|
68
125
|
#
|
69
126
|
# @yieldparam [String] char
|
70
|
-
# Each character in the
|
127
|
+
# Each character in the {CharSet}.
|
71
128
|
#
|
72
129
|
# @return [Enumerator]
|
73
130
|
# If no block is given, an enumerator object will be returned.
|
74
131
|
#
|
75
132
|
def each_char
|
76
|
-
return enum_for(
|
133
|
+
return enum_for(__method__) unless block_given?
|
77
134
|
|
78
|
-
each { |
|
135
|
+
each { |byte| yield @chars[byte] }
|
79
136
|
end
|
80
137
|
|
81
138
|
#
|
82
|
-
# Selects characters from the
|
139
|
+
# Selects characters from the {CharSet}.
|
83
140
|
#
|
84
141
|
# @yield [char]
|
85
142
|
# If a block is given, it will be used to select the characters
|
86
|
-
# from the
|
143
|
+
# from the {CharSet}.
|
87
144
|
#
|
88
145
|
# @yieldparam [String] char
|
89
146
|
# The character to select or reject.
|
90
147
|
#
|
91
148
|
# @return [Array<String>]
|
92
|
-
# The selected characters from the
|
149
|
+
# The selected characters from the {CharSet}.
|
93
150
|
#
|
94
151
|
def select_chars(&block)
|
95
|
-
|
152
|
+
each_char.select(&block)
|
96
153
|
end
|
97
154
|
|
98
155
|
#
|
99
|
-
# Maps the characters of the
|
156
|
+
# Maps the characters of the {CharSet}.
|
100
157
|
#
|
101
158
|
# @yield [char]
|
102
159
|
# The given block will be used to transform the characters within
|
103
|
-
# the
|
160
|
+
# the {CharSet}.
|
104
161
|
#
|
105
162
|
# @yieldparam [String] char
|
106
|
-
# Each character in the
|
163
|
+
# Each character in the {CharSet}.
|
107
164
|
#
|
108
165
|
# @return [Array<String>]
|
109
|
-
# The mapped characters of the
|
166
|
+
# The mapped characters of the {CharSet}.
|
110
167
|
#
|
111
168
|
def map_chars(&block)
|
112
|
-
|
169
|
+
each_char.map(&block)
|
113
170
|
end
|
114
171
|
|
172
|
+
#
|
173
|
+
# Returns a random byte from the {CharSet}.
|
115
174
|
#
|
116
175
|
# @return [Integer]
|
117
|
-
# A random byte
|
176
|
+
# A random byte value.
|
118
177
|
#
|
119
178
|
def random_byte
|
120
179
|
self.entries[rand(self.length)]
|
121
180
|
end
|
122
181
|
|
182
|
+
#
|
183
|
+
# Returns a random character from the {CharSet}.
|
123
184
|
#
|
124
185
|
# @return [String]
|
125
|
-
# A random char
|
186
|
+
# A random char value.
|
126
187
|
#
|
127
188
|
def random_char
|
128
|
-
random_byte
|
189
|
+
@chars[random_byte]
|
129
190
|
end
|
130
191
|
|
131
192
|
#
|
@@ -138,13 +199,13 @@ module Chars
|
|
138
199
|
# The block will receive the random bytes.
|
139
200
|
#
|
140
201
|
# @yieldparam [Integer] byte
|
141
|
-
# The random byte from the
|
202
|
+
# The random byte from the {CharSet}.
|
142
203
|
#
|
143
204
|
# @return [Enumerator]
|
144
205
|
# If no block is given, an enumerator object will be returned.
|
145
206
|
#
|
146
207
|
def each_random_byte(n,&block)
|
147
|
-
return enum_for(
|
208
|
+
return enum_for(__method__,n) unless block_given?
|
148
209
|
|
149
210
|
n.times { yield random_byte }
|
150
211
|
return nil
|
@@ -160,19 +221,19 @@ module Chars
|
|
160
221
|
# The block will receive the random characters.
|
161
222
|
#
|
162
223
|
# @yieldparam [String] char
|
163
|
-
# The random character from the
|
224
|
+
# The random character from the {CharSet}.
|
164
225
|
#
|
165
226
|
# @return [Enumerator]
|
166
227
|
# If no block is given, an enumerator object will be returned.
|
167
228
|
#
|
168
229
|
def each_random_char(n,&block)
|
169
|
-
return enum_for(
|
230
|
+
return enum_for(__method__,n) unless block_given?
|
170
231
|
|
171
|
-
each_random_byte(n) { |
|
232
|
+
each_random_byte(n) { |byte| yield @chars[byte] }
|
172
233
|
end
|
173
234
|
|
174
235
|
#
|
175
|
-
# Creates an Array of random bytes from the
|
236
|
+
# Creates an Array of random bytes from the {CharSet}.
|
176
237
|
#
|
177
238
|
# @param [Integer, Array, Range] length
|
178
239
|
# The length of the Array of random bytes.
|
@@ -181,15 +242,18 @@ module Chars
|
|
181
242
|
# The randomly selected bytes.
|
182
243
|
#
|
183
244
|
def random_bytes(length)
|
184
|
-
|
185
|
-
|
245
|
+
case length
|
246
|
+
when Array
|
247
|
+
Array.new(length.sample) { random_byte }
|
248
|
+
when Range
|
249
|
+
Array.new(rand(length)) { random_byte }
|
186
250
|
else
|
187
251
|
Array.new(length) { random_byte }
|
188
252
|
end
|
189
253
|
end
|
190
254
|
|
191
255
|
#
|
192
|
-
# Creates an Array of random non-repeating bytes from the
|
256
|
+
# Creates an Array of random non-repeating bytes from the {CharSet}.
|
193
257
|
#
|
194
258
|
# @param [Integer, Array, Range] length
|
195
259
|
# The length of the Array of random non-repeating bytes.
|
@@ -198,15 +262,20 @@ module Chars
|
|
198
262
|
# The randomly selected non-repeating bytes.
|
199
263
|
#
|
200
264
|
def random_distinct_bytes(length)
|
201
|
-
|
202
|
-
|
265
|
+
shuffled_bytes = bytes.shuffle
|
266
|
+
|
267
|
+
case length
|
268
|
+
when Array
|
269
|
+
shuffled_bytes[0,length.sample]
|
270
|
+
when Range
|
271
|
+
shuffled_bytes[0,rand(length)]
|
203
272
|
else
|
204
|
-
|
273
|
+
shuffled_bytes[0,length]
|
205
274
|
end
|
206
275
|
end
|
207
276
|
|
208
277
|
#
|
209
|
-
# Creates an Array of random characters from the
|
278
|
+
# Creates an Array of random characters from the {CharSet}.
|
210
279
|
#
|
211
280
|
# @param [Integer, Array, Range] length
|
212
281
|
# The length of the Array of random characters.
|
@@ -215,12 +284,12 @@ module Chars
|
|
215
284
|
# The randomly selected characters.
|
216
285
|
#
|
217
286
|
def random_chars(length)
|
218
|
-
random_bytes(length).map { |
|
287
|
+
random_bytes(length).map { |byte| @chars[byte] }
|
219
288
|
end
|
220
289
|
|
221
290
|
#
|
222
291
|
# Creates a String containing randomly selected characters from the
|
223
|
-
#
|
292
|
+
# {CharSet}.
|
224
293
|
#
|
225
294
|
# @param [Integer, Array, Range] length
|
226
295
|
# The length of the String of random characters.
|
@@ -236,7 +305,7 @@ module Chars
|
|
236
305
|
|
237
306
|
#
|
238
307
|
# Creates an Array of random non-repeating characters from the
|
239
|
-
#
|
308
|
+
# {CharSet}.
|
240
309
|
#
|
241
310
|
# @param [Integer, Array, Range] length
|
242
311
|
# The length of the Array of random non-repeating characters.
|
@@ -245,12 +314,12 @@ module Chars
|
|
245
314
|
# The randomly selected non-repeating characters.
|
246
315
|
#
|
247
316
|
def random_distinct_chars(length)
|
248
|
-
random_distinct_bytes(length).map { |
|
317
|
+
random_distinct_bytes(length).map { |byte| @chars[byte] }
|
249
318
|
end
|
250
319
|
|
251
320
|
#
|
252
321
|
# Creates a String containing randomly selected non-repeating
|
253
|
-
# characters from the
|
322
|
+
# characters from the {CharSet}.
|
254
323
|
#
|
255
324
|
# @param [Integer, Array, Range] length
|
256
325
|
# The length of the String of random non-repeating characters.
|
@@ -264,10 +333,9 @@ module Chars
|
|
264
333
|
random_distinct_chars(length).join
|
265
334
|
end
|
266
335
|
|
267
|
-
|
268
336
|
#
|
269
337
|
# Finds sub-strings within given data that are made of characters within
|
270
|
-
# the
|
338
|
+
# the {CharSet}.
|
271
339
|
#
|
272
340
|
# @param [String] data
|
273
341
|
# The data to find sub-strings within.
|
@@ -283,77 +351,108 @@ module Chars
|
|
283
351
|
# sub-strings within the data, or to just return the matched
|
284
352
|
# sub-strings themselves.
|
285
353
|
#
|
286
|
-
|
287
|
-
|
354
|
+
# @yield [match,(index)]
|
355
|
+
# The given block will be passed every matched sub-string, and the
|
356
|
+
# optional index.
|
357
|
+
#
|
358
|
+
# @yield [String] match
|
359
|
+
# A sub-string containing the characters from the {CharSet}.
|
360
|
+
#
|
361
|
+
# @yield [Integer] index
|
362
|
+
# The index the sub-string was found at.
|
363
|
+
#
|
364
|
+
# @return [Array, Hash]
|
365
|
+
# If no block is given, an Array or Hash of sub-strings is returned.
|
366
|
+
#
|
367
|
+
def strings_in(data,options={},&block)
|
368
|
+
unless block
|
369
|
+
if options[:offsets]
|
370
|
+
found = {}
|
371
|
+
block = lambda { |offset,substring| found[offset] = substring }
|
372
|
+
else
|
373
|
+
found = []
|
374
|
+
block = lambda { |substring| found << substring }
|
375
|
+
end
|
288
376
|
|
289
|
-
|
290
|
-
found
|
291
|
-
found_substring = lambda { |offset,substring|
|
292
|
-
found[offset] = substring
|
293
|
-
}
|
294
|
-
else
|
295
|
-
found = []
|
296
|
-
found_substring = lambda { |offset,substring|
|
297
|
-
found << substring
|
298
|
-
}
|
377
|
+
strings_in(data,options,&block)
|
378
|
+
return found
|
299
379
|
end
|
300
380
|
|
301
|
-
|
381
|
+
min_length = options.fetch(:length,4)
|
382
|
+
return if data.length < min_length
|
302
383
|
|
303
384
|
index = 0
|
304
385
|
|
305
386
|
while index <= (data.length - min_length)
|
306
|
-
if self === data[index
|
387
|
+
if self === data[index,min_length]
|
307
388
|
sub_index = (index + min_length)
|
308
389
|
|
309
|
-
while self.include_char?(data[sub_index
|
390
|
+
while self.include_char?(data[sub_index,1])
|
310
391
|
sub_index += 1
|
311
392
|
end
|
312
393
|
|
313
|
-
|
394
|
+
match = data[index...sub_index]
|
395
|
+
|
396
|
+
case block.arity
|
397
|
+
when 2
|
398
|
+
yield match, index
|
399
|
+
else
|
400
|
+
yield match
|
401
|
+
end
|
402
|
+
|
314
403
|
index = sub_index
|
315
404
|
else
|
316
405
|
index += 1
|
317
406
|
end
|
318
407
|
end
|
319
|
-
|
320
|
-
return found
|
321
408
|
end
|
322
409
|
|
323
410
|
#
|
324
|
-
# Creates a new CharSet object by unioning the
|
325
|
-
#
|
411
|
+
# Creates a new CharSet object by unioning the {CharSet} with another
|
412
|
+
# {CharSet}.
|
326
413
|
#
|
327
|
-
# @param [CharSet, Array, Range]
|
328
|
-
# The other
|
414
|
+
# @param [CharSet, Array, Range] set
|
415
|
+
# The other {CharSet} to union with.
|
329
416
|
#
|
330
417
|
# @return [CharSet]
|
331
|
-
# The unioned
|
418
|
+
# The unioned {ChraSet}.
|
332
419
|
#
|
333
|
-
def |(
|
334
|
-
|
420
|
+
def |(set)
|
421
|
+
set = CharSet.new(set) unless set.kind_of?(CharSet)
|
422
|
+
|
423
|
+
return super(set)
|
335
424
|
end
|
336
425
|
|
337
426
|
alias + |
|
338
427
|
|
339
428
|
#
|
340
429
|
# Compares the bytes within a given string with the bytes of the
|
341
|
-
#
|
430
|
+
# {CharSet}.
|
342
431
|
#
|
343
|
-
# @param [String]
|
344
|
-
# The string to compare with the
|
432
|
+
# @param [String, Enumerable] other
|
433
|
+
# The string to compare with the {CharSet}.
|
345
434
|
#
|
346
435
|
# @return [Boolean]
|
347
436
|
# Specifies whether all of the bytes within the given string are
|
348
|
-
# included in the
|
437
|
+
# included in the {CharSet}.
|
349
438
|
#
|
350
439
|
# @example
|
351
440
|
# Chars.alpha === "hello"
|
352
441
|
# # => true
|
353
442
|
#
|
354
|
-
def ===(
|
355
|
-
|
356
|
-
|
443
|
+
def ===(other)
|
444
|
+
case other
|
445
|
+
when String
|
446
|
+
other.each_char.all? { |char| include_char?(char) }
|
447
|
+
when Enumerable
|
448
|
+
other.all? do |element|
|
449
|
+
case element
|
450
|
+
when String
|
451
|
+
include_char?(element)
|
452
|
+
when Integer
|
453
|
+
include_byte?(element)
|
454
|
+
end
|
455
|
+
end
|
357
456
|
else
|
358
457
|
false
|
359
458
|
end
|
@@ -362,22 +461,22 @@ module Chars
|
|
362
461
|
alias =~ ===
|
363
462
|
|
364
463
|
#
|
365
|
-
# Inspects the
|
464
|
+
# Inspects the {CharSet}.
|
366
465
|
#
|
367
466
|
# @return [String]
|
368
|
-
# The inspected
|
467
|
+
# The inspected {CharSet}.
|
369
468
|
#
|
370
469
|
def inspect
|
371
|
-
"#<#{self.class.name}: {" + map { |
|
372
|
-
case
|
470
|
+
"#<#{self.class.name}: {" + map { |byte|
|
471
|
+
case byte
|
373
472
|
when (0x07..0x0d), (0x20..0x7e)
|
374
|
-
|
473
|
+
@chars[byte].dump
|
375
474
|
when 0x00
|
376
475
|
# sly hack to make char-sets more friendly
|
377
476
|
# to us C programmers
|
378
477
|
'"\0"'
|
379
478
|
else
|
380
|
-
"0x%02x"
|
479
|
+
sprintf("0x%02x",byte)
|
381
480
|
end
|
382
481
|
}.join(', ') + "}>"
|
383
482
|
end
|