chars 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +1 -1
- data/.github/workflows/ruby.yml +28 -0
- data/.gitignore +8 -0
- data/.yardopts +1 -1
- data/ChangeLog.md +33 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +1 -1
- data/README.md +130 -50
- data/Rakefile +6 -30
- data/benchmarks/compare.rb +16 -0
- data/benchmarks/substrings.rb +25 -0
- data/chars.gemspec +39 -105
- data/gemspec.yml +9 -5
- data/lib/chars/char_set.rb +371 -162
- data/lib/chars/chars.rb +86 -21
- data/lib/chars/extensions/integer.rb +34 -17
- data/lib/chars/extensions/string.rb +34 -17
- data/lib/chars/string_enumerator.rb +98 -0
- data/lib/chars/version.rb +2 -2
- data/spec/char_set_spec.rb +623 -110
- data/spec/chars_spec.rb +183 -27
- data/spec/extensions/integer_spec.rb +18 -18
- data/spec/extensions/string_spec.rb +20 -18
- data/spec/spec_helper.rb +0 -2
- data/spec/string_enumerator_spec.rb +99 -0
- metadata +57 -77
data/lib/chars/char_set.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
require 'chars/string_enumerator'
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
|
3
5
|
module Chars
|
4
|
-
class CharSet <
|
6
|
+
class CharSet < Set
|
5
7
|
|
6
8
|
#
|
7
9
|
# Creates a new CharSet object.
|
@@ -9,10 +11,13 @@ module Chars
|
|
9
11
|
# @param [Array<String, Integer, Enumerable>] arguments
|
10
12
|
# The chars for the CharSet.
|
11
13
|
#
|
14
|
+
# @raise [TypeError]
|
15
|
+
# One of the arguments was not a {String}, {Integer} or `Enumerable`.
|
16
|
+
#
|
12
17
|
def initialize(*arguments)
|
13
18
|
super()
|
14
19
|
|
15
|
-
@chars = Hash.new { |hash,key| hash[key] =
|
20
|
+
@chars = Hash.new { |hash,key| hash[key] = key.chr(Encoding::UTF_8) }
|
16
21
|
|
17
22
|
arguments.each do |subset|
|
18
23
|
case subset
|
@@ -21,13 +26,23 @@ module Chars
|
|
21
26
|
when Enumerable
|
22
27
|
subset.each { |char| self << char }
|
23
28
|
else
|
24
|
-
raise(
|
29
|
+
raise(TypeError,"arguments must be a String, Integer or Enumerable")
|
25
30
|
end
|
26
31
|
end
|
27
32
|
end
|
28
|
-
|
33
|
+
|
34
|
+
#
|
35
|
+
# Initializes the copy of another {CharSet} object.
|
36
|
+
#
|
37
|
+
# @param [CharSet] other
|
38
|
+
# The other {CharSet} object.
|
39
|
+
#
|
40
|
+
def initialize_copy(other)
|
41
|
+
@chars = other.instance_variable_get('@chars').dup
|
42
|
+
end
|
43
|
+
|
29
44
|
#
|
30
|
-
# Creates a new
|
45
|
+
# Creates a new {CharSet}.
|
31
46
|
#
|
32
47
|
# @see #initialize
|
33
48
|
#
|
@@ -46,7 +61,7 @@ module Chars
|
|
46
61
|
# @return [CharSet]
|
47
62
|
# The modified character set.
|
48
63
|
#
|
49
|
-
# @raise [
|
64
|
+
# @raise [TypeError]
|
50
65
|
# The argument was not a {String} or {Integer}.
|
51
66
|
#
|
52
67
|
# @since 0.2.1
|
@@ -55,7 +70,7 @@ module Chars
|
|
55
70
|
case other
|
56
71
|
when String
|
57
72
|
other.each_char do |char|
|
58
|
-
byte =
|
73
|
+
byte = char.ord
|
59
74
|
|
60
75
|
@chars[byte] = char
|
61
76
|
super(byte)
|
@@ -65,7 +80,7 @@ module Chars
|
|
65
80
|
when Integer
|
66
81
|
super(other)
|
67
82
|
else
|
68
|
-
raise(
|
83
|
+
raise(TypeError,"can only append Strings and Integers")
|
69
84
|
end
|
70
85
|
end
|
71
86
|
|
@@ -76,100 +91,113 @@ module Chars
|
|
76
91
|
alias map_bytes map
|
77
92
|
|
78
93
|
#
|
79
|
-
# Determines if a character is contained within the
|
94
|
+
# Determines if a character is contained within the {CharSet}.
|
80
95
|
#
|
81
96
|
# @param [String] char
|
82
97
|
# The character to search for.
|
83
98
|
#
|
84
99
|
# @return [Boolean]
|
85
100
|
# Specifies whether the character is contained within the
|
86
|
-
#
|
101
|
+
# {CharSet}.
|
87
102
|
#
|
88
103
|
def include_char?(char)
|
89
104
|
unless char.empty?
|
90
|
-
@chars.has_value?(char) || include_byte?(
|
105
|
+
@chars.has_value?(char) || include_byte?(char.ord)
|
91
106
|
else
|
92
107
|
false
|
93
108
|
end
|
94
109
|
end
|
95
110
|
|
96
111
|
#
|
97
|
-
# The characters within the
|
112
|
+
# The characters within the {CharSet}.
|
98
113
|
#
|
99
114
|
# @return [Array<String>]
|
100
|
-
# All the characters within the
|
115
|
+
# All the characters within the {CharSet}.
|
101
116
|
#
|
102
117
|
def chars
|
103
118
|
map { |byte| @chars[byte] }
|
104
119
|
end
|
105
120
|
|
106
121
|
#
|
107
|
-
# Iterates over every character within the
|
122
|
+
# Iterates over every character within the {CharSet}.
|
108
123
|
#
|
109
124
|
# @yield [char]
|
110
125
|
# If a block is given, it will be passed each character in the
|
111
|
-
#
|
126
|
+
# {CharSet}.
|
112
127
|
#
|
113
128
|
# @yieldparam [String] char
|
114
|
-
# Each character in the
|
129
|
+
# Each character in the {CharSet}.
|
115
130
|
#
|
116
131
|
# @return [Enumerator]
|
117
132
|
# If no block is given, an enumerator object will be returned.
|
118
133
|
#
|
119
134
|
def each_char
|
120
|
-
return enum_for(
|
135
|
+
return enum_for(__method__) unless block_given?
|
121
136
|
|
122
137
|
each { |byte| yield @chars[byte] }
|
123
138
|
end
|
124
139
|
|
125
140
|
#
|
126
|
-
# Selects characters from the
|
141
|
+
# Selects characters from the {CharSet}.
|
127
142
|
#
|
128
143
|
# @yield [char]
|
129
144
|
# If a block is given, it will be used to select the characters
|
130
|
-
# from the
|
145
|
+
# from the {CharSet}.
|
131
146
|
#
|
132
147
|
# @yieldparam [String] char
|
133
148
|
# The character to select or reject.
|
134
149
|
#
|
135
150
|
# @return [Array<String>]
|
136
|
-
# The selected characters from the
|
151
|
+
# The selected characters from the {CharSet}.
|
137
152
|
#
|
138
153
|
def select_chars(&block)
|
139
154
|
each_char.select(&block)
|
140
155
|
end
|
141
156
|
|
142
157
|
#
|
143
|
-
# Maps the characters of the
|
158
|
+
# Maps the characters of the {CharSet}.
|
144
159
|
#
|
145
160
|
# @yield [char]
|
146
161
|
# The given block will be used to transform the characters within
|
147
|
-
# the
|
162
|
+
# the {CharSet}.
|
148
163
|
#
|
149
164
|
# @yieldparam [String] char
|
150
|
-
# Each character in the
|
165
|
+
# Each character in the {CharSet}.
|
151
166
|
#
|
152
167
|
# @return [Array<String>]
|
153
|
-
# The mapped characters of the
|
168
|
+
# The mapped characters of the {CharSet}.
|
154
169
|
#
|
155
170
|
def map_chars(&block)
|
156
171
|
each_char.map(&block)
|
157
172
|
end
|
158
173
|
|
174
|
+
#
|
175
|
+
# Returns a random byte from the {CharSet}.
|
176
|
+
#
|
177
|
+
# @param [Random, SecureRandom] random
|
178
|
+
# The random number generator to use.
|
159
179
|
#
|
160
180
|
# @return [Integer]
|
161
|
-
# A random byte
|
181
|
+
# A random byte value.
|
162
182
|
#
|
163
|
-
def random_byte
|
164
|
-
self.entries[rand(self.length)]
|
183
|
+
def random_byte(random: Random)
|
184
|
+
self.entries[random.rand(self.length)]
|
165
185
|
end
|
166
186
|
|
187
|
+
#
|
188
|
+
# Returns a random character from the {CharSet}.
|
189
|
+
#
|
190
|
+
# @param [Hash{Symbol => Object}] kwargs
|
191
|
+
# Additional keyword arguments.
|
192
|
+
#
|
193
|
+
# @option kwargs [Random, SecureRandom] :random
|
194
|
+
# The random number generator to use.
|
167
195
|
#
|
168
196
|
# @return [String]
|
169
|
-
# A random char
|
197
|
+
# A random char value.
|
170
198
|
#
|
171
|
-
def random_char
|
172
|
-
@chars[random_byte]
|
199
|
+
def random_char(**kwargs)
|
200
|
+
@chars[random_byte(**kwargs)]
|
173
201
|
end
|
174
202
|
|
175
203
|
#
|
@@ -178,19 +206,27 @@ module Chars
|
|
178
206
|
# @param [Integer] n
|
179
207
|
# Specifies how many times to pass a random byte to the block.
|
180
208
|
#
|
209
|
+
# @param [Hash{Symbol => Object}] kwargs
|
210
|
+
# Additional keyword arguments.
|
211
|
+
#
|
212
|
+
# @option kwargs [Random, SecureRandom] :random
|
213
|
+
# The random number generator to use.
|
214
|
+
#
|
181
215
|
# @yield [byte]
|
182
216
|
# The block will receive the random bytes.
|
183
217
|
#
|
184
218
|
# @yieldparam [Integer] byte
|
185
|
-
# The random byte from the
|
219
|
+
# The random byte from the {CharSet}.
|
186
220
|
#
|
187
221
|
# @return [Enumerator]
|
188
222
|
# If no block is given, an enumerator object will be returned.
|
189
223
|
#
|
190
|
-
def each_random_byte(n,&block)
|
191
|
-
return enum_for(
|
224
|
+
def each_random_byte(n,**kwargs,&block)
|
225
|
+
return enum_for(__method__,n,**kwargs) unless block_given?
|
192
226
|
|
193
|
-
n.times
|
227
|
+
n.times do
|
228
|
+
yield random_byte(**kwargs)
|
229
|
+
end
|
194
230
|
return nil
|
195
231
|
end
|
196
232
|
|
@@ -200,117 +236,309 @@ module Chars
|
|
200
236
|
# @param [Integer] n
|
201
237
|
# Specifies how many times to pass a random character to the block.
|
202
238
|
#
|
239
|
+
# @param [Hash{Symbol => Object}] kwargs
|
240
|
+
# Additional keyword arguments.
|
241
|
+
#
|
242
|
+
# @option kwargs [Random, SecureRandom] :random
|
243
|
+
# The random number generator to use.
|
244
|
+
#
|
203
245
|
# @yield [char]
|
204
246
|
# The block will receive the random characters.
|
205
247
|
#
|
206
248
|
# @yieldparam [String] char
|
207
|
-
# The random character from the
|
249
|
+
# The random character from the {CharSet}.
|
208
250
|
#
|
209
251
|
# @return [Enumerator]
|
210
252
|
# If no block is given, an enumerator object will be returned.
|
211
253
|
#
|
212
|
-
def each_random_char(n,&block)
|
213
|
-
return enum_for(
|
254
|
+
def each_random_char(n,**kwargs,&block)
|
255
|
+
return enum_for(__method__,n,**kwargs) unless block_given?
|
214
256
|
|
215
|
-
each_random_byte(n)
|
257
|
+
each_random_byte(n,**kwargs) do |byte|
|
258
|
+
yield @chars[byte]
|
259
|
+
end
|
216
260
|
end
|
217
261
|
|
218
262
|
#
|
219
|
-
# Creates an Array of random bytes from the
|
263
|
+
# Creates an Array of random bytes from the {CharSet}.
|
220
264
|
#
|
221
265
|
# @param [Integer, Array, Range] length
|
222
266
|
# The length of the Array of random bytes.
|
223
267
|
#
|
268
|
+
# @param [Random, SecureRandom] random
|
269
|
+
# The random number generator to use.
|
270
|
+
#
|
224
271
|
# @return [Array<Integer>]
|
225
272
|
# The randomly selected bytes.
|
226
273
|
#
|
227
|
-
def random_bytes(length)
|
228
|
-
|
229
|
-
|
274
|
+
def random_bytes(length, random: Random)
|
275
|
+
case length
|
276
|
+
when Array
|
277
|
+
Array.new(length.sample(random: random)) do
|
278
|
+
random_byte(random: random)
|
279
|
+
end
|
280
|
+
when Range
|
281
|
+
Array.new(random.rand(length)) do
|
282
|
+
random_byte(random: random)
|
283
|
+
end
|
230
284
|
else
|
231
|
-
Array.new(length) { random_byte }
|
285
|
+
Array.new(length) { random_byte(random: random) }
|
232
286
|
end
|
233
287
|
end
|
234
288
|
|
235
289
|
#
|
236
|
-
# Creates an Array of random non-repeating bytes from the
|
290
|
+
# Creates an Array of random non-repeating bytes from the {CharSet}.
|
237
291
|
#
|
238
292
|
# @param [Integer, Array, Range] length
|
239
293
|
# The length of the Array of random non-repeating bytes.
|
240
294
|
#
|
295
|
+
# @param [Random, SecureRandom] random
|
296
|
+
# The random number generator to use.
|
297
|
+
#
|
241
298
|
# @return [Array<Integer>]
|
242
299
|
# The randomly selected non-repeating bytes.
|
243
300
|
#
|
244
|
-
def random_distinct_bytes(length)
|
245
|
-
|
246
|
-
|
301
|
+
def random_distinct_bytes(length, random: Random)
|
302
|
+
shuffled_bytes = bytes.shuffle(random: random)
|
303
|
+
|
304
|
+
case length
|
305
|
+
when Array
|
306
|
+
shuffled_bytes[0,length.sample(random: random)]
|
307
|
+
when Range
|
308
|
+
shuffled_bytes[0,random.rand(length)]
|
247
309
|
else
|
248
|
-
|
310
|
+
shuffled_bytes[0,length]
|
249
311
|
end
|
250
312
|
end
|
251
313
|
|
252
314
|
#
|
253
|
-
# Creates an Array of random characters from the
|
315
|
+
# Creates an Array of random characters from the {CharSet}.
|
254
316
|
#
|
255
317
|
# @param [Integer, Array, Range] length
|
256
318
|
# The length of the Array of random characters.
|
257
319
|
#
|
320
|
+
# @param [Hash{Symbol => Object}] kwargs
|
321
|
+
# Additional keyword arguments.
|
322
|
+
#
|
323
|
+
# @option kwargs [Random, SecureRandom] :random
|
324
|
+
# The random number generator to use.
|
325
|
+
#
|
258
326
|
# @return [Array<String>]
|
259
327
|
# The randomly selected characters.
|
260
328
|
#
|
261
|
-
def random_chars(length)
|
262
|
-
random_bytes(length).map { |byte| @chars[byte] }
|
329
|
+
def random_chars(length,**kwargs)
|
330
|
+
random_bytes(length,**kwargs).map { |byte| @chars[byte] }
|
263
331
|
end
|
264
332
|
|
265
333
|
#
|
266
334
|
# Creates a String containing randomly selected characters from the
|
267
|
-
#
|
335
|
+
# {CharSet}.
|
268
336
|
#
|
269
337
|
# @param [Integer, Array, Range] length
|
270
338
|
# The length of the String of random characters.
|
271
339
|
#
|
340
|
+
# @param [Hash{Symbol => Object}] kwargs
|
341
|
+
# Additional keyword arguments.
|
342
|
+
#
|
343
|
+
# @option kwargs [Random, SecureRandom] :random
|
344
|
+
# The random number generator to use.
|
345
|
+
#
|
272
346
|
# @return [String]
|
273
347
|
# The String of randomly selected characters.
|
274
348
|
#
|
275
349
|
# @see random_chars
|
276
350
|
#
|
277
|
-
def random_string(length)
|
278
|
-
random_chars(length).join
|
351
|
+
def random_string(length,**kwargs)
|
352
|
+
random_chars(length,**kwargs).join
|
279
353
|
end
|
280
354
|
|
281
355
|
#
|
282
356
|
# Creates an Array of random non-repeating characters from the
|
283
|
-
#
|
357
|
+
# {CharSet}.
|
284
358
|
#
|
285
359
|
# @param [Integer, Array, Range] length
|
286
360
|
# The length of the Array of random non-repeating characters.
|
287
361
|
#
|
362
|
+
# @param [Hash{Symbol => Object}] kwargs
|
363
|
+
# Additional keyword arguments.
|
364
|
+
#
|
365
|
+
# @option kwargs [Random, SecureRandom] :random
|
366
|
+
# The random number generator to use.
|
367
|
+
#
|
288
368
|
# @return [Array<Integer>]
|
289
369
|
# The randomly selected non-repeating characters.
|
290
370
|
#
|
291
|
-
def random_distinct_chars(length)
|
292
|
-
random_distinct_bytes(length).map { |byte| @chars[byte] }
|
371
|
+
def random_distinct_chars(length,**kwargs)
|
372
|
+
random_distinct_bytes(length,**kwargs).map { |byte| @chars[byte] }
|
293
373
|
end
|
294
374
|
|
295
375
|
#
|
296
376
|
# Creates a String containing randomly selected non-repeating
|
297
|
-
# characters from the
|
377
|
+
# characters from the {CharSet}.
|
298
378
|
#
|
299
379
|
# @param [Integer, Array, Range] length
|
300
380
|
# The length of the String of random non-repeating characters.
|
301
381
|
#
|
382
|
+
# @param [Hash{Symbol => Object}] kwargs
|
383
|
+
# Additional keyword arguments.
|
384
|
+
#
|
385
|
+
# @option kwargs [Random, SecureRandom] :random
|
386
|
+
# The random number generator to use.
|
387
|
+
#
|
302
388
|
# @return [String]
|
303
389
|
# The String of randomly selected non-repeating characters.
|
304
390
|
#
|
305
391
|
# @see random_distinct_chars
|
306
392
|
#
|
307
|
-
def random_distinct_string(length)
|
308
|
-
random_distinct_chars(length).join
|
393
|
+
def random_distinct_string(length,**kwargs)
|
394
|
+
random_distinct_chars(length,**kwargs).join
|
395
|
+
end
|
396
|
+
|
397
|
+
#
|
398
|
+
# Enumerates over all substrings and their indices within the given string,
|
399
|
+
# of minimum length and that are made up of characters from the {CharSet}.
|
400
|
+
#
|
401
|
+
# @param [String] data
|
402
|
+
# The data to find sub-strings within.
|
403
|
+
#
|
404
|
+
# @param [Integer] min_length
|
405
|
+
# The minimum length of sub-strings found within the given data.
|
406
|
+
#
|
407
|
+
# @yield [match, index]
|
408
|
+
# The given block will be passed every matched sub-string and it's index.
|
409
|
+
#
|
410
|
+
# @yield [String] match
|
411
|
+
# A sub-string containing the characters from the {CharSet}.
|
412
|
+
#
|
413
|
+
# @yield [Integer] index
|
414
|
+
# The index the sub-string was found at.
|
415
|
+
#
|
416
|
+
# @return [Enumerator]
|
417
|
+
# If no block is given, an Enumerator object will be returned.
|
418
|
+
#
|
419
|
+
# @since 0.3.0
|
420
|
+
#
|
421
|
+
def each_substring_with_index(data, min_length: 4)
|
422
|
+
unless block_given?
|
423
|
+
return enum_for(__method__,data, min_length: min_length)
|
424
|
+
end
|
425
|
+
|
426
|
+
return if data.size < min_length
|
427
|
+
|
428
|
+
index = 0
|
429
|
+
|
430
|
+
match_start = nil
|
431
|
+
match_end = nil
|
432
|
+
|
433
|
+
while index < data.size
|
434
|
+
unless match_start
|
435
|
+
if self.include_char?(data[index])
|
436
|
+
match_start = index
|
437
|
+
end
|
438
|
+
else
|
439
|
+
unless self.include_char?(data[index])
|
440
|
+
match_end = index
|
441
|
+
match_length = (match_end - match_start)
|
442
|
+
|
443
|
+
if match_length >= min_length
|
444
|
+
match = data[match_start,match_length]
|
445
|
+
|
446
|
+
yield match, match_start
|
447
|
+
end
|
448
|
+
|
449
|
+
match_start = match_end = nil
|
450
|
+
end
|
451
|
+
end
|
452
|
+
|
453
|
+
index += 1
|
454
|
+
end
|
455
|
+
|
456
|
+
# yield the remaining match
|
457
|
+
if match_start
|
458
|
+
yield data[match_start, data.size - match_start], match_start
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
#
|
463
|
+
# Returns an Array of all substrings and their indices within the given
|
464
|
+
# string, of minimum length and that are made up of characters from the
|
465
|
+
# {CharSet}.
|
466
|
+
#
|
467
|
+
# @param [String] data
|
468
|
+
# The data to find sub-strings within.
|
469
|
+
#
|
470
|
+
# @param [Hash{Symbol => Object}] kwargs
|
471
|
+
# Keyword arguments for {#each_substring_with_index}.
|
472
|
+
#
|
473
|
+
# @option kwargs [Integer] :min_length
|
474
|
+
# The minimum length of sub-strings found within the given data.
|
475
|
+
#
|
476
|
+
# @return [Array<(String, Integer)>]
|
477
|
+
# Tthe array of substrings and their indices within the given `data`.
|
478
|
+
#
|
479
|
+
# @see #each_substring_with_index
|
480
|
+
#
|
481
|
+
# @since 0.3.0
|
482
|
+
#
|
483
|
+
def substrings_with_indexes(data,**kwargs)
|
484
|
+
each_substring_with_index(data,**kwargs).to_a
|
485
|
+
end
|
486
|
+
|
487
|
+
#
|
488
|
+
# Enumerates over all substrings within the given string, of minimum length
|
489
|
+
# and that are made up of characters from the {CharSet}.
|
490
|
+
#
|
491
|
+
# @param [String] data
|
492
|
+
# The data to find sub-strings within.
|
493
|
+
#
|
494
|
+
# @param [Hash{Symbol => Object}] kwargs
|
495
|
+
# Keyword arguments for {#each_substring_with_index}.
|
496
|
+
#
|
497
|
+
# @option kwargs [Integer] :min_length
|
498
|
+
# The minimum length of sub-strings found within the given data.
|
499
|
+
#
|
500
|
+
# @return [Enumerator]
|
501
|
+
# If no block is given, an Enumerator object will be returned.
|
502
|
+
#
|
503
|
+
# @see #each_substring_with_index
|
504
|
+
#
|
505
|
+
# @since 0.3.0
|
506
|
+
#
|
507
|
+
def each_substring(data,**kwargs)
|
508
|
+
return enum_for(__method__,data,**kwargs) unless block_given?
|
509
|
+
|
510
|
+
each_substring_with_index(data,**kwargs) do |substring,index|
|
511
|
+
yield substring
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
#
|
516
|
+
# Returns an Array of all substrings within the given string,
|
517
|
+
# of minimum length and that are made up of characters from the {CharSet}.
|
518
|
+
#
|
519
|
+
# @param [String] data
|
520
|
+
# The data to find sub-strings within.
|
521
|
+
#
|
522
|
+
# @param [Hash{Symbol => Object}] kwargs
|
523
|
+
# Keyword arguments for {#each_substring_with_index}.
|
524
|
+
#
|
525
|
+
# @option kwargs [Integer] :min_length
|
526
|
+
# The minimum length of sub-strings found within the given data.
|
527
|
+
#
|
528
|
+
# @see #each_substring
|
529
|
+
#
|
530
|
+
# @return [Array<String>]
|
531
|
+
# Tthe array of substrings within the given `data`.
|
532
|
+
#
|
533
|
+
# @since 0.3.0
|
534
|
+
#
|
535
|
+
def substrings(data,**kwargs)
|
536
|
+
each_substring(data,**kwargs).to_a
|
309
537
|
end
|
310
538
|
|
311
539
|
#
|
312
540
|
# Finds sub-strings within given data that are made of characters within
|
313
|
-
# the
|
541
|
+
# the {CharSet}.
|
314
542
|
#
|
315
543
|
# @param [String] data
|
316
544
|
# The data to find sub-strings within.
|
@@ -326,52 +554,97 @@ module Chars
|
|
326
554
|
# sub-strings within the data, or to just return the matched
|
327
555
|
# sub-strings themselves.
|
328
556
|
#
|
329
|
-
|
330
|
-
|
557
|
+
# @yield [match,(index)]
|
558
|
+
# The given block will be passed every matched sub-string, and the
|
559
|
+
# optional index.
|
560
|
+
#
|
561
|
+
# @yield [String] match
|
562
|
+
# A sub-string containing the characters from the {CharSet}.
|
563
|
+
#
|
564
|
+
# @yield [Integer] index
|
565
|
+
# The index the sub-string was found at.
|
566
|
+
#
|
567
|
+
# @return [Array, Hash]
|
568
|
+
# If no block is given, an Array or Hash of sub-strings is returned.
|
569
|
+
#
|
570
|
+
# @deprecated
|
571
|
+
# Use {#each_substring_with_index}, {#substrings_with_index},
|
572
|
+
# {#each_substring}, or {#substrings} instead.
|
573
|
+
#
|
574
|
+
def strings_in(data,options={},&block)
|
575
|
+
kwargs = {min_length: options.fetch(:length,4)}
|
331
576
|
|
332
|
-
|
577
|
+
unless block
|
578
|
+
if options[:offsets]
|
579
|
+
return Hash[substrings_with_indexes(data,**kwargs)]
|
580
|
+
else
|
581
|
+
return substrings(data,**kwargs)
|
582
|
+
end
|
583
|
+
end
|
333
584
|
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
found[offset] = substring
|
338
|
-
}
|
585
|
+
case block.arity
|
586
|
+
when 2
|
587
|
+
each_substring_with_index(data,**kwargs,&block)
|
339
588
|
else
|
340
|
-
|
341
|
-
found_substring = lambda { |offset,substring|
|
342
|
-
found << substring
|
343
|
-
}
|
589
|
+
each_substring(data,**kwargs,&block)
|
344
590
|
end
|
591
|
+
end
|
345
592
|
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
593
|
+
#
|
594
|
+
# Enumerates through every possible string belonging to the {CharSet} and
|
595
|
+
# of the given length.
|
596
|
+
#
|
597
|
+
# @param [Range, Array, Integer] length
|
598
|
+
# The desired length(s) of each string.
|
599
|
+
#
|
600
|
+
# @yield [string]
|
601
|
+
# The given block will be passed each sequential string.
|
602
|
+
#
|
603
|
+
# @yieldparam [String] string
|
604
|
+
# A string belonging to {#char_set} and `length` long.
|
605
|
+
#
|
606
|
+
# @return [Enumerator]
|
607
|
+
# If no block is given, an Enumerator will be returned.
|
608
|
+
#
|
609
|
+
# @since 0.3.0
|
610
|
+
#
|
611
|
+
def each_string_of_length(length,&block)
|
612
|
+
return enum_for(__method__,length) unless block
|
355
613
|
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
614
|
+
case length
|
615
|
+
when Range, Array
|
616
|
+
length.each do |len|
|
617
|
+
StringEnumerator.new(self,len).each(&block)
|
360
618
|
end
|
619
|
+
else
|
620
|
+
StringEnumerator.new(self,length).each(&block)
|
361
621
|
end
|
622
|
+
end
|
362
623
|
|
363
|
-
|
624
|
+
#
|
625
|
+
# Returns an Enumerator that enumerates through every possible string
|
626
|
+
# belonging to the {CharSEt} and of the given length.
|
627
|
+
#
|
628
|
+
# @param [Range, Array, Integer] length
|
629
|
+
# The desired length(s) of each string.
|
630
|
+
#
|
631
|
+
# @return [Enumerator]
|
632
|
+
#
|
633
|
+
# @see #each_string
|
634
|
+
#
|
635
|
+
def strings_of_length(length)
|
636
|
+
each_string_of_length(length)
|
364
637
|
end
|
365
638
|
|
366
639
|
#
|
367
|
-
# Creates a new CharSet object by unioning the
|
368
|
-
#
|
640
|
+
# Creates a new CharSet object by unioning the {CharSet} with another
|
641
|
+
# {CharSet}.
|
369
642
|
#
|
370
643
|
# @param [CharSet, Array, Range] set
|
371
|
-
# The other
|
644
|
+
# The other {CharSet} to union with.
|
372
645
|
#
|
373
646
|
# @return [CharSet]
|
374
|
-
# The unioned
|
647
|
+
# The unioned {ChraSet}.
|
375
648
|
#
|
376
649
|
def |(set)
|
377
650
|
set = CharSet.new(set) unless set.kind_of?(CharSet)
|
@@ -383,14 +656,14 @@ module Chars
|
|
383
656
|
|
384
657
|
#
|
385
658
|
# Compares the bytes within a given string with the bytes of the
|
386
|
-
#
|
659
|
+
# {CharSet}.
|
387
660
|
#
|
388
|
-
# @param [String, Enumerable]
|
389
|
-
# The string to compare with the
|
661
|
+
# @param [String, Enumerable] other
|
662
|
+
# The string to compare with the {CharSet}.
|
390
663
|
#
|
391
664
|
# @return [Boolean]
|
392
665
|
# Specifies whether all of the bytes within the given string are
|
393
|
-
# included in the
|
666
|
+
# included in the {CharSet}.
|
394
667
|
#
|
395
668
|
# @example
|
396
669
|
# Chars.alpha === "hello"
|
@@ -417,10 +690,10 @@ module Chars
|
|
417
690
|
alias =~ ===
|
418
691
|
|
419
692
|
#
|
420
|
-
# Inspects the
|
693
|
+
# Inspects the {CharSet}.
|
421
694
|
#
|
422
695
|
# @return [String]
|
423
|
-
# The inspected
|
696
|
+
# The inspected {CharSet}.
|
424
697
|
#
|
425
698
|
def inspect
|
426
699
|
"#<#{self.class.name}: {" + map { |byte|
|
@@ -432,74 +705,10 @@ module Chars
|
|
432
705
|
# to us C programmers
|
433
706
|
'"\0"'
|
434
707
|
else
|
435
|
-
"0x%02x"
|
708
|
+
sprintf("0x%02x",byte)
|
436
709
|
end
|
437
710
|
}.join(', ') + "}>"
|
438
711
|
end
|
439
712
|
|
440
|
-
protected
|
441
|
-
|
442
|
-
if RUBY_VERSION > '1.9.'
|
443
|
-
#
|
444
|
-
# Converts a byte to a character.
|
445
|
-
#
|
446
|
-
# @param [Integer] byte
|
447
|
-
# The byte to convert.
|
448
|
-
#
|
449
|
-
# @return [String]
|
450
|
-
# The character.
|
451
|
-
#
|
452
|
-
# @since 0.2.1
|
453
|
-
#
|
454
|
-
def byte_to_char(byte)
|
455
|
-
byte.chr(Encoding::UTF_8)
|
456
|
-
end
|
457
|
-
|
458
|
-
#
|
459
|
-
# Converts a character to a byte.
|
460
|
-
#
|
461
|
-
# @param [String] char
|
462
|
-
# The character to convert.
|
463
|
-
#
|
464
|
-
# @return [Integer]
|
465
|
-
# The byte.
|
466
|
-
#
|
467
|
-
# @since 0.2.1
|
468
|
-
#
|
469
|
-
def char_to_byte(char)
|
470
|
-
char.ord
|
471
|
-
end
|
472
|
-
else
|
473
|
-
#
|
474
|
-
# Converts a byte to a character.
|
475
|
-
#
|
476
|
-
# @param [Integer] byte
|
477
|
-
# The byte to convert.
|
478
|
-
#
|
479
|
-
# @return [String]
|
480
|
-
# The character.
|
481
|
-
#
|
482
|
-
# @since 0.2.1
|
483
|
-
#
|
484
|
-
def byte_to_char(byte)
|
485
|
-
byte.chr
|
486
|
-
end
|
487
|
-
|
488
|
-
#
|
489
|
-
# Converts a character to a byte.
|
490
|
-
#
|
491
|
-
# @param [String] char
|
492
|
-
# The character to convert.
|
493
|
-
#
|
494
|
-
# @return [Integer]
|
495
|
-
# The byte.
|
496
|
-
#
|
497
|
-
# @since 0.2.1
|
498
|
-
#
|
499
|
-
def char_to_byte(char)
|
500
|
-
char[0]
|
501
|
-
end
|
502
|
-
end
|
503
|
-
|
504
713
|
end
|
505
714
|
end
|