chars 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +1 -1
- data/.github/workflows/ruby.yml +28 -0
- data/.gitignore +8 -0
- data/.yardopts +1 -1
- data/ChangeLog.md +33 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +1 -1
- data/README.md +130 -50
- data/Rakefile +6 -30
- data/benchmarks/compare.rb +16 -0
- data/benchmarks/substrings.rb +25 -0
- data/chars.gemspec +39 -105
- data/gemspec.yml +9 -5
- data/lib/chars/char_set.rb +371 -162
- data/lib/chars/chars.rb +86 -21
- data/lib/chars/extensions/integer.rb +34 -17
- data/lib/chars/extensions/string.rb +34 -17
- data/lib/chars/string_enumerator.rb +98 -0
- data/lib/chars/version.rb +2 -2
- data/spec/char_set_spec.rb +623 -110
- data/spec/chars_spec.rb +183 -27
- data/spec/extensions/integer_spec.rb +18 -18
- data/spec/extensions/string_spec.rb +20 -18
- data/spec/spec_helper.rb +0 -2
- data/spec/string_enumerator_spec.rb +99 -0
- metadata +57 -77
data/lib/chars/char_set.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
+
require 'chars/string_enumerator'
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
|
3
5
|
module Chars
|
4
|
-
class CharSet <
|
6
|
+
class CharSet < Set
|
5
7
|
|
6
8
|
#
|
7
9
|
# Creates a new CharSet object.
|
@@ -9,10 +11,13 @@ module Chars
|
|
9
11
|
# @param [Array<String, Integer, Enumerable>] arguments
|
10
12
|
# The chars for the CharSet.
|
11
13
|
#
|
14
|
+
# @raise [TypeError]
|
15
|
+
# One of the arguments was not a {String}, {Integer} or `Enumerable`.
|
16
|
+
#
|
12
17
|
def initialize(*arguments)
|
13
18
|
super()
|
14
19
|
|
15
|
-
@chars = Hash.new { |hash,key| hash[key] =
|
20
|
+
@chars = Hash.new { |hash,key| hash[key] = key.chr(Encoding::UTF_8) }
|
16
21
|
|
17
22
|
arguments.each do |subset|
|
18
23
|
case subset
|
@@ -21,13 +26,23 @@ module Chars
|
|
21
26
|
when Enumerable
|
22
27
|
subset.each { |char| self << char }
|
23
28
|
else
|
24
|
-
raise(
|
29
|
+
raise(TypeError,"arguments must be a String, Integer or Enumerable")
|
25
30
|
end
|
26
31
|
end
|
27
32
|
end
|
28
|
-
|
33
|
+
|
34
|
+
#
|
35
|
+
# Initializes the copy of another {CharSet} object.
|
36
|
+
#
|
37
|
+
# @param [CharSet] other
|
38
|
+
# The other {CharSet} object.
|
39
|
+
#
|
40
|
+
def initialize_copy(other)
|
41
|
+
@chars = other.instance_variable_get('@chars').dup
|
42
|
+
end
|
43
|
+
|
29
44
|
#
|
30
|
-
# Creates a new
|
45
|
+
# Creates a new {CharSet}.
|
31
46
|
#
|
32
47
|
# @see #initialize
|
33
48
|
#
|
@@ -46,7 +61,7 @@ module Chars
|
|
46
61
|
# @return [CharSet]
|
47
62
|
# The modified character set.
|
48
63
|
#
|
49
|
-
# @raise [
|
64
|
+
# @raise [TypeError]
|
50
65
|
# The argument was not a {String} or {Integer}.
|
51
66
|
#
|
52
67
|
# @since 0.2.1
|
@@ -55,7 +70,7 @@ module Chars
|
|
55
70
|
case other
|
56
71
|
when String
|
57
72
|
other.each_char do |char|
|
58
|
-
byte =
|
73
|
+
byte = char.ord
|
59
74
|
|
60
75
|
@chars[byte] = char
|
61
76
|
super(byte)
|
@@ -65,7 +80,7 @@ module Chars
|
|
65
80
|
when Integer
|
66
81
|
super(other)
|
67
82
|
else
|
68
|
-
raise(
|
83
|
+
raise(TypeError,"can only append Strings and Integers")
|
69
84
|
end
|
70
85
|
end
|
71
86
|
|
@@ -76,100 +91,113 @@ module Chars
|
|
76
91
|
alias map_bytes map
|
77
92
|
|
78
93
|
#
|
79
|
-
# Determines if a character is contained within the
|
94
|
+
# Determines if a character is contained within the {CharSet}.
|
80
95
|
#
|
81
96
|
# @param [String] char
|
82
97
|
# The character to search for.
|
83
98
|
#
|
84
99
|
# @return [Boolean]
|
85
100
|
# Specifies whether the character is contained within the
|
86
|
-
#
|
101
|
+
# {CharSet}.
|
87
102
|
#
|
88
103
|
def include_char?(char)
|
89
104
|
unless char.empty?
|
90
|
-
@chars.has_value?(char) || include_byte?(
|
105
|
+
@chars.has_value?(char) || include_byte?(char.ord)
|
91
106
|
else
|
92
107
|
false
|
93
108
|
end
|
94
109
|
end
|
95
110
|
|
96
111
|
#
|
97
|
-
# The characters within the
|
112
|
+
# The characters within the {CharSet}.
|
98
113
|
#
|
99
114
|
# @return [Array<String>]
|
100
|
-
# All the characters within the
|
115
|
+
# All the characters within the {CharSet}.
|
101
116
|
#
|
102
117
|
def chars
|
103
118
|
map { |byte| @chars[byte] }
|
104
119
|
end
|
105
120
|
|
106
121
|
#
|
107
|
-
# Iterates over every character within the
|
122
|
+
# Iterates over every character within the {CharSet}.
|
108
123
|
#
|
109
124
|
# @yield [char]
|
110
125
|
# If a block is given, it will be passed each character in the
|
111
|
-
#
|
126
|
+
# {CharSet}.
|
112
127
|
#
|
113
128
|
# @yieldparam [String] char
|
114
|
-
# Each character in the
|
129
|
+
# Each character in the {CharSet}.
|
115
130
|
#
|
116
131
|
# @return [Enumerator]
|
117
132
|
# If no block is given, an enumerator object will be returned.
|
118
133
|
#
|
119
134
|
def each_char
|
120
|
-
return enum_for(
|
135
|
+
return enum_for(__method__) unless block_given?
|
121
136
|
|
122
137
|
each { |byte| yield @chars[byte] }
|
123
138
|
end
|
124
139
|
|
125
140
|
#
|
126
|
-
# Selects characters from the
|
141
|
+
# Selects characters from the {CharSet}.
|
127
142
|
#
|
128
143
|
# @yield [char]
|
129
144
|
# If a block is given, it will be used to select the characters
|
130
|
-
# from the
|
145
|
+
# from the {CharSet}.
|
131
146
|
#
|
132
147
|
# @yieldparam [String] char
|
133
148
|
# The character to select or reject.
|
134
149
|
#
|
135
150
|
# @return [Array<String>]
|
136
|
-
# The selected characters from the
|
151
|
+
# The selected characters from the {CharSet}.
|
137
152
|
#
|
138
153
|
def select_chars(&block)
|
139
154
|
each_char.select(&block)
|
140
155
|
end
|
141
156
|
|
142
157
|
#
|
143
|
-
# Maps the characters of the
|
158
|
+
# Maps the characters of the {CharSet}.
|
144
159
|
#
|
145
160
|
# @yield [char]
|
146
161
|
# The given block will be used to transform the characters within
|
147
|
-
# the
|
162
|
+
# the {CharSet}.
|
148
163
|
#
|
149
164
|
# @yieldparam [String] char
|
150
|
-
# Each character in the
|
165
|
+
# Each character in the {CharSet}.
|
151
166
|
#
|
152
167
|
# @return [Array<String>]
|
153
|
-
# The mapped characters of the
|
168
|
+
# The mapped characters of the {CharSet}.
|
154
169
|
#
|
155
170
|
def map_chars(&block)
|
156
171
|
each_char.map(&block)
|
157
172
|
end
|
158
173
|
|
174
|
+
#
|
175
|
+
# Returns a random byte from the {CharSet}.
|
176
|
+
#
|
177
|
+
# @param [Random, SecureRandom] random
|
178
|
+
# The random number generator to use.
|
159
179
|
#
|
160
180
|
# @return [Integer]
|
161
|
-
# A random byte
|
181
|
+
# A random byte value.
|
162
182
|
#
|
163
|
-
def random_byte
|
164
|
-
self.entries[rand(self.length)]
|
183
|
+
def random_byte(random: Random)
|
184
|
+
self.entries[random.rand(self.length)]
|
165
185
|
end
|
166
186
|
|
187
|
+
#
|
188
|
+
# Returns a random character from the {CharSet}.
|
189
|
+
#
|
190
|
+
# @param [Hash{Symbol => Object}] kwargs
|
191
|
+
# Additional keyword arguments.
|
192
|
+
#
|
193
|
+
# @option kwargs [Random, SecureRandom] :random
|
194
|
+
# The random number generator to use.
|
167
195
|
#
|
168
196
|
# @return [String]
|
169
|
-
# A random char
|
197
|
+
# A random char value.
|
170
198
|
#
|
171
|
-
def random_char
|
172
|
-
@chars[random_byte]
|
199
|
+
def random_char(**kwargs)
|
200
|
+
@chars[random_byte(**kwargs)]
|
173
201
|
end
|
174
202
|
|
175
203
|
#
|
@@ -178,19 +206,27 @@ module Chars
|
|
178
206
|
# @param [Integer] n
|
179
207
|
# Specifies how many times to pass a random byte to the block.
|
180
208
|
#
|
209
|
+
# @param [Hash{Symbol => Object}] kwargs
|
210
|
+
# Additional keyword arguments.
|
211
|
+
#
|
212
|
+
# @option kwargs [Random, SecureRandom] :random
|
213
|
+
# The random number generator to use.
|
214
|
+
#
|
181
215
|
# @yield [byte]
|
182
216
|
# The block will receive the random bytes.
|
183
217
|
#
|
184
218
|
# @yieldparam [Integer] byte
|
185
|
-
# The random byte from the
|
219
|
+
# The random byte from the {CharSet}.
|
186
220
|
#
|
187
221
|
# @return [Enumerator]
|
188
222
|
# If no block is given, an enumerator object will be returned.
|
189
223
|
#
|
190
|
-
def each_random_byte(n,&block)
|
191
|
-
return enum_for(
|
224
|
+
def each_random_byte(n,**kwargs,&block)
|
225
|
+
return enum_for(__method__,n,**kwargs) unless block_given?
|
192
226
|
|
193
|
-
n.times
|
227
|
+
n.times do
|
228
|
+
yield random_byte(**kwargs)
|
229
|
+
end
|
194
230
|
return nil
|
195
231
|
end
|
196
232
|
|
@@ -200,117 +236,309 @@ module Chars
|
|
200
236
|
# @param [Integer] n
|
201
237
|
# Specifies how many times to pass a random character to the block.
|
202
238
|
#
|
239
|
+
# @param [Hash{Symbol => Object}] kwargs
|
240
|
+
# Additional keyword arguments.
|
241
|
+
#
|
242
|
+
# @option kwargs [Random, SecureRandom] :random
|
243
|
+
# The random number generator to use.
|
244
|
+
#
|
203
245
|
# @yield [char]
|
204
246
|
# The block will receive the random characters.
|
205
247
|
#
|
206
248
|
# @yieldparam [String] char
|
207
|
-
# The random character from the
|
249
|
+
# The random character from the {CharSet}.
|
208
250
|
#
|
209
251
|
# @return [Enumerator]
|
210
252
|
# If no block is given, an enumerator object will be returned.
|
211
253
|
#
|
212
|
-
def each_random_char(n,&block)
|
213
|
-
return enum_for(
|
254
|
+
def each_random_char(n,**kwargs,&block)
|
255
|
+
return enum_for(__method__,n,**kwargs) unless block_given?
|
214
256
|
|
215
|
-
each_random_byte(n)
|
257
|
+
each_random_byte(n,**kwargs) do |byte|
|
258
|
+
yield @chars[byte]
|
259
|
+
end
|
216
260
|
end
|
217
261
|
|
218
262
|
#
|
219
|
-
# Creates an Array of random bytes from the
|
263
|
+
# Creates an Array of random bytes from the {CharSet}.
|
220
264
|
#
|
221
265
|
# @param [Integer, Array, Range] length
|
222
266
|
# The length of the Array of random bytes.
|
223
267
|
#
|
268
|
+
# @param [Random, SecureRandom] random
|
269
|
+
# The random number generator to use.
|
270
|
+
#
|
224
271
|
# @return [Array<Integer>]
|
225
272
|
# The randomly selected bytes.
|
226
273
|
#
|
227
|
-
def random_bytes(length)
|
228
|
-
|
229
|
-
|
274
|
+
def random_bytes(length, random: Random)
|
275
|
+
case length
|
276
|
+
when Array
|
277
|
+
Array.new(length.sample(random: random)) do
|
278
|
+
random_byte(random: random)
|
279
|
+
end
|
280
|
+
when Range
|
281
|
+
Array.new(random.rand(length)) do
|
282
|
+
random_byte(random: random)
|
283
|
+
end
|
230
284
|
else
|
231
|
-
Array.new(length) { random_byte }
|
285
|
+
Array.new(length) { random_byte(random: random) }
|
232
286
|
end
|
233
287
|
end
|
234
288
|
|
235
289
|
#
|
236
|
-
# Creates an Array of random non-repeating bytes from the
|
290
|
+
# Creates an Array of random non-repeating bytes from the {CharSet}.
|
237
291
|
#
|
238
292
|
# @param [Integer, Array, Range] length
|
239
293
|
# The length of the Array of random non-repeating bytes.
|
240
294
|
#
|
295
|
+
# @param [Random, SecureRandom] random
|
296
|
+
# The random number generator to use.
|
297
|
+
#
|
241
298
|
# @return [Array<Integer>]
|
242
299
|
# The randomly selected non-repeating bytes.
|
243
300
|
#
|
244
|
-
def random_distinct_bytes(length)
|
245
|
-
|
246
|
-
|
301
|
+
def random_distinct_bytes(length, random: Random)
|
302
|
+
shuffled_bytes = bytes.shuffle(random: random)
|
303
|
+
|
304
|
+
case length
|
305
|
+
when Array
|
306
|
+
shuffled_bytes[0,length.sample(random: random)]
|
307
|
+
when Range
|
308
|
+
shuffled_bytes[0,random.rand(length)]
|
247
309
|
else
|
248
|
-
|
310
|
+
shuffled_bytes[0,length]
|
249
311
|
end
|
250
312
|
end
|
251
313
|
|
252
314
|
#
|
253
|
-
# Creates an Array of random characters from the
|
315
|
+
# Creates an Array of random characters from the {CharSet}.
|
254
316
|
#
|
255
317
|
# @param [Integer, Array, Range] length
|
256
318
|
# The length of the Array of random characters.
|
257
319
|
#
|
320
|
+
# @param [Hash{Symbol => Object}] kwargs
|
321
|
+
# Additional keyword arguments.
|
322
|
+
#
|
323
|
+
# @option kwargs [Random, SecureRandom] :random
|
324
|
+
# The random number generator to use.
|
325
|
+
#
|
258
326
|
# @return [Array<String>]
|
259
327
|
# The randomly selected characters.
|
260
328
|
#
|
261
|
-
def random_chars(length)
|
262
|
-
random_bytes(length).map { |byte| @chars[byte] }
|
329
|
+
def random_chars(length,**kwargs)
|
330
|
+
random_bytes(length,**kwargs).map { |byte| @chars[byte] }
|
263
331
|
end
|
264
332
|
|
265
333
|
#
|
266
334
|
# Creates a String containing randomly selected characters from the
|
267
|
-
#
|
335
|
+
# {CharSet}.
|
268
336
|
#
|
269
337
|
# @param [Integer, Array, Range] length
|
270
338
|
# The length of the String of random characters.
|
271
339
|
#
|
340
|
+
# @param [Hash{Symbol => Object}] kwargs
|
341
|
+
# Additional keyword arguments.
|
342
|
+
#
|
343
|
+
# @option kwargs [Random, SecureRandom] :random
|
344
|
+
# The random number generator to use.
|
345
|
+
#
|
272
346
|
# @return [String]
|
273
347
|
# The String of randomly selected characters.
|
274
348
|
#
|
275
349
|
# @see random_chars
|
276
350
|
#
|
277
|
-
def random_string(length)
|
278
|
-
random_chars(length).join
|
351
|
+
def random_string(length,**kwargs)
|
352
|
+
random_chars(length,**kwargs).join
|
279
353
|
end
|
280
354
|
|
281
355
|
#
|
282
356
|
# Creates an Array of random non-repeating characters from the
|
283
|
-
#
|
357
|
+
# {CharSet}.
|
284
358
|
#
|
285
359
|
# @param [Integer, Array, Range] length
|
286
360
|
# The length of the Array of random non-repeating characters.
|
287
361
|
#
|
362
|
+
# @param [Hash{Symbol => Object}] kwargs
|
363
|
+
# Additional keyword arguments.
|
364
|
+
#
|
365
|
+
# @option kwargs [Random, SecureRandom] :random
|
366
|
+
# The random number generator to use.
|
367
|
+
#
|
288
368
|
# @return [Array<Integer>]
|
289
369
|
# The randomly selected non-repeating characters.
|
290
370
|
#
|
291
|
-
def random_distinct_chars(length)
|
292
|
-
random_distinct_bytes(length).map { |byte| @chars[byte] }
|
371
|
+
def random_distinct_chars(length,**kwargs)
|
372
|
+
random_distinct_bytes(length,**kwargs).map { |byte| @chars[byte] }
|
293
373
|
end
|
294
374
|
|
295
375
|
#
|
296
376
|
# Creates a String containing randomly selected non-repeating
|
297
|
-
# characters from the
|
377
|
+
# characters from the {CharSet}.
|
298
378
|
#
|
299
379
|
# @param [Integer, Array, Range] length
|
300
380
|
# The length of the String of random non-repeating characters.
|
301
381
|
#
|
382
|
+
# @param [Hash{Symbol => Object}] kwargs
|
383
|
+
# Additional keyword arguments.
|
384
|
+
#
|
385
|
+
# @option kwargs [Random, SecureRandom] :random
|
386
|
+
# The random number generator to use.
|
387
|
+
#
|
302
388
|
# @return [String]
|
303
389
|
# The String of randomly selected non-repeating characters.
|
304
390
|
#
|
305
391
|
# @see random_distinct_chars
|
306
392
|
#
|
307
|
-
def random_distinct_string(length)
|
308
|
-
random_distinct_chars(length).join
|
393
|
+
def random_distinct_string(length,**kwargs)
|
394
|
+
random_distinct_chars(length,**kwargs).join
|
395
|
+
end
|
396
|
+
|
397
|
+
#
|
398
|
+
# Enumerates over all substrings and their indices within the given string,
|
399
|
+
# of minimum length and that are made up of characters from the {CharSet}.
|
400
|
+
#
|
401
|
+
# @param [String] data
|
402
|
+
# The data to find sub-strings within.
|
403
|
+
#
|
404
|
+
# @param [Integer] min_length
|
405
|
+
# The minimum length of sub-strings found within the given data.
|
406
|
+
#
|
407
|
+
# @yield [match, index]
|
408
|
+
# The given block will be passed every matched sub-string and it's index.
|
409
|
+
#
|
410
|
+
# @yield [String] match
|
411
|
+
# A sub-string containing the characters from the {CharSet}.
|
412
|
+
#
|
413
|
+
# @yield [Integer] index
|
414
|
+
# The index the sub-string was found at.
|
415
|
+
#
|
416
|
+
# @return [Enumerator]
|
417
|
+
# If no block is given, an Enumerator object will be returned.
|
418
|
+
#
|
419
|
+
# @since 0.3.0
|
420
|
+
#
|
421
|
+
def each_substring_with_index(data, min_length: 4)
|
422
|
+
unless block_given?
|
423
|
+
return enum_for(__method__,data, min_length: min_length)
|
424
|
+
end
|
425
|
+
|
426
|
+
return if data.size < min_length
|
427
|
+
|
428
|
+
index = 0
|
429
|
+
|
430
|
+
match_start = nil
|
431
|
+
match_end = nil
|
432
|
+
|
433
|
+
while index < data.size
|
434
|
+
unless match_start
|
435
|
+
if self.include_char?(data[index])
|
436
|
+
match_start = index
|
437
|
+
end
|
438
|
+
else
|
439
|
+
unless self.include_char?(data[index])
|
440
|
+
match_end = index
|
441
|
+
match_length = (match_end - match_start)
|
442
|
+
|
443
|
+
if match_length >= min_length
|
444
|
+
match = data[match_start,match_length]
|
445
|
+
|
446
|
+
yield match, match_start
|
447
|
+
end
|
448
|
+
|
449
|
+
match_start = match_end = nil
|
450
|
+
end
|
451
|
+
end
|
452
|
+
|
453
|
+
index += 1
|
454
|
+
end
|
455
|
+
|
456
|
+
# yield the remaining match
|
457
|
+
if match_start
|
458
|
+
yield data[match_start, data.size - match_start], match_start
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
#
|
463
|
+
# Returns an Array of all substrings and their indices within the given
|
464
|
+
# string, of minimum length and that are made up of characters from the
|
465
|
+
# {CharSet}.
|
466
|
+
#
|
467
|
+
# @param [String] data
|
468
|
+
# The data to find sub-strings within.
|
469
|
+
#
|
470
|
+
# @param [Hash{Symbol => Object}] kwargs
|
471
|
+
# Keyword arguments for {#each_substring_with_index}.
|
472
|
+
#
|
473
|
+
# @option kwargs [Integer] :min_length
|
474
|
+
# The minimum length of sub-strings found within the given data.
|
475
|
+
#
|
476
|
+
# @return [Array<(String, Integer)>]
|
477
|
+
# Tthe array of substrings and their indices within the given `data`.
|
478
|
+
#
|
479
|
+
# @see #each_substring_with_index
|
480
|
+
#
|
481
|
+
# @since 0.3.0
|
482
|
+
#
|
483
|
+
def substrings_with_indexes(data,**kwargs)
|
484
|
+
each_substring_with_index(data,**kwargs).to_a
|
485
|
+
end
|
486
|
+
|
487
|
+
#
|
488
|
+
# Enumerates over all substrings within the given string, of minimum length
|
489
|
+
# and that are made up of characters from the {CharSet}.
|
490
|
+
#
|
491
|
+
# @param [String] data
|
492
|
+
# The data to find sub-strings within.
|
493
|
+
#
|
494
|
+
# @param [Hash{Symbol => Object}] kwargs
|
495
|
+
# Keyword arguments for {#each_substring_with_index}.
|
496
|
+
#
|
497
|
+
# @option kwargs [Integer] :min_length
|
498
|
+
# The minimum length of sub-strings found within the given data.
|
499
|
+
#
|
500
|
+
# @return [Enumerator]
|
501
|
+
# If no block is given, an Enumerator object will be returned.
|
502
|
+
#
|
503
|
+
# @see #each_substring_with_index
|
504
|
+
#
|
505
|
+
# @since 0.3.0
|
506
|
+
#
|
507
|
+
def each_substring(data,**kwargs)
|
508
|
+
return enum_for(__method__,data,**kwargs) unless block_given?
|
509
|
+
|
510
|
+
each_substring_with_index(data,**kwargs) do |substring,index|
|
511
|
+
yield substring
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
#
|
516
|
+
# Returns an Array of all substrings within the given string,
|
517
|
+
# of minimum length and that are made up of characters from the {CharSet}.
|
518
|
+
#
|
519
|
+
# @param [String] data
|
520
|
+
# The data to find sub-strings within.
|
521
|
+
#
|
522
|
+
# @param [Hash{Symbol => Object}] kwargs
|
523
|
+
# Keyword arguments for {#each_substring_with_index}.
|
524
|
+
#
|
525
|
+
# @option kwargs [Integer] :min_length
|
526
|
+
# The minimum length of sub-strings found within the given data.
|
527
|
+
#
|
528
|
+
# @see #each_substring
|
529
|
+
#
|
530
|
+
# @return [Array<String>]
|
531
|
+
# Tthe array of substrings within the given `data`.
|
532
|
+
#
|
533
|
+
# @since 0.3.0
|
534
|
+
#
|
535
|
+
def substrings(data,**kwargs)
|
536
|
+
each_substring(data,**kwargs).to_a
|
309
537
|
end
|
310
538
|
|
311
539
|
#
|
312
540
|
# Finds sub-strings within given data that are made of characters within
|
313
|
-
# the
|
541
|
+
# the {CharSet}.
|
314
542
|
#
|
315
543
|
# @param [String] data
|
316
544
|
# The data to find sub-strings within.
|
@@ -326,52 +554,97 @@ module Chars
|
|
326
554
|
# sub-strings within the data, or to just return the matched
|
327
555
|
# sub-strings themselves.
|
328
556
|
#
|
329
|
-
|
330
|
-
|
557
|
+
# @yield [match,(index)]
|
558
|
+
# The given block will be passed every matched sub-string, and the
|
559
|
+
# optional index.
|
560
|
+
#
|
561
|
+
# @yield [String] match
|
562
|
+
# A sub-string containing the characters from the {CharSet}.
|
563
|
+
#
|
564
|
+
# @yield [Integer] index
|
565
|
+
# The index the sub-string was found at.
|
566
|
+
#
|
567
|
+
# @return [Array, Hash]
|
568
|
+
# If no block is given, an Array or Hash of sub-strings is returned.
|
569
|
+
#
|
570
|
+
# @deprecated
|
571
|
+
# Use {#each_substring_with_index}, {#substrings_with_index},
|
572
|
+
# {#each_substring}, or {#substrings} instead.
|
573
|
+
#
|
574
|
+
def strings_in(data,options={},&block)
|
575
|
+
kwargs = {min_length: options.fetch(:length,4)}
|
331
576
|
|
332
|
-
|
577
|
+
unless block
|
578
|
+
if options[:offsets]
|
579
|
+
return Hash[substrings_with_indexes(data,**kwargs)]
|
580
|
+
else
|
581
|
+
return substrings(data,**kwargs)
|
582
|
+
end
|
583
|
+
end
|
333
584
|
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
found[offset] = substring
|
338
|
-
}
|
585
|
+
case block.arity
|
586
|
+
when 2
|
587
|
+
each_substring_with_index(data,**kwargs,&block)
|
339
588
|
else
|
340
|
-
|
341
|
-
found_substring = lambda { |offset,substring|
|
342
|
-
found << substring
|
343
|
-
}
|
589
|
+
each_substring(data,**kwargs,&block)
|
344
590
|
end
|
591
|
+
end
|
345
592
|
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
593
|
+
#
|
594
|
+
# Enumerates through every possible string belonging to the {CharSet} and
|
595
|
+
# of the given length.
|
596
|
+
#
|
597
|
+
# @param [Range, Array, Integer] length
|
598
|
+
# The desired length(s) of each string.
|
599
|
+
#
|
600
|
+
# @yield [string]
|
601
|
+
# The given block will be passed each sequential string.
|
602
|
+
#
|
603
|
+
# @yieldparam [String] string
|
604
|
+
# A string belonging to {#char_set} and `length` long.
|
605
|
+
#
|
606
|
+
# @return [Enumerator]
|
607
|
+
# If no block is given, an Enumerator will be returned.
|
608
|
+
#
|
609
|
+
# @since 0.3.0
|
610
|
+
#
|
611
|
+
def each_string_of_length(length,&block)
|
612
|
+
return enum_for(__method__,length) unless block
|
355
613
|
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
614
|
+
case length
|
615
|
+
when Range, Array
|
616
|
+
length.each do |len|
|
617
|
+
StringEnumerator.new(self,len).each(&block)
|
360
618
|
end
|
619
|
+
else
|
620
|
+
StringEnumerator.new(self,length).each(&block)
|
361
621
|
end
|
622
|
+
end
|
362
623
|
|
363
|
-
|
624
|
+
#
|
625
|
+
# Returns an Enumerator that enumerates through every possible string
|
626
|
+
# belonging to the {CharSEt} and of the given length.
|
627
|
+
#
|
628
|
+
# @param [Range, Array, Integer] length
|
629
|
+
# The desired length(s) of each string.
|
630
|
+
#
|
631
|
+
# @return [Enumerator]
|
632
|
+
#
|
633
|
+
# @see #each_string
|
634
|
+
#
|
635
|
+
def strings_of_length(length)
|
636
|
+
each_string_of_length(length)
|
364
637
|
end
|
365
638
|
|
366
639
|
#
|
367
|
-
# Creates a new CharSet object by unioning the
|
368
|
-
#
|
640
|
+
# Creates a new CharSet object by unioning the {CharSet} with another
|
641
|
+
# {CharSet}.
|
369
642
|
#
|
370
643
|
# @param [CharSet, Array, Range] set
|
371
|
-
# The other
|
644
|
+
# The other {CharSet} to union with.
|
372
645
|
#
|
373
646
|
# @return [CharSet]
|
374
|
-
# The unioned
|
647
|
+
# The unioned {ChraSet}.
|
375
648
|
#
|
376
649
|
def |(set)
|
377
650
|
set = CharSet.new(set) unless set.kind_of?(CharSet)
|
@@ -383,14 +656,14 @@ module Chars
|
|
383
656
|
|
384
657
|
#
|
385
658
|
# Compares the bytes within a given string with the bytes of the
|
386
|
-
#
|
659
|
+
# {CharSet}.
|
387
660
|
#
|
388
|
-
# @param [String, Enumerable]
|
389
|
-
# The string to compare with the
|
661
|
+
# @param [String, Enumerable] other
|
662
|
+
# The string to compare with the {CharSet}.
|
390
663
|
#
|
391
664
|
# @return [Boolean]
|
392
665
|
# Specifies whether all of the bytes within the given string are
|
393
|
-
# included in the
|
666
|
+
# included in the {CharSet}.
|
394
667
|
#
|
395
668
|
# @example
|
396
669
|
# Chars.alpha === "hello"
|
@@ -417,10 +690,10 @@ module Chars
|
|
417
690
|
alias =~ ===
|
418
691
|
|
419
692
|
#
|
420
|
-
# Inspects the
|
693
|
+
# Inspects the {CharSet}.
|
421
694
|
#
|
422
695
|
# @return [String]
|
423
|
-
# The inspected
|
696
|
+
# The inspected {CharSet}.
|
424
697
|
#
|
425
698
|
def inspect
|
426
699
|
"#<#{self.class.name}: {" + map { |byte|
|
@@ -432,74 +705,10 @@ module Chars
|
|
432
705
|
# to us C programmers
|
433
706
|
'"\0"'
|
434
707
|
else
|
435
|
-
"0x%02x"
|
708
|
+
sprintf("0x%02x",byte)
|
436
709
|
end
|
437
710
|
}.join(', ') + "}>"
|
438
711
|
end
|
439
712
|
|
440
|
-
protected
|
441
|
-
|
442
|
-
if RUBY_VERSION > '1.9.'
|
443
|
-
#
|
444
|
-
# Converts a byte to a character.
|
445
|
-
#
|
446
|
-
# @param [Integer] byte
|
447
|
-
# The byte to convert.
|
448
|
-
#
|
449
|
-
# @return [String]
|
450
|
-
# The character.
|
451
|
-
#
|
452
|
-
# @since 0.2.1
|
453
|
-
#
|
454
|
-
def byte_to_char(byte)
|
455
|
-
byte.chr(Encoding::UTF_8)
|
456
|
-
end
|
457
|
-
|
458
|
-
#
|
459
|
-
# Converts a character to a byte.
|
460
|
-
#
|
461
|
-
# @param [String] char
|
462
|
-
# The character to convert.
|
463
|
-
#
|
464
|
-
# @return [Integer]
|
465
|
-
# The byte.
|
466
|
-
#
|
467
|
-
# @since 0.2.1
|
468
|
-
#
|
469
|
-
def char_to_byte(char)
|
470
|
-
char.ord
|
471
|
-
end
|
472
|
-
else
|
473
|
-
#
|
474
|
-
# Converts a byte to a character.
|
475
|
-
#
|
476
|
-
# @param [Integer] byte
|
477
|
-
# The byte to convert.
|
478
|
-
#
|
479
|
-
# @return [String]
|
480
|
-
# The character.
|
481
|
-
#
|
482
|
-
# @since 0.2.1
|
483
|
-
#
|
484
|
-
def byte_to_char(byte)
|
485
|
-
byte.chr
|
486
|
-
end
|
487
|
-
|
488
|
-
#
|
489
|
-
# Converts a character to a byte.
|
490
|
-
#
|
491
|
-
# @param [String] char
|
492
|
-
# The character to convert.
|
493
|
-
#
|
494
|
-
# @return [Integer]
|
495
|
-
# The byte.
|
496
|
-
#
|
497
|
-
# @since 0.2.1
|
498
|
-
#
|
499
|
-
def char_to_byte(char)
|
500
|
-
char[0]
|
501
|
-
end
|
502
|
-
end
|
503
|
-
|
504
713
|
end
|
505
714
|
end
|