chars 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,85 @@
1
1
  require 'set'
2
2
 
3
3
  module Chars
4
- class CharSet < SortedSet
4
+ class CharSet < Set
5
5
 
6
6
  #
7
7
  # Creates a new CharSet object.
8
8
  #
9
- # @param [Array<String, Integer, Range>] chars
9
+ # @param [Array<String, Integer, Enumerable>] arguments
10
10
  # The chars for the CharSet.
11
11
  #
12
- def initialize(*chars)
12
+ # @raise [TypeError]
13
+ # One of the arguments was not a {String}, {Integer} or `Enumerable`.
14
+ #
15
+ def initialize(*arguments)
13
16
  super()
14
17
 
15
- merge_chars = lambda { |element|
16
- if element.kind_of?(String)
17
- element.each_byte(&merge_chars)
18
- elsif element.kind_of?(Integer)
19
- self << element
20
- elsif element.kind_of?(Enumerable)
21
- element.each(&merge_chars)
18
+ @chars = Hash.new { |hash,key| hash[key] = key.chr(Encoding::UTF_8) }
19
+
20
+ arguments.each do |subset|
21
+ case subset
22
+ when String, Integer
23
+ self << subset
24
+ when Enumerable
25
+ subset.each { |char| self << char }
26
+ else
27
+ raise(TypeError,"arguments must be a String, Integer or Enumerable")
28
+ end
29
+ end
30
+ end
31
+
32
+ #
33
+ # Initializes the copy of another {CharSet} object.
34
+ #
35
+ # @param [CharSet] other
36
+ # The other {CharSet} object.
37
+ #
38
+ def initialize_copy(other)
39
+ @chars = other.instance_variable_get('@chars').dup
40
+ end
41
+
42
+ #
43
+ # Creates a new {CharSet}.
44
+ #
45
+ # @see #initialize
46
+ #
47
+ # @since 0.2.1
48
+ #
49
+ def self.[](*arguments)
50
+ new(*arguments)
51
+ end
52
+
53
+ #
54
+ # Adds a character to the set.
55
+ #
56
+ # @param [String, Integer] other
57
+ # The character(s) or byte to add.
58
+ #
59
+ # @return [CharSet]
60
+ # The modified character set.
61
+ #
62
+ # @raise [TypeError]
63
+ # The argument was not a {String} or {Integer}.
64
+ #
65
+ # @since 0.2.1
66
+ #
67
+ def <<(other)
68
+ case other
69
+ when String
70
+ other.each_char do |char|
71
+ byte = char.ord
72
+
73
+ @chars[byte] = char
74
+ super(byte)
22
75
  end
23
- }
24
76
 
25
- merge_chars.call(chars)
77
+ return self
78
+ when Integer
79
+ super(other)
80
+ else
81
+ raise(TypeError,"can only append Strings and Integers")
82
+ end
26
83
  end
27
84
 
28
85
  alias include_byte? include?
@@ -32,100 +89,104 @@ module Chars
32
89
  alias map_bytes map
33
90
 
34
91
  #
35
- # Determines if a character is contained within the character set.
92
+ # Determines if a character is contained within the {CharSet}.
36
93
  #
37
94
  # @param [String] char
38
95
  # The character to search for.
39
96
  #
40
97
  # @return [Boolean]
41
98
  # Specifies whether the character is contained within the
42
- # character set.
99
+ # {CharSet}.
43
100
  #
44
101
  def include_char?(char)
45
- if char.respond_to?(:each_byte)
46
- char.each_byte.any? { |b| include?(b) }
102
+ unless char.empty?
103
+ @chars.has_value?(char) || include_byte?(char.ord)
47
104
  else
48
105
  false
49
106
  end
50
107
  end
51
108
 
52
109
  #
53
- # The characters within the character set.
110
+ # The characters within the {CharSet}.
54
111
  #
55
112
  # @return [Array<String>]
56
- # All the characters within the character set.
113
+ # All the characters within the {CharSet}.
57
114
  #
58
115
  def chars
59
- map { |b| b.chr }
116
+ map { |byte| @chars[byte] }
60
117
  end
61
118
 
62
119
  #
63
- # Iterates over every character within the character set.
120
+ # Iterates over every character within the {CharSet}.
64
121
  #
65
122
  # @yield [char]
66
123
  # If a block is given, it will be passed each character in the
67
- # character set.
124
+ # {CharSet}.
68
125
  #
69
126
  # @yieldparam [String] char
70
- # Each character in the character set.
127
+ # Each character in the {CharSet}.
71
128
  #
72
129
  # @return [Enumerator]
73
130
  # If no block is given, an enumerator object will be returned.
74
131
  #
75
132
  def each_char
76
- return enum_for(:each_char) unless block_given?
133
+ return enum_for(__method__) unless block_given?
77
134
 
78
- each { |b| yield b.chr }
135
+ each { |byte| yield @chars[byte] }
79
136
  end
80
137
 
81
138
  #
82
- # Selects characters from the character set.
139
+ # Selects characters from the {CharSet}.
83
140
  #
84
141
  # @yield [char]
85
142
  # If a block is given, it will be used to select the characters
86
- # from the character set.
143
+ # from the {CharSet}.
87
144
  #
88
145
  # @yieldparam [String] char
89
146
  # The character to select or reject.
90
147
  #
91
148
  # @return [Array<String>]
92
- # The selected characters from the character set.
149
+ # The selected characters from the {CharSet}.
93
150
  #
94
151
  def select_chars(&block)
95
- chars.select(&block)
152
+ each_char.select(&block)
96
153
  end
97
154
 
98
155
  #
99
- # Maps the characters of the character set.
156
+ # Maps the characters of the {CharSet}.
100
157
  #
101
158
  # @yield [char]
102
159
  # The given block will be used to transform the characters within
103
- # the character set.
160
+ # the {CharSet}.
104
161
  #
105
162
  # @yieldparam [String] char
106
- # Each character in the character set.
163
+ # Each character in the {CharSet}.
107
164
  #
108
165
  # @return [Array<String>]
109
- # The mapped characters of the character set.
166
+ # The mapped characters of the {CharSet}.
110
167
  #
111
168
  def map_chars(&block)
112
- chars.map(&block)
169
+ each_char.map(&block)
113
170
  end
114
171
 
172
+ #
173
+ # Returns a random byte from the {CharSet}.
115
174
  #
116
175
  # @return [Integer]
117
- # A random byte from the character set.
176
+ # A random byte value.
118
177
  #
119
178
  def random_byte
120
179
  self.entries[rand(self.length)]
121
180
  end
122
181
 
182
+ #
183
+ # Returns a random character from the {CharSet}.
123
184
  #
124
185
  # @return [String]
125
- # A random char from the character set.
186
+ # A random char value.
126
187
  #
127
188
  def random_char
128
- random_byte.chr
189
+ @chars[random_byte]
129
190
  end
130
191
 
131
192
  #
@@ -138,13 +199,13 @@ module Chars
138
199
  # The block will receive the random bytes.
139
200
  #
140
201
  # @yieldparam [Integer] byte
141
- # The random byte from the character set.
202
+ # The random byte from the {CharSet}.
142
203
  #
143
204
  # @return [Enumerator]
144
205
  # If no block is given, an enumerator object will be returned.
145
206
  #
146
207
  def each_random_byte(n,&block)
147
- return enum_for(:each_random_byte,n) unless block_given?
208
+ return enum_for(__method__,n) unless block_given?
148
209
 
149
210
  n.times { yield random_byte }
150
211
  return nil
@@ -160,19 +221,19 @@ module Chars
160
221
  # The block will receive the random characters.
161
222
  #
162
223
  # @yieldparam [String] char
163
- # The random character from the character set.
224
+ # The random character from the {CharSet}.
164
225
  #
165
226
  # @return [Enumerator]
166
227
  # If no block is given, an enumerator object will be returned.
167
228
  #
168
229
  def each_random_char(n,&block)
169
- return enum_for(:each_random_char,n) unless block_given?
230
+ return enum_for(__method__,n) unless block_given?
170
231
 
171
- each_random_byte(n) { |b| yield b.chr }
232
+ each_random_byte(n) { |byte| yield @chars[byte] }
172
233
  end
173
234
 
174
235
  #
175
- # Creates an Array of random bytes from the character set.
236
+ # Creates an Array of random bytes from the {CharSet}.
176
237
  #
177
238
  # @param [Integer, Array, Range] length
178
239
  # The length of the Array of random bytes.
@@ -181,15 +242,18 @@ module Chars
181
242
  # The randomly selected bytes.
182
243
  #
183
244
  def random_bytes(length)
184
- if (length.kind_of?(Array) || length.kind_of?(Range))
185
- Array.new(length.sort_by { rand }.first) { random_byte }
245
+ case length
246
+ when Array
247
+ Array.new(length.sample) { random_byte }
248
+ when Range
249
+ Array.new(rand(length)) { random_byte }
186
250
  else
187
251
  Array.new(length) { random_byte }
188
252
  end
189
253
  end
190
254
 
191
255
  #
192
- # Creates an Array of random non-repeating bytes from the character set.
256
+ # Creates an Array of random non-repeating bytes from the {CharSet}.
193
257
  #
194
258
  # @param [Integer, Array, Range] length
195
259
  # The length of the Array of random non-repeating bytes.
@@ -198,15 +262,20 @@ module Chars
198
262
  # The randomly selected non-repeating bytes.
199
263
  #
200
264
  def random_distinct_bytes(length)
201
- if (length.kind_of?(Array) || length.kind_of?(Range))
202
- self.entries.sort_by { rand }.slice(0...(length.sort_by { rand }.first))
265
+ shuffled_bytes = bytes.shuffle
266
+
267
+ case length
268
+ when Array
269
+ shuffled_bytes[0,length.sample]
270
+ when Range
271
+ shuffled_bytes[0,rand(length)]
203
272
  else
204
- self.entries.sort_by { rand }.slice(0...length)
273
+ shuffled_bytes[0,length]
205
274
  end
206
275
  end
207
276
 
208
277
  #
209
- # Creates an Array of random characters from the character set.
278
+ # Creates an Array of random characters from the {CharSet}.
210
279
  #
211
280
  # @param [Integer, Array, Range] length
212
281
  # The length of the Array of random characters.
@@ -215,12 +284,12 @@ module Chars
215
284
  # The randomly selected characters.
216
285
  #
217
286
  def random_chars(length)
218
- random_bytes(length).map { |b| b.chr }
287
+ random_bytes(length).map { |byte| @chars[byte] }
219
288
  end
220
289
 
221
290
  #
222
291
  # Creates a String containing randomly selected characters from the
223
- # character set.
292
+ # {CharSet}.
224
293
  #
225
294
  # @param [Integer, Array, Range] length
226
295
  # The length of the String of random characters.
@@ -236,7 +305,7 @@ module Chars
236
305
 
237
306
  #
238
307
  # Creates an Array of random non-repeating characters from the
239
- # character set.
308
+ # {CharSet}.
240
309
  #
241
310
  # @param [Integer, Array, Range] length
242
311
  # The length of the Array of random non-repeating characters.
@@ -245,12 +314,12 @@ module Chars
245
314
  # The randomly selected non-repeating characters.
246
315
  #
247
316
  def random_distinct_chars(length)
248
- random_distinct_bytes(length).map { |b| b.chr }
317
+ random_distinct_bytes(length).map { |byte| @chars[byte] }
249
318
  end
250
319
 
251
320
  #
252
321
  # Creates a String containing randomly selected non-repeating
253
- # characters from the character set.
322
+ # characters from the {CharSet}.
254
323
  #
255
324
  # @param [Integer, Array, Range] length
256
325
  # The length of the String of random non-repeating characters.
@@ -264,10 +333,9 @@ module Chars
264
333
  random_distinct_chars(length).join
265
334
  end
266
335
 
267
-
268
336
  #
269
337
  # Finds sub-strings within given data that are made of characters within
270
- # the character set.
338
+ # the {CharSet}.
271
339
  #
272
340
  # @param [String] data
273
341
  # The data to find sub-strings within.
@@ -283,77 +351,108 @@ module Chars
283
351
  # sub-strings within the data, or to just return the matched
284
352
  # sub-strings themselves.
285
353
  #
286
- def strings_in(data,options={})
287
- min_length = (options[:length] || 4)
354
+ # @yield [match,(index)]
355
+ # The given block will be passed every matched sub-string, and the
356
+ # optional index.
357
+ #
358
+ # @yield [String] match
359
+ # A sub-string containing the characters from the {CharSet}.
360
+ #
361
+ # @yield [Integer] index
362
+ # The index the sub-string was found at.
363
+ #
364
+ # @return [Array, Hash]
365
+ # If no block is given, an Array or Hash of sub-strings is returned.
366
+ #
367
+ def strings_in(data,options={},&block)
368
+ unless block
369
+ if options[:offsets]
370
+ found = {}
371
+ block = lambda { |offset,substring| found[offset] = substring }
372
+ else
373
+ found = []
374
+ block = lambda { |substring| found << substring }
375
+ end
288
376
 
289
- if options[:offsets]
290
- found = {}
291
- found_substring = lambda { |offset,substring|
292
- found[offset] = substring
293
- }
294
- else
295
- found = []
296
- found_substring = lambda { |offset,substring|
297
- found << substring
298
- }
377
+ strings_in(data,options,&block)
378
+ return found
299
379
  end
300
380
 
301
- return found if data.length < min_length
381
+ min_length = options.fetch(:length,4)
382
+ return if data.length < min_length
302
383
 
303
384
  index = 0
304
385
 
305
386
  while index <= (data.length - min_length)
306
- if self === data[index...(index + min_length)]
387
+ if self === data[index,min_length]
307
388
  sub_index = (index + min_length)
308
389
 
309
- while self.include_char?(data[sub_index..sub_index])
390
+ while self.include_char?(data[sub_index,1])
310
391
  sub_index += 1
311
392
  end
312
393
 
313
- found_substring.call(index,data[index...sub_index])
394
+ match = data[index...sub_index]
395
+
396
+ case block.arity
397
+ when 2
398
+ yield match, index
399
+ else
400
+ yield match
401
+ end
402
+
314
403
  index = sub_index
315
404
  else
316
405
  index += 1
317
406
  end
318
407
  end
319
-
320
- return found
321
408
  end
322
409
 
323
410
  #
324
- # Creates a new CharSet object by unioning the character set with
325
- # another character set.
411
+ # Creates a new CharSet object by unioning the {CharSet} with another
412
+ # {CharSet}.
326
413
  #
327
- # @param [CharSet, Array, Range] other_set
328
- # The other character set to union with.
414
+ # @param [CharSet, Array, Range] set
415
+ # The other {CharSet} to union with.
329
416
  #
330
417
  # @return [CharSet]
331
- # The unioned character sets.
418
+ # The unioned {ChraSet}.
332
419
  #
333
- def |(other_set)
334
- super(CharSet.new(other_set))
420
+ def |(set)
421
+ set = CharSet.new(set) unless set.kind_of?(CharSet)
422
+
423
+ return super(set)
335
424
  end
336
425
 
337
426
  alias + |
338
427
 
339
428
  #
340
429
  # Compares the bytes within a given string with the bytes of the
341
- # character set.
430
+ # {CharSet}.
342
431
  #
343
- # @param [String] string
344
- # The string to compare with the character set.
432
+ # @param [String, Enumerable] other
433
+ # The string to compare with the {CharSet}.
345
434
  #
346
435
  # @return [Boolean]
347
436
  # Specifies whether all of the bytes within the given string are
348
- # included in the character set.
437
+ # included in the {CharSet}.
349
438
  #
350
439
  # @example
351
440
  # Chars.alpha === "hello"
352
441
  # # => true
353
442
  #
354
- def ===(string)
355
- if string.respond_to?(:each_byte)
356
- string.each_byte.all? { |b| include?(b) }
443
+ def ===(other)
444
+ case other
445
+ when String
446
+ other.each_char.all? { |char| include_char?(char) }
447
+ when Enumerable
448
+ other.all? do |element|
449
+ case element
450
+ when String
451
+ include_char?(element)
452
+ when Integer
453
+ include_byte?(element)
454
+ end
455
+ end
357
456
  else
358
457
  false
359
458
  end
@@ -362,22 +461,22 @@ module Chars
362
461
  alias =~ ===
363
462
 
364
463
  #
365
- # Inspects the character set.
464
+ # Inspects the {CharSet}.
366
465
  #
367
466
  # @return [String]
368
- # The inspected character set.
467
+ # The inspected {CharSet}.
369
468
  #
370
469
  def inspect
371
- "#<#{self.class.name}: {" + map { |b|
372
- case b
470
+ "#<#{self.class.name}: {" + map { |byte|
471
+ case byte
373
472
  when (0x07..0x0d), (0x20..0x7e)
374
- b.chr.dump
473
+ @chars[byte].dump
375
474
  when 0x00
376
475
  # sly hack to make char-sets more friendly
377
476
  # to us C programmers
378
477
  '"\0"'
379
478
  else
380
- "0x%02x" % b
479
+ sprintf("0x%02x",byte)
381
480
  end
382
481
  }.join(', ') + "}>"
383
482
  end