chars 0.1.1 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +4 -0
- data/.gemspec +0 -0
- data/.github/workflows/ruby.yml +28 -0
- data/.gitignore +8 -0
- data/.rspec +1 -0
- data/.yardopts +1 -0
- data/ChangeLog.md +68 -0
- data/Gemfile +11 -0
- data/LICENSE.txt +22 -0
- data/README.md +98 -0
- data/Rakefile +9 -11
- data/benchmarks/compare.rb +16 -0
- data/benchmarks/strings_in.rb +23 -0
- data/chars.gemspec +60 -0
- data/gemspec.yml +16 -0
- data/lib/chars/char_set.rb +390 -93
- data/lib/chars/chars.rb +98 -31
- data/lib/chars/extensions/integer.rb +168 -15
- data/lib/chars/extensions/string.rb +159 -0
- data/lib/chars/version.rb +2 -2
- data/spec/char_set_spec.rb +125 -97
- data/spec/chars_spec.rb +17 -68
- data/spec/{integer_spec.rb → extensions/integer_spec.rb} +25 -17
- data/spec/{string_spec.rb → extensions/string_spec.rb} +26 -16
- data/spec/spec_helper.rb +1 -4
- metadata +61 -58
- data/History.txt +0 -28
- data/Manifest.txt +0 -18
- data/README.txt +0 -104
- data/TODO.txt +0 -13
- data/tasks/spec.rb +0 -9
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a22fd4d88e24c4f378880de5e324ac4acc4ff197c541baa1b840a8852d420afa
|
4
|
+
data.tar.gz: ad9c7a38b080490366aaf4142eb642224d551da165ddeedfb4fc1fd0e6bf0728
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5886283079f3bfcf060facf0b0520a51f75066b0301a87750d262e1e4326fad164e541c9580aad1a3f86810430732b2e1933b2ccee26eda6667d0da2aaf2bf00
|
7
|
+
data.tar.gz: 0313dcedc0872cc15f84804bdea985bf726040b48d48018084800c8e07f4224267b5802df4da9d68c5ce79ff0239e4af7138db522fa8523647251524d51e6ba1
|
data/.document
ADDED
data/.gemspec
ADDED
File without changes
|
@@ -0,0 +1,28 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [ push, pull_request ]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
tests:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
strategy:
|
9
|
+
fail-fast: false
|
10
|
+
matrix:
|
11
|
+
ruby:
|
12
|
+
- 2.4
|
13
|
+
- 2.5
|
14
|
+
- 2.6
|
15
|
+
- 2.7
|
16
|
+
- 3.0
|
17
|
+
- jruby
|
18
|
+
name: Ruby ${{ matrix.ruby }}
|
19
|
+
steps:
|
20
|
+
- uses: actions/checkout@v2
|
21
|
+
- name: Set up Ruby
|
22
|
+
uses: ruby/setup-ruby@v1
|
23
|
+
with:
|
24
|
+
ruby-version: ${{ matrix.ruby }}
|
25
|
+
- name: Install dependencies
|
26
|
+
run: bundle install --jobs 4 --retry 3
|
27
|
+
- name: Run tests
|
28
|
+
run: bundle exec rake test
|
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour --format documentation
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup markdown --title 'Chars Documentation' --protected --quiet
|
data/ChangeLog.md
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
### 0.2.3 / 2020-12-25
|
2
|
+
|
3
|
+
* Change {Chars::CharSet} to inherit from `Set`, instead of `SortedSet`.
|
4
|
+
|
5
|
+
### 0.2.2 / 2012-05-28
|
6
|
+
|
7
|
+
* {Chars::CharSet#initialize} now raises a TypeError when given arguments
|
8
|
+
that were neither a `String`, `Integer` or `Enumerable`.
|
9
|
+
* Allow {Chars::CharSet#strings_in} to yield results as they are found.
|
10
|
+
* Improved the performance of {Chars::CharSet#strings_in} when operating on
|
11
|
+
small Strings.
|
12
|
+
* Replaced ore-tasks with
|
13
|
+
[rubygems-tasks](https://github.com/postmodern/rubygems-tasks#readme).
|
14
|
+
|
15
|
+
### 0.2.1 / 2011-06-22
|
16
|
+
|
17
|
+
* Added {Chars::CharSet.[]}
|
18
|
+
* Added {Chars::CharSet#<<}.
|
19
|
+
* Added {Chars::CharSet#byte_to_char}.
|
20
|
+
* Added {Chars::CharSet#char_to_byte}.
|
21
|
+
* Added a cache of characters of the bytes within {Chars::CharSet}.
|
22
|
+
* Use `String#each_char` to distinguish Unicode from ASCII.
|
23
|
+
|
24
|
+
### 0.2.0 / 2010-10-27
|
25
|
+
|
26
|
+
* Make sure all enumerable methods in {Chars::CharSet} return an
|
27
|
+
`Enumerator` object if no block is given.
|
28
|
+
|
29
|
+
### 0.1.2 / 2009-09-21
|
30
|
+
|
31
|
+
* Require Hoe >= 2.3.3.
|
32
|
+
* Require YARD >= 0.2.3.5.
|
33
|
+
* Require RSpec >= 1.2.8.
|
34
|
+
* Added Chars.visibile and Chars::VISIBLE (thanks flatline).
|
35
|
+
* Added CharSet#random_distinct_bytes, CharSet#random_distinct_chars,
|
36
|
+
and CharSet#random_distinct_string (thanks flatline).
|
37
|
+
* Use 'hoe/signing' for signed RubyGems.
|
38
|
+
* Moved to YARD based documentation.
|
39
|
+
* All specs now pass on JRuby 1.3.1.
|
40
|
+
|
41
|
+
### 0.1.1 / 2009-04-01
|
42
|
+
|
43
|
+
* Renamed CharSet#=~ to CharSet#===.
|
44
|
+
* Added an alias from CharSet#=~ to CharSet#===.
|
45
|
+
|
46
|
+
### 0.1.0 / 2009-03-16
|
47
|
+
|
48
|
+
* Initial release.
|
49
|
+
* Provides character sets for:
|
50
|
+
* Numeric
|
51
|
+
* Octal
|
52
|
+
* Uppercase Hexadecimal
|
53
|
+
* Lowercase Hexadecimal
|
54
|
+
* Hexadecimal
|
55
|
+
* Uppercase Alpha
|
56
|
+
* Lowercase Alpha
|
57
|
+
* Alpha
|
58
|
+
* Alpha-numeric
|
59
|
+
* Punctuation
|
60
|
+
* Symbols
|
61
|
+
* Space
|
62
|
+
* Printable
|
63
|
+
* Control
|
64
|
+
* ASCII
|
65
|
+
* Provides convenience methods for testing wether a String or Integer
|
66
|
+
belongs to a certain character set.
|
67
|
+
* Supports random text generation using specific character sets.
|
68
|
+
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright (c) 2009-2012 Hal Brodigan
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
# Chars
|
2
|
+
|
3
|
+
* [Source](https://github.com/postmodern/chars#readme)
|
4
|
+
* [Issues](https://github.com/postmodern/chars/issues)
|
5
|
+
* [Documentation](http://rubydoc.info/gems/chars)
|
6
|
+
* [Email](mailto:postmodern.mod3 at gmail.com)
|
7
|
+
|
8
|
+
## Description
|
9
|
+
|
10
|
+
Chars is a Ruby library for working with various character sets,
|
11
|
+
recognizing text and generating random text from specific character sets.
|
12
|
+
|
13
|
+
## Features
|
14
|
+
|
15
|
+
* Provides character sets for:
|
16
|
+
* Numeric ('0' - '9')
|
17
|
+
* Octal ('0' - '7')
|
18
|
+
* Uppercase Hexadecimal ('0' - '9', 'A' - 'F')
|
19
|
+
* Lowercase Hexadecimal ('0' - '9', 'a' - 'f')
|
20
|
+
* Hexadecimal ('0' - '9', 'a' - 'f', 'A' - 'F')
|
21
|
+
* Uppercase Alpha ('A' - 'Z')
|
22
|
+
* Lowercase Alpha ('a' - 'z')
|
23
|
+
* Alpha ('a' - 'z', 'A' - 'Z')
|
24
|
+
* Alpha-numeric ('0' - '9', 'a' - 'z', 'A' - 'Z')
|
25
|
+
* Punctuation (' ', '\'', '"', '`', ',', ';', ':', '~', '-', '(', ')',
|
26
|
+
'[', ']', '{', '}', '.', '?', '!')
|
27
|
+
* Symbols (' ', '\'', '"', '`', ',', ';', ':', '~', '-', '(', ')',
|
28
|
+
'[', ']', '{', '}', '.', '?', '!', '@', '#', '$', '%', '^', '&', '*',
|
29
|
+
'_', '+', '=', '|', '\\', '<', '>', '/')
|
30
|
+
* Space (' ', '\f', '\n', '\r', '\t', '\v')
|
31
|
+
* Visible ('0' - '9', 'a' - 'z', 'A' - 'Z', '\'', '"', '`', ',',
|
32
|
+
';', ':', '~', '-', '(', ')', '[', ']', '{', '}', '.', '?', '!', '@',
|
33
|
+
'#', '$', '%', '^', '&', '*', '_', '+', '=', '|', '\\', '<', '>', '/',)
|
34
|
+
* Printable ('0' - '9', 'a' - 'z', 'A' - 'Z', ' ', '\'', '"', '`', ',',
|
35
|
+
';', ':', '~', '-', '(', ')', '[', ']', '{', '}', '.', '?', '!', '@',
|
36
|
+
'#', '$', '%', '^', '&', '*', '_', '+', '=', '|', '\\', '<', '>', '/',
|
37
|
+
'\f', '\n', '\r', '\t', '\v')
|
38
|
+
* Control ('\x00' - '\x1f', '\x7f')
|
39
|
+
* Signed ASCII ('\x00' - '\x7f')
|
40
|
+
* ASCII ('\x00' - '\xff')
|
41
|
+
|
42
|
+
## Examples
|
43
|
+
|
44
|
+
Determine whether a byte belongs to a character set:
|
45
|
+
|
46
|
+
0x41.alpha?
|
47
|
+
# => true
|
48
|
+
|
49
|
+
Determine whether a String belongs to a character set:
|
50
|
+
|
51
|
+
"22e1c0".hex?
|
52
|
+
# => true
|
53
|
+
|
54
|
+
Find all sub-strings that belong to a character set within a String:
|
55
|
+
|
56
|
+
ls = File.read('/bin/ls')
|
57
|
+
Chars.printable.strings_in(ls)
|
58
|
+
# => ["/lib64/ld-linux-x86-64.so.2", "KIq/", "5J~!", "%L~!", ...]
|
59
|
+
|
60
|
+
Return a random character from the set of all characters:
|
61
|
+
|
62
|
+
Chars.all.random_char
|
63
|
+
# => "\x94"
|
64
|
+
|
65
|
+
Return a random Array of characters from the alpha-numeric character set:
|
66
|
+
|
67
|
+
Chars.alpha_numeric.random_chars(10)
|
68
|
+
# => ["Q", "N", "S", "4", "x", "z", "3", "M", "F", "F"]
|
69
|
+
|
70
|
+
Return a random Array of a random length of unique characters from the
|
71
|
+
visible character set:
|
72
|
+
|
73
|
+
Chars.visible.random_distinct_chars(1..10)
|
74
|
+
# => ["S", "l", "o", "8", "'", "q"]
|
75
|
+
|
76
|
+
Return a random String from the set of all characters:
|
77
|
+
|
78
|
+
Chars.all.random_string(10)
|
79
|
+
# => "\xc2h\xad\xccm7\x1e6J\x13"
|
80
|
+
|
81
|
+
Return a random String with a random length between 5 and 10, from the
|
82
|
+
set of space characters:
|
83
|
+
|
84
|
+
Chars.space.random_string(5..10)
|
85
|
+
# => "\r\v\n\t\n\f"
|
86
|
+
|
87
|
+
## Requirements
|
88
|
+
|
89
|
+
* [ruby](http://www.ruby-lang.org/) >= 1.8.7
|
90
|
+
|
91
|
+
## Install
|
92
|
+
|
93
|
+
$ sudo gem install chars
|
94
|
+
|
95
|
+
## License
|
96
|
+
|
97
|
+
See {file:LICENSE.txt} for license information.
|
98
|
+
|
data/Rakefile
CHANGED
@@ -1,14 +1,12 @@
|
|
1
|
-
# -*- ruby -*-
|
2
|
-
|
3
1
|
require 'rubygems'
|
4
|
-
require 'hoe'
|
5
|
-
require './tasks/spec.rb'
|
6
|
-
require './lib/chars/version.rb'
|
7
2
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
3
|
+
require 'rubygems/tasks'
|
4
|
+
Gem::Tasks.new
|
5
|
+
|
6
|
+
require 'rspec/core/rake_task'
|
7
|
+
RSpec::Core::RakeTask.new
|
8
|
+
task :test => :spec
|
9
|
+
task :default => :spec
|
13
10
|
|
14
|
-
|
11
|
+
require 'yard'
|
12
|
+
YARD::Rake::YardocTask.new
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH << File.expand_path(File.join(File.dirname(__FILE__),'..','lib'))
|
4
|
+
|
5
|
+
require 'chars'
|
6
|
+
require 'benchmark'
|
7
|
+
|
8
|
+
CHARSET = Chars::ALPHA_NUMERIC
|
9
|
+
N = 1_000_000
|
10
|
+
STRING = ('A' * N) + '!'
|
11
|
+
ENUM = (['A', 0x42] * (N / 2)) << '!'
|
12
|
+
|
13
|
+
Benchmark.bm(12) do |b|
|
14
|
+
b.report('String') { CHARSET === STRING }
|
15
|
+
b.report('Enumerable') { CHARSET === ENUM }
|
16
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH << File.expand_path(File.join(File.dirname(__FILE__),'..','lib'))
|
4
|
+
|
5
|
+
require 'chars'
|
6
|
+
require 'benchmark'
|
7
|
+
|
8
|
+
CHARSET = Chars::ALPHA_NUMERIC
|
9
|
+
STRING = File.open('/usr/bin/openssl','rb') do |file|
|
10
|
+
file.read
|
11
|
+
end
|
12
|
+
|
13
|
+
Benchmark.bm(24) do |b|
|
14
|
+
b.report('strings_in') do
|
15
|
+
CHARSET.strings_in(STRING) { |offset,string| }
|
16
|
+
end
|
17
|
+
|
18
|
+
(5..20).step(5) do |n|
|
19
|
+
b.report("strings_in (length=#{n})") do
|
20
|
+
CHARSET.strings_in(STRING, :length => n) { |offset,string| }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/chars.gemspec
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gemspec = YAML.load_file('gemspec.yml')
|
7
|
+
|
8
|
+
gem.name = gemspec.fetch('name')
|
9
|
+
gem.version = gemspec.fetch('version') do
|
10
|
+
lib_dir = File.join(File.dirname(__FILE__),'lib')
|
11
|
+
$LOAD_PATH << lib_dir unless $LOAD_PATH.include?(lib_dir)
|
12
|
+
|
13
|
+
require 'chars/version'
|
14
|
+
Chars::VERSION
|
15
|
+
end
|
16
|
+
|
17
|
+
gem.summary = gemspec['summary']
|
18
|
+
gem.description = gemspec['description']
|
19
|
+
gem.licenses = Array(gemspec['license'])
|
20
|
+
gem.authors = Array(gemspec['authors'])
|
21
|
+
gem.email = gemspec['email']
|
22
|
+
gem.homepage = gemspec['homepage']
|
23
|
+
|
24
|
+
glob = lambda { |patterns| gem.files & Dir[*patterns] }
|
25
|
+
|
26
|
+
gem.files = `git ls-files`.split($/)
|
27
|
+
gem.files = glob[gemspec['files']] if gemspec['files']
|
28
|
+
|
29
|
+
gem.executables = gemspec.fetch('executables') do
|
30
|
+
glob['bin/*'].map { |path| File.basename(path) }
|
31
|
+
end
|
32
|
+
gem.default_executable = gem.executables.first if Gem::VERSION < '1.7.'
|
33
|
+
|
34
|
+
gem.extensions = glob[gemspec['extensions'] || 'ext/**/extconf.rb']
|
35
|
+
gem.test_files = glob[gemspec['test_files'] || '{test/{**/}*_test.rb']
|
36
|
+
gem.extra_rdoc_files = glob[gemspec['extra_doc_files'] || '*.{txt,md}']
|
37
|
+
|
38
|
+
gem.require_paths = Array(gemspec.fetch('require_paths') {
|
39
|
+
%w[ext lib].select { |dir| File.directory?(dir) }
|
40
|
+
})
|
41
|
+
|
42
|
+
gem.requirements = gemspec['requirements']
|
43
|
+
gem.required_ruby_version = gemspec['required_ruby_version']
|
44
|
+
gem.required_rubygems_version = gemspec['required_rubygems_version']
|
45
|
+
gem.post_install_message = gemspec['post_install_message']
|
46
|
+
|
47
|
+
split = lambda { |string| string.split(/,\s*/) }
|
48
|
+
|
49
|
+
if gemspec['dependencies']
|
50
|
+
gemspec['dependencies'].each do |name,versions|
|
51
|
+
gem.add_dependency(name,split[versions])
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if gemspec['development_dependencies']
|
56
|
+
gemspec['development_dependencies'].each do |name,versions|
|
57
|
+
gem.add_development_dependency(name,split[versions])
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/gemspec.yml
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
name: chars
|
2
|
+
summary: A Ruby library for working with various character sets
|
3
|
+
description:
|
4
|
+
Chars is a Ruby library for working with various character sets,
|
5
|
+
recognizing text and generating random text from specific character sets.
|
6
|
+
|
7
|
+
license: MIT
|
8
|
+
homepage: https://github.com/postmodern/chars#readme
|
9
|
+
authors: Postmodern
|
10
|
+
email: postmodern.mod3@gmail.com
|
11
|
+
has_yard: true
|
12
|
+
|
13
|
+
required_ruby_version: ">= 1.8.7"
|
14
|
+
|
15
|
+
development_dependencies:
|
16
|
+
bundler: ~> 2.0
|
data/lib/chars/char_set.rb
CHANGED
@@ -1,25 +1,75 @@
|
|
1
1
|
require 'set'
|
2
2
|
|
3
3
|
module Chars
|
4
|
-
class CharSet <
|
4
|
+
class CharSet < Set
|
5
5
|
|
6
6
|
#
|
7
|
-
# Creates a new CharSet object
|
7
|
+
# Creates a new CharSet object.
|
8
8
|
#
|
9
|
-
|
9
|
+
# @param [Array<String, Integer, Enumerable>] arguments
|
10
|
+
# The chars for the CharSet.
|
11
|
+
#
|
12
|
+
# @raise [TypeError]
|
13
|
+
# One of the arguments was not a {String}, {Integer} or `Enumerable`.
|
14
|
+
#
|
15
|
+
def initialize(*arguments)
|
10
16
|
super()
|
11
17
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
18
|
+
@chars = Hash.new { |hash,key| hash[key] = byte_to_char(key) }
|
19
|
+
|
20
|
+
arguments.each do |subset|
|
21
|
+
case subset
|
22
|
+
when String, Integer
|
23
|
+
self << subset
|
24
|
+
when Enumerable
|
25
|
+
subset.each { |char| self << char }
|
26
|
+
else
|
27
|
+
raise(TypeError,"arguments must be a String, Integer or Enumerable")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Creates a new character set.
|
34
|
+
#
|
35
|
+
# @see #initialize
|
36
|
+
#
|
37
|
+
# @since 0.2.1
|
38
|
+
#
|
39
|
+
def self.[](*arguments)
|
40
|
+
new(*arguments)
|
41
|
+
end
|
42
|
+
|
43
|
+
#
|
44
|
+
# Adds a character to the set.
|
45
|
+
#
|
46
|
+
# @param [String, Integer] other
|
47
|
+
# The character(s) or byte to add.
|
48
|
+
#
|
49
|
+
# @return [CharSet]
|
50
|
+
# The modified character set.
|
51
|
+
#
|
52
|
+
# @raise [TypeError]
|
53
|
+
# The argument was not a {String} or {Integer}.
|
54
|
+
#
|
55
|
+
# @since 0.2.1
|
56
|
+
#
|
57
|
+
def <<(other)
|
58
|
+
case other
|
59
|
+
when String
|
60
|
+
other.each_char do |char|
|
61
|
+
byte = char_to_byte(char)
|
62
|
+
|
63
|
+
@chars[byte] = char
|
64
|
+
super(byte)
|
19
65
|
end
|
20
|
-
}
|
21
66
|
|
22
|
-
|
67
|
+
return self
|
68
|
+
when Integer
|
69
|
+
super(other)
|
70
|
+
else
|
71
|
+
raise(TypeError,"can only append Strings and Integers")
|
72
|
+
end
|
23
73
|
end
|
24
74
|
|
25
75
|
alias include_byte? include?
|
@@ -29,181 +79,361 @@ module Chars
|
|
29
79
|
alias map_bytes map
|
30
80
|
|
31
81
|
#
|
32
|
-
#
|
33
|
-
#
|
82
|
+
# Determines if a character is contained within the character set.
|
83
|
+
#
|
84
|
+
# @param [String] char
|
85
|
+
# The character to search for.
|
86
|
+
#
|
87
|
+
# @return [Boolean]
|
88
|
+
# Specifies whether the character is contained within the
|
89
|
+
# character set.
|
34
90
|
#
|
35
91
|
def include_char?(char)
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
92
|
+
unless char.empty?
|
93
|
+
@chars.has_value?(char) || include_byte?(char_to_byte(char))
|
94
|
+
else
|
95
|
+
false
|
40
96
|
end
|
41
97
|
end
|
42
98
|
|
43
99
|
#
|
44
|
-
#
|
100
|
+
# The characters within the character set.
|
101
|
+
#
|
102
|
+
# @return [Array<String>]
|
103
|
+
# All the characters within the character set.
|
45
104
|
#
|
46
105
|
def chars
|
47
|
-
map { |
|
106
|
+
map { |byte| @chars[byte] }
|
48
107
|
end
|
49
108
|
|
50
109
|
#
|
51
|
-
# Iterates over every character within the character set
|
52
|
-
#
|
110
|
+
# Iterates over every character within the character set.
|
111
|
+
#
|
112
|
+
# @yield [char]
|
113
|
+
# If a block is given, it will be passed each character in the
|
114
|
+
# character set.
|
53
115
|
#
|
54
|
-
|
55
|
-
|
116
|
+
# @yieldparam [String] char
|
117
|
+
# Each character in the character set.
|
118
|
+
#
|
119
|
+
# @return [Enumerator]
|
120
|
+
# If no block is given, an enumerator object will be returned.
|
121
|
+
#
|
122
|
+
def each_char
|
123
|
+
return enum_for(__method__) unless block_given?
|
124
|
+
|
125
|
+
each { |byte| yield @chars[byte] }
|
56
126
|
end
|
57
127
|
|
58
128
|
#
|
59
|
-
# Selects
|
60
|
-
#
|
129
|
+
# Selects characters from the character set.
|
130
|
+
#
|
131
|
+
# @yield [char]
|
132
|
+
# If a block is given, it will be used to select the characters
|
133
|
+
# from the character set.
|
134
|
+
#
|
135
|
+
# @yieldparam [String] char
|
136
|
+
# The character to select or reject.
|
137
|
+
#
|
138
|
+
# @return [Array<String>]
|
139
|
+
# The selected characters from the character set.
|
61
140
|
#
|
62
141
|
def select_chars(&block)
|
63
|
-
|
142
|
+
each_char.select(&block)
|
64
143
|
end
|
65
144
|
|
66
145
|
#
|
67
|
-
# Maps the characters of the character set
|
146
|
+
# Maps the characters of the character set.
|
147
|
+
#
|
148
|
+
# @yield [char]
|
149
|
+
# The given block will be used to transform the characters within
|
150
|
+
# the character set.
|
151
|
+
#
|
152
|
+
# @yieldparam [String] char
|
153
|
+
# Each character in the character set.
|
154
|
+
#
|
155
|
+
# @return [Array<String>]
|
156
|
+
# The mapped characters of the character set.
|
68
157
|
#
|
69
158
|
def map_chars(&block)
|
70
|
-
|
159
|
+
each_char.map(&block)
|
71
160
|
end
|
72
161
|
|
73
162
|
#
|
74
|
-
#
|
163
|
+
# @return [Integer]
|
164
|
+
# A random byte from the character set.
|
75
165
|
#
|
76
166
|
def random_byte
|
77
167
|
self.entries[rand(self.length)]
|
78
168
|
end
|
79
169
|
|
80
170
|
#
|
81
|
-
#
|
171
|
+
# @return [String]
|
172
|
+
# A random char from the character set.
|
82
173
|
#
|
83
174
|
def random_char
|
84
|
-
random_byte
|
175
|
+
@chars[random_byte]
|
85
176
|
end
|
86
177
|
|
87
178
|
#
|
88
|
-
# Pass
|
179
|
+
# Pass random bytes to a given block.
|
180
|
+
#
|
181
|
+
# @param [Integer] n
|
182
|
+
# Specifies how many times to pass a random byte to the block.
|
183
|
+
#
|
184
|
+
# @yield [byte]
|
185
|
+
# The block will receive the random bytes.
|
186
|
+
#
|
187
|
+
# @yieldparam [Integer] byte
|
188
|
+
# The random byte from the character set.
|
189
|
+
#
|
190
|
+
# @return [Enumerator]
|
191
|
+
# If no block is given, an enumerator object will be returned.
|
89
192
|
#
|
90
193
|
def each_random_byte(n,&block)
|
91
|
-
|
194
|
+
return enum_for(__method__,n) unless block_given?
|
195
|
+
|
196
|
+
n.times { yield random_byte }
|
197
|
+
return nil
|
92
198
|
end
|
93
199
|
|
94
200
|
#
|
95
|
-
# Pass
|
201
|
+
# Pass random characters to a given block.
|
202
|
+
#
|
203
|
+
# @param [Integer] n
|
204
|
+
# Specifies how many times to pass a random character to the block.
|
205
|
+
#
|
206
|
+
# @yield [char]
|
207
|
+
# The block will receive the random characters.
|
208
|
+
#
|
209
|
+
# @yieldparam [String] char
|
210
|
+
# The random character from the character set.
|
211
|
+
#
|
212
|
+
# @return [Enumerator]
|
213
|
+
# If no block is given, an enumerator object will be returned.
|
96
214
|
#
|
97
215
|
def each_random_char(n,&block)
|
98
|
-
|
216
|
+
return enum_for(__method__,n) unless block_given?
|
217
|
+
|
218
|
+
each_random_byte(n) { |byte| yield @chars[byte] }
|
99
219
|
end
|
100
220
|
|
101
221
|
#
|
102
|
-
#
|
103
|
-
#
|
104
|
-
#
|
222
|
+
# Creates an Array of random bytes from the character set.
|
223
|
+
#
|
224
|
+
# @param [Integer, Array, Range] length
|
225
|
+
# The length of the Array of random bytes.
|
226
|
+
#
|
227
|
+
# @return [Array<Integer>]
|
228
|
+
# The randomly selected bytes.
|
105
229
|
#
|
106
230
|
def random_bytes(length)
|
107
231
|
if (length.kind_of?(Array) || length.kind_of?(Range))
|
108
|
-
|
232
|
+
Array.new(length.sort_by { rand }.first) { random_byte }
|
109
233
|
else
|
110
|
-
|
234
|
+
Array.new(length) { random_byte }
|
111
235
|
end
|
112
236
|
end
|
113
237
|
|
114
238
|
#
|
115
|
-
#
|
116
|
-
#
|
117
|
-
#
|
239
|
+
# Creates an Array of random non-repeating bytes from the character set.
|
240
|
+
#
|
241
|
+
# @param [Integer, Array, Range] length
|
242
|
+
# The length of the Array of random non-repeating bytes.
|
243
|
+
#
|
244
|
+
# @return [Array<Integer>]
|
245
|
+
# The randomly selected non-repeating bytes.
|
246
|
+
#
|
247
|
+
def random_distinct_bytes(length)
|
248
|
+
if (length.kind_of?(Array) || length.kind_of?(Range))
|
249
|
+
self.entries.sort_by { rand }.slice(0...(length.sort_by { rand }.first))
|
250
|
+
else
|
251
|
+
self.entries.sort_by { rand }.slice(0...length)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
#
|
256
|
+
# Creates an Array of random characters from the character set.
|
257
|
+
#
|
258
|
+
# @param [Integer, Array, Range] length
|
259
|
+
# The length of the Array of random characters.
|
260
|
+
#
|
261
|
+
# @return [Array<String>]
|
262
|
+
# The randomly selected characters.
|
118
263
|
#
|
119
264
|
def random_chars(length)
|
120
|
-
random_bytes(length).map { |
|
265
|
+
random_bytes(length).map { |byte| @chars[byte] }
|
121
266
|
end
|
122
267
|
|
123
268
|
#
|
124
|
-
#
|
125
|
-
#
|
269
|
+
# Creates a String containing randomly selected characters from the
|
270
|
+
# character set.
|
271
|
+
#
|
272
|
+
# @param [Integer, Array, Range] length
|
273
|
+
# The length of the String of random characters.
|
274
|
+
#
|
275
|
+
# @return [String]
|
276
|
+
# The String of randomly selected characters.
|
277
|
+
#
|
278
|
+
# @see random_chars
|
126
279
|
#
|
127
280
|
def random_string(length)
|
128
281
|
random_chars(length).join
|
129
282
|
end
|
130
283
|
|
131
284
|
#
|
132
|
-
#
|
133
|
-
# character set
|
285
|
+
# Creates an Array of random non-repeating characters from the
|
286
|
+
# character set.
|
287
|
+
#
|
288
|
+
# @param [Integer, Array, Range] length
|
289
|
+
# The length of the Array of random non-repeating characters.
|
134
290
|
#
|
135
|
-
#
|
136
|
-
#
|
137
|
-
# within the _data_. Defaults to 4, if not
|
138
|
-
# specified.
|
139
|
-
# <tt>:offsets</tt>:: Specifies wether to return a Hash of the
|
140
|
-
# offsets within the _data_ and the matched
|
141
|
-
# sub-strings. If not specified a simple
|
142
|
-
# Array will be returned of the matched
|
143
|
-
# sub-strings.
|
291
|
+
# @return [Array<Integer>]
|
292
|
+
# The randomly selected non-repeating characters.
|
144
293
|
#
|
145
|
-
def
|
146
|
-
|
294
|
+
def random_distinct_chars(length)
|
295
|
+
random_distinct_bytes(length).map { |byte| @chars[byte] }
|
296
|
+
end
|
147
297
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
298
|
+
#
|
299
|
+
# Creates a String containing randomly selected non-repeating
|
300
|
+
# characters from the character set.
|
301
|
+
#
|
302
|
+
# @param [Integer, Array, Range] length
|
303
|
+
# The length of the String of random non-repeating characters.
|
304
|
+
#
|
305
|
+
# @return [String]
|
306
|
+
# The String of randomly selected non-repeating characters.
|
307
|
+
#
|
308
|
+
# @see random_distinct_chars
|
309
|
+
#
|
310
|
+
def random_distinct_string(length)
|
311
|
+
random_distinct_chars(length).join
|
312
|
+
end
|
313
|
+
|
314
|
+
#
|
315
|
+
# Finds sub-strings within given data that are made of characters within
|
316
|
+
# the character set.
|
317
|
+
#
|
318
|
+
# @param [String] data
|
319
|
+
# The data to find sub-strings within.
|
320
|
+
#
|
321
|
+
# @param [Hash] options
|
322
|
+
# Additional options.
|
323
|
+
#
|
324
|
+
# @option options [Integer] :length (4)
|
325
|
+
# The minimum length of sub-strings found within the given data.
|
326
|
+
#
|
327
|
+
# @option options [Boolean] :offsets (false)
|
328
|
+
# Specifies whether to return a Hash of offsets and matched
|
329
|
+
# sub-strings within the data, or to just return the matched
|
330
|
+
# sub-strings themselves.
|
331
|
+
#
|
332
|
+
# @yield [match,(index)]
|
333
|
+
# The given block will be passed every matched sub-string, and the
|
334
|
+
# optional index.
|
335
|
+
#
|
336
|
+
# @yield [String] match
|
337
|
+
# A sub-string containing the characters from the character set.
|
338
|
+
#
|
339
|
+
# @yield [Integer] index
|
340
|
+
# The index the sub-string was found at.
|
341
|
+
#
|
342
|
+
# @return [Array, Hash]
|
343
|
+
# If no block is given, an Array or Hash of sub-strings is returned.
|
344
|
+
#
|
345
|
+
def strings_in(data,options={},&block)
|
346
|
+
unless block
|
347
|
+
if options[:offsets]
|
348
|
+
found = {}
|
349
|
+
block = lambda { |offset,substring| found[offset] = substring }
|
350
|
+
else
|
351
|
+
found = []
|
352
|
+
block = lambda { |substring| found << substring }
|
353
|
+
end
|
354
|
+
|
355
|
+
strings_in(data,options,&block)
|
356
|
+
return found
|
158
357
|
end
|
159
358
|
|
160
|
-
|
359
|
+
min_length = options.fetch(:length,4)
|
360
|
+
return if data.length < min_length
|
161
361
|
|
162
362
|
index = 0
|
163
363
|
|
164
364
|
while index <= (data.length - min_length)
|
165
|
-
if self === data[index
|
365
|
+
if self === data[index,min_length]
|
166
366
|
sub_index = (index + min_length)
|
167
367
|
|
168
|
-
while self.include_char?(data[sub_index
|
368
|
+
while self.include_char?(data[sub_index,1])
|
169
369
|
sub_index += 1
|
170
370
|
end
|
171
371
|
|
172
|
-
|
372
|
+
match = data[index...sub_index]
|
373
|
+
|
374
|
+
case block.arity
|
375
|
+
when 2
|
376
|
+
yield match, index
|
377
|
+
else
|
378
|
+
yield match
|
379
|
+
end
|
380
|
+
|
173
381
|
index = sub_index
|
174
382
|
else
|
175
383
|
index += 1
|
176
384
|
end
|
177
385
|
end
|
178
|
-
|
179
|
-
return found
|
180
386
|
end
|
181
387
|
|
182
388
|
#
|
183
|
-
# Creates a new CharSet object
|
184
|
-
#
|
389
|
+
# Creates a new CharSet object by unioning the character set with
|
390
|
+
# another character set.
|
391
|
+
#
|
392
|
+
# @param [CharSet, Array, Range] set
|
393
|
+
# The other character set to union with.
|
185
394
|
#
|
186
|
-
|
187
|
-
|
395
|
+
# @return [CharSet]
|
396
|
+
# The unioned character sets.
|
397
|
+
#
|
398
|
+
def |(set)
|
399
|
+
set = CharSet.new(set) unless set.kind_of?(CharSet)
|
400
|
+
|
401
|
+
return super(set)
|
188
402
|
end
|
189
403
|
|
190
404
|
alias + |
|
191
405
|
|
192
406
|
#
|
193
|
-
#
|
194
|
-
#
|
407
|
+
# Compares the bytes within a given string with the bytes of the
|
408
|
+
# character set.
|
195
409
|
#
|
410
|
+
# @param [String, Enumerable] other
|
411
|
+
# The string to compare with the character set.
|
412
|
+
#
|
413
|
+
# @return [Boolean]
|
414
|
+
# Specifies whether all of the bytes within the given string are
|
415
|
+
# included in the character set.
|
416
|
+
#
|
417
|
+
# @example
|
196
418
|
# Chars.alpha === "hello"
|
197
419
|
# # => true
|
198
420
|
#
|
199
|
-
def ===(
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
421
|
+
def ===(other)
|
422
|
+
case other
|
423
|
+
when String
|
424
|
+
other.each_char.all? { |char| include_char?(char) }
|
425
|
+
when Enumerable
|
426
|
+
other.all? do |element|
|
427
|
+
case element
|
428
|
+
when String
|
429
|
+
include_char?(element)
|
430
|
+
when Integer
|
431
|
+
include_byte?(element)
|
432
|
+
end
|
433
|
+
end
|
434
|
+
else
|
435
|
+
false
|
204
436
|
end
|
205
|
-
|
206
|
-
return true
|
207
437
|
end
|
208
438
|
|
209
439
|
alias =~ ===
|
@@ -211,20 +441,87 @@ module Chars
|
|
211
441
|
#
|
212
442
|
# Inspects the character set.
|
213
443
|
#
|
444
|
+
# @return [String]
|
445
|
+
# The inspected character set.
|
446
|
+
#
|
214
447
|
def inspect
|
215
|
-
"#<#{self.class.name}: {" + map { |
|
216
|
-
case
|
448
|
+
"#<#{self.class.name}: {" + map { |byte|
|
449
|
+
case byte
|
217
450
|
when (0x07..0x0d), (0x20..0x7e)
|
218
|
-
|
451
|
+
@chars[byte].dump
|
219
452
|
when 0x00
|
220
453
|
# sly hack to make char-sets more friendly
|
221
454
|
# to us C programmers
|
222
455
|
'"\0"'
|
223
456
|
else
|
224
|
-
"0x%02x" %
|
457
|
+
"0x%02x" % byte
|
225
458
|
end
|
226
459
|
}.join(', ') + "}>"
|
227
460
|
end
|
228
461
|
|
462
|
+
protected
|
463
|
+
|
464
|
+
if RUBY_VERSION > '1.9.'
|
465
|
+
#
|
466
|
+
# Converts a byte to a character.
|
467
|
+
#
|
468
|
+
# @param [Integer] byte
|
469
|
+
# The byte to convert.
|
470
|
+
#
|
471
|
+
# @return [String]
|
472
|
+
# The character.
|
473
|
+
#
|
474
|
+
# @since 0.2.1
|
475
|
+
#
|
476
|
+
def byte_to_char(byte)
|
477
|
+
byte.chr(Encoding::UTF_8)
|
478
|
+
end
|
479
|
+
|
480
|
+
#
|
481
|
+
# Converts a character to a byte.
|
482
|
+
#
|
483
|
+
# @param [String] char
|
484
|
+
# The character to convert.
|
485
|
+
#
|
486
|
+
# @return [Integer]
|
487
|
+
# The byte.
|
488
|
+
#
|
489
|
+
# @since 0.2.1
|
490
|
+
#
|
491
|
+
def char_to_byte(char)
|
492
|
+
char.ord
|
493
|
+
end
|
494
|
+
else
|
495
|
+
#
|
496
|
+
# Converts a byte to a character.
|
497
|
+
#
|
498
|
+
# @param [Integer] byte
|
499
|
+
# The byte to convert.
|
500
|
+
#
|
501
|
+
# @return [String]
|
502
|
+
# The character.
|
503
|
+
#
|
504
|
+
# @since 0.2.1
|
505
|
+
#
|
506
|
+
def byte_to_char(byte)
|
507
|
+
byte.chr
|
508
|
+
end
|
509
|
+
|
510
|
+
#
|
511
|
+
# Converts a character to a byte.
|
512
|
+
#
|
513
|
+
# @param [String] char
|
514
|
+
# The character to convert.
|
515
|
+
#
|
516
|
+
# @return [Integer]
|
517
|
+
# The byte.
|
518
|
+
#
|
519
|
+
# @since 0.2.1
|
520
|
+
#
|
521
|
+
def char_to_byte(char)
|
522
|
+
char[0]
|
523
|
+
end
|
524
|
+
end
|
525
|
+
|
229
526
|
end
|
230
527
|
end
|