characteristics 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: be2da5691ab55cf6657a961f4ad7f4fc880a8b65
4
+ data.tar.gz: 71d2b668a4e2383aac937505ebaf8ec5450d3969
5
+ SHA512:
6
+ metadata.gz: da6dad0bb4c897c7145064894c483625f8fb68162cf0e3e8b812a8dfc3cddb5c4aec49f04a7a8452ff7166ba5c74ebdd0600f084e6333e010b11345335bb1ac0
7
+ data.tar.gz: 02b16977833dbaa784516e08b9bb65581dad9c8bf2e32207b5f95f3d6b65a83f2fd3557c5abe05e0a878730906a5a193342eba248706a28c00cf37d7741e0901
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+ /pkg
data/.travis.yml ADDED
@@ -0,0 +1,22 @@
1
+ sudo: false
2
+ language: ruby
3
+
4
+ rvm:
5
+ - ruby-head
6
+ - 2.4.0
7
+ - 2.3.3
8
+ - 2.2
9
+ - 2.1
10
+ - 2.0
11
+ - jruby-head
12
+ - jruby-9.1.7.0
13
+
14
+ cache:
15
+ - bundler
16
+
17
+ matrix:
18
+ allow_failures:
19
+ - rvm: jruby-head
20
+ - rvm: ruby-head
21
+ - rvm: 2.0
22
+ # fast_finish: true
data/CHANGELOG.md ADDED
@@ -0,0 +1,6 @@
1
+ ## CHANGELOG
2
+
3
+ ### 0.1.0
4
+
5
+ * Initial release
6
+
@@ -0,0 +1,74 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ In the interest of fostering an open and welcoming environment, we as
6
+ contributors and maintainers pledge to making participation in our project and
7
+ our community a harassment-free experience for everyone, regardless of age, body
8
+ size, disability, ethnicity, gender identity and expression, level of experience,
9
+ nationality, personal appearance, race, religion, or sexual identity and
10
+ orientation.
11
+
12
+ ## Our Standards
13
+
14
+ Examples of behavior that contributes to creating a positive environment
15
+ include:
16
+
17
+ * Using welcoming and inclusive language
18
+ * Being respectful of differing viewpoints and experiences
19
+ * Gracefully accepting constructive criticism
20
+ * Focusing on what is best for the community
21
+ * Showing empathy towards other community members
22
+
23
+ Examples of unacceptable behavior by participants include:
24
+
25
+ * The use of sexualized language or imagery and unwelcome sexual attention or
26
+ advances
27
+ * Trolling, insulting/derogatory comments, and personal or political attacks
28
+ * Public or private harassment
29
+ * Publishing others' private information, such as a physical or electronic
30
+ address, without explicit permission
31
+ * Other conduct which could reasonably be considered inappropriate in a
32
+ professional setting
33
+
34
+ ## Our Responsibilities
35
+
36
+ Project maintainers are responsible for clarifying the standards of acceptable
37
+ behavior and are expected to take appropriate and fair corrective action in
38
+ response to any instances of unacceptable behavior.
39
+
40
+ Project maintainers have the right and responsibility to remove, edit, or
41
+ reject comments, commits, code, wiki edits, issues, and other contributions
42
+ that are not aligned to this Code of Conduct, or to ban temporarily or
43
+ permanently any contributor for other behaviors that they deem inappropriate,
44
+ threatening, offensive, or harmful.
45
+
46
+ ## Scope
47
+
48
+ This Code of Conduct applies both within project spaces and in public spaces
49
+ when an individual is representing the project or its community. Examples of
50
+ representing a project or community include using an official project e-mail
51
+ address, posting via an official social media account, or acting as an appointed
52
+ representative at an online or offline event. Representation of a project may be
53
+ further defined and clarified by project maintainers.
54
+
55
+ ## Enforcement
56
+
57
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
58
+ reported by contacting the project team at opensource@janlelis.com. All
59
+ complaints will be reviewed and investigated and will result in a response that
60
+ is deemed necessary and appropriate to the circumstances. The project team is
61
+ obligated to maintain confidentiality with regard to the reporter of an incident.
62
+ Further details of specific enforcement policies may be posted separately.
63
+
64
+ Project maintainers who do not follow or enforce the Code of Conduct in good
65
+ faith may face temporary or permanent repercussions as determined by other
66
+ members of the project's leadership.
67
+
68
+ ## Attribution
69
+
70
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71
+ available at [http://contributor-covenant.org/version/1/4][version]
72
+
73
+ [homepage]: http://contributor-covenant.org
74
+ [version]: http://contributor-covenant.org/version/1/4/
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ gem 'minitest'
data/MIT-LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2017 Jan Lelis, mail@janlelis.de
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # Characteristics [![[version]](https://badge.fury.io/rb/characteristics.svg)](http://badge.fury.io/rb/characteristics) [![[travis]](https://travis-ci.org/janlelis/characteristics.svg)](https://travis-ci.org/janlelis/characteristics)
2
+
3
+ A Ruby library which provides some basic information about how characters behave in different encodings:
4
+
5
+ - Is a character valid according to its encoding?
6
+ - Is a character assigned?
7
+ - Is a character a special control character?
8
+ - Could a character be invisible (blank)?
9
+
10
+ The [unibits](https://github.com/janlelis/unibits) gem makes use of this data to visualize it accordingliy.
11
+
12
+ ## Setup
13
+
14
+ Add to your `Gemfile`:
15
+
16
+ ```ruby
17
+ gem 'characteristics'
18
+ ```
19
+
20
+ ## Usage
21
+
22
+ ```ruby
23
+ char_info = Characteristics.new(character)
24
+ char_info.valid? # => true / false
25
+ char_info.unicode? # => true / false
26
+ char_info.assigned? # => true / false
27
+ char_info.control? # => true / false
28
+ char_info.blank? # => true / false
29
+ ```
30
+
31
+ ## Types of Encodings
32
+
33
+ This library knows of four different kinds of encodings:
34
+
35
+ - **:unicode** Unicode familiy of multibyte encodings (*UTF-\**)
36
+ - **:ascii** 7-Bit ASCII (*US-ASCII*)
37
+ - **:binary** Arbitrary string (*ASCII-8BIT*)
38
+ - **:byte ** Known byte encoding (*ISO-8859-\**, *Windows-\**)
39
+
40
+ Other encodings are not supported, yet.
41
+
42
+ ## Predicates
43
+
44
+ ### `valid?`
45
+
46
+ Validness is determined by Ruby's String#valid_encoding?
47
+
48
+ ### `unicode?`
49
+
50
+ `true` for Unicode encodings (`UTF-*`)
51
+
52
+ ### `control?`
53
+
54
+ Control characters are codepoints in the is C0, delete or C1 control character range.
55
+
56
+ ### `assigned?`
57
+
58
+ - All valid ASCII and BINARY characters are considered assigned
59
+ - For other byte based encodings, a character is considered assigned if it is not on the exception list included in this library. C0 control characters (and `\x7F`) are always considered assigned. C1 control characters are treated as assigned, if the encoding generally does not assign characters in the C1 region.
60
+ - For Unicode, the general category is considered
61
+
62
+ ### `blank?`
63
+
64
+ The library includes a list of characters that might not be rendered visually. This list does not include unassigned codepoints, control characters (except for `\t`, `\n`, `\v`, `\f`, `\r`), or special formatting characters (right-to-left marker, variation selectors, etc).
65
+
66
+ ## Todo
67
+
68
+ - Support all non-dummy encodings that Ruby supports
69
+ - Complete test matrix
70
+
71
+ ## MIT License
72
+
73
+ Copyright (C) 2017 Jan Lelis <http://janlelis.com>. Released under the MIT license.
data/Rakefile ADDED
@@ -0,0 +1,38 @@
1
+ # # #
2
+ # Get gemspec info
3
+
4
+ gemspec_file = Dir['*.gemspec'].first
5
+ gemspec = eval File.read(gemspec_file), binding, gemspec_file
6
+ info = "#{gemspec.name} | #{gemspec.version} | " \
7
+ "#{gemspec.runtime_dependencies.size} dependencies | " \
8
+ "#{gemspec.files.size} files"
9
+
10
+ # # #
11
+ # Gem build and install task
12
+
13
+ desc info
14
+ task :gem do
15
+ puts info + "\n\n"
16
+ print " "; sh "gem build #{gemspec_file}"
17
+ FileUtils.mkdir_p 'pkg'
18
+ FileUtils.mv "#{gemspec.name}-#{gemspec.version}.gem", 'pkg'
19
+ puts; sh %{gem install --no-document pkg/#{gemspec.name}-#{gemspec.version}.gem}
20
+ end
21
+
22
+ # # #
23
+ # Start an IRB session with the gem loaded
24
+
25
+ desc "#{gemspec.name} | IRB"
26
+ task :irb do
27
+ sh "irb -I ./lib -r #{gemspec.name.gsub '-','/'}"
28
+ end
29
+
30
+ # # #
31
+ # Run specs
32
+
33
+ desc "#{gemspec.name} | Spec"
34
+ task :spec do
35
+ sh "for file in spec/*_spec.rb; do ruby $file; done"
36
+ end
37
+ task default: :spec
38
+
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require File.dirname(__FILE__) + "/lib/characteristics/version"
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = "characteristics"
7
+ gem.version = Characteristics::VERSION
8
+ gem.summary = "Basic character properties."
9
+ gem.description = "A Ruby library which provides some basic information about how characters behave in different encodings."
10
+ gem.authors = ["Jan Lelis"]
11
+ gem.email = ["mail@janlelis.de"]
12
+ gem.homepage = "https://github.com/janlelis/characteristics"
13
+ gem.license = "MIT"
14
+
15
+ gem.files = Dir["{**/}{.*,*}"].select{ |path| File.file?(path) && path !~ /^pkg/ }
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.required_ruby_version = "~> 2.0"
21
+ gem.add_dependency 'unicode-categories', '~> 1.1', '>= 1.1.2'
22
+ end
@@ -0,0 +1,42 @@
1
+ class AsciiCharacteristics < Characteristics
2
+ BLANKS = [
3
+ 0x9,
4
+ 0x20,
5
+ ].freeze
6
+
7
+ SEPARATORS = [
8
+ 0xA,
9
+ 0xB,
10
+ 0xC,
11
+ 0xD,
12
+ ].freeze
13
+
14
+ def initialize(char)
15
+ super
16
+ @ord = char.ord if @is_valid
17
+ end
18
+
19
+ def assigned?
20
+ true
21
+ end
22
+
23
+ def control?
24
+ c0? || delete?
25
+ end
26
+
27
+ def c0?
28
+ @is_valid && @ord < 0x20
29
+ end
30
+
31
+ def delete?
32
+ @is_valid && @ord == 0x7F
33
+ end
34
+
35
+ def c1?
36
+ false
37
+ end
38
+
39
+ def blank?
40
+ @is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) )
41
+ end
42
+ end
@@ -0,0 +1,43 @@
1
+ class BinaryCharacteristics < Characteristics
2
+ BLANKS = [
3
+ 0x9,
4
+ 0x20,
5
+ ].freeze
6
+
7
+ SEPARATORS = [
8
+ 0xA,
9
+ 0xB,
10
+ 0xC,
11
+ 0xD,
12
+ ].freeze
13
+
14
+ def initialize(char)
15
+ @ord = char.ord
16
+ @encoding = char.encoding
17
+ @encoding_name = @encoding.name
18
+ end
19
+
20
+ def valid?
21
+ true
22
+ end
23
+
24
+ def assigned?
25
+ true
26
+ end
27
+
28
+ def control?
29
+ c0? || delete?
30
+ end
31
+
32
+ def c0?
33
+ @ord < 0x20
34
+ end
35
+
36
+ def delete?
37
+ @ord == 0x7F
38
+ end
39
+
40
+ def blank?
41
+ BLANKS.include?(@ord) || SEPARATORS.include?(@ord)
42
+ end
43
+ end
@@ -0,0 +1,103 @@
1
+ class ByteCharacteristics < Characteristics
2
+ HAS_C1 = /^(ISO-8859-)/
3
+
4
+ UNASSIGNED = {
5
+ 0x81 => /^Windows-(1250|1252|1253|1254|1255|1257|1258)/,
6
+ 0x83 => /^Windows-(1250|1257)/,
7
+ 0x88 => /^Windows-(1250|1253|1257)/,
8
+ 0x8A => /^Windows-(1253|1255|1257|1258)/,
9
+ 0x8C => /^Windows-(1253|1255|1257)/,
10
+ 0x8D => /^Windows-(1252|1253|1254|1255|1258)/,
11
+ 0x8E => /^Windows-(1253|1254|1255|1258)/,
12
+ 0x8F => /^Windows-(1252|1253|1254|1255|1258)/,
13
+
14
+ 0x90 => /^Windows-(1250|1252|1253|1254|1255|1257|1258)/,
15
+ 0x98 => /^Windows-(1250|1251|1253|1257)/,
16
+ 0x9A => /^Windows-(1253|1255|1257|1258)/,
17
+ 0x9B => /^Windows-(1252)/,
18
+ 0x9C => /^Windows-(1253|1255|1257)/,
19
+ 0x9D => /^Windows-(1253|1254|1255|1258)/,
20
+ 0x9E => /^Windows-(1253|1254|1255|1258)/,
21
+ 0x9F => /^Windows-(1253|1255|1257)/,
22
+
23
+ 0xA1 => /^Windows-(1257)/,
24
+ 0xA5 => /^Windows-(1257)/,
25
+ 0xAA => /^Windows-(1253)/,
26
+
27
+ 0xD2 => /^Windows-(1253)/,
28
+ 0xD9 => /^Windows-(1255)/,
29
+ 0xDA => /^Windows-(1255)/,
30
+ 0xDB => /^Windows-(1255)/,
31
+ 0xDC => /^Windows-(1255)/,
32
+ 0xDD => /^Windows-(1255)/,
33
+ 0xDE => /^Windows-(1255)/,
34
+ 0xDF => /^Windows-(1255)/,
35
+
36
+ 0xFB => /^Windows-(1255)/,
37
+ 0xFC => /^Windows-(1255)/,
38
+ 0xFF => /^Windows-(1253|1255)/,
39
+ }.freeze
40
+
41
+ BLANKS = [
42
+ 0x9,
43
+ 0x20,
44
+ ].freeze
45
+
46
+ SEPARATORS = [
47
+ 0xA,
48
+ 0xB,
49
+ 0xC,
50
+ 0xD,
51
+ ].freeze
52
+
53
+ EXTRA_BLANKS = {
54
+ 0xA0 => /^(ISO-8859-|Windows-)/,
55
+ 0x9D => /^Windows-(1256)/,
56
+ 0x9F => /^Windows-(1256)/,
57
+ }.freeze
58
+
59
+ def initialize(char)
60
+ super
61
+ @ord = char.ord
62
+ end
63
+
64
+ def encoding_has_c0?
65
+ # !!(HAS_C0 =~ @encoding_name)
66
+ true
67
+ end
68
+
69
+ def encoding_has_delete?
70
+ # !!(HAS_C0 =~ @encoding_name)
71
+ true
72
+ end
73
+
74
+ def encoding_has_c1?
75
+ !!(HAS_C1 =~ @encoding_name)
76
+ end
77
+
78
+ def assigned?
79
+ control? || UNASSIGNED[@ord] !~ @encoding_name
80
+ end
81
+
82
+ def control?
83
+ c0? || c1? || delete?
84
+ end
85
+
86
+ def c0?
87
+ @ord < 0x20 && encoding_has_c0?
88
+ end
89
+
90
+ def c1?
91
+ @ord >= 0x80 && @ord < 0xA0 && encoding_has_c1?
92
+ end
93
+
94
+ def delete?
95
+ @ord == 0x7F && encoding_has_delete?
96
+ end
97
+
98
+ def blank?
99
+ BLANKS.include?(@ord) ||
100
+ SEPARATORS.include?(@ord) ||
101
+ EXTRA_BLANKS[@ord] =~ @encoding_name
102
+ end
103
+ end
@@ -0,0 +1,103 @@
1
+ require "unicode/categories"
2
+
3
+ class UnicodeCharacteristics < Characteristics
4
+ BLANKS = [
5
+ 0x0009,
6
+ 0x0020,
7
+ 0x00AD,
8
+ 0x115F,
9
+ 0x1160,
10
+ 0x1680,
11
+ 0x180E,
12
+ 0x2000,
13
+ 0x2001,
14
+ 0x2002,
15
+ 0x2003,
16
+ 0x2004,
17
+ 0x2005,
18
+ 0x2006,
19
+ 0x2007,
20
+ 0x2008,
21
+ 0x2009,
22
+ 0x200A,
23
+ 0x200B,
24
+ 0x200C,
25
+ 0x200D,
26
+ 0x202F,
27
+ 0x205F,
28
+ 0x2060,
29
+ 0x2061,
30
+ 0x2062,
31
+ 0x2063,
32
+ 0x2064,
33
+ 0x206A,
34
+ 0x206B,
35
+ 0x206C,
36
+ 0x206D,
37
+ 0x206E,
38
+ 0x206F,
39
+ 0x3000,
40
+ 0x3164,
41
+ 0x2800,
42
+ 0xFEFF,
43
+ 0x1BCA0,
44
+ 0x1BCA1,
45
+ 0x1BCA2,
46
+ 0x1BCA3,
47
+ 0x1D159,
48
+ 0x1D173,
49
+ 0x1D174,
50
+ 0x1D175,
51
+ 0x1D176,
52
+ 0x1D177,
53
+ 0x1D178,
54
+ 0x1D179,
55
+ 0x1D17A,
56
+ ].freeze
57
+
58
+ SEPARATORS = [
59
+ 0x000A,
60
+ 0x000B,
61
+ 0x000C,
62
+ 0x000D,
63
+ 0x2028,
64
+ 0x2029,
65
+ ].freeze
66
+
67
+ def initialize(char)
68
+ super
69
+
70
+ if @is_valid
71
+ @category = Unicode::Categories.category(char)
72
+ @ord = char.ord
73
+ end
74
+ end
75
+
76
+ def unicode?
77
+ true
78
+ end
79
+
80
+ def assigned?
81
+ @is_valid && @category != "Cn"
82
+ end
83
+
84
+ def control?
85
+ @is_valid && @category == "Cc"
86
+ end
87
+
88
+ def c0?
89
+ @is_valid && @ord < 0x20
90
+ end
91
+
92
+ def delete?
93
+ @is_valid && @ord == 0x7F
94
+ end
95
+
96
+ def c1?
97
+ @is_valid && @ord >= 0x80 && @ord < 0xA0
98
+ end
99
+
100
+ def blank?
101
+ @is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) )
102
+ end
103
+ end
@@ -0,0 +1,4 @@
1
+ class Characteristics
2
+ VERSION = "0.1.0".freeze
3
+ end
4
+
@@ -0,0 +1,65 @@
1
+ require_relative "characteristics/version"
2
+
3
+ require_relative "characteristics/ascii"
4
+ require_relative "characteristics/binary"
5
+ require_relative "characteristics/byte"
6
+ require_relative "characteristics/unicode"
7
+
8
+ class Characteristics
9
+ def self.type_from_encoding_name(encoding_name)
10
+ case encoding_name
11
+ when "US-ASCII"
12
+ :ascii
13
+ when "ASCII-8BIT"
14
+ :binary
15
+ when /^UTF-/
16
+ :unicode
17
+ when /^ISO-8859-/, /^Windows/
18
+ :byte
19
+ else
20
+ raise ArgumentError, "encoding <#{encoding_name}> not supported"
21
+ end
22
+ end
23
+
24
+ def self.create_for_type(char, type)
25
+ case type
26
+ when :unicode
27
+ UnicodeCharacteristics.new(char)
28
+ when :byte
29
+ ByteCharacteristics.new(char)
30
+ when :ascii
31
+ AsciiCharacteristics.new(char)
32
+ else
33
+ BinaryCharacteristics.new(char)
34
+ end
35
+ end
36
+
37
+ def self.create(char)
38
+ create_for_type(char, type_from_encoding_name(char.encoding.name))
39
+ end
40
+
41
+ attr_reader :encoding
42
+
43
+ def initialize(char)
44
+ @is_valid = char.valid_encoding?
45
+ @encoding = char.encoding
46
+ @encoding_name = @encoding.name
47
+ end
48
+
49
+ def valid?
50
+ @is_valid
51
+ end
52
+
53
+ def unicode?
54
+ false
55
+ end
56
+
57
+ def assigned?
58
+ end
59
+
60
+ def control?
61
+ end
62
+
63
+ def blank?
64
+ end
65
+ end
@@ -0,0 +1,181 @@
1
+ require_relative "../lib/characteristics"
2
+ require "minitest/autorun"
3
+
4
+ describe Characteristics do
5
+ def valid?(char)
6
+ Characteristics.create(char.force_encoding(encoding)).valid?
7
+ end
8
+
9
+ def assigned?(char)
10
+ Characteristics.create(char.force_encoding(encoding)).assigned?
11
+ end
12
+
13
+ def control?(char)
14
+ Characteristics.create(char.force_encoding(encoding)).control?
15
+ end
16
+
17
+ def blank?(char)
18
+ Characteristics.create(char.force_encoding(encoding)).blank?
19
+ end
20
+
21
+ describe UnicodeCharacteristics do
22
+ describe "UTF-*" do
23
+ let(:encoding) { "UTF-8" }
24
+
25
+ it "is valid or not" do
26
+ assert valid? "\x21"
27
+ refute valid? "\x80"
28
+ end
29
+
30
+ it "is assigned or not" do
31
+ assert assigned? "\x21"
32
+ refute assigned? "\uFFEF"
33
+ end
34
+
35
+ it "is control or not" do
36
+ assert control? "\x1E"
37
+ assert control? "\x7F"
38
+ assert control? "\u0080"
39
+ refute control? "\x67"
40
+ end
41
+
42
+ it "is blank or not" do
43
+ assert blank? "\x20"
44
+ refute blank? "\x21"
45
+ end
46
+ end
47
+ end
48
+
49
+ describe BinaryCharacteristics do
50
+ describe "ASCII-8BIT" do
51
+ let(:encoding) { "ASCII-8BIT" }
52
+
53
+ it "is always valid" do
54
+ assert valid? "\x80"
55
+ end
56
+
57
+ it "is always assigned" do
58
+ assert assigned? "\x7F"
59
+ end
60
+
61
+ it "is control or not" do
62
+ assert control? "\x1E"
63
+ assert control? "\x7F"
64
+ refute control? "\x67"
65
+ end
66
+
67
+ it "is blank or not" do
68
+ assert blank? "\x20"
69
+ refute blank? "\x21"
70
+ end
71
+ end
72
+ end
73
+
74
+ describe AsciiCharacteristics do
75
+ describe "US-ASCII" do
76
+ let(:encoding) { "US-ASCII" }
77
+
78
+ it "is valid or not" do
79
+ assert valid? "\x21"
80
+ refute valid? "\x80"
81
+ end
82
+
83
+ it "is always assigned" do
84
+ assert assigned? "\x21"
85
+ end
86
+
87
+ it "is control or not" do
88
+ assert control? "\x1E"
89
+ assert control? "\x7F"
90
+ refute control? "\x67"
91
+ end
92
+
93
+ it "is blank or not" do
94
+ assert blank? "\x20"
95
+ refute blank? "\x21"
96
+ end
97
+ end
98
+ end
99
+
100
+ describe ByteCharacteristics do
101
+ describe "ISO-8859-*" do
102
+ describe "ISO-8859-1" do
103
+ let(:encoding) { "ISO-8859-1" }
104
+
105
+ it "is always valid" do
106
+ assert valid? "\x80"
107
+ end
108
+
109
+ it "is always assigned" do
110
+ assert assigned? "\x21"
111
+ assert assigned? "\x80"
112
+ end
113
+
114
+ it "is control or not" do
115
+ assert control? "\x1E"
116
+ assert control? "\x7F"
117
+ assert control? "\x80"
118
+ refute control? "\x67"
119
+ end
120
+
121
+ it "is blank or not" do
122
+ assert blank? "\x20"
123
+ refute blank? "\x21"
124
+ end
125
+ end
126
+
127
+ # TODO
128
+
129
+ # describe "ISO-8859-2" do
130
+ # describe "ISO-8859-3" do
131
+ # describe "ISO-8859-4" do
132
+ # describe "ISO-8859-5" do
133
+ # describe "ISO-8859-6" do
134
+ # describe "ISO-8859-7" do
135
+ # describe "ISO-8859-8" do
136
+ # describe "ISO-8859-9" do
137
+ # describe "ISO-8859-10" do
138
+ # describe "ISO-8859-11" do
139
+ # describe "ISO-8859-13" do
140
+ # describe "ISO-8859-14" do
141
+ # describe "ISO-8859-15" do
142
+ # describe "ISO-8859-16" do
143
+ end
144
+
145
+ describe "Windows-*" do
146
+ describe "Windows-1252" do
147
+ let(:encoding) { "Windows-1252" }
148
+
149
+ it "is always valid" do
150
+ assert valid? "\x80"
151
+ end
152
+
153
+ it "is assigned or not" do
154
+ assert assigned? "\x21"
155
+ refute assigned? "\x81"
156
+ end
157
+
158
+ it "is control or not" do
159
+ assert control? "\x1E"
160
+ refute control? "\x67"
161
+ end
162
+
163
+ it "is blank or not" do
164
+ assert blank? "\x20"
165
+ refute blank? "\x21"
166
+ end
167
+ end
168
+
169
+ # TODO
170
+
171
+ # describe "Windows-1250" do
172
+ # describe "Windows-1251" do
173
+ # describe "Windows-1253" do
174
+ # describe "Windows-1254" do
175
+ # describe "Windows-1255" do
176
+ # describe "Windows-1256" do
177
+ # describe "Windows-1257" do
178
+ # describe "Windows-1258" do
179
+ end
180
+ end
181
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: characteristics
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jan Lelis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-03-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: unicode-categories
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.1'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.1.2
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.1'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.1.2
33
+ description: A Ruby library which provides some basic information about how characters
34
+ behave in different encodings.
35
+ email:
36
+ - mail@janlelis.de
37
+ executables: []
38
+ extensions: []
39
+ extra_rdoc_files: []
40
+ files:
41
+ - ".gitignore"
42
+ - ".travis.yml"
43
+ - CHANGELOG.md
44
+ - CODE_OF_CONDUCT.md
45
+ - Gemfile
46
+ - MIT-LICENSE.txt
47
+ - README.md
48
+ - Rakefile
49
+ - characteristics.gemspec
50
+ - lib/characteristics.rb
51
+ - lib/characteristics/ascii.rb
52
+ - lib/characteristics/binary.rb
53
+ - lib/characteristics/byte.rb
54
+ - lib/characteristics/unicode.rb
55
+ - lib/characteristics/version.rb
56
+ - spec/characteristics_spec.rb
57
+ homepage: https://github.com/janlelis/characteristics
58
+ licenses:
59
+ - MIT
60
+ metadata: {}
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '2.0'
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ requirements: []
76
+ rubyforge_project:
77
+ rubygems_version: 2.6.8
78
+ signing_key:
79
+ specification_version: 4
80
+ summary: Basic character properties.
81
+ test_files:
82
+ - spec/characteristics_spec.rb