characteristics 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +4 -4
- data/lib/characteristics.rb +1 -1
- data/lib/characteristics/byte.rb +97 -35
- data/lib/characteristics/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 382e8a6535fa4152dbdbf7f213a12ba6e5a82830
|
4
|
+
data.tar.gz: c92c1bdf4eb32f8ee66c0dce0d72327f95ca0260
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6e886ba4aa8b3605c5a8918e3e97c2982a1843779e780562757373fe30641e7e56e6e8bb0ea90bcca6b20a1ce752780c587a87902f3f346fa42a56c1cca07bd
|
7
|
+
data.tar.gz: 273ee12ec3e4bb2426743ac146790b658bdfc236e396c73d7af369b4cf76d89248fcb074a1515c4280e322a05644feeaf838622782210501fab2c36e7819e384
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -32,10 +32,10 @@ char_info.blank? # => true / false
|
|
32
32
|
|
33
33
|
This library knows of four different kinds of encodings:
|
34
34
|
|
35
|
-
- **:unicode** Unicode familiy of multibyte encodings (*UTF
|
35
|
+
- **:unicode** Unicode familiy of multibyte encodings (*UTF-X*)
|
36
36
|
- **:ascii** 7-Bit ASCII (*US-ASCII*)
|
37
37
|
- **:binary** Arbitrary string (*ASCII-8BIT*)
|
38
|
-
- **:byte
|
38
|
+
- **:byte** Known byte encoding (*ISO-8859-X*, *Windows-125X*)
|
39
39
|
|
40
40
|
Other encodings are not supported, yet.
|
41
41
|
|
@@ -47,11 +47,11 @@ Validness is determined by Ruby's String#valid_encoding?
|
|
47
47
|
|
48
48
|
### `unicode?`
|
49
49
|
|
50
|
-
`true` for Unicode encodings (`UTF
|
50
|
+
`true` for Unicode encodings (`UTF-X`)
|
51
51
|
|
52
52
|
### `control?`
|
53
53
|
|
54
|
-
Control characters are codepoints in the is C0, delete or C1 control character range.
|
54
|
+
Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes).
|
55
55
|
|
56
56
|
### `assigned?`
|
57
57
|
|
data/lib/characteristics.rb
CHANGED
data/lib/characteristics/byte.rb
CHANGED
@@ -2,40 +2,102 @@ class ByteCharacteristics < Characteristics
|
|
2
2
|
HAS_C1 = /^(ISO-8859-)/
|
3
3
|
|
4
4
|
UNASSIGNED = {
|
5
|
-
0x81 => /^Windows-(1250|1252|1253|1254|1255|1257|1258)/,
|
6
|
-
0x83 => /^Windows-(1250|1257)/,
|
7
|
-
0x88 => /^Windows-(1250|1253|1257)/,
|
8
|
-
0x8A => /^Windows-(1253|1255|1257|1258)/,
|
9
|
-
0x8C => /^Windows-(1253|1255|1257)/,
|
10
|
-
0x8D => /^Windows-(1252|1253|1254|1255|1258)/,
|
11
|
-
0x8E => /^Windows-(1253|1254|1255|1258)/,
|
12
|
-
0x8F => /^Windows-(1252|1253|1254|1255|1258)/,
|
13
|
-
|
14
|
-
0x90 => /^Windows-(1250|1252|1253|1254|1255|1257|1258)/,
|
15
|
-
0x98 => /^Windows-(1250|1251|1253|1257)/,
|
16
|
-
0x9A => /^Windows-(1253|1255|1257|1258)/,
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
5
|
+
0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
|
6
|
+
0x83 => /^(Windows-(1250|1257))/,
|
7
|
+
0x88 => /^(Windows-(1250|1253|1257))/,
|
8
|
+
0x8A => /^(Windows-(1253|1255|1257|1258))/,
|
9
|
+
0x8C => /^(Windows-(1253|1255|1257))/,
|
10
|
+
0x8D => /^(Windows-(1252|1253|1254|1255|1258))/,
|
11
|
+
0x8E => /^(Windows-(1253|1254|1255|1258))/,
|
12
|
+
0x8F => /^(Windows-(1252|1253|1254|1255|1258))/,
|
13
|
+
|
14
|
+
0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
|
15
|
+
0x98 => /^(Windows-(1250|1251|1253|1257))/,
|
16
|
+
0x9A => /^(Windows-(1253|1255|1257|1258))/,
|
17
|
+
0x9C => /^(Windows-(1253|1255|1257))/,
|
18
|
+
0x9D => /^(Windows-(1252|1253|1254|1255|1258))/,
|
19
|
+
0x9E => /^(Windows-(1253|1254|1255|1258))/,
|
20
|
+
0x9F => /^(Windows-(1253|1255|1257))/,
|
21
|
+
|
22
|
+
0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/,
|
23
|
+
0xA2 => /^(ISO-8859-(6))/,
|
24
|
+
0xA3 => /^(ISO-8859-(6))/,
|
25
|
+
0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/,
|
26
|
+
0xA6 => /^(ISO-8859-(6))/,
|
27
|
+
0xA7 => /^(ISO-8859-(6))/,
|
28
|
+
0xA8 => /^(ISO-8859-(6))/,
|
29
|
+
0xA9 => /^(ISO-8859-(6))/,
|
30
|
+
0xAA => /^(ISO-8859-(6)|Windows-(1253))/,
|
31
|
+
0xAB => /^(ISO-8859-(6))/,
|
32
|
+
0xAE => /^(ISO-8859-(3|6|7))/,
|
33
|
+
0xAF => /^(ISO-8859-(6))/,
|
34
|
+
|
35
|
+
0xB0 => /^(ISO-8859-(6))/,
|
36
|
+
0xB1 => /^(ISO-8859-(6))/,
|
37
|
+
0xB2 => /^(ISO-8859-(6))/,
|
38
|
+
0xB3 => /^(ISO-8859-(6))/,
|
39
|
+
0xB4 => /^(ISO-8859-(6))/,
|
40
|
+
0xB5 => /^(ISO-8859-(6))/,
|
41
|
+
0xB6 => /^(ISO-8859-(6))/,
|
42
|
+
0xB7 => /^(ISO-8859-(6))/,
|
43
|
+
0xB8 => /^(ISO-8859-(6))/,
|
44
|
+
0xB9 => /^(ISO-8859-(6))/,
|
45
|
+
0xBA => /^(ISO-8859-(6))/,
|
46
|
+
0xBC => /^(ISO-8859-(6))/,
|
47
|
+
0xBD => /^(ISO-8859-(6))/,
|
48
|
+
0xBE => /^(ISO-8859-(3|6))/,
|
49
|
+
0xBF => /^(ISO-8859-(8))/,
|
50
|
+
|
51
|
+
0xC0 => /^(ISO-8859-(6|8))/,
|
52
|
+
0xC1 => /^(ISO-8859-(8))/,
|
53
|
+
0xC2 => /^(ISO-8859-(8))/,
|
54
|
+
0xC3 => /^(ISO-8859-(3|8))/,
|
55
|
+
0xC4 => /^(ISO-8859-(8))/,
|
56
|
+
0xC5 => /^(ISO-8859-(8))/,
|
57
|
+
0xC6 => /^(ISO-8859-(8))/,
|
58
|
+
0xC7 => /^(ISO-8859-(8))/,
|
59
|
+
0xC8 => /^(ISO-8859-(8))/,
|
60
|
+
0xC9 => /^(ISO-8859-(8))/,
|
61
|
+
0xCA => /^(ISO-8859-(8))/,
|
62
|
+
0xCB => /^(ISO-8859-(8))/,
|
63
|
+
0xCC => /^(ISO-8859-(8))/,
|
64
|
+
0xCD => /^(ISO-8859-(8))/,
|
65
|
+
0xCE => /^(ISO-8859-(8))/,
|
66
|
+
0xCF => /^(ISO-8859-(8))/,
|
67
|
+
|
68
|
+
0xD0 => /^(ISO-8859-(3|8))/,
|
69
|
+
0xD1 => /^(ISO-8859-(8))/,
|
70
|
+
0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/,
|
71
|
+
0xD3 => /^(ISO-8859-(8))/,
|
72
|
+
0xD4 => /^(ISO-8859-(8))/,
|
73
|
+
0xD5 => /^(ISO-8859-(8))/,
|
74
|
+
0xD6 => /^(ISO-8859-(8))/,
|
75
|
+
0xD7 => /^(ISO-8859-(8))/,
|
76
|
+
0xD8 => /^(ISO-8859-(8))/,
|
77
|
+
0xD9 => /^(ISO-8859-(8)|Windows-(1255))/,
|
78
|
+
0xDA => /^(ISO-8859-(8)|Windows-(1255))/,
|
79
|
+
0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
80
|
+
0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
81
|
+
0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
82
|
+
0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
83
|
+
0xDF => /^(ISO-8859-(6)|Windows-(1255))/,
|
84
|
+
|
85
|
+
0xE3 => /^(ISO-8859-(3))/,
|
86
|
+
|
87
|
+
0xF0 => /^(ISO-8859-(3))/,
|
88
|
+
0xF3 => /^(ISO-8859-(6))/,
|
89
|
+
0xF4 => /^(ISO-8859-(6))/,
|
90
|
+
0xF5 => /^(ISO-8859-(6))/,
|
91
|
+
0xF6 => /^(ISO-8859-(6))/,
|
92
|
+
0xF7 => /^(ISO-8859-(6))/,
|
93
|
+
0xF8 => /^(ISO-8859-(6))/,
|
94
|
+
0xF9 => /^(ISO-8859-(6))/,
|
95
|
+
0xFA => /^(ISO-8859-(6))/,
|
96
|
+
0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/,
|
97
|
+
0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
98
|
+
0xFD => /^(ISO-8859-(6|11))/,
|
99
|
+
0xFE => /^(ISO-8859-(6|11))/,
|
100
|
+
0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255))/,
|
39
101
|
}.freeze
|
40
102
|
|
41
103
|
BLANKS = [
|
@@ -51,7 +113,7 @@ class ByteCharacteristics < Characteristics
|
|
51
113
|
].freeze
|
52
114
|
|
53
115
|
EXTRA_BLANKS = {
|
54
|
-
0xA0 => /^(ISO-8859-|Windows-)/,
|
116
|
+
0xA0 => /^(ISO-8859-|Windows-125)/,
|
55
117
|
0x9D => /^Windows-(1256)/,
|
56
118
|
0x9F => /^Windows-(1256)/,
|
57
119
|
}.freeze
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: characteristics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode-categories
|