characteristics 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +4 -4
- data/lib/characteristics.rb +1 -1
- data/lib/characteristics/byte.rb +97 -35
- data/lib/characteristics/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 382e8a6535fa4152dbdbf7f213a12ba6e5a82830
|
4
|
+
data.tar.gz: c92c1bdf4eb32f8ee66c0dce0d72327f95ca0260
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6e886ba4aa8b3605c5a8918e3e97c2982a1843779e780562757373fe30641e7e56e6e8bb0ea90bcca6b20a1ce752780c587a87902f3f346fa42a56c1cca07bd
|
7
|
+
data.tar.gz: 273ee12ec3e4bb2426743ac146790b658bdfc236e396c73d7af369b4cf76d89248fcb074a1515c4280e322a05644feeaf838622782210501fab2c36e7819e384
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -32,10 +32,10 @@ char_info.blank? # => true / false
|
|
32
32
|
|
33
33
|
This library knows of four different kinds of encodings:
|
34
34
|
|
35
|
-
- **:unicode** Unicode familiy of multibyte encodings (*UTF
|
35
|
+
- **:unicode** Unicode familiy of multibyte encodings (*UTF-X*)
|
36
36
|
- **:ascii** 7-Bit ASCII (*US-ASCII*)
|
37
37
|
- **:binary** Arbitrary string (*ASCII-8BIT*)
|
38
|
-
- **:byte
|
38
|
+
- **:byte** Known byte encoding (*ISO-8859-X*, *Windows-125X*)
|
39
39
|
|
40
40
|
Other encodings are not supported, yet.
|
41
41
|
|
@@ -47,11 +47,11 @@ Validness is determined by Ruby's String#valid_encoding?
|
|
47
47
|
|
48
48
|
### `unicode?`
|
49
49
|
|
50
|
-
`true` for Unicode encodings (`UTF
|
50
|
+
`true` for Unicode encodings (`UTF-X`)
|
51
51
|
|
52
52
|
### `control?`
|
53
53
|
|
54
|
-
Control characters are codepoints in the is C0, delete or C1 control character range.
|
54
|
+
Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes).
|
55
55
|
|
56
56
|
### `assigned?`
|
57
57
|
|
data/lib/characteristics.rb
CHANGED
data/lib/characteristics/byte.rb
CHANGED
@@ -2,40 +2,102 @@ class ByteCharacteristics < Characteristics
|
|
2
2
|
HAS_C1 = /^(ISO-8859-)/
|
3
3
|
|
4
4
|
UNASSIGNED = {
|
5
|
-
0x81 => /^Windows-(1250|1252|1253|1254|1255|1257|1258)/,
|
6
|
-
0x83 => /^Windows-(1250|1257)/,
|
7
|
-
0x88 => /^Windows-(1250|1253|1257)/,
|
8
|
-
0x8A => /^Windows-(1253|1255|1257|1258)/,
|
9
|
-
0x8C => /^Windows-(1253|1255|1257)/,
|
10
|
-
0x8D => /^Windows-(1252|1253|1254|1255|1258)/,
|
11
|
-
0x8E => /^Windows-(1253|1254|1255|1258)/,
|
12
|
-
0x8F => /^Windows-(1252|1253|1254|1255|1258)/,
|
13
|
-
|
14
|
-
0x90 => /^Windows-(1250|1252|1253|1254|1255|1257|1258)/,
|
15
|
-
0x98 => /^Windows-(1250|1251|1253|1257)/,
|
16
|
-
0x9A => /^Windows-(1253|1255|1257|1258)/,
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
5
|
+
0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
|
6
|
+
0x83 => /^(Windows-(1250|1257))/,
|
7
|
+
0x88 => /^(Windows-(1250|1253|1257))/,
|
8
|
+
0x8A => /^(Windows-(1253|1255|1257|1258))/,
|
9
|
+
0x8C => /^(Windows-(1253|1255|1257))/,
|
10
|
+
0x8D => /^(Windows-(1252|1253|1254|1255|1258))/,
|
11
|
+
0x8E => /^(Windows-(1253|1254|1255|1258))/,
|
12
|
+
0x8F => /^(Windows-(1252|1253|1254|1255|1258))/,
|
13
|
+
|
14
|
+
0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
|
15
|
+
0x98 => /^(Windows-(1250|1251|1253|1257))/,
|
16
|
+
0x9A => /^(Windows-(1253|1255|1257|1258))/,
|
17
|
+
0x9C => /^(Windows-(1253|1255|1257))/,
|
18
|
+
0x9D => /^(Windows-(1252|1253|1254|1255|1258))/,
|
19
|
+
0x9E => /^(Windows-(1253|1254|1255|1258))/,
|
20
|
+
0x9F => /^(Windows-(1253|1255|1257))/,
|
21
|
+
|
22
|
+
0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/,
|
23
|
+
0xA2 => /^(ISO-8859-(6))/,
|
24
|
+
0xA3 => /^(ISO-8859-(6))/,
|
25
|
+
0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/,
|
26
|
+
0xA6 => /^(ISO-8859-(6))/,
|
27
|
+
0xA7 => /^(ISO-8859-(6))/,
|
28
|
+
0xA8 => /^(ISO-8859-(6))/,
|
29
|
+
0xA9 => /^(ISO-8859-(6))/,
|
30
|
+
0xAA => /^(ISO-8859-(6)|Windows-(1253))/,
|
31
|
+
0xAB => /^(ISO-8859-(6))/,
|
32
|
+
0xAE => /^(ISO-8859-(3|6|7))/,
|
33
|
+
0xAF => /^(ISO-8859-(6))/,
|
34
|
+
|
35
|
+
0xB0 => /^(ISO-8859-(6))/,
|
36
|
+
0xB1 => /^(ISO-8859-(6))/,
|
37
|
+
0xB2 => /^(ISO-8859-(6))/,
|
38
|
+
0xB3 => /^(ISO-8859-(6))/,
|
39
|
+
0xB4 => /^(ISO-8859-(6))/,
|
40
|
+
0xB5 => /^(ISO-8859-(6))/,
|
41
|
+
0xB6 => /^(ISO-8859-(6))/,
|
42
|
+
0xB7 => /^(ISO-8859-(6))/,
|
43
|
+
0xB8 => /^(ISO-8859-(6))/,
|
44
|
+
0xB9 => /^(ISO-8859-(6))/,
|
45
|
+
0xBA => /^(ISO-8859-(6))/,
|
46
|
+
0xBC => /^(ISO-8859-(6))/,
|
47
|
+
0xBD => /^(ISO-8859-(6))/,
|
48
|
+
0xBE => /^(ISO-8859-(3|6))/,
|
49
|
+
0xBF => /^(ISO-8859-(8))/,
|
50
|
+
|
51
|
+
0xC0 => /^(ISO-8859-(6|8))/,
|
52
|
+
0xC1 => /^(ISO-8859-(8))/,
|
53
|
+
0xC2 => /^(ISO-8859-(8))/,
|
54
|
+
0xC3 => /^(ISO-8859-(3|8))/,
|
55
|
+
0xC4 => /^(ISO-8859-(8))/,
|
56
|
+
0xC5 => /^(ISO-8859-(8))/,
|
57
|
+
0xC6 => /^(ISO-8859-(8))/,
|
58
|
+
0xC7 => /^(ISO-8859-(8))/,
|
59
|
+
0xC8 => /^(ISO-8859-(8))/,
|
60
|
+
0xC9 => /^(ISO-8859-(8))/,
|
61
|
+
0xCA => /^(ISO-8859-(8))/,
|
62
|
+
0xCB => /^(ISO-8859-(8))/,
|
63
|
+
0xCC => /^(ISO-8859-(8))/,
|
64
|
+
0xCD => /^(ISO-8859-(8))/,
|
65
|
+
0xCE => /^(ISO-8859-(8))/,
|
66
|
+
0xCF => /^(ISO-8859-(8))/,
|
67
|
+
|
68
|
+
0xD0 => /^(ISO-8859-(3|8))/,
|
69
|
+
0xD1 => /^(ISO-8859-(8))/,
|
70
|
+
0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/,
|
71
|
+
0xD3 => /^(ISO-8859-(8))/,
|
72
|
+
0xD4 => /^(ISO-8859-(8))/,
|
73
|
+
0xD5 => /^(ISO-8859-(8))/,
|
74
|
+
0xD6 => /^(ISO-8859-(8))/,
|
75
|
+
0xD7 => /^(ISO-8859-(8))/,
|
76
|
+
0xD8 => /^(ISO-8859-(8))/,
|
77
|
+
0xD9 => /^(ISO-8859-(8)|Windows-(1255))/,
|
78
|
+
0xDA => /^(ISO-8859-(8)|Windows-(1255))/,
|
79
|
+
0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
80
|
+
0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
81
|
+
0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
82
|
+
0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
83
|
+
0xDF => /^(ISO-8859-(6)|Windows-(1255))/,
|
84
|
+
|
85
|
+
0xE3 => /^(ISO-8859-(3))/,
|
86
|
+
|
87
|
+
0xF0 => /^(ISO-8859-(3))/,
|
88
|
+
0xF3 => /^(ISO-8859-(6))/,
|
89
|
+
0xF4 => /^(ISO-8859-(6))/,
|
90
|
+
0xF5 => /^(ISO-8859-(6))/,
|
91
|
+
0xF6 => /^(ISO-8859-(6))/,
|
92
|
+
0xF7 => /^(ISO-8859-(6))/,
|
93
|
+
0xF8 => /^(ISO-8859-(6))/,
|
94
|
+
0xF9 => /^(ISO-8859-(6))/,
|
95
|
+
0xFA => /^(ISO-8859-(6))/,
|
96
|
+
0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/,
|
97
|
+
0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
98
|
+
0xFD => /^(ISO-8859-(6|11))/,
|
99
|
+
0xFE => /^(ISO-8859-(6|11))/,
|
100
|
+
0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255))/,
|
39
101
|
}.freeze
|
40
102
|
|
41
103
|
BLANKS = [
|
@@ -51,7 +113,7 @@ class ByteCharacteristics < Characteristics
|
|
51
113
|
].freeze
|
52
114
|
|
53
115
|
EXTRA_BLANKS = {
|
54
|
-
0xA0 => /^(ISO-8859-|Windows-)/,
|
116
|
+
0xA0 => /^(ISO-8859-|Windows-125)/,
|
55
117
|
0x9D => /^Windows-(1256)/,
|
56
118
|
0x9F => /^Windows-(1256)/,
|
57
119
|
}.freeze
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: characteristics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode-categories
|