ansel 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +4 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +22 -4
- data/Gemfile.travis +0 -4
- data/Gemfile.travis19 +7 -0
- data/{MIT-LICENSE → LICENSE} +1 -1
- data/README.md +13 -19
- data/ansel.gemspec +1 -1
- data/lib/ansel/character_map.rb +564 -562
- data/lib/ansel/converter.rb +10 -13
- data/lib/ansel/version.rb +1 -1
- metadata +6 -6
data/lib/ansel/converter.rb
CHANGED
@@ -6,8 +6,7 @@ module ANSEL
|
|
6
6
|
|
7
7
|
def initialize(to_charset = 'UTF-8')
|
8
8
|
@to_charset = to_charset
|
9
|
-
@encoding_converter = Encoding::Converter.new(
|
10
|
-
@ansi_to_utf16_map = @@non_combining.merge(@@combining)
|
9
|
+
@encoding_converter = Encoding::Converter.new('UTF-16BE', 'UTF-8')
|
11
10
|
end
|
12
11
|
|
13
12
|
def utf16_to_utf8(string)
|
@@ -17,7 +16,7 @@ module ANSEL
|
|
17
16
|
def convert(string)
|
18
17
|
output = ''
|
19
18
|
scanner = StringScanner.new(string)
|
20
|
-
until scanner.eos?
|
19
|
+
until scanner.eos?
|
21
20
|
byte = scanner.get_byte
|
22
21
|
char = byte.unpack('C')[0]
|
23
22
|
char_hex = char.to_s(16).upcase
|
@@ -26,27 +25,25 @@ module ANSEL
|
|
26
25
|
when 0x00..0x7F
|
27
26
|
output << byte
|
28
27
|
when 0x88..0xC8
|
29
|
-
output << utf16_to_utf8(
|
30
|
-
scanner.get_byte
|
28
|
+
output << utf16_to_utf8(ANSI_TO_UTF16_MAP[char_hex] || ANSI_TO_UTF16_MAP['ERR'])
|
29
|
+
last_byte = scanner.get_byte if scanner.check(/\00/)
|
31
30
|
when 0xE0..0xFB
|
32
31
|
[2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
|
33
32
|
bytes = [char_hex]
|
34
|
-
scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
|
33
|
+
scanner.peek(n).each_byte { |b| bytes << b.to_s(16).upcase }
|
35
34
|
hex_key = bytes.join('+')
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
end
|
35
|
+
next unless ANSI_TO_UTF16_MAP.key?(hex_key)
|
36
|
+
output << utf16_to_utf8(ANSI_TO_UTF16_MAP[hex_key])
|
37
|
+
n.times { scanner.get_byte }
|
38
|
+
break
|
41
39
|
end
|
42
40
|
else
|
43
|
-
output << utf16_to_utf8(
|
41
|
+
output << utf16_to_utf8(ANSI_TO_UTF16_MAP['ERR'])
|
44
42
|
scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
|
45
43
|
end
|
46
44
|
end
|
47
45
|
|
48
46
|
output
|
49
47
|
end
|
50
|
-
|
51
48
|
end
|
52
49
|
end
|
data/lib/ansel/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ansel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Keith Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Convert ANSEL encoded text to UTF-8
|
14
14
|
email: keithm@infused.org
|
@@ -17,13 +17,14 @@ extensions: []
|
|
17
17
|
extra_rdoc_files:
|
18
18
|
- README.md
|
19
19
|
- CHANGELOG.md
|
20
|
-
-
|
20
|
+
- LICENSE
|
21
21
|
files:
|
22
22
|
- CHANGELOG.md
|
23
23
|
- Gemfile
|
24
24
|
- Gemfile.lock
|
25
25
|
- Gemfile.travis
|
26
|
-
-
|
26
|
+
- Gemfile.travis19
|
27
|
+
- LICENSE
|
27
28
|
- README.md
|
28
29
|
- Rakefile
|
29
30
|
- ansel.gemspec
|
@@ -50,8 +51,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
50
51
|
- !ruby/object:Gem::Version
|
51
52
|
version: 1.3.0
|
52
53
|
requirements: []
|
53
|
-
|
54
|
-
rubygems_version: 2.5.1
|
54
|
+
rubygems_version: 3.0.1
|
55
55
|
signing_key:
|
56
56
|
specification_version: 4
|
57
57
|
summary: Convert ANSEL encoded text to UTF-8
|