ansel 2.0.1 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +4 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +22 -4
- data/Gemfile.travis +0 -4
- data/Gemfile.travis19 +7 -0
- data/{MIT-LICENSE → LICENSE} +1 -1
- data/README.md +13 -19
- data/ansel.gemspec +1 -1
- data/lib/ansel/character_map.rb +564 -562
- data/lib/ansel/converter.rb +10 -13
- data/lib/ansel/version.rb +1 -1
- metadata +6 -6
data/lib/ansel/converter.rb
CHANGED
@@ -6,8 +6,7 @@ module ANSEL
|
|
6
6
|
|
7
7
|
def initialize(to_charset = 'UTF-8')
|
8
8
|
@to_charset = to_charset
|
9
|
-
@encoding_converter = Encoding::Converter.new(
|
10
|
-
@ansi_to_utf16_map = @@non_combining.merge(@@combining)
|
9
|
+
@encoding_converter = Encoding::Converter.new('UTF-16BE', 'UTF-8')
|
11
10
|
end
|
12
11
|
|
13
12
|
def utf16_to_utf8(string)
|
@@ -17,7 +16,7 @@ module ANSEL
|
|
17
16
|
def convert(string)
|
18
17
|
output = ''
|
19
18
|
scanner = StringScanner.new(string)
|
20
|
-
until scanner.eos?
|
19
|
+
until scanner.eos?
|
21
20
|
byte = scanner.get_byte
|
22
21
|
char = byte.unpack('C')[0]
|
23
22
|
char_hex = char.to_s(16).upcase
|
@@ -26,27 +25,25 @@ module ANSEL
|
|
26
25
|
when 0x00..0x7F
|
27
26
|
output << byte
|
28
27
|
when 0x88..0xC8
|
29
|
-
output << utf16_to_utf8(
|
30
|
-
scanner.get_byte
|
28
|
+
output << utf16_to_utf8(ANSI_TO_UTF16_MAP[char_hex] || ANSI_TO_UTF16_MAP['ERR'])
|
29
|
+
last_byte = scanner.get_byte if scanner.check(/\00/)
|
31
30
|
when 0xE0..0xFB
|
32
31
|
[2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
|
33
32
|
bytes = [char_hex]
|
34
|
-
scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
|
33
|
+
scanner.peek(n).each_byte { |b| bytes << b.to_s(16).upcase }
|
35
34
|
hex_key = bytes.join('+')
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
end
|
35
|
+
next unless ANSI_TO_UTF16_MAP.key?(hex_key)
|
36
|
+
output << utf16_to_utf8(ANSI_TO_UTF16_MAP[hex_key])
|
37
|
+
n.times { scanner.get_byte }
|
38
|
+
break
|
41
39
|
end
|
42
40
|
else
|
43
|
-
output << utf16_to_utf8(
|
41
|
+
output << utf16_to_utf8(ANSI_TO_UTF16_MAP['ERR'])
|
44
42
|
scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
|
45
43
|
end
|
46
44
|
end
|
47
45
|
|
48
46
|
output
|
49
47
|
end
|
50
|
-
|
51
48
|
end
|
52
49
|
end
|
data/lib/ansel/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ansel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Keith Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Convert ANSEL encoded text to UTF-8
|
14
14
|
email: keithm@infused.org
|
@@ -17,13 +17,14 @@ extensions: []
|
|
17
17
|
extra_rdoc_files:
|
18
18
|
- README.md
|
19
19
|
- CHANGELOG.md
|
20
|
-
-
|
20
|
+
- LICENSE
|
21
21
|
files:
|
22
22
|
- CHANGELOG.md
|
23
23
|
- Gemfile
|
24
24
|
- Gemfile.lock
|
25
25
|
- Gemfile.travis
|
26
|
-
-
|
26
|
+
- Gemfile.travis19
|
27
|
+
- LICENSE
|
27
28
|
- README.md
|
28
29
|
- Rakefile
|
29
30
|
- ansel.gemspec
|
@@ -50,8 +51,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
50
51
|
- !ruby/object:Gem::Version
|
51
52
|
version: 1.3.0
|
52
53
|
requirements: []
|
53
|
-
|
54
|
-
rubygems_version: 2.5.1
|
54
|
+
rubygems_version: 3.0.1
|
55
55
|
signing_key:
|
56
56
|
specification_version: 4
|
57
57
|
summary: Convert ANSEL encoded text to UTF-8
|