ansel_iconv 1.0.5 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -1
- data/README.markdown +4 -0
- data/Rakefile +1 -1
- data/VERSION.yml +2 -2
- data/ansel_iconv.gemspec +2 -2
- data/lib/ansel_iconv.rb +8 -6
- data/test/ansel_iconv_test.rb +52 -50
- data/test/test_helper.rb +11 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.markdown
CHANGED
@@ -9,6 +9,10 @@ Copyright (c) 2006-2010 Keith Morrison <mailto:keithm@infused.org>, <http://www.
|
|
9
9
|
* Report bugs: <http://github.com/infused/ansel_iconv/issues>
|
10
10
|
* Questions? Email [keithm@infused.org](mailto:keithm@infused.org?subject=ANSEL::Iconv)
|
11
11
|
with ANSEL::Iconv in the subject line
|
12
|
+
|
13
|
+
## Compatibility
|
14
|
+
|
15
|
+
ANSEL::Iconv is compatible with Ruby 1.8.6, 1.8.7 and 1.9.1
|
12
16
|
|
13
17
|
## Installation
|
14
18
|
|
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ require 'jeweler'
|
|
8
8
|
|
9
9
|
Jeweler::Tasks.new do |p|
|
10
10
|
p.name = 'ansel_iconv'
|
11
|
-
p.description = 'Convert ANSEL encoded text to any other encoding'
|
11
|
+
p.description = 'Convert ANSEL encoded text to any other encoding available to Iconv'
|
12
12
|
p.summary = 'Convert ANSEL encoded text'
|
13
13
|
p.platform = Gem::Platform::RUBY
|
14
14
|
p.authors = ['Keith Morrison']
|
data/VERSION.yml
CHANGED
data/ansel_iconv.gemspec
CHANGED
@@ -5,12 +5,12 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{ansel_iconv}
|
8
|
-
s.version = "1.0
|
8
|
+
s.version = "1.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Keith Morrison"]
|
12
12
|
s.date = %q{2010-04-07}
|
13
|
-
s.description = %q{Convert ANSEL encoded text to any other encoding}
|
13
|
+
s.description = %q{Convert ANSEL encoded text to any other encoding available to Iconv}
|
14
14
|
s.email = %q{keithm@infused.org}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"README.markdown"
|
data/lib/ansel_iconv.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: ascii-8bit
|
2
|
+
|
1
3
|
gem 'activesupport', '=2.3.5'
|
2
4
|
require 'active_support'
|
3
5
|
require 'iconv'
|
@@ -588,15 +590,15 @@ module ANSEL
|
|
588
590
|
until scanner.eos? do
|
589
591
|
byte = scanner.get_byte
|
590
592
|
|
591
|
-
if byte[0] <= 0x7F
|
593
|
+
if byte.unpack('C')[0] <= 0x7F
|
592
594
|
output << byte
|
593
|
-
elsif byte[0] >= 0x88 && byte[0] <= 0xC8
|
594
|
-
hex_key = byte[0].to_s(16).upcase
|
595
|
+
elsif byte.unpack('C')[0] >= 0x88 && byte.unpack('C')[0] <= 0xC8
|
596
|
+
hex_key = byte.unpack('C')[0].to_s(16).upcase
|
595
597
|
output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8.has_key?(hex_key) ? @ansi_to_utf8[hex_key] : @ansi_to_utf8['ERR'])
|
596
598
|
scanner.get_byte # ignore the next byte
|
597
|
-
elsif byte[0] >= 0xE0 && byte[0] <= 0xFB
|
599
|
+
elsif byte.unpack('C')[0] >= 0xE0 && byte.unpack('C')[0] <= 0xFB
|
598
600
|
[2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
|
599
|
-
bytes = [byte[0].to_s(16).upcase]
|
601
|
+
bytes = [byte.unpack('C')[0].to_s(16).upcase]
|
600
602
|
scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
|
601
603
|
hex_key = bytes.join("+")
|
602
604
|
if @ansi_to_utf8.has_key?(hex_key)
|
@@ -607,7 +609,7 @@ module ANSEL
|
|
607
609
|
end
|
608
610
|
else
|
609
611
|
output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8['ERR'])
|
610
|
-
scanner.get_byte if scanner.get_byte[0] >= 0xE0 # ignore the next byte
|
612
|
+
scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
|
611
613
|
end
|
612
614
|
end
|
613
615
|
|
data/test/ansel_iconv_test.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: ascii-8bit
|
2
|
+
|
1
3
|
require 'test_helper'
|
2
4
|
|
3
5
|
class ANSEL::IconvTest < Test::Unit::TestCase
|
@@ -13,8 +15,8 @@ class ANSEL::IconvTest < Test::Unit::TestCase
|
|
13
15
|
end
|
14
16
|
|
15
17
|
should "return the unicode replacement character for invalid characters" do
|
16
|
-
assert_equal "
|
17
|
-
assert_equal "
|
18
|
+
assert_equal "�".force_encoding('utf-8'), @ansel.iconv("\xBE\x00")
|
19
|
+
assert_equal "�".force_encoding('utf-8'), @ansel.iconv("\xD1\x00")
|
18
20
|
end
|
19
21
|
|
20
22
|
should "return UTF-8 characters for valid ANSEL characters" do
|
@@ -23,60 +25,60 @@ class ANSEL::IconvTest < Test::Unit::TestCase
|
|
23
25
|
assert_equal "", @ansel.iconv("\x89\x00")
|
24
26
|
assert_equal "", @ansel.iconv("\x8D\x00")
|
25
27
|
assert_equal "", @ansel.iconv("\x8E\x00")
|
26
|
-
assert_equal "Ł", @ansel.iconv("\xA1\x00")
|
27
|
-
assert_equal "Ø", @ansel.iconv("\xA2\x00")
|
28
|
-
assert_equal "Đ", @ansel.iconv("\xA3\x00")
|
29
|
-
assert_equal "Þ", @ansel.iconv("\xA4\x00")
|
30
|
-
assert_equal "Æ", @ansel.iconv("\xA5\x00")
|
31
|
-
assert_equal "Œ", @ansel.iconv("\xA6\x00")
|
32
|
-
assert_equal "ʹ", @ansel.iconv("\xA7\x00")
|
33
|
-
assert_equal "·", @ansel.iconv("\xA8\x00")
|
34
|
-
assert_equal "♭", @ansel.iconv("\xA9\x00")
|
35
|
-
assert_equal "®", @ansel.iconv("\xAA\x00")
|
36
|
-
assert_equal "±", @ansel.iconv("\xAB\x00")
|
37
|
-
assert_equal "±", @ansel.iconv("\xAB\x00")
|
38
|
-
assert_equal "Ơ", @ansel.iconv("\xAC\x00")
|
39
|
-
assert_equal "Ư", @ansel.iconv("\xAD\x00")
|
40
|
-
assert_equal "ʼ", @ansel.iconv("\xAE\x00")
|
41
|
-
assert_equal "ʻ", @ansel.iconv("\xB0\x00")
|
42
|
-
assert_equal "ł", @ansel.iconv("\xB1\x00")
|
43
|
-
assert_equal "ø", @ansel.iconv("\xB2\x00")
|
44
|
-
assert_equal "đ", @ansel.iconv("\xB3\x00")
|
45
|
-
assert_equal "þ", @ansel.iconv("\xB4\x00")
|
46
|
-
assert_equal "æ", @ansel.iconv("\xB5\x00")
|
47
|
-
assert_equal "œ", @ansel.iconv("\xB6\x00")
|
48
|
-
assert_equal "ʺ", @ansel.iconv("\xB7\x00")
|
49
|
-
assert_equal "ı", @ansel.iconv("\xB8\x00")
|
50
|
-
assert_equal "£", @ansel.iconv("\xB9\x00")
|
51
|
-
assert_equal "ð", @ansel.iconv("\xBA\x00")
|
52
|
-
assert_equal "ơ", @ansel.iconv("\xBC\x00")
|
53
|
-
assert_equal "ư", @ansel.iconv("\xBD\x00")
|
54
|
-
assert_equal "°", @ansel.iconv("\xC0\x00")
|
55
|
-
assert_equal "ℓ", @ansel.iconv("\xC1\x00")
|
56
|
-
assert_equal "℗", @ansel.iconv("\xC2\x00")
|
57
|
-
assert_equal "©", @ansel.iconv("\xC3\x00")
|
58
|
-
assert_equal "♯", @ansel.iconv("\xC4\x00")
|
59
|
-
assert_equal "¿", @ansel.iconv("\xC5\x00")
|
60
|
-
assert_equal "¡", @ansel.iconv("\xC6\x00")
|
61
|
-
assert_equal "ß", @ansel.iconv("\xC7\x00")
|
62
|
-
assert_equal "€", @ansel.iconv("\xC8\x00")
|
28
|
+
assert_equal "Ł".force_encoding('utf-8'), @ansel.iconv("\xA1\x00")
|
29
|
+
assert_equal "Ø".force_encoding('utf-8'), @ansel.iconv("\xA2\x00")
|
30
|
+
assert_equal "Đ".force_encoding('utf-8'), @ansel.iconv("\xA3\x00")
|
31
|
+
assert_equal "Þ".force_encoding('utf-8'), @ansel.iconv("\xA4\x00")
|
32
|
+
assert_equal "Æ".force_encoding('utf-8'), @ansel.iconv("\xA5\x00")
|
33
|
+
assert_equal "Œ".force_encoding('utf-8'), @ansel.iconv("\xA6\x00")
|
34
|
+
assert_equal "ʹ".force_encoding('utf-8'), @ansel.iconv("\xA7\x00")
|
35
|
+
assert_equal "·".force_encoding('utf-8'), @ansel.iconv("\xA8\x00")
|
36
|
+
assert_equal "♭".force_encoding('utf-8'), @ansel.iconv("\xA9\x00")
|
37
|
+
assert_equal "®".force_encoding('utf-8'), @ansel.iconv("\xAA\x00")
|
38
|
+
assert_equal "±".force_encoding('utf-8'), @ansel.iconv("\xAB\x00")
|
39
|
+
assert_equal "±".force_encoding('utf-8'), @ansel.iconv("\xAB\x00")
|
40
|
+
assert_equal "Ơ".force_encoding('utf-8'), @ansel.iconv("\xAC\x00")
|
41
|
+
assert_equal "Ư".force_encoding('utf-8'), @ansel.iconv("\xAD\x00")
|
42
|
+
assert_equal "ʼ".force_encoding('utf-8'), @ansel.iconv("\xAE\x00")
|
43
|
+
assert_equal "ʻ".force_encoding('utf-8'), @ansel.iconv("\xB0\x00")
|
44
|
+
assert_equal "ł".force_encoding('utf-8'), @ansel.iconv("\xB1\x00")
|
45
|
+
assert_equal "ø".force_encoding('utf-8'), @ansel.iconv("\xB2\x00")
|
46
|
+
assert_equal "đ".force_encoding('utf-8'), @ansel.iconv("\xB3\x00")
|
47
|
+
assert_equal "þ".force_encoding('utf-8'), @ansel.iconv("\xB4\x00")
|
48
|
+
assert_equal "æ".force_encoding('utf-8'), @ansel.iconv("\xB5\x00")
|
49
|
+
assert_equal "œ".force_encoding('utf-8'), @ansel.iconv("\xB6\x00")
|
50
|
+
assert_equal "ʺ".force_encoding('utf-8'), @ansel.iconv("\xB7\x00")
|
51
|
+
assert_equal "ı".force_encoding('utf-8'), @ansel.iconv("\xB8\x00")
|
52
|
+
assert_equal "£".force_encoding('utf-8'), @ansel.iconv("\xB9\x00")
|
53
|
+
assert_equal "ð".force_encoding('utf-8'), @ansel.iconv("\xBA\x00")
|
54
|
+
assert_equal "ơ".force_encoding('utf-8'), @ansel.iconv("\xBC\x00")
|
55
|
+
assert_equal "ư".force_encoding('utf-8'), @ansel.iconv("\xBD\x00")
|
56
|
+
assert_equal "°".force_encoding('utf-8'), @ansel.iconv("\xC0\x00")
|
57
|
+
assert_equal "ℓ".force_encoding('utf-8'), @ansel.iconv("\xC1\x00")
|
58
|
+
assert_equal "℗".force_encoding('utf-8'), @ansel.iconv("\xC2\x00")
|
59
|
+
assert_equal "©".force_encoding('utf-8'), @ansel.iconv("\xC3\x00")
|
60
|
+
assert_equal "♯".force_encoding('utf-8'), @ansel.iconv("\xC4\x00")
|
61
|
+
assert_equal "¿".force_encoding('utf-8'), @ansel.iconv("\xC5\x00")
|
62
|
+
assert_equal "¡".force_encoding('utf-8'), @ansel.iconv("\xC6\x00")
|
63
|
+
assert_equal "ß".force_encoding('utf-8'), @ansel.iconv("\xC7\x00")
|
64
|
+
assert_equal "€".force_encoding('utf-8'), @ansel.iconv("\xC8\x00")
|
63
65
|
|
64
66
|
# ANSEL combining characters
|
65
|
-
assert_equal "Ả", @ansel.iconv("\xE0\x41")
|
66
|
-
assert_equal "Ḻ", @ansel.iconv("\xF6\x4C")
|
67
|
-
assert_equal "̲", @ansel.iconv("\xF6")
|
68
|
-
assert_equal "̮", @ansel.iconv("\xF9")
|
69
|
-
assert_equal "Ḫ", @ansel.iconv("\xF9\x48")
|
70
|
-
assert_equal "Ậ", @ansel.iconv("\xF2\xE3\x41")
|
71
|
-
assert_equal "ỵ", @ansel.iconv("\xF2\x79")
|
72
|
-
assert_equal "̣", @ansel.iconv("\xF2")
|
67
|
+
assert_equal "Ả".force_encoding('utf-8'), @ansel.iconv("\xE0\x41")
|
68
|
+
assert_equal "Ḻ".force_encoding('utf-8'), @ansel.iconv("\xF6\x4C")
|
69
|
+
assert_equal "̲".force_encoding('utf-8'), @ansel.iconv("\xF6")
|
70
|
+
assert_equal "̮".force_encoding('utf-8'), @ansel.iconv("\xF9")
|
71
|
+
assert_equal "Ḫ".force_encoding('utf-8'), @ansel.iconv("\xF9\x48")
|
72
|
+
assert_equal "Ậ".force_encoding('utf-8'), @ansel.iconv("\xF2\xE3\x41")
|
73
|
+
assert_equal "ỵ".force_encoding('utf-8'), @ansel.iconv("\xF2\x79")
|
74
|
+
assert_equal "̣".force_encoding('utf-8'), @ansel.iconv("\xF2")
|
73
75
|
end
|
74
76
|
|
75
77
|
should "convert full text correctly" do
|
76
78
|
assert_equal "What is the question?", @ansel.iconv("What is the question?")
|
77
|
-
assert_equal "¿What is the question?", @ansel.iconv("\xC5\x00What is the question?")
|
78
|
-
assert_equal "© 1994", @ansel.iconv("\xC3\x00 1994")
|
79
|
-
assert_equal "£4.59", @ansel.iconv("\xB9\x004.59")
|
79
|
+
assert_equal "¿What is the question?".force_encoding('utf-8'), @ansel.iconv("\xC5\x00What is the question?")
|
80
|
+
assert_equal "© 1994".force_encoding('utf-8'), @ansel.iconv("\xC3\x00 1994")
|
81
|
+
assert_equal "£4.59".force_encoding('utf-8'), @ansel.iconv("\xB9\x004.59")
|
80
82
|
end
|
81
83
|
|
82
84
|
should "convert ANSEL to UTF-16" do
|
data/test/test_helper.rb
CHANGED
@@ -1,5 +1,15 @@
|
|
1
|
+
# encoding: ascii-8bit
|
2
|
+
|
1
3
|
$:.unshift(File.dirname(__FILE__) + "/../lib/")
|
2
4
|
require 'rubygems'
|
3
5
|
require 'test/unit'
|
4
6
|
require 'shoulda'
|
5
|
-
require 'ansel_iconv'
|
7
|
+
require 'ansel_iconv'
|
8
|
+
|
9
|
+
if RUBY_VERSION < '1.9'
|
10
|
+
class String
|
11
|
+
def force_encoding(e)
|
12
|
+
self
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
|
9
|
-
version: 1.0.5
|
9
|
+
version: 1.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Keith Morrison
|
@@ -31,7 +31,7 @@ dependencies:
|
|
31
31
|
version: 2.3.5
|
32
32
|
type: :runtime
|
33
33
|
version_requirements: *id001
|
34
|
-
description: Convert ANSEL encoded text to any other encoding
|
34
|
+
description: Convert ANSEL encoded text to any other encoding available to Iconv
|
35
35
|
email: keithm@infused.org
|
36
36
|
executables: []
|
37
37
|
|