ansel_iconv 1.0.5 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -1
- data/README.markdown +4 -0
- data/Rakefile +1 -1
- data/VERSION.yml +2 -2
- data/ansel_iconv.gemspec +2 -2
- data/lib/ansel_iconv.rb +8 -6
- data/test/ansel_iconv_test.rb +52 -50
- data/test/test_helper.rb +11 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.markdown
CHANGED
@@ -9,6 +9,10 @@ Copyright (c) 2006-2010 Keith Morrison <mailto:keithm@infused.org>, <http://www.
|
|
9
9
|
* Report bugs: <http://github.com/infused/ansel_iconv/issues>
|
10
10
|
* Questions? Email [keithm@infused.org](mailto:keithm@infused.org?subject=ANSEL::Iconv)
|
11
11
|
with ANSEL::Iconv in the subject line
|
12
|
+
|
13
|
+
## Compatibility
|
14
|
+
|
15
|
+
ANSEL::Iconv is compatible with Ruby 1.8.6, 1.8.7 and 1.9.1
|
12
16
|
|
13
17
|
## Installation
|
14
18
|
|
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ require 'jeweler'
|
|
8
8
|
|
9
9
|
Jeweler::Tasks.new do |p|
|
10
10
|
p.name = 'ansel_iconv'
|
11
|
-
p.description = 'Convert ANSEL encoded text to any other encoding'
|
11
|
+
p.description = 'Convert ANSEL encoded text to any other encoding available to Iconv'
|
12
12
|
p.summary = 'Convert ANSEL encoded text'
|
13
13
|
p.platform = Gem::Platform::RUBY
|
14
14
|
p.authors = ['Keith Morrison']
|
data/VERSION.yml
CHANGED
data/ansel_iconv.gemspec
CHANGED
@@ -5,12 +5,12 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{ansel_iconv}
|
8
|
-
s.version = "1.0
|
8
|
+
s.version = "1.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Keith Morrison"]
|
12
12
|
s.date = %q{2010-04-07}
|
13
|
-
s.description = %q{Convert ANSEL encoded text to any other encoding}
|
13
|
+
s.description = %q{Convert ANSEL encoded text to any other encoding available to Iconv}
|
14
14
|
s.email = %q{keithm@infused.org}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"README.markdown"
|
data/lib/ansel_iconv.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: ascii-8bit
|
2
|
+
|
1
3
|
gem 'activesupport', '=2.3.5'
|
2
4
|
require 'active_support'
|
3
5
|
require 'iconv'
|
@@ -588,15 +590,15 @@ module ANSEL
|
|
588
590
|
until scanner.eos? do
|
589
591
|
byte = scanner.get_byte
|
590
592
|
|
591
|
-
if byte[0] <= 0x7F
|
593
|
+
if byte.unpack('C')[0] <= 0x7F
|
592
594
|
output << byte
|
593
|
-
elsif byte[0] >= 0x88 && byte[0] <= 0xC8
|
594
|
-
hex_key = byte[0].to_s(16).upcase
|
595
|
+
elsif byte.unpack('C')[0] >= 0x88 && byte.unpack('C')[0] <= 0xC8
|
596
|
+
hex_key = byte.unpack('C')[0].to_s(16).upcase
|
595
597
|
output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8.has_key?(hex_key) ? @ansi_to_utf8[hex_key] : @ansi_to_utf8['ERR'])
|
596
598
|
scanner.get_byte # ignore the next byte
|
597
|
-
elsif byte[0] >= 0xE0 && byte[0] <= 0xFB
|
599
|
+
elsif byte.unpack('C')[0] >= 0xE0 && byte.unpack('C')[0] <= 0xFB
|
598
600
|
[2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
|
599
|
-
bytes = [byte[0].to_s(16).upcase]
|
601
|
+
bytes = [byte.unpack('C')[0].to_s(16).upcase]
|
600
602
|
scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
|
601
603
|
hex_key = bytes.join("+")
|
602
604
|
if @ansi_to_utf8.has_key?(hex_key)
|
@@ -607,7 +609,7 @@ module ANSEL
|
|
607
609
|
end
|
608
610
|
else
|
609
611
|
output << ::Iconv.conv(@to_charset, 'UTF-16', @ansi_to_utf8['ERR'])
|
610
|
-
scanner.get_byte if scanner.get_byte[0] >= 0xE0 # ignore the next byte
|
612
|
+
scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
|
611
613
|
end
|
612
614
|
end
|
613
615
|
|
data/test/ansel_iconv_test.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: ascii-8bit
|
2
|
+
|
1
3
|
require 'test_helper'
|
2
4
|
|
3
5
|
class ANSEL::IconvTest < Test::Unit::TestCase
|
@@ -13,8 +15,8 @@ class ANSEL::IconvTest < Test::Unit::TestCase
|
|
13
15
|
end
|
14
16
|
|
15
17
|
should "return the unicode replacement character for invalid characters" do
|
16
|
-
assert_equal "
|
17
|
-
assert_equal "
|
18
|
+
assert_equal "�".force_encoding('utf-8'), @ansel.iconv("\xBE\x00")
|
19
|
+
assert_equal "�".force_encoding('utf-8'), @ansel.iconv("\xD1\x00")
|
18
20
|
end
|
19
21
|
|
20
22
|
should "return UTF-8 characters for valid ANSEL characters" do
|
@@ -23,60 +25,60 @@ class ANSEL::IconvTest < Test::Unit::TestCase
|
|
23
25
|
assert_equal "", @ansel.iconv("\x89\x00")
|
24
26
|
assert_equal "", @ansel.iconv("\x8D\x00")
|
25
27
|
assert_equal "", @ansel.iconv("\x8E\x00")
|
26
|
-
assert_equal "Ł", @ansel.iconv("\xA1\x00")
|
27
|
-
assert_equal "Ø", @ansel.iconv("\xA2\x00")
|
28
|
-
assert_equal "Đ", @ansel.iconv("\xA3\x00")
|
29
|
-
assert_equal "Þ", @ansel.iconv("\xA4\x00")
|
30
|
-
assert_equal "Æ", @ansel.iconv("\xA5\x00")
|
31
|
-
assert_equal "Œ", @ansel.iconv("\xA6\x00")
|
32
|
-
assert_equal "ʹ", @ansel.iconv("\xA7\x00")
|
33
|
-
assert_equal "·", @ansel.iconv("\xA8\x00")
|
34
|
-
assert_equal "♭", @ansel.iconv("\xA9\x00")
|
35
|
-
assert_equal "®", @ansel.iconv("\xAA\x00")
|
36
|
-
assert_equal "±", @ansel.iconv("\xAB\x00")
|
37
|
-
assert_equal "±", @ansel.iconv("\xAB\x00")
|
38
|
-
assert_equal "Ơ", @ansel.iconv("\xAC\x00")
|
39
|
-
assert_equal "Ư", @ansel.iconv("\xAD\x00")
|
40
|
-
assert_equal "ʼ", @ansel.iconv("\xAE\x00")
|
41
|
-
assert_equal "ʻ", @ansel.iconv("\xB0\x00")
|
42
|
-
assert_equal "ł", @ansel.iconv("\xB1\x00")
|
43
|
-
assert_equal "ø", @ansel.iconv("\xB2\x00")
|
44
|
-
assert_equal "đ", @ansel.iconv("\xB3\x00")
|
45
|
-
assert_equal "þ", @ansel.iconv("\xB4\x00")
|
46
|
-
assert_equal "æ", @ansel.iconv("\xB5\x00")
|
47
|
-
assert_equal "œ", @ansel.iconv("\xB6\x00")
|
48
|
-
assert_equal "ʺ", @ansel.iconv("\xB7\x00")
|
49
|
-
assert_equal "ı", @ansel.iconv("\xB8\x00")
|
50
|
-
assert_equal "£", @ansel.iconv("\xB9\x00")
|
51
|
-
assert_equal "ð", @ansel.iconv("\xBA\x00")
|
52
|
-
assert_equal "ơ", @ansel.iconv("\xBC\x00")
|
53
|
-
assert_equal "ư", @ansel.iconv("\xBD\x00")
|
54
|
-
assert_equal "°", @ansel.iconv("\xC0\x00")
|
55
|
-
assert_equal "ℓ", @ansel.iconv("\xC1\x00")
|
56
|
-
assert_equal "℗", @ansel.iconv("\xC2\x00")
|
57
|
-
assert_equal "©", @ansel.iconv("\xC3\x00")
|
58
|
-
assert_equal "♯", @ansel.iconv("\xC4\x00")
|
59
|
-
assert_equal "¿", @ansel.iconv("\xC5\x00")
|
60
|
-
assert_equal "¡", @ansel.iconv("\xC6\x00")
|
61
|
-
assert_equal "ß", @ansel.iconv("\xC7\x00")
|
62
|
-
assert_equal "€", @ansel.iconv("\xC8\x00")
|
28
|
+
assert_equal "Ł".force_encoding('utf-8'), @ansel.iconv("\xA1\x00")
|
29
|
+
assert_equal "Ø".force_encoding('utf-8'), @ansel.iconv("\xA2\x00")
|
30
|
+
assert_equal "Đ".force_encoding('utf-8'), @ansel.iconv("\xA3\x00")
|
31
|
+
assert_equal "Þ".force_encoding('utf-8'), @ansel.iconv("\xA4\x00")
|
32
|
+
assert_equal "Æ".force_encoding('utf-8'), @ansel.iconv("\xA5\x00")
|
33
|
+
assert_equal "Œ".force_encoding('utf-8'), @ansel.iconv("\xA6\x00")
|
34
|
+
assert_equal "ʹ".force_encoding('utf-8'), @ansel.iconv("\xA7\x00")
|
35
|
+
assert_equal "·".force_encoding('utf-8'), @ansel.iconv("\xA8\x00")
|
36
|
+
assert_equal "♭".force_encoding('utf-8'), @ansel.iconv("\xA9\x00")
|
37
|
+
assert_equal "®".force_encoding('utf-8'), @ansel.iconv("\xAA\x00")
|
38
|
+
assert_equal "±".force_encoding('utf-8'), @ansel.iconv("\xAB\x00")
|
39
|
+
assert_equal "±".force_encoding('utf-8'), @ansel.iconv("\xAB\x00")
|
40
|
+
assert_equal "Ơ".force_encoding('utf-8'), @ansel.iconv("\xAC\x00")
|
41
|
+
assert_equal "Ư".force_encoding('utf-8'), @ansel.iconv("\xAD\x00")
|
42
|
+
assert_equal "ʼ".force_encoding('utf-8'), @ansel.iconv("\xAE\x00")
|
43
|
+
assert_equal "ʻ".force_encoding('utf-8'), @ansel.iconv("\xB0\x00")
|
44
|
+
assert_equal "ł".force_encoding('utf-8'), @ansel.iconv("\xB1\x00")
|
45
|
+
assert_equal "ø".force_encoding('utf-8'), @ansel.iconv("\xB2\x00")
|
46
|
+
assert_equal "đ".force_encoding('utf-8'), @ansel.iconv("\xB3\x00")
|
47
|
+
assert_equal "þ".force_encoding('utf-8'), @ansel.iconv("\xB4\x00")
|
48
|
+
assert_equal "æ".force_encoding('utf-8'), @ansel.iconv("\xB5\x00")
|
49
|
+
assert_equal "œ".force_encoding('utf-8'), @ansel.iconv("\xB6\x00")
|
50
|
+
assert_equal "ʺ".force_encoding('utf-8'), @ansel.iconv("\xB7\x00")
|
51
|
+
assert_equal "ı".force_encoding('utf-8'), @ansel.iconv("\xB8\x00")
|
52
|
+
assert_equal "£".force_encoding('utf-8'), @ansel.iconv("\xB9\x00")
|
53
|
+
assert_equal "ð".force_encoding('utf-8'), @ansel.iconv("\xBA\x00")
|
54
|
+
assert_equal "ơ".force_encoding('utf-8'), @ansel.iconv("\xBC\x00")
|
55
|
+
assert_equal "ư".force_encoding('utf-8'), @ansel.iconv("\xBD\x00")
|
56
|
+
assert_equal "°".force_encoding('utf-8'), @ansel.iconv("\xC0\x00")
|
57
|
+
assert_equal "ℓ".force_encoding('utf-8'), @ansel.iconv("\xC1\x00")
|
58
|
+
assert_equal "℗".force_encoding('utf-8'), @ansel.iconv("\xC2\x00")
|
59
|
+
assert_equal "©".force_encoding('utf-8'), @ansel.iconv("\xC3\x00")
|
60
|
+
assert_equal "♯".force_encoding('utf-8'), @ansel.iconv("\xC4\x00")
|
61
|
+
assert_equal "¿".force_encoding('utf-8'), @ansel.iconv("\xC5\x00")
|
62
|
+
assert_equal "¡".force_encoding('utf-8'), @ansel.iconv("\xC6\x00")
|
63
|
+
assert_equal "ß".force_encoding('utf-8'), @ansel.iconv("\xC7\x00")
|
64
|
+
assert_equal "€".force_encoding('utf-8'), @ansel.iconv("\xC8\x00")
|
63
65
|
|
64
66
|
# ANSEL combining characters
|
65
|
-
assert_equal "Ả", @ansel.iconv("\xE0\x41")
|
66
|
-
assert_equal "Ḻ", @ansel.iconv("\xF6\x4C")
|
67
|
-
assert_equal "̲", @ansel.iconv("\xF6")
|
68
|
-
assert_equal "̮", @ansel.iconv("\xF9")
|
69
|
-
assert_equal "Ḫ", @ansel.iconv("\xF9\x48")
|
70
|
-
assert_equal "Ậ", @ansel.iconv("\xF2\xE3\x41")
|
71
|
-
assert_equal "ỵ", @ansel.iconv("\xF2\x79")
|
72
|
-
assert_equal "̣", @ansel.iconv("\xF2")
|
67
|
+
assert_equal "Ả".force_encoding('utf-8'), @ansel.iconv("\xE0\x41")
|
68
|
+
assert_equal "Ḻ".force_encoding('utf-8'), @ansel.iconv("\xF6\x4C")
|
69
|
+
assert_equal "̲".force_encoding('utf-8'), @ansel.iconv("\xF6")
|
70
|
+
assert_equal "̮".force_encoding('utf-8'), @ansel.iconv("\xF9")
|
71
|
+
assert_equal "Ḫ".force_encoding('utf-8'), @ansel.iconv("\xF9\x48")
|
72
|
+
assert_equal "Ậ".force_encoding('utf-8'), @ansel.iconv("\xF2\xE3\x41")
|
73
|
+
assert_equal "ỵ".force_encoding('utf-8'), @ansel.iconv("\xF2\x79")
|
74
|
+
assert_equal "̣".force_encoding('utf-8'), @ansel.iconv("\xF2")
|
73
75
|
end
|
74
76
|
|
75
77
|
should "convert full text correctly" do
|
76
78
|
assert_equal "What is the question?", @ansel.iconv("What is the question?")
|
77
|
-
assert_equal "¿What is the question?", @ansel.iconv("\xC5\x00What is the question?")
|
78
|
-
assert_equal "© 1994", @ansel.iconv("\xC3\x00 1994")
|
79
|
-
assert_equal "£4.59", @ansel.iconv("\xB9\x004.59")
|
79
|
+
assert_equal "¿What is the question?".force_encoding('utf-8'), @ansel.iconv("\xC5\x00What is the question?")
|
80
|
+
assert_equal "© 1994".force_encoding('utf-8'), @ansel.iconv("\xC3\x00 1994")
|
81
|
+
assert_equal "£4.59".force_encoding('utf-8'), @ansel.iconv("\xB9\x004.59")
|
80
82
|
end
|
81
83
|
|
82
84
|
should "convert ANSEL to UTF-16" do
|
data/test/test_helper.rb
CHANGED
@@ -1,5 +1,15 @@
|
|
1
|
+
# encoding: ascii-8bit
|
2
|
+
|
1
3
|
$:.unshift(File.dirname(__FILE__) + "/../lib/")
|
2
4
|
require 'rubygems'
|
3
5
|
require 'test/unit'
|
4
6
|
require 'shoulda'
|
5
|
-
require 'ansel_iconv'
|
7
|
+
require 'ansel_iconv'
|
8
|
+
|
9
|
+
if RUBY_VERSION < '1.9'
|
10
|
+
class String
|
11
|
+
def force_encoding(e)
|
12
|
+
self
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
|
9
|
-
version: 1.0.5
|
9
|
+
version: 1.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Keith Morrison
|
@@ -31,7 +31,7 @@ dependencies:
|
|
31
31
|
version: 2.3.5
|
32
32
|
type: :runtime
|
33
33
|
version_requirements: *id001
|
34
|
-
description: Convert ANSEL encoded text to any other encoding
|
34
|
+
description: Convert ANSEL encoded text to any other encoding available to Iconv
|
35
35
|
email: keithm@infused.org
|
36
36
|
executables: []
|
37
37
|
|