ansel_iconv 1.1.5 → 1.1.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +6 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +24 -0
- data/README.md +1 -1
- data/Rakefile +8 -38
- data/lib/ansel_iconv.rb +0 -2
- data/lib/ansel_iconv/character_map.rb +2 -0
- data/lib/ansel_iconv/converter.rb +9 -8
- data/lib/ansel_iconv/iconv.rb +5 -3
- data/lib/ansel_iconv/version.rb +5 -1
- metadata +13 -20
- data/test/ansel_iconv_test.rb +0 -103
- data/test/test_helper.rb +0 -15
data/CHANGELOG.md
CHANGED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
ansel_iconv (1.1.6)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.1.2)
|
10
|
+
rspec (2.3.0)
|
11
|
+
rspec-core (~> 2.3.0)
|
12
|
+
rspec-expectations (~> 2.3.0)
|
13
|
+
rspec-mocks (~> 2.3.0)
|
14
|
+
rspec-core (2.3.1)
|
15
|
+
rspec-expectations (2.3.0)
|
16
|
+
diff-lcs (~> 1.1.2)
|
17
|
+
rspec-mocks (2.3.0)
|
18
|
+
|
19
|
+
PLATFORMS
|
20
|
+
ruby
|
21
|
+
|
22
|
+
DEPENDENCIES
|
23
|
+
ansel_iconv!
|
24
|
+
rspec (~> 2.3.0)
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@ ANSEL::Iconv is a wrapper for Iconv that adds ANSEL character set conversion.
|
|
5
5
|
Copyright (c) 2006-2010 Keith Morrison <mailto:keithm@infused.org>, <http://www.infused.org>
|
6
6
|
|
7
7
|
- Project page: <http://github.com/infused/ansel_iconv>
|
8
|
-
- API Documentation: <http://
|
8
|
+
- API Documentation: <http://rubydoc.info/github/infused/ansel_iconv/frames>
|
9
9
|
- Report bugs: <http://github.com/infused/ansel_iconv/issues>
|
10
10
|
- Questions? Email [keithm@infused.org](mailto:keithm@infused.org?subject=ANSEL::Iconv)
|
11
11
|
with ANSEL::Iconv in the subject line
|
data/Rakefile
CHANGED
@@ -1,45 +1,15 @@
|
|
1
|
-
# encoding:
|
1
|
+
# encoding: ascii-8bit
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require 'rubygems/specification'
|
5
|
-
require 'rake/testtask'
|
6
4
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
Rake::TestTask.new(:test) do |t|
|
12
|
-
t.pattern = 'test/**/*_test.rb'
|
13
|
-
t.verbose = true
|
14
|
-
t.libs << 'test'
|
15
|
-
end
|
16
|
-
|
17
|
-
def gemspec
|
18
|
-
@gemspec ||= begin
|
19
|
-
file = File.expand_path('../ansel_iconv.gemspec', __FILE__)
|
20
|
-
eval(File.read(file), binding, file)
|
21
|
-
end
|
5
|
+
require 'rspec/core/rake_task'
|
6
|
+
RSpec::Core::RakeTask.new :spec do |t|
|
7
|
+
t.rspec_opts = %w(-fs --color)
|
22
8
|
end
|
23
9
|
|
24
|
-
|
25
|
-
require 'rake/gempackagetask'
|
26
|
-
rescue LoadError
|
27
|
-
task(:gem) { $stderr.puts '`gem install rake` to package gems' }
|
28
|
-
else
|
29
|
-
Rake::GemPackageTask.new(gemspec) do |pkg|
|
30
|
-
pkg.gem_spec = gemspec
|
31
|
-
end
|
32
|
-
task :gem => :gemspec
|
33
|
-
end
|
34
|
-
|
35
|
-
desc "install the gem locally"
|
36
|
-
task :install => :package do
|
37
|
-
sh %{gem install pkg/#{gemspec.name}-#{gemspec.version}}
|
38
|
-
end
|
10
|
+
task :default => :spec
|
39
11
|
|
40
|
-
desc "
|
41
|
-
task :
|
42
|
-
|
12
|
+
desc "Open an irb session preloaded with this library"
|
13
|
+
task :console do
|
14
|
+
sh "irb -rubygems -I lib -r dbf.rb"
|
43
15
|
end
|
44
|
-
|
45
|
-
task :package => :gemspec
|
data/lib/ansel_iconv.rb
CHANGED
@@ -6,9 +6,10 @@ module ANSEL
|
|
6
6
|
|
7
7
|
def initialize(to_charset = 'UTF-8')
|
8
8
|
@to_charset = to_charset
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
end
|
10
|
+
|
11
|
+
def ansi_to_utf8
|
12
|
+
@ansi_to_utf8 ||= @@non_combining.merge(@@combining)
|
12
13
|
end
|
13
14
|
|
14
15
|
def iconv(string)
|
@@ -22,21 +23,21 @@ module ANSEL
|
|
22
23
|
output << byte
|
23
24
|
elsif char >= 0x88 && char <= 0xC8
|
24
25
|
hex_key = char.to_s(16).upcase
|
25
|
-
output << ::Iconv.conv(@to_charset, 'UTF-16',
|
26
|
+
output << ::Iconv.conv(@to_charset, 'UTF-16', ansi_to_utf8[hex_key] || ansi_to_utf8['ERR'])
|
26
27
|
scanner.get_byte # ignore the next byte
|
27
28
|
elsif char >= 0xE0 && char <= 0xFB
|
28
29
|
[2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
|
29
30
|
bytes = [char.to_s(16).upcase]
|
30
31
|
scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
|
31
|
-
hex_key = bytes.join(
|
32
|
-
if
|
33
|
-
output << ::Iconv.conv(@to_charset, 'UTF-16',
|
32
|
+
hex_key = bytes.join('+')
|
33
|
+
if ansi_to_utf8.has_key?(hex_key)
|
34
|
+
output << ::Iconv.conv(@to_charset, 'UTF-16', ansi_to_utf8[hex_key])
|
34
35
|
n.times {scanner.get_byte}
|
35
36
|
break
|
36
37
|
end
|
37
38
|
end
|
38
39
|
else
|
39
|
-
output << ::Iconv.conv(@to_charset, 'UTF-16',
|
40
|
+
output << ::Iconv.conv(@to_charset, 'UTF-16', ansi_to_utf8['ERR'])
|
40
41
|
scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
|
41
42
|
end
|
42
43
|
end
|
data/lib/ansel_iconv/iconv.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
# encoding: ascii-8bit
|
2
2
|
|
3
3
|
module ANSEL
|
4
|
-
class Iconv
|
5
|
-
delegate :iconv, :to => :@converter
|
6
|
-
|
4
|
+
class Iconv
|
7
5
|
def initialize(to, from = 'ANSEL')
|
8
6
|
@converter = (from == 'ANSEL') ? Convert.new(to) : ::Iconv.new(to, from)
|
9
7
|
end
|
8
|
+
|
9
|
+
def iconv(*args)
|
10
|
+
@converter.iconv(*args)
|
11
|
+
end
|
10
12
|
end
|
11
13
|
end
|
data/lib/ansel_iconv/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ansel_iconv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 1.1.
|
9
|
+
- 6
|
10
|
+
version: 1.1.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Keith Morrison
|
@@ -15,32 +15,24 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-12-30 00:00:00 -08:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
|
-
name:
|
22
|
+
name: rspec
|
23
23
|
prerelease: false
|
24
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 3
|
30
30
|
segments:
|
31
31
|
- 2
|
32
32
|
- 3
|
33
|
-
-
|
34
|
-
version: 2.3.
|
35
|
-
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
hash: 19
|
38
|
-
segments:
|
39
|
-
- 2
|
40
|
-
- 3
|
41
|
-
- 8
|
42
|
-
version: 2.3.8
|
43
|
-
type: :runtime
|
33
|
+
- 0
|
34
|
+
version: 2.3.0
|
35
|
+
type: :development
|
44
36
|
version_requirements: *id001
|
45
37
|
description: Convert ANSEL encoded text to any other encoding available to Iconv
|
46
38
|
email: keithm@infused.org
|
@@ -51,8 +43,11 @@ extensions: []
|
|
51
43
|
extra_rdoc_files:
|
52
44
|
- README.md
|
53
45
|
- CHANGELOG.md
|
46
|
+
- MIT-LICENSE
|
54
47
|
files:
|
55
48
|
- CHANGELOG.md
|
49
|
+
- Gemfile
|
50
|
+
- Gemfile.lock
|
56
51
|
- MIT-LICENSE
|
57
52
|
- Rakefile
|
58
53
|
- README.md
|
@@ -61,8 +56,6 @@ files:
|
|
61
56
|
- lib/ansel_iconv/iconv.rb
|
62
57
|
- lib/ansel_iconv/version.rb
|
63
58
|
- lib/ansel_iconv.rb
|
64
|
-
- test/ansel_iconv_test.rb
|
65
|
-
- test/test_helper.rb
|
66
59
|
has_rdoc: true
|
67
60
|
homepage: http://github.com/infused/ansel_iconv
|
68
61
|
licenses: []
|
data/test/ansel_iconv_test.rb
DELETED
@@ -1,103 +0,0 @@
|
|
1
|
-
# encoding: ascii-8bit
|
2
|
-
|
3
|
-
require 'test_helper'
|
4
|
-
|
5
|
-
class ANSEL::IconvTest < Test::Unit::TestCase
|
6
|
-
FIXTURE_PATH = File.dirname(__FILE__) + "/../../../fixtures/gedcom"
|
7
|
-
|
8
|
-
def setup
|
9
|
-
@ansel = ANSEL::Iconv.new 'UTF-8'
|
10
|
-
end
|
11
|
-
|
12
|
-
should "return ASCII values without conversion" do
|
13
|
-
assert_equal " ", @ansel.iconv("\x20")
|
14
|
-
assert_equal "x", @ansel.iconv("\x78")
|
15
|
-
end
|
16
|
-
|
17
|
-
should "return the unicode replacement character for invalid characters" do
|
18
|
-
assert_equal "�".force_encoding('utf-8'), @ansel.iconv("\xBE\x00")
|
19
|
-
assert_equal "�".force_encoding('utf-8'), @ansel.iconv("\xD1\x00")
|
20
|
-
end
|
21
|
-
|
22
|
-
should "return UTF-8 characters for valid ANSEL characters" do
|
23
|
-
# ANSEL non-combining mappings
|
24
|
-
assert_equal "", @ansel.iconv("\x88\x00")
|
25
|
-
assert_equal "", @ansel.iconv("\x89\x00")
|
26
|
-
assert_equal "", @ansel.iconv("\x8D\x00")
|
27
|
-
assert_equal "", @ansel.iconv("\x8E\x00")
|
28
|
-
assert_equal "Ł".force_encoding('utf-8'), @ansel.iconv("\xA1\x00")
|
29
|
-
assert_equal "Ø".force_encoding('utf-8'), @ansel.iconv("\xA2\x00")
|
30
|
-
assert_equal "Đ".force_encoding('utf-8'), @ansel.iconv("\xA3\x00")
|
31
|
-
assert_equal "Þ".force_encoding('utf-8'), @ansel.iconv("\xA4\x00")
|
32
|
-
assert_equal "Æ".force_encoding('utf-8'), @ansel.iconv("\xA5\x00")
|
33
|
-
assert_equal "Œ".force_encoding('utf-8'), @ansel.iconv("\xA6\x00")
|
34
|
-
assert_equal "ʹ".force_encoding('utf-8'), @ansel.iconv("\xA7\x00")
|
35
|
-
assert_equal "·".force_encoding('utf-8'), @ansel.iconv("\xA8\x00")
|
36
|
-
assert_equal "♭".force_encoding('utf-8'), @ansel.iconv("\xA9\x00")
|
37
|
-
assert_equal "®".force_encoding('utf-8'), @ansel.iconv("\xAA\x00")
|
38
|
-
assert_equal "±".force_encoding('utf-8'), @ansel.iconv("\xAB\x00")
|
39
|
-
assert_equal "±".force_encoding('utf-8'), @ansel.iconv("\xAB\x00")
|
40
|
-
assert_equal "Ơ".force_encoding('utf-8'), @ansel.iconv("\xAC\x00")
|
41
|
-
assert_equal "Ư".force_encoding('utf-8'), @ansel.iconv("\xAD\x00")
|
42
|
-
assert_equal "ʼ".force_encoding('utf-8'), @ansel.iconv("\xAE\x00")
|
43
|
-
assert_equal "ʻ".force_encoding('utf-8'), @ansel.iconv("\xB0\x00")
|
44
|
-
assert_equal "ł".force_encoding('utf-8'), @ansel.iconv("\xB1\x00")
|
45
|
-
assert_equal "ø".force_encoding('utf-8'), @ansel.iconv("\xB2\x00")
|
46
|
-
assert_equal "đ".force_encoding('utf-8'), @ansel.iconv("\xB3\x00")
|
47
|
-
assert_equal "þ".force_encoding('utf-8'), @ansel.iconv("\xB4\x00")
|
48
|
-
assert_equal "æ".force_encoding('utf-8'), @ansel.iconv("\xB5\x00")
|
49
|
-
assert_equal "œ".force_encoding('utf-8'), @ansel.iconv("\xB6\x00")
|
50
|
-
assert_equal "ʺ".force_encoding('utf-8'), @ansel.iconv("\xB7\x00")
|
51
|
-
assert_equal "ı".force_encoding('utf-8'), @ansel.iconv("\xB8\x00")
|
52
|
-
assert_equal "£".force_encoding('utf-8'), @ansel.iconv("\xB9\x00")
|
53
|
-
assert_equal "ð".force_encoding('utf-8'), @ansel.iconv("\xBA\x00")
|
54
|
-
assert_equal "ơ".force_encoding('utf-8'), @ansel.iconv("\xBC\x00")
|
55
|
-
assert_equal "ư".force_encoding('utf-8'), @ansel.iconv("\xBD\x00")
|
56
|
-
assert_equal "°".force_encoding('utf-8'), @ansel.iconv("\xC0\x00")
|
57
|
-
assert_equal "ℓ".force_encoding('utf-8'), @ansel.iconv("\xC1\x00")
|
58
|
-
assert_equal "℗".force_encoding('utf-8'), @ansel.iconv("\xC2\x00")
|
59
|
-
assert_equal "©".force_encoding('utf-8'), @ansel.iconv("\xC3\x00")
|
60
|
-
assert_equal "♯".force_encoding('utf-8'), @ansel.iconv("\xC4\x00")
|
61
|
-
assert_equal "¿".force_encoding('utf-8'), @ansel.iconv("\xC5\x00")
|
62
|
-
assert_equal "¡".force_encoding('utf-8'), @ansel.iconv("\xC6\x00")
|
63
|
-
assert_equal "ß".force_encoding('utf-8'), @ansel.iconv("\xC7\x00")
|
64
|
-
assert_equal "€".force_encoding('utf-8'), @ansel.iconv("\xC8\x00")
|
65
|
-
|
66
|
-
# ANSEL combining characters
|
67
|
-
assert_equal "Ả".force_encoding('utf-8'), @ansel.iconv("\xE0\x41")
|
68
|
-
assert_equal "Ḻ".force_encoding('utf-8'), @ansel.iconv("\xF6\x4C")
|
69
|
-
assert_equal "̲".force_encoding('utf-8'), @ansel.iconv("\xF6")
|
70
|
-
assert_equal "̮".force_encoding('utf-8'), @ansel.iconv("\xF9")
|
71
|
-
assert_equal "Ḫ".force_encoding('utf-8'), @ansel.iconv("\xF9\x48")
|
72
|
-
assert_equal "Ậ".force_encoding('utf-8'), @ansel.iconv("\xF2\xE3\x41")
|
73
|
-
assert_equal "ỵ".force_encoding('utf-8'), @ansel.iconv("\xF2\x79")
|
74
|
-
assert_equal "̣".force_encoding('utf-8'), @ansel.iconv("\xF2")
|
75
|
-
end
|
76
|
-
|
77
|
-
should "convert full text correctly" do
|
78
|
-
assert_equal "What is the question?", @ansel.iconv("What is the question?")
|
79
|
-
assert_equal "¿What is the question?".force_encoding('utf-8'), @ansel.iconv("\xC5\x00What is the question?")
|
80
|
-
assert_equal "© 1994".force_encoding('utf-8'), @ansel.iconv("\xC3\x00 1994")
|
81
|
-
assert_equal "£4.59".force_encoding('utf-8'), @ansel.iconv("\xB9\x004.59")
|
82
|
-
end
|
83
|
-
|
84
|
-
should "convert ANSEL to UTF-16" do
|
85
|
-
converter = ANSEL::Iconv.new 'UTF-16', 'ANSEL'
|
86
|
-
assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
|
87
|
-
end
|
88
|
-
|
89
|
-
should "convert ASCII to UTF-16" do
|
90
|
-
converter = ANSEL::Iconv.new 'UTF-16', 'ASCII'
|
91
|
-
assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
|
92
|
-
end
|
93
|
-
|
94
|
-
should "convert UTF-8 to UTF-16" do
|
95
|
-
converter = ANSEL::Iconv.new 'UTF-16', 'UTF-8'
|
96
|
-
assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
|
97
|
-
end
|
98
|
-
|
99
|
-
should "convert UTF-16 to UTF-16" do
|
100
|
-
converter = ANSEL::Iconv.new 'UTF-16', 'UTF-16'
|
101
|
-
assert_equal "\376\377\000a\000b\000c", converter.iconv("\376\377\000a\000b\000c")
|
102
|
-
end
|
103
|
-
end
|
data/test/test_helper.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
# encoding: ascii-8bit
|
2
|
-
|
3
|
-
$:.unshift(File.dirname(__FILE__) + "/../lib/")
|
4
|
-
require 'rubygems'
|
5
|
-
require 'test/unit'
|
6
|
-
require 'shoulda'
|
7
|
-
require 'ansel_iconv'
|
8
|
-
|
9
|
-
if RUBY_VERSION < '1.9'
|
10
|
-
class String
|
11
|
-
def force_encoding(e)
|
12
|
-
self
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|