ansel_iconv 1.1.5 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +6 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +24 -0
- data/README.md +1 -1
- data/Rakefile +8 -38
- data/lib/ansel_iconv.rb +0 -2
- data/lib/ansel_iconv/character_map.rb +2 -0
- data/lib/ansel_iconv/converter.rb +9 -8
- data/lib/ansel_iconv/iconv.rb +5 -3
- data/lib/ansel_iconv/version.rb +5 -1
- metadata +13 -20
- data/test/ansel_iconv_test.rb +0 -103
- data/test/test_helper.rb +0 -15
data/CHANGELOG.md
CHANGED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
ansel_iconv (1.1.6)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.1.2)
|
10
|
+
rspec (2.3.0)
|
11
|
+
rspec-core (~> 2.3.0)
|
12
|
+
rspec-expectations (~> 2.3.0)
|
13
|
+
rspec-mocks (~> 2.3.0)
|
14
|
+
rspec-core (2.3.1)
|
15
|
+
rspec-expectations (2.3.0)
|
16
|
+
diff-lcs (~> 1.1.2)
|
17
|
+
rspec-mocks (2.3.0)
|
18
|
+
|
19
|
+
PLATFORMS
|
20
|
+
ruby
|
21
|
+
|
22
|
+
DEPENDENCIES
|
23
|
+
ansel_iconv!
|
24
|
+
rspec (~> 2.3.0)
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@ ANSEL::Iconv is a wrapper for Iconv that adds ANSEL character set conversion.
|
|
5
5
|
Copyright (c) 2006-2010 Keith Morrison <mailto:keithm@infused.org>, <http://www.infused.org>
|
6
6
|
|
7
7
|
- Project page: <http://github.com/infused/ansel_iconv>
|
8
|
-
- API Documentation: <http://
|
8
|
+
- API Documentation: <http://rubydoc.info/github/infused/ansel_iconv/frames>
|
9
9
|
- Report bugs: <http://github.com/infused/ansel_iconv/issues>
|
10
10
|
- Questions? Email [keithm@infused.org](mailto:keithm@infused.org?subject=ANSEL::Iconv)
|
11
11
|
with ANSEL::Iconv in the subject line
|
data/Rakefile
CHANGED
@@ -1,45 +1,15 @@
|
|
1
|
-
# encoding:
|
1
|
+
# encoding: ascii-8bit
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require 'rubygems/specification'
|
5
|
-
require 'rake/testtask'
|
6
4
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
Rake::TestTask.new(:test) do |t|
|
12
|
-
t.pattern = 'test/**/*_test.rb'
|
13
|
-
t.verbose = true
|
14
|
-
t.libs << 'test'
|
15
|
-
end
|
16
|
-
|
17
|
-
def gemspec
|
18
|
-
@gemspec ||= begin
|
19
|
-
file = File.expand_path('../ansel_iconv.gemspec', __FILE__)
|
20
|
-
eval(File.read(file), binding, file)
|
21
|
-
end
|
5
|
+
require 'rspec/core/rake_task'
|
6
|
+
RSpec::Core::RakeTask.new :spec do |t|
|
7
|
+
t.rspec_opts = %w(-fs --color)
|
22
8
|
end
|
23
9
|
|
24
|
-
|
25
|
-
require 'rake/gempackagetask'
|
26
|
-
rescue LoadError
|
27
|
-
task(:gem) { $stderr.puts '`gem install rake` to package gems' }
|
28
|
-
else
|
29
|
-
Rake::GemPackageTask.new(gemspec) do |pkg|
|
30
|
-
pkg.gem_spec = gemspec
|
31
|
-
end
|
32
|
-
task :gem => :gemspec
|
33
|
-
end
|
34
|
-
|
35
|
-
desc "install the gem locally"
|
36
|
-
task :install => :package do
|
37
|
-
sh %{gem install pkg/#{gemspec.name}-#{gemspec.version}}
|
38
|
-
end
|
10
|
+
task :default => :spec
|
39
11
|
|
40
|
-
desc "
|
41
|
-
task :
|
42
|
-
|
12
|
+
desc "Open an irb session preloaded with this library"
|
13
|
+
task :console do
|
14
|
+
sh "irb -rubygems -I lib -r dbf.rb"
|
43
15
|
end
|
44
|
-
|
45
|
-
task :package => :gemspec
|
data/lib/ansel_iconv.rb
CHANGED
@@ -6,9 +6,10 @@ module ANSEL
|
|
6
6
|
|
7
7
|
def initialize(to_charset = 'UTF-8')
|
8
8
|
@to_charset = to_charset
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
end
|
10
|
+
|
11
|
+
def ansi_to_utf8
|
12
|
+
@ansi_to_utf8 ||= @@non_combining.merge(@@combining)
|
12
13
|
end
|
13
14
|
|
14
15
|
def iconv(string)
|
@@ -22,21 +23,21 @@ module ANSEL
|
|
22
23
|
output << byte
|
23
24
|
elsif char >= 0x88 && char <= 0xC8
|
24
25
|
hex_key = char.to_s(16).upcase
|
25
|
-
output << ::Iconv.conv(@to_charset, 'UTF-16',
|
26
|
+
output << ::Iconv.conv(@to_charset, 'UTF-16', ansi_to_utf8[hex_key] || ansi_to_utf8['ERR'])
|
26
27
|
scanner.get_byte # ignore the next byte
|
27
28
|
elsif char >= 0xE0 && char <= 0xFB
|
28
29
|
[2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
|
29
30
|
bytes = [char.to_s(16).upcase]
|
30
31
|
scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
|
31
|
-
hex_key = bytes.join(
|
32
|
-
if
|
33
|
-
output << ::Iconv.conv(@to_charset, 'UTF-16',
|
32
|
+
hex_key = bytes.join('+')
|
33
|
+
if ansi_to_utf8.has_key?(hex_key)
|
34
|
+
output << ::Iconv.conv(@to_charset, 'UTF-16', ansi_to_utf8[hex_key])
|
34
35
|
n.times {scanner.get_byte}
|
35
36
|
break
|
36
37
|
end
|
37
38
|
end
|
38
39
|
else
|
39
|
-
output << ::Iconv.conv(@to_charset, 'UTF-16',
|
40
|
+
output << ::Iconv.conv(@to_charset, 'UTF-16', ansi_to_utf8['ERR'])
|
40
41
|
scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
|
41
42
|
end
|
42
43
|
end
|
data/lib/ansel_iconv/iconv.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
# encoding: ascii-8bit
|
2
2
|
|
3
3
|
module ANSEL
|
4
|
-
class Iconv
|
5
|
-
delegate :iconv, :to => :@converter
|
6
|
-
|
4
|
+
class Iconv
|
7
5
|
def initialize(to, from = 'ANSEL')
|
8
6
|
@converter = (from == 'ANSEL') ? Convert.new(to) : ::Iconv.new(to, from)
|
9
7
|
end
|
8
|
+
|
9
|
+
def iconv(*args)
|
10
|
+
@converter.iconv(*args)
|
11
|
+
end
|
10
12
|
end
|
11
13
|
end
|
data/lib/ansel_iconv/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ansel_iconv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 1.1.
|
9
|
+
- 6
|
10
|
+
version: 1.1.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Keith Morrison
|
@@ -15,32 +15,24 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-12-30 00:00:00 -08:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
|
-
name:
|
22
|
+
name: rspec
|
23
23
|
prerelease: false
|
24
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 3
|
30
30
|
segments:
|
31
31
|
- 2
|
32
32
|
- 3
|
33
|
-
-
|
34
|
-
version: 2.3.
|
35
|
-
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
hash: 19
|
38
|
-
segments:
|
39
|
-
- 2
|
40
|
-
- 3
|
41
|
-
- 8
|
42
|
-
version: 2.3.8
|
43
|
-
type: :runtime
|
33
|
+
- 0
|
34
|
+
version: 2.3.0
|
35
|
+
type: :development
|
44
36
|
version_requirements: *id001
|
45
37
|
description: Convert ANSEL encoded text to any other encoding available to Iconv
|
46
38
|
email: keithm@infused.org
|
@@ -51,8 +43,11 @@ extensions: []
|
|
51
43
|
extra_rdoc_files:
|
52
44
|
- README.md
|
53
45
|
- CHANGELOG.md
|
46
|
+
- MIT-LICENSE
|
54
47
|
files:
|
55
48
|
- CHANGELOG.md
|
49
|
+
- Gemfile
|
50
|
+
- Gemfile.lock
|
56
51
|
- MIT-LICENSE
|
57
52
|
- Rakefile
|
58
53
|
- README.md
|
@@ -61,8 +56,6 @@ files:
|
|
61
56
|
- lib/ansel_iconv/iconv.rb
|
62
57
|
- lib/ansel_iconv/version.rb
|
63
58
|
- lib/ansel_iconv.rb
|
64
|
-
- test/ansel_iconv_test.rb
|
65
|
-
- test/test_helper.rb
|
66
59
|
has_rdoc: true
|
67
60
|
homepage: http://github.com/infused/ansel_iconv
|
68
61
|
licenses: []
|
data/test/ansel_iconv_test.rb
DELETED
@@ -1,103 +0,0 @@
|
|
1
|
-
# encoding: ascii-8bit
|
2
|
-
|
3
|
-
require 'test_helper'
|
4
|
-
|
5
|
-
class ANSEL::IconvTest < Test::Unit::TestCase
|
6
|
-
FIXTURE_PATH = File.dirname(__FILE__) + "/../../../fixtures/gedcom"
|
7
|
-
|
8
|
-
def setup
|
9
|
-
@ansel = ANSEL::Iconv.new 'UTF-8'
|
10
|
-
end
|
11
|
-
|
12
|
-
should "return ASCII values without conversion" do
|
13
|
-
assert_equal " ", @ansel.iconv("\x20")
|
14
|
-
assert_equal "x", @ansel.iconv("\x78")
|
15
|
-
end
|
16
|
-
|
17
|
-
should "return the unicode replacement character for invalid characters" do
|
18
|
-
assert_equal "�".force_encoding('utf-8'), @ansel.iconv("\xBE\x00")
|
19
|
-
assert_equal "�".force_encoding('utf-8'), @ansel.iconv("\xD1\x00")
|
20
|
-
end
|
21
|
-
|
22
|
-
should "return UTF-8 characters for valid ANSEL characters" do
|
23
|
-
# ANSEL non-combining mappings
|
24
|
-
assert_equal "", @ansel.iconv("\x88\x00")
|
25
|
-
assert_equal "", @ansel.iconv("\x89\x00")
|
26
|
-
assert_equal "", @ansel.iconv("\x8D\x00")
|
27
|
-
assert_equal "", @ansel.iconv("\x8E\x00")
|
28
|
-
assert_equal "Ł".force_encoding('utf-8'), @ansel.iconv("\xA1\x00")
|
29
|
-
assert_equal "Ø".force_encoding('utf-8'), @ansel.iconv("\xA2\x00")
|
30
|
-
assert_equal "Đ".force_encoding('utf-8'), @ansel.iconv("\xA3\x00")
|
31
|
-
assert_equal "Þ".force_encoding('utf-8'), @ansel.iconv("\xA4\x00")
|
32
|
-
assert_equal "Æ".force_encoding('utf-8'), @ansel.iconv("\xA5\x00")
|
33
|
-
assert_equal "Œ".force_encoding('utf-8'), @ansel.iconv("\xA6\x00")
|
34
|
-
assert_equal "ʹ".force_encoding('utf-8'), @ansel.iconv("\xA7\x00")
|
35
|
-
assert_equal "·".force_encoding('utf-8'), @ansel.iconv("\xA8\x00")
|
36
|
-
assert_equal "♭".force_encoding('utf-8'), @ansel.iconv("\xA9\x00")
|
37
|
-
assert_equal "®".force_encoding('utf-8'), @ansel.iconv("\xAA\x00")
|
38
|
-
assert_equal "±".force_encoding('utf-8'), @ansel.iconv("\xAB\x00")
|
39
|
-
assert_equal "±".force_encoding('utf-8'), @ansel.iconv("\xAB\x00")
|
40
|
-
assert_equal "Ơ".force_encoding('utf-8'), @ansel.iconv("\xAC\x00")
|
41
|
-
assert_equal "Ư".force_encoding('utf-8'), @ansel.iconv("\xAD\x00")
|
42
|
-
assert_equal "ʼ".force_encoding('utf-8'), @ansel.iconv("\xAE\x00")
|
43
|
-
assert_equal "ʻ".force_encoding('utf-8'), @ansel.iconv("\xB0\x00")
|
44
|
-
assert_equal "ł".force_encoding('utf-8'), @ansel.iconv("\xB1\x00")
|
45
|
-
assert_equal "ø".force_encoding('utf-8'), @ansel.iconv("\xB2\x00")
|
46
|
-
assert_equal "đ".force_encoding('utf-8'), @ansel.iconv("\xB3\x00")
|
47
|
-
assert_equal "þ".force_encoding('utf-8'), @ansel.iconv("\xB4\x00")
|
48
|
-
assert_equal "æ".force_encoding('utf-8'), @ansel.iconv("\xB5\x00")
|
49
|
-
assert_equal "œ".force_encoding('utf-8'), @ansel.iconv("\xB6\x00")
|
50
|
-
assert_equal "ʺ".force_encoding('utf-8'), @ansel.iconv("\xB7\x00")
|
51
|
-
assert_equal "ı".force_encoding('utf-8'), @ansel.iconv("\xB8\x00")
|
52
|
-
assert_equal "£".force_encoding('utf-8'), @ansel.iconv("\xB9\x00")
|
53
|
-
assert_equal "ð".force_encoding('utf-8'), @ansel.iconv("\xBA\x00")
|
54
|
-
assert_equal "ơ".force_encoding('utf-8'), @ansel.iconv("\xBC\x00")
|
55
|
-
assert_equal "ư".force_encoding('utf-8'), @ansel.iconv("\xBD\x00")
|
56
|
-
assert_equal "°".force_encoding('utf-8'), @ansel.iconv("\xC0\x00")
|
57
|
-
assert_equal "ℓ".force_encoding('utf-8'), @ansel.iconv("\xC1\x00")
|
58
|
-
assert_equal "℗".force_encoding('utf-8'), @ansel.iconv("\xC2\x00")
|
59
|
-
assert_equal "©".force_encoding('utf-8'), @ansel.iconv("\xC3\x00")
|
60
|
-
assert_equal "♯".force_encoding('utf-8'), @ansel.iconv("\xC4\x00")
|
61
|
-
assert_equal "¿".force_encoding('utf-8'), @ansel.iconv("\xC5\x00")
|
62
|
-
assert_equal "¡".force_encoding('utf-8'), @ansel.iconv("\xC6\x00")
|
63
|
-
assert_equal "ß".force_encoding('utf-8'), @ansel.iconv("\xC7\x00")
|
64
|
-
assert_equal "€".force_encoding('utf-8'), @ansel.iconv("\xC8\x00")
|
65
|
-
|
66
|
-
# ANSEL combining characters
|
67
|
-
assert_equal "Ả".force_encoding('utf-8'), @ansel.iconv("\xE0\x41")
|
68
|
-
assert_equal "Ḻ".force_encoding('utf-8'), @ansel.iconv("\xF6\x4C")
|
69
|
-
assert_equal "̲".force_encoding('utf-8'), @ansel.iconv("\xF6")
|
70
|
-
assert_equal "̮".force_encoding('utf-8'), @ansel.iconv("\xF9")
|
71
|
-
assert_equal "Ḫ".force_encoding('utf-8'), @ansel.iconv("\xF9\x48")
|
72
|
-
assert_equal "Ậ".force_encoding('utf-8'), @ansel.iconv("\xF2\xE3\x41")
|
73
|
-
assert_equal "ỵ".force_encoding('utf-8'), @ansel.iconv("\xF2\x79")
|
74
|
-
assert_equal "̣".force_encoding('utf-8'), @ansel.iconv("\xF2")
|
75
|
-
end
|
76
|
-
|
77
|
-
should "convert full text correctly" do
|
78
|
-
assert_equal "What is the question?", @ansel.iconv("What is the question?")
|
79
|
-
assert_equal "¿What is the question?".force_encoding('utf-8'), @ansel.iconv("\xC5\x00What is the question?")
|
80
|
-
assert_equal "© 1994".force_encoding('utf-8'), @ansel.iconv("\xC3\x00 1994")
|
81
|
-
assert_equal "£4.59".force_encoding('utf-8'), @ansel.iconv("\xB9\x004.59")
|
82
|
-
end
|
83
|
-
|
84
|
-
should "convert ANSEL to UTF-16" do
|
85
|
-
converter = ANSEL::Iconv.new 'UTF-16', 'ANSEL'
|
86
|
-
assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
|
87
|
-
end
|
88
|
-
|
89
|
-
should "convert ASCII to UTF-16" do
|
90
|
-
converter = ANSEL::Iconv.new 'UTF-16', 'ASCII'
|
91
|
-
assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
|
92
|
-
end
|
93
|
-
|
94
|
-
should "convert UTF-8 to UTF-16" do
|
95
|
-
converter = ANSEL::Iconv.new 'UTF-16', 'UTF-8'
|
96
|
-
assert_equal "\376\377\000a\000b\000c", converter.iconv('abc')
|
97
|
-
end
|
98
|
-
|
99
|
-
should "convert UTF-16 to UTF-16" do
|
100
|
-
converter = ANSEL::Iconv.new 'UTF-16', 'UTF-16'
|
101
|
-
assert_equal "\376\377\000a\000b\000c", converter.iconv("\376\377\000a\000b\000c")
|
102
|
-
end
|
103
|
-
end
|
data/test/test_helper.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
# encoding: ascii-8bit
|
2
|
-
|
3
|
-
$:.unshift(File.dirname(__FILE__) + "/../lib/")
|
4
|
-
require 'rubygems'
|
5
|
-
require 'test/unit'
|
6
|
-
require 'shoulda'
|
7
|
-
require 'ansel_iconv'
|
8
|
-
|
9
|
-
if RUBY_VERSION < '1.9'
|
10
|
-
class String
|
11
|
-
def force_encoding(e)
|
12
|
-
self
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|