ansel 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +5 -0
- data/Gemfile.travis +11 -0
- data/MIT-LICENSE +1 -1
- data/README.md +5 -2
- data/lib/ansel.rb +1 -0
- data/lib/ansel/converter.rb +12 -10
- data/lib/ansel/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 815490e0ac24a83d73654a2b6168d3da23609f54
|
4
|
+
data.tar.gz: 2d85ffe9513ee2619d55c7ed8802ce98d8d3040b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d2cac810103dcb72232feaecbf845d67d340fecffeeb5c91d6ec12e2a54a7cab226c1b0170cc5b176908177278a4f30104136d132bf495f5bdc08d2610fb38c
|
7
|
+
data.tar.gz: 6fa8eb2906486ceb71840b76715b351bffcae3eb7b820d15cea480020ae810d59393d9058db04d18e84d53b19bd41bc45b519f24200b096bf975acd8b2049911
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -7,6 +7,7 @@ GEM
|
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
diff-lcs (1.2.5)
|
10
|
+
rake (11.1.0)
|
10
11
|
rspec (3.1.0)
|
11
12
|
rspec-core (~> 3.1.0)
|
12
13
|
rspec-expectations (~> 3.1.0)
|
@@ -25,4 +26,8 @@ PLATFORMS
|
|
25
26
|
|
26
27
|
DEPENDENCIES
|
27
28
|
ansel!
|
29
|
+
rake
|
28
30
|
rspec
|
31
|
+
|
32
|
+
BUNDLED WITH
|
33
|
+
1.11.2
|
data/Gemfile.travis
ADDED
data/MIT-LICENSE
CHANGED
data/README.md
CHANGED
@@ -3,13 +3,16 @@
|
|
3
3
|
[![Version](http://img.shields.io/gem/v/ansel.svg?style=flat)](https://rubygems.org/gems/ansel)
|
4
4
|
[![Build Status](http://img.shields.io/travis/infused/ansel/master.svg?style=flat)](http://travis-ci.org/infused/ansel)
|
5
5
|
[![Code Quality](http://img.shields.io/codeclimate/github/infused/ansel.svg?style=flat)](https://codeclimate.com/github/infused/ansel)
|
6
|
+
[![Test Coverage](http://img.shields.io/codeclimate/coverage/github/infused/ansel.svg?style=flat)](https://codeclimate.com/github/infused/ansel)
|
7
|
+
[![Dependency Status](http://img.shields.io/gemnasium/infused/ansel.svg?style=flat)](https://gemnasium.com/infused/ansel)
|
8
|
+
|
6
9
|
|
7
10
|
ANSEL provides character set conversion from ANSEL to UTF-8
|
8
11
|
|
9
12
|
Copyright (c) 2006-2015 Keith Morrison <mailto:keithm@infused.org>, <http://www.infused.org>
|
10
13
|
|
11
14
|
- Project page: <http://github.com/infused/ansel>
|
12
|
-
- API Documentation: <http://rubydoc.info/github/infused/ansel
|
15
|
+
- API Documentation: <http://rubydoc.info/github/infused/ansel/>
|
13
16
|
- Report bugs: <http://github.com/infused/ansel/issues>
|
14
17
|
- Questions? Email [keithm@infused.org](mailto:keithm@infused.org?subject=ANSE)
|
15
18
|
with ANSEL in the subject line
|
@@ -52,7 +55,7 @@ Conversion from ANSEL to UTF-8 is fully supported.
|
|
52
55
|
[ANSI/NISO
|
53
56
|
Z39.47](http://www.niso.org/kst/reports/standards?step=2&gid%3Austring%3Aiso-8859-1=&project_key%3Austring%3Aiso-8859-1=0b5d2bd7b690b60fcc75cde9256ed9f9e526e531),
|
54
57
|
also known as ANSEL, is a character set encoding used primarily for
|
55
|
-
bibliographic and genealogical data. It is one of the official character
|
58
|
+
bibliographic and genealogical data. It is used in library systems worldwide and is one of the official character
|
56
59
|
encodings supported by the [Gedcom
|
57
60
|
5.5](http://homepages.rootsweb.ancestry.com/~pmcbride/gedcom/55gctoc.htm)
|
58
61
|
standard.
|
data/lib/ansel.rb
CHANGED
data/lib/ansel/converter.rb
CHANGED
@@ -6,10 +6,12 @@ module ANSEL
|
|
6
6
|
|
7
7
|
def initialize(to_charset = 'UTF-8')
|
8
8
|
@to_charset = to_charset
|
9
|
+
@encoding_converter = Encoding::Converter.new("UTF-16BE", "UTF-8")
|
10
|
+
@ansi_to_utf16_map = @@non_combining.merge(@@combining)
|
9
11
|
end
|
10
12
|
|
11
|
-
def
|
12
|
-
@
|
13
|
+
def utf16_to_utf8(string)
|
14
|
+
@encoding_converter.convert(string)
|
13
15
|
end
|
14
16
|
|
15
17
|
def convert(string)
|
@@ -18,32 +20,32 @@ module ANSEL
|
|
18
20
|
until scanner.eos? do
|
19
21
|
byte = scanner.get_byte
|
20
22
|
char = byte.unpack('C')[0]
|
23
|
+
char_hex = char.to_s(16).upcase
|
21
24
|
|
22
25
|
case char
|
23
26
|
when 0x00..0x7F
|
24
|
-
output << byte
|
27
|
+
output << byte
|
25
28
|
when 0x88..0xC8
|
26
|
-
|
27
|
-
output << (ansi_to_utf16[hex_key] || ansi_to_utf16['ERR']).force_encoding('UTF-16BE').encode('UTF-8')
|
29
|
+
output << utf16_to_utf8(@ansi_to_utf16_map[char_hex] || @ansi_to_utf16_map['ERR'])
|
28
30
|
scanner.get_byte # ignore the next byte
|
29
31
|
when 0xE0..0xFB
|
30
32
|
[2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
|
31
|
-
bytes = [
|
33
|
+
bytes = [char_hex]
|
32
34
|
scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
|
33
35
|
hex_key = bytes.join('+')
|
34
|
-
if
|
35
|
-
output <<
|
36
|
+
if @ansi_to_utf16_map.has_key?(hex_key)
|
37
|
+
output << utf16_to_utf8(@ansi_to_utf16_map[hex_key])
|
36
38
|
n.times {scanner.get_byte}
|
37
39
|
break
|
38
40
|
end
|
39
41
|
end
|
40
42
|
else
|
41
|
-
output <<
|
43
|
+
output << utf16_to_utf8(@ansi_to_utf16_map['ERR'])
|
42
44
|
scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
|
43
45
|
end
|
44
46
|
end
|
45
47
|
|
46
|
-
output
|
48
|
+
output
|
47
49
|
end
|
48
50
|
|
49
51
|
end
|
data/lib/ansel/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ansel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Keith Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-03-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Convert ANSEL encoded text to UTF-8
|
14
14
|
email: keithm@infused.org
|
@@ -22,6 +22,7 @@ files:
|
|
22
22
|
- CHANGELOG.md
|
23
23
|
- Gemfile
|
24
24
|
- Gemfile.lock
|
25
|
+
- Gemfile.travis
|
25
26
|
- MIT-LICENSE
|
26
27
|
- README.md
|
27
28
|
- Rakefile
|
@@ -50,7 +51,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
50
51
|
version: 1.3.0
|
51
52
|
requirements: []
|
52
53
|
rubyforge_project:
|
53
|
-
rubygems_version: 2.
|
54
|
+
rubygems_version: 2.5.1
|
54
55
|
signing_key:
|
55
56
|
specification_version: 4
|
56
57
|
summary: Convert ANSEL encoded text to UTF-8
|