ansel 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +5 -0
- data/Gemfile.travis +11 -0
- data/MIT-LICENSE +1 -1
- data/README.md +5 -2
- data/lib/ansel.rb +1 -0
- data/lib/ansel/converter.rb +12 -10
- data/lib/ansel/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 815490e0ac24a83d73654a2b6168d3da23609f54
|
4
|
+
data.tar.gz: 2d85ffe9513ee2619d55c7ed8802ce98d8d3040b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d2cac810103dcb72232feaecbf845d67d340fecffeeb5c91d6ec12e2a54a7cab226c1b0170cc5b176908177278a4f30104136d132bf495f5bdc08d2610fb38c
|
7
|
+
data.tar.gz: 6fa8eb2906486ceb71840b76715b351bffcae3eb7b820d15cea480020ae810d59393d9058db04d18e84d53b19bd41bc45b519f24200b096bf975acd8b2049911
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -7,6 +7,7 @@ GEM
|
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
diff-lcs (1.2.5)
|
10
|
+
rake (11.1.0)
|
10
11
|
rspec (3.1.0)
|
11
12
|
rspec-core (~> 3.1.0)
|
12
13
|
rspec-expectations (~> 3.1.0)
|
@@ -25,4 +26,8 @@ PLATFORMS
|
|
25
26
|
|
26
27
|
DEPENDENCIES
|
27
28
|
ansel!
|
29
|
+
rake
|
28
30
|
rspec
|
31
|
+
|
32
|
+
BUNDLED WITH
|
33
|
+
1.11.2
|
data/Gemfile.travis
ADDED
data/MIT-LICENSE
CHANGED
data/README.md
CHANGED
@@ -3,13 +3,16 @@
|
|
3
3
|
[](https://rubygems.org/gems/ansel)
|
4
4
|
[](http://travis-ci.org/infused/ansel)
|
5
5
|
[](https://codeclimate.com/github/infused/ansel)
|
6
|
+
[](https://codeclimate.com/github/infused/ansel)
|
7
|
+
[](https://gemnasium.com/infused/ansel)
|
8
|
+
|
6
9
|
|
7
10
|
ANSEL provides character set conversion from ANSEL to UTF-8
|
8
11
|
|
9
12
|
Copyright (c) 2006-2015 Keith Morrison <mailto:keithm@infused.org>, <http://www.infused.org>
|
10
13
|
|
11
14
|
- Project page: <http://github.com/infused/ansel>
|
12
|
-
- API Documentation: <http://rubydoc.info/github/infused/ansel
|
15
|
+
- API Documentation: <http://rubydoc.info/github/infused/ansel/>
|
13
16
|
- Report bugs: <http://github.com/infused/ansel/issues>
|
14
17
|
- Questions? Email [keithm@infused.org](mailto:keithm@infused.org?subject=ANSE)
|
15
18
|
with ANSEL in the subject line
|
@@ -52,7 +55,7 @@ Conversion from ANSEL to UTF-8 is fully supported.
|
|
52
55
|
[ANSI/NISO
|
53
56
|
Z39.47](http://www.niso.org/kst/reports/standards?step=2&gid%3Austring%3Aiso-8859-1=&project_key%3Austring%3Aiso-8859-1=0b5d2bd7b690b60fcc75cde9256ed9f9e526e531),
|
54
57
|
also known as ANSEL, is a character set encoding used primarily for
|
55
|
-
bibliographic and genealogical data. It is one of the official character
|
58
|
+
bibliographic and genealogical data. It is used in library systems worldwide and is one of the official character
|
56
59
|
encodings supported by the [Gedcom
|
57
60
|
5.5](http://homepages.rootsweb.ancestry.com/~pmcbride/gedcom/55gctoc.htm)
|
58
61
|
standard.
|
data/lib/ansel.rb
CHANGED
data/lib/ansel/converter.rb
CHANGED
@@ -6,10 +6,12 @@ module ANSEL
|
|
6
6
|
|
7
7
|
def initialize(to_charset = 'UTF-8')
|
8
8
|
@to_charset = to_charset
|
9
|
+
@encoding_converter = Encoding::Converter.new("UTF-16BE", "UTF-8")
|
10
|
+
@ansi_to_utf16_map = @@non_combining.merge(@@combining)
|
9
11
|
end
|
10
12
|
|
11
|
-
def
|
12
|
-
@
|
13
|
+
def utf16_to_utf8(string)
|
14
|
+
@encoding_converter.convert(string)
|
13
15
|
end
|
14
16
|
|
15
17
|
def convert(string)
|
@@ -18,32 +20,32 @@ module ANSEL
|
|
18
20
|
until scanner.eos? do
|
19
21
|
byte = scanner.get_byte
|
20
22
|
char = byte.unpack('C')[0]
|
23
|
+
char_hex = char.to_s(16).upcase
|
21
24
|
|
22
25
|
case char
|
23
26
|
when 0x00..0x7F
|
24
|
-
output << byte
|
27
|
+
output << byte
|
25
28
|
when 0x88..0xC8
|
26
|
-
|
27
|
-
output << (ansi_to_utf16[hex_key] || ansi_to_utf16['ERR']).force_encoding('UTF-16BE').encode('UTF-8')
|
29
|
+
output << utf16_to_utf8(@ansi_to_utf16_map[char_hex] || @ansi_to_utf16_map['ERR'])
|
28
30
|
scanner.get_byte # ignore the next byte
|
29
31
|
when 0xE0..0xFB
|
30
32
|
[2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
|
31
|
-
bytes = [
|
33
|
+
bytes = [char_hex]
|
32
34
|
scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
|
33
35
|
hex_key = bytes.join('+')
|
34
|
-
if
|
35
|
-
output <<
|
36
|
+
if @ansi_to_utf16_map.has_key?(hex_key)
|
37
|
+
output << utf16_to_utf8(@ansi_to_utf16_map[hex_key])
|
36
38
|
n.times {scanner.get_byte}
|
37
39
|
break
|
38
40
|
end
|
39
41
|
end
|
40
42
|
else
|
41
|
-
output <<
|
43
|
+
output << utf16_to_utf8(@ansi_to_utf16_map['ERR'])
|
42
44
|
scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
|
43
45
|
end
|
44
46
|
end
|
45
47
|
|
46
|
-
output
|
48
|
+
output
|
47
49
|
end
|
48
50
|
|
49
51
|
end
|
data/lib/ansel/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ansel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Keith Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-03-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Convert ANSEL encoded text to UTF-8
|
14
14
|
email: keithm@infused.org
|
@@ -22,6 +22,7 @@ files:
|
|
22
22
|
- CHANGELOG.md
|
23
23
|
- Gemfile
|
24
24
|
- Gemfile.lock
|
25
|
+
- Gemfile.travis
|
25
26
|
- MIT-LICENSE
|
26
27
|
- README.md
|
27
28
|
- Rakefile
|
@@ -50,7 +51,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
50
51
|
version: 1.3.0
|
51
52
|
requirements: []
|
52
53
|
rubyforge_project:
|
53
|
-
rubygems_version: 2.
|
54
|
+
rubygems_version: 2.5.1
|
54
55
|
signing_key:
|
55
56
|
specification_version: 4
|
56
57
|
summary: Convert ANSEL encoded text to UTF-8
|