ansel 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ec23fbc485516d81eaf02b0439e5ca9178417035
4
- data.tar.gz: 569ffd81d040ba067f667f2c7d944f80af9cc71c
3
+ metadata.gz: 815490e0ac24a83d73654a2b6168d3da23609f54
4
+ data.tar.gz: 2d85ffe9513ee2619d55c7ed8802ce98d8d3040b
5
5
  SHA512:
6
- metadata.gz: 4c81372c81cda650f3a65ee15924b791fb9deb907651693e18a2e46196da6e3c5754f74711fd23c08f363619b28f0099157d20448ba821f9e76488b0a5f173f2
7
- data.tar.gz: 15642df832bcaa46f24d6de3cf9ef0866b7d5fddb3a0d6461f70f2bf5980e20db059eb516115ad83879cee3d35d5c8786d72e9dd509c3b665d921b0e6bc90177
6
+ metadata.gz: 0d2cac810103dcb72232feaecbf845d67d340fecffeeb5c91d6ec12e2a54a7cab226c1b0170cc5b176908177278a4f30104136d132bf495f5bdc08d2610fb38c
7
+ data.tar.gz: 6fa8eb2906486ceb71840b76715b351bffcae3eb7b820d15cea480020ae810d59393d9058db04d18e84d53b19bd41bc45b519f24200b096bf975acd8b2049911
@@ -1,3 +1,7 @@
1
+ ## 2.0.1
2
+
3
+ - Explicitly require Ruby's StringScanner class
4
+
1
5
  ## 2.0.0
2
6
 
3
7
  - Remove Iconv dependency (requires Ruby 1.9+)
@@ -7,6 +7,7 @@ GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
9
  diff-lcs (1.2.5)
10
+ rake (11.1.0)
10
11
  rspec (3.1.0)
11
12
  rspec-core (~> 3.1.0)
12
13
  rspec-expectations (~> 3.1.0)
@@ -25,4 +26,8 @@ PLATFORMS
25
26
 
26
27
  DEPENDENCIES
27
28
  ansel!
29
+ rake
28
30
  rspec
31
+
32
+ BUNDLED WITH
33
+ 1.11.2
@@ -0,0 +1,11 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem 'rspec'
7
+ end
8
+
9
+ group :test do
10
+ gem 'codeclimate-test-reporter', require: false
11
+ end
@@ -1,4 +1,4 @@
1
- Copyright (c) 2009-2012 Keith Morrison <keithm@infused.org>
1
+ Copyright (c) 2009-2015 Keith Morrison <keithm@infused.org>
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -3,13 +3,16 @@
3
3
  [![Version](http://img.shields.io/gem/v/ansel.svg?style=flat)](https://rubygems.org/gems/ansel)
4
4
  [![Build Status](http://img.shields.io/travis/infused/ansel/master.svg?style=flat)](http://travis-ci.org/infused/ansel)
5
5
  [![Code Quality](http://img.shields.io/codeclimate/github/infused/ansel.svg?style=flat)](https://codeclimate.com/github/infused/ansel)
6
+ [![Test Coverage](http://img.shields.io/codeclimate/coverage/github/infused/ansel.svg?style=flat)](https://codeclimate.com/github/infused/ansel)
7
+ [![Dependency Status](http://img.shields.io/gemnasium/infused/ansel.svg?style=flat)](https://gemnasium.com/infused/ansel)
8
+
6
9
 
7
10
  ANSEL provides character set conversion from ANSEL to UTF-8
8
11
 
9
12
  Copyright (c) 2006-2015 Keith Morrison <mailto:keithm@infused.org>, <http://www.infused.org>
10
13
 
11
14
  - Project page: <http://github.com/infused/ansel>
12
- - API Documentation: <http://rubydoc.info/github/infused/ansel/frames>
15
+ - API Documentation: <http://rubydoc.info/github/infused/ansel/>
13
16
  - Report bugs: <http://github.com/infused/ansel/issues>
14
17
  - Questions? Email [keithm@infused.org](mailto:keithm@infused.org?subject=ANSE)
15
18
  with ANSEL in the subject line
@@ -52,7 +55,7 @@ Conversion from ANSEL to UTF-8 is fully supported.
52
55
  [ANSI/NISO
53
56
  Z39.47](http://www.niso.org/kst/reports/standards?step=2&gid%3Austring%3Aiso-8859-1=&project_key%3Austring%3Aiso-8859-1=0b5d2bd7b690b60fcc75cde9256ed9f9e526e531),
54
57
  also known as ANSEL, is a character set encoding used primarily for
55
- bibliographic and genealogical data. It is one of the official character
58
+ bibliographic and genealogical data. It is used in library systems worldwide and is one of the official character
56
59
  encodings supported by the [Gedcom
57
60
  5.5](http://homepages.rootsweb.ancestry.com/~pmcbride/gedcom/55gctoc.htm)
58
61
  standard.
@@ -1,4 +1,5 @@
1
1
  # encoding: ascii-8bit
2
2
 
3
+ require 'strscan'
3
4
  require 'ansel/character_map'
4
5
  require 'ansel/converter'
@@ -6,10 +6,12 @@ module ANSEL
6
6
 
7
7
  def initialize(to_charset = 'UTF-8')
8
8
  @to_charset = to_charset
9
+ @encoding_converter = Encoding::Converter.new("UTF-16BE", "UTF-8")
10
+ @ansi_to_utf16_map = @@non_combining.merge(@@combining)
9
11
  end
10
12
 
11
- def ansi_to_utf16
12
- @ansi_to_utf16 ||= @@non_combining.merge(@@combining)
13
+ def utf16_to_utf8(string)
14
+ @encoding_converter.convert(string)
13
15
  end
14
16
 
15
17
  def convert(string)
@@ -18,32 +20,32 @@ module ANSEL
18
20
  until scanner.eos? do
19
21
  byte = scanner.get_byte
20
22
  char = byte.unpack('C')[0]
23
+ char_hex = char.to_s(16).upcase
21
24
 
22
25
  case char
23
26
  when 0x00..0x7F
24
- output << byte.force_encoding('UTF-8')
27
+ output << byte
25
28
  when 0x88..0xC8
26
- hex_key = char.to_s(16).upcase
27
- output << (ansi_to_utf16[hex_key] || ansi_to_utf16['ERR']).force_encoding('UTF-16BE').encode('UTF-8')
29
+ output << utf16_to_utf8(@ansi_to_utf16_map[char_hex] || @ansi_to_utf16_map['ERR'])
28
30
  scanner.get_byte # ignore the next byte
29
31
  when 0xE0..0xFB
30
32
  [2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
31
- bytes = [char.to_s(16).upcase]
33
+ bytes = [char_hex]
32
34
  scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
33
35
  hex_key = bytes.join('+')
34
- if ansi_to_utf16.has_key?(hex_key)
35
- output << ansi_to_utf16[hex_key].force_encoding('UTF-16BE').encode('UTF-8')
36
+ if @ansi_to_utf16_map.has_key?(hex_key)
37
+ output << utf16_to_utf8(@ansi_to_utf16_map[hex_key])
36
38
  n.times {scanner.get_byte}
37
39
  break
38
40
  end
39
41
  end
40
42
  else
41
- output << ansi_to_utf16['ERR'].force_encoding('UTF-16BE').encode('UTF-8')
43
+ output << utf16_to_utf8(@ansi_to_utf16_map['ERR'])
42
44
  scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
43
45
  end
44
46
  end
45
47
 
46
- output.force_encoding('UTF-8')
48
+ output
47
49
  end
48
50
 
49
51
  end
@@ -1,5 +1,5 @@
1
1
  # encoding: ascii-8bit
2
2
 
3
3
  module ANSEL
4
- VERSION = '2.0.0'
4
+ VERSION = '2.0.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ansel
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Keith Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-31 00:00:00.000000000 Z
11
+ date: 2016-03-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Convert ANSEL encoded text to UTF-8
14
14
  email: keithm@infused.org
@@ -22,6 +22,7 @@ files:
22
22
  - CHANGELOG.md
23
23
  - Gemfile
24
24
  - Gemfile.lock
25
+ - Gemfile.travis
25
26
  - MIT-LICENSE
26
27
  - README.md
27
28
  - Rakefile
@@ -50,7 +51,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
50
51
  version: 1.3.0
51
52
  requirements: []
52
53
  rubyforge_project:
53
- rubygems_version: 2.4.3
54
+ rubygems_version: 2.5.1
54
55
  signing_key:
55
56
  specification_version: 4
56
57
  summary: Convert ANSEL encoded text to UTF-8