ansel 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ec23fbc485516d81eaf02b0439e5ca9178417035
4
- data.tar.gz: 569ffd81d040ba067f667f2c7d944f80af9cc71c
3
+ metadata.gz: 815490e0ac24a83d73654a2b6168d3da23609f54
4
+ data.tar.gz: 2d85ffe9513ee2619d55c7ed8802ce98d8d3040b
5
5
  SHA512:
6
- metadata.gz: 4c81372c81cda650f3a65ee15924b791fb9deb907651693e18a2e46196da6e3c5754f74711fd23c08f363619b28f0099157d20448ba821f9e76488b0a5f173f2
7
- data.tar.gz: 15642df832bcaa46f24d6de3cf9ef0866b7d5fddb3a0d6461f70f2bf5980e20db059eb516115ad83879cee3d35d5c8786d72e9dd509c3b665d921b0e6bc90177
6
+ metadata.gz: 0d2cac810103dcb72232feaecbf845d67d340fecffeeb5c91d6ec12e2a54a7cab226c1b0170cc5b176908177278a4f30104136d132bf495f5bdc08d2610fb38c
7
+ data.tar.gz: 6fa8eb2906486ceb71840b76715b351bffcae3eb7b820d15cea480020ae810d59393d9058db04d18e84d53b19bd41bc45b519f24200b096bf975acd8b2049911
@@ -1,3 +1,7 @@
1
+ ## 2.0.1
2
+
3
+ - Explicitly require Ruby's StringScanner class
4
+
1
5
  ## 2.0.0
2
6
 
3
7
  - Remove Iconv dependency (requires Ruby 1.9+)
@@ -7,6 +7,7 @@ GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
9
  diff-lcs (1.2.5)
10
+ rake (11.1.0)
10
11
  rspec (3.1.0)
11
12
  rspec-core (~> 3.1.0)
12
13
  rspec-expectations (~> 3.1.0)
@@ -25,4 +26,8 @@ PLATFORMS
25
26
 
26
27
  DEPENDENCIES
27
28
  ansel!
29
+ rake
28
30
  rspec
31
+
32
+ BUNDLED WITH
33
+ 1.11.2
@@ -0,0 +1,11 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem 'rspec'
7
+ end
8
+
9
+ group :test do
10
+ gem 'codeclimate-test-reporter', require: false
11
+ end
@@ -1,4 +1,4 @@
1
- Copyright (c) 2009-2012 Keith Morrison <keithm@infused.org>
1
+ Copyright (c) 2009-2015 Keith Morrison <keithm@infused.org>
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -3,13 +3,16 @@
3
3
  [![Version](http://img.shields.io/gem/v/ansel.svg?style=flat)](https://rubygems.org/gems/ansel)
4
4
  [![Build Status](http://img.shields.io/travis/infused/ansel/master.svg?style=flat)](http://travis-ci.org/infused/ansel)
5
5
  [![Code Quality](http://img.shields.io/codeclimate/github/infused/ansel.svg?style=flat)](https://codeclimate.com/github/infused/ansel)
6
+ [![Test Coverage](http://img.shields.io/codeclimate/coverage/github/infused/ansel.svg?style=flat)](https://codeclimate.com/github/infused/ansel)
7
+ [![Dependency Status](http://img.shields.io/gemnasium/infused/ansel.svg?style=flat)](https://gemnasium.com/infused/ansel)
8
+
6
9
 
7
10
  ANSEL provides character set conversion from ANSEL to UTF-8
8
11
 
9
12
  Copyright (c) 2006-2015 Keith Morrison <mailto:keithm@infused.org>, <http://www.infused.org>
10
13
 
11
14
  - Project page: <http://github.com/infused/ansel>
12
- - API Documentation: <http://rubydoc.info/github/infused/ansel/frames>
15
+ - API Documentation: <http://rubydoc.info/github/infused/ansel/>
13
16
  - Report bugs: <http://github.com/infused/ansel/issues>
14
17
  - Questions? Email [keithm@infused.org](mailto:keithm@infused.org?subject=ANSE)
15
18
  with ANSEL in the subject line
@@ -52,7 +55,7 @@ Conversion from ANSEL to UTF-8 is fully supported.
52
55
  [ANSI/NISO
53
56
  Z39.47](http://www.niso.org/kst/reports/standards?step=2&gid%3Austring%3Aiso-8859-1=&project_key%3Austring%3Aiso-8859-1=0b5d2bd7b690b60fcc75cde9256ed9f9e526e531),
54
57
  also known as ANSEL, is a character set encoding used primarily for
55
- bibliographic and genealogical data. It is one of the official character
58
+ bibliographic and genealogical data. It is used in library systems worldwide and is one of the official character
56
59
  encodings supported by the [Gedcom
57
60
  5.5](http://homepages.rootsweb.ancestry.com/~pmcbride/gedcom/55gctoc.htm)
58
61
  standard.
@@ -1,4 +1,5 @@
1
1
  # encoding: ascii-8bit
2
2
 
3
+ require 'strscan'
3
4
  require 'ansel/character_map'
4
5
  require 'ansel/converter'
@@ -6,10 +6,12 @@ module ANSEL
6
6
 
7
7
  def initialize(to_charset = 'UTF-8')
8
8
  @to_charset = to_charset
9
+ @encoding_converter = Encoding::Converter.new("UTF-16BE", "UTF-8")
10
+ @ansi_to_utf16_map = @@non_combining.merge(@@combining)
9
11
  end
10
12
 
11
- def ansi_to_utf16
12
- @ansi_to_utf16 ||= @@non_combining.merge(@@combining)
13
+ def utf16_to_utf8(string)
14
+ @encoding_converter.convert(string)
13
15
  end
14
16
 
15
17
  def convert(string)
@@ -18,32 +20,32 @@ module ANSEL
18
20
  until scanner.eos? do
19
21
  byte = scanner.get_byte
20
22
  char = byte.unpack('C')[0]
23
+ char_hex = char.to_s(16).upcase
21
24
 
22
25
  case char
23
26
  when 0x00..0x7F
24
- output << byte.force_encoding('UTF-8')
27
+ output << byte
25
28
  when 0x88..0xC8
26
- hex_key = char.to_s(16).upcase
27
- output << (ansi_to_utf16[hex_key] || ansi_to_utf16['ERR']).force_encoding('UTF-16BE').encode('UTF-8')
29
+ output << utf16_to_utf8(@ansi_to_utf16_map[char_hex] || @ansi_to_utf16_map['ERR'])
28
30
  scanner.get_byte # ignore the next byte
29
31
  when 0xE0..0xFB
30
32
  [2, 1, 0].each do |n| # try 3 bytes, then 2 bytes, then 1 byte
31
- bytes = [char.to_s(16).upcase]
33
+ bytes = [char_hex]
32
34
  scanner.peek(n).each_byte {|b| bytes << b.to_s(16).upcase}
33
35
  hex_key = bytes.join('+')
34
- if ansi_to_utf16.has_key?(hex_key)
35
- output << ansi_to_utf16[hex_key].force_encoding('UTF-16BE').encode('UTF-8')
36
+ if @ansi_to_utf16_map.has_key?(hex_key)
37
+ output << utf16_to_utf8(@ansi_to_utf16_map[hex_key])
36
38
  n.times {scanner.get_byte}
37
39
  break
38
40
  end
39
41
  end
40
42
  else
41
- output << ansi_to_utf16['ERR'].force_encoding('UTF-16BE').encode('UTF-8')
43
+ output << utf16_to_utf8(@ansi_to_utf16_map['ERR'])
42
44
  scanner.get_byte if scanner.get_byte.unpack('C')[0] >= 0xE0 # ignore the next byte
43
45
  end
44
46
  end
45
47
 
46
- output.force_encoding('UTF-8')
48
+ output
47
49
  end
48
50
 
49
51
  end
@@ -1,5 +1,5 @@
1
1
  # encoding: ascii-8bit
2
2
 
3
3
  module ANSEL
4
- VERSION = '2.0.0'
4
+ VERSION = '2.0.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ansel
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Keith Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-31 00:00:00.000000000 Z
11
+ date: 2016-03-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Convert ANSEL encoded text to UTF-8
14
14
  email: keithm@infused.org
@@ -22,6 +22,7 @@ files:
22
22
  - CHANGELOG.md
23
23
  - Gemfile
24
24
  - Gemfile.lock
25
+ - Gemfile.travis
25
26
  - MIT-LICENSE
26
27
  - README.md
27
28
  - Rakefile
@@ -50,7 +51,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
50
51
  version: 1.3.0
51
52
  requirements: []
52
53
  rubyforge_project:
53
- rubygems_version: 2.4.3
54
+ rubygems_version: 2.5.1
54
55
  signing_key:
55
56
  specification_version: 4
56
57
  summary: Convert ANSEL encoded text to UTF-8