code-pages 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a666cbacc1bdb0ef5d453d63598d65db0d76ef5d
4
- data.tar.gz: 0bd3052e4ef28c7c51374c7459a63cc6331ec7e6
3
+ metadata.gz: f0ca7a2a70aacc32f367e85f350b57a0316e3a9a
4
+ data.tar.gz: 8c545eecf87b693e25d4c9b10e4717e824f48144
5
5
  SHA512:
6
- metadata.gz: 0734705ccfef3a5fa24064cc6c00622f5a62807955fe70fe61313494c272e267ca1f2d2a8ef821dba44e373224e66c0c6204dd82ad824a33c6f0967e1d654a3c
7
- data.tar.gz: 3e07d38166ae28d53bf84903d65fb3cfb594ba914addd10009ba76790931a6616baa8fe3f34c5c53bd2558721c3dd57954a343f01ecc0e46f98504ed939ec44b
6
+ metadata.gz: e6e1c93238a27bbb973517994b16d7641dca938f1fb1f6276dcb65b3c837ede72d5cc29dab6ba33fbff1122ed34b3a37f4f9d489eefbab550c64664ab70e7269
7
+ data.tar.gz: 86808b405f77afe5d6ac369922a7a6db4723b43fe1f0885759f83193915acf500148c71a43eb7ec086d4ef1fb19138b168a74ff9b369251ee9349f7f2a80453d
data/README.md ADDED
@@ -0,0 +1,49 @@
1
+ ## code-pages
2
+ A database of legacy Microsoft and ISO 8859 code pages for Ruby.
3
+
4
+ ## Installation
5
+
6
+ `gem install code-pages`
7
+
8
+ or put it in your Gemfile:
9
+
10
+ ```ruby
11
+ gem 'code-pages'
12
+ ```
13
+
14
+ ### What is this Thing?
15
+
16
+ Before the advent of the Unicode standard, Microsoft and other companies developed their own encoding systems to accommodate the world's many languages and writing systems. Like Unicode, each of these systems map a series of integers to characters. A group of these characters is called a code page, and a number of legacy systems still use them to represent text. Fortunately the Unicode Consortium maintains a set of data files mapping the characters used in each code page to their Unicode equivalents, making it possible to convert text from the various code page encodings to UTF-8.
17
+
18
+ ### Usage
19
+
20
+ Individual code pages can be accessed directly via the `CodePages` constant:
21
+
22
+ ```ruby
23
+ CodePages[37] # => #<CodePages::CodePage:0x00007fcd1c374dd0 @id=37 ... >
24
+ ```
25
+
26
+ Once you have a code page, a mapping of code page characters to Unicode characters can be obtained via the `#unicode_mapping` method:
27
+
28
+ ```ruby
29
+ CodePages[37].unicode_mapping # => { 0=>0, 1=>1, 2=>2, 3=>3, 4=>156, 5=>9, ... }
30
+ ```
31
+
32
+ You can also convert text encoded with the code page to UTF-8 and vice-versa:
33
+
34
+ ```ruby
35
+ CodePages[37].to_utf8('abc') # => '/ÂÄ'
36
+ CodePages[37].from_utf8('/ÂÄ') # => 'abc'
37
+ ```
38
+
39
+ ## Updating Code Pages
40
+
41
+ The library comes with a rake task for downloading and updating the code pages from the Unicode Consortium's website. Run `bundle exec rake import` to update. To add additional code pages, edit resources/code_page_manifest.yml and run the rake task.
42
+
43
+ ## License
44
+
45
+ Licensed under the MIT license. See LICENSE for details.
46
+
47
+ ## Authors
48
+
49
+ * Cameron C. Dutro: http://github.com/camertron
@@ -10,11 +10,25 @@ module CodePages
10
10
  end
11
11
 
12
12
  def to_utf8(str)
13
- str.each_byte.map { |byte| unicode_mapping[byte] }.pack('U*')
13
+ str
14
+ .each_byte
15
+ .map { |byte| unicode_mapping[byte] }
16
+ .pack('U*')
17
+ end
18
+
19
+ def from_utf8(str)
20
+ str
21
+ .unpack('U*')
22
+ .map { |code_point| code_page_mapping[code_point] }
23
+ .pack('C*')
14
24
  end
15
25
 
16
26
  def unicode_mapping
17
27
  @unicode_mapping ||= YAML.load_file(resource_file)
18
28
  end
29
+
30
+ def code_page_mapping
31
+ @code_page_mapping ||= unicode_mapping.invert
32
+ end
19
33
  end
20
34
  end
@@ -1,3 +1,3 @@
1
1
  module CodePages
2
- VERSION = '1.0.0'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -14,9 +14,26 @@ describe CodePages::CodePage do
14
14
  end
15
15
  end
16
16
 
17
+ describe '#code_page_mapping' do
18
+ subject { code_page.code_page_mapping }
19
+
20
+ it 'maps unicode code points to code page characters' do
21
+ expect(subject[156]).to eq(4)
22
+ expect(subject[204]).to eq(120)
23
+ expect(subject[50]).to eq(242)
24
+ expect(subject[159]).to eq(255)
25
+ end
26
+ end
27
+
17
28
  describe '#to_utf8' do
18
29
  it 'converts the given string from the code page to utf8' do
19
30
  expect(code_page.to_utf8('abc')).to eq('/ÂÄ')
20
31
  end
21
32
  end
33
+
34
+ describe '#from_utf8' do
35
+ it 'converts the given string from the code page to utf8' do
36
+ expect(code_page.from_utf8('/ÂÄ')).to eq('abc')
37
+ end
38
+ end
22
39
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: code-pages
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cameron Dutro
@@ -18,6 +18,7 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - Gemfile
21
+ - README.md
21
22
  - Rakefile
22
23
  - code-pages.gemspec
23
24
  - lib/code-pages.rb