code-pages 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +49 -0
- data/lib/code-pages/code_page.rb +15 -1
- data/lib/code-pages/version.rb +1 -1
- data/spec/code_page_spec.rb +17 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f0ca7a2a70aacc32f367e85f350b57a0316e3a9a
|
4
|
+
data.tar.gz: 8c545eecf87b693e25d4c9b10e4717e824f48144
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6e1c93238a27bbb973517994b16d7641dca938f1fb1f6276dcb65b3c837ede72d5cc29dab6ba33fbff1122ed34b3a37f4f9d489eefbab550c64664ab70e7269
|
7
|
+
data.tar.gz: 86808b405f77afe5d6ac369922a7a6db4723b43fe1f0885759f83193915acf500148c71a43eb7ec086d4ef1fb19138b168a74ff9b369251ee9349f7f2a80453d
|
data/README.md
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
## code-pages
|
2
|
+
A database of legacy Microsoft and ISO 8859 code pages for Ruby.
|
3
|
+
|
4
|
+
## Installation
|
5
|
+
|
6
|
+
`gem install code-pages`
|
7
|
+
|
8
|
+
or put it in your Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'code-pages'
|
12
|
+
```
|
13
|
+
|
14
|
+
### What is this Thing?
|
15
|
+
|
16
|
+
Before the advent of the Unicode standard, Microsoft and other companies developed their own encoding systems to accommodate the world's many languages and writing systems. Like Unicode, each of these systems map a series of integers to characters. A group of these characters is called a code page, and a number of legacy systems still use them to represent text. Fortunately the Unicode Consortium maintains a set of data files mapping the characters used in each code page to their Unicode equivalents, making it possible to convert text from the various code page encodings to UTF-8.
|
17
|
+
|
18
|
+
### Usage
|
19
|
+
|
20
|
+
Individual code pages can be accessed directly via the `CodePages` constant:
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
CodePages[37] # => #<CodePages::CodePage:0x00007fcd1c374dd0 @id=37 ... >
|
24
|
+
```
|
25
|
+
|
26
|
+
Once you have a code page, a mapping of code page characters to Unicode characters can be obtained via the `#unicode_mapping` method:
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
CodePages[37].unicode_mapping # => { 0=>0, 1=>1, 2=>2, 3=>3, 4=>156, 5=>9, ... }
|
30
|
+
```
|
31
|
+
|
32
|
+
You can also convert text encoded with the code page to UTF-8 and vice-versa:
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
CodePages[37].to_utf8('abc') # => '/ÂÄ'
|
36
|
+
CodePages[37].from_utf8('/ÂÄ') # => 'abc'
|
37
|
+
```
|
38
|
+
|
39
|
+
## Updating Code Pages
|
40
|
+
|
41
|
+
The library comes with a rake task for downloading and updating the code pages from the Unicode Consortium's website. Run `bundle exec rake import` to update. To add additional code pages, edit resources/code_page_manifest.yml and run the rake task.
|
42
|
+
|
43
|
+
## License
|
44
|
+
|
45
|
+
Licensed under the MIT license. See LICENSE for details.
|
46
|
+
|
47
|
+
## Authors
|
48
|
+
|
49
|
+
* Cameron C. Dutro: http://github.com/camertron
|
data/lib/code-pages/code_page.rb
CHANGED
@@ -10,11 +10,25 @@ module CodePages
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def to_utf8(str)
|
13
|
-
str
|
13
|
+
str
|
14
|
+
.each_byte
|
15
|
+
.map { |byte| unicode_mapping[byte] }
|
16
|
+
.pack('U*')
|
17
|
+
end
|
18
|
+
|
19
|
+
def from_utf8(str)
|
20
|
+
str
|
21
|
+
.unpack('U*')
|
22
|
+
.map { |code_point| code_page_mapping[code_point] }
|
23
|
+
.pack('C*')
|
14
24
|
end
|
15
25
|
|
16
26
|
def unicode_mapping
|
17
27
|
@unicode_mapping ||= YAML.load_file(resource_file)
|
18
28
|
end
|
29
|
+
|
30
|
+
def code_page_mapping
|
31
|
+
@code_page_mapping ||= unicode_mapping.invert
|
32
|
+
end
|
19
33
|
end
|
20
34
|
end
|
data/lib/code-pages/version.rb
CHANGED
data/spec/code_page_spec.rb
CHANGED
@@ -14,9 +14,26 @@ describe CodePages::CodePage do
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
describe '#code_page_mapping' do
|
18
|
+
subject { code_page.code_page_mapping }
|
19
|
+
|
20
|
+
it 'maps unicode code points to code page characters' do
|
21
|
+
expect(subject[156]).to eq(4)
|
22
|
+
expect(subject[204]).to eq(120)
|
23
|
+
expect(subject[50]).to eq(242)
|
24
|
+
expect(subject[159]).to eq(255)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
17
28
|
describe '#to_utf8' do
|
18
29
|
it 'converts the given string from the code page to utf8' do
|
19
30
|
expect(code_page.to_utf8('abc')).to eq('/ÂÄ')
|
20
31
|
end
|
21
32
|
end
|
33
|
+
|
34
|
+
describe '#from_utf8' do
|
35
|
+
it 'converts the given string from the code page to utf8' do
|
36
|
+
expect(code_page.from_utf8('/ÂÄ')).to eq('abc')
|
37
|
+
end
|
38
|
+
end
|
22
39
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: code-pages
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cameron Dutro
|
@@ -18,6 +18,7 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- Gemfile
|
21
|
+
- README.md
|
21
22
|
- Rakefile
|
22
23
|
- code-pages.gemspec
|
23
24
|
- lib/code-pages.rb
|