mac_japanese 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ module MacJapanese
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/mac_japanese/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["labocho"]
6
+ gem.email = ["labocho@penguinlab.jp"]
7
+ gem.description = %q{Convert MacJapanese string to UTF-8 and vice versa.}
8
+ gem.summary = %q{Convert MacJapanese string to UTF-8 and vice versa.}
9
+ gem.homepage = "https://github.com/labocho/mac_japanese"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "mac_japanese"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = MacJapanese::VERSION
17
+ gem.add_development_dependency "rspec", "~>2.11.0"
18
+ gem.add_development_dependency "guard-rspec", "~>0.7.0"
19
+ gem.add_development_dependency "ruby-debug19"
20
+ end
@@ -0,0 +1,134 @@
1
+ require "spec_helper"
2
+ require "ruby-debug"
3
+
4
+ describe MacJapanese do
5
+ describe ".to_utf8" do
6
+ [true, false].each do |use_pua|
7
+ context "user_pua: #{use_pua}" do
8
+ let(:options) { {} }
9
+ subject { MacJapanese.to_utf8(@src, options.merge(use_pua: use_pua)) }
10
+ it "should convert us-ascii chars to utf8" do
11
+ @src = "foo\n".force_encoding("macjapan")
12
+ should == "foo\n"
13
+ end
14
+ it "should convert additional backslash to utf8" do
15
+ @src = "\x80".force_encoding("macjapan")
16
+ should == "\\"
17
+ end
18
+ it "should convert halfwidth katakana to utf8" do
19
+ @src = "\xA7".force_encoding("macjapan")
20
+ should == "\u{FF67}"
21
+ end
22
+ it "should convert hiragana to utf8" do
23
+ @src = "\x82\x9F".force_encoding("macjapan")
24
+ should == "\u{3041}"
25
+ end
26
+ it "should convert apple additions to utf8" do
27
+ @src = "\x85\x5E".force_encoding("macjapan")
28
+ should == "\u{2474}"
29
+ end
30
+ end
31
+ end
32
+ context "default" do
33
+ subject { MacJapanese.to_utf8(@src) }
34
+ it "should expand composed char with pua" do
35
+ @src = "\x85\xAB".force_encoding("macjapan")
36
+ should == "\u{F862}\u{0058}\u{0049}\u{0049}\u{0049}"
37
+ end
38
+ end
39
+ context "use_pua: true" do
40
+ subject { MacJapanese.to_utf8(@src, use_pua: true) }
41
+ it "should expand composed char with pua" do
42
+ @src = "\x85\xAB".force_encoding("macjapan")
43
+ should == "\u{F862}\u{0058}\u{0049}\u{0049}\u{0049}"
44
+ end
45
+ end
46
+ context "use_pua: false" do
47
+ subject { MacJapanese.to_utf8(@src, use_pua: false) }
48
+ it "should expand composed char without pua" do
49
+ @src = "\x85\xAB".force_encoding("macjapan")
50
+ should == "\u{0058}\u{0049}\u{0049}\u{0049}"
51
+ end
52
+ end
53
+ context "pass another encoding string to .to_utf8" do
54
+ it "should encode just like passing mac japanese string" do
55
+ @src = "\x82\x9F"
56
+ @src.encoding.should == Encoding::ASCII_8BIT
57
+ MacJapanese.to_utf8(@src).should == "\u{3041}"
58
+ end
59
+ end
60
+ end
61
+
62
+ describe ".to_mac_japanese" do
63
+ let(:options) { {} }
64
+ subject { MacJapanese.to_mac_japanese(@src, options) }
65
+ it "should convert us-ascii chars to mac_japanese" do
66
+ @src = "foo\n"
67
+ should == "foo\n".force_encoding("macjapan")
68
+ end
69
+ it "should convert additional backslash to mac_japanese" do
70
+ @src = "\\"
71
+ should == "\x80".force_encoding("macjapan")
72
+ end
73
+ it "should convert halfwidth katakana to mac_japanese" do
74
+ @src = "\u{FF67}"
75
+ should == "\xA7".force_encoding("macjapan")
76
+ end
77
+ it "should convert hiragana to mac_japanese" do
78
+ @src = "\u{3041}"
79
+ should == "\x82\x9F".force_encoding("macjapan")
80
+ end
81
+ it "should convert hiragana followed by composed characters to mac_japanese" do
82
+ @src = "\u{3041}\u{F862}\u{0058}\u{0049}\u{0049}\u{0049}"
83
+ should == "\x82\x9F\x85\xAB".force_encoding("macjapan")
84
+ end
85
+ it "should convert apple additions to mac_japanese" do
86
+ @src = "\u{2474}"
87
+ should == "\x85\x5E".force_encoding("macjapan")
88
+ end
89
+ it "should compose characters with pua" do
90
+ @src = "\u{F862}\u{0058}\u{0049}\u{0049}\u{0049}"
91
+ should == "\x85\xAB".force_encoding("macjapan")
92
+ end
93
+ it "should not compose characters without pua" do
94
+ @src = "\u{0058}\u{0049}\u{0049}\u{0049}"
95
+ should == "XIII".force_encoding("macjapan")
96
+ end
97
+ context "pass another encoding string to .to_mac_japanese" do
98
+ it "should encode to mac japanese string (via utf8)" do
99
+ @src = "\u{3041}".encode("euc-jp")
100
+ MacJapanese.to_mac_japanese(@src).should == "\x82\x9F".force_encoding("macjapan")
101
+ end
102
+ end
103
+ end
104
+
105
+ context "undef: :replace" do
106
+ it "should replace undefined mac japanese char" do
107
+ @src = "foo\xFC\xFCbar".force_encoding("macjapan")
108
+ MacJapanese.to_utf8(@src, undef: :replace).should == "foo\u{fffd}bar"
109
+ end
110
+ it "should replace undefined utf-8 char" do
111
+ @src = "foo\u{FA11}bar"
112
+ MacJapanese.to_mac_japanese(@src, undef: :replace).should == "foo?bar"
113
+ end
114
+ it "should replace with replace option" do
115
+ @src = "foo\xFC\xFCbar".force_encoding("macjapan")
116
+ MacJapanese.to_utf8(@src, undef: :replace, replace: "*").should == "foo*bar"
117
+ end
118
+ end
119
+
120
+ context "undef: (none)" do
121
+ it "should raise Encoding::UndefinedConversionError for undefined mac japanese char" do
122
+ @src = "foo\xFC\xFCbar".force_encoding("macjapan")
123
+ expect{
124
+ MacJapanese.to_utf8(@src)
125
+ }.to raise_error(Encoding::UndefinedConversionError)
126
+ end
127
+ it "should raise Encoding::UndefinedConversionError for undefined utf-8 japanese char" do
128
+ @src = "foo\u{FA11}bar"
129
+ expect{
130
+ MacJapanese.to_mac_japanese(@src)
131
+ }.to raise_error(Encoding::UndefinedConversionError)
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,20 @@
1
+ $LOAD_PATH.unshift "#{File.dirname(__FILE__)}/../lib"
2
+ require "mac_japanese"
3
+
4
+ # This file was generated by the `rspec --init` command. Conventionally, all
5
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
6
+ # Require this file using `require "spec_helper"` to ensure that it is only
7
+ # loaded once.
8
+ #
9
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
10
+ RSpec.configure do |config|
11
+ config.treat_symbols_as_metadata_keys_with_true_values = true
12
+ config.run_all_when_everything_filtered = true
13
+ config.filter_run :focus
14
+
15
+ # Run specs in random order to surface order dependencies. If you find an
16
+ # order dependency and want to debug it, you can fix the order by providing
17
+ # the seed, which is printed after each run.
18
+ # --seed 1234
19
+ config.order = 'random'
20
+ end
@@ -0,0 +1,133 @@
1
+ require "csv"
2
+ require "open-uri"
3
+
4
+ ROOT_DIR = File.expand_path "#{File.dirname(__FILE__)}/../"
5
+
6
+ # convert string to literal like "\x81\x40"
7
+ def hex_literal(string)
8
+ string.bytes.map{|b|
9
+ "\\x" + b.to_s(16).upcase.rjust(2, "0")
10
+ }.join
11
+ end
12
+
13
+ # convert string to literal like "\u{3041}"
14
+ def unicode_literal(string)
15
+ string.codepoints.map{|c|
16
+ "\\u{" + c.to_s(16).upcase.rjust(4, "0") + "}"
17
+ }.join
18
+ end
19
+
20
+ def pua?(four_hex_with_0x)
21
+ case four_hex_with_0x
22
+ when "\u{F860}", "\u{F861}", "\u{F862}", "\u{F87A}", "\u{F87E}", "\u{F87F}"
23
+ true
24
+ else
25
+ false
26
+ end
27
+ end
28
+
29
+ # Make pairs of string literals [[macjapanese, utf8], ...]
30
+ def make_pairs(use_pua = true)
31
+ pairs = []
32
+ # Control characters
33
+ pairs += (0x00..0x1f).map{|i|
34
+ c = [i].pack("C*")
35
+ [c, c]
36
+ }
37
+ open("#{ROOT_DIR}/src/JAPANESE.txt") do |f|
38
+ f.lines.each do |line|
39
+ next if line =~ /^#/ # ignore comment
40
+ next unless line =~ /^(0x.+)\t(0x.+)\t/ # capture
41
+ macjp_hex, unicode_hex = $~.captures
42
+
43
+ # macjp = macjp_hex[2..-1].chars.each_slice(2).map{|hex| "\\x" + hex.join}.join
44
+ macjp = macjp_hex[2..-1].chars.each_slice(2).map{|h| h.join.to_i(16)}.pack("C*")
45
+
46
+ unicode = unicode_hex.split("+").map{|hex|
47
+ c = hex.to_i(16).chr("utf-8")
48
+ next "" if !use_pua && pua?(c) # skip pua
49
+ c
50
+ }.join
51
+
52
+ pairs.push [macjp, unicode]
53
+ end
54
+ end
55
+ pairs
56
+ end
57
+
58
+ unless File.exist?("#{ROOT_DIR}/src/JAPANESE.txt")
59
+ open("#{ROOT_DIR}/src/JAPANESE.txt", "w") do |f|
60
+ open("http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/JAPANESE.TXT") do |g|
61
+ f.print g.read
62
+ end
63
+ end
64
+ end
65
+
66
+ # Make MacJapanese to UTF-8 table (with PUA)
67
+ path = "#{ROOT_DIR}/lib/mac_japanese/mac_japanese_to_utf8_with_pua.rb"
68
+ puts path
69
+ literal_pairs = make_pairs.map{|m, u| [hex_literal(m), unicode_literal(u)]}
70
+ open(path, "w") do |f|
71
+ f.puts <<-EOS
72
+ # This file was automatically generated by `rake tables`.
73
+ # Cannot modify directly.
74
+ module MacJapanese
75
+ MAC_JAPANESE_TO_UTF8_WITH_PUA = Hash[
76
+ [
77
+ #{literal_pairs.map{|m, u| %{ ["#{m}", "#{u}"]}}.join(",\n")}
78
+ ].each{|m, u| m.force_encoding(Encoding::MacJapanese)}
79
+ ]
80
+ end
81
+ EOS
82
+ end
83
+
84
+ # Make MacJapanese to UTF-8 table (without PUA)
85
+ path = "#{ROOT_DIR}/lib/mac_japanese/mac_japanese_to_utf8_without_pua.rb"
86
+ puts path
87
+ literal_pairs = make_pairs(false).map{|m, u| [hex_literal(m), unicode_literal(u)]}
88
+ open(path, "w") do |f|
89
+ f.puts <<-EOS
90
+ # This file was automatically generated by `rake tables`.
91
+ # Cannot modify directly.
92
+ module MacJapanese
93
+ MAC_JAPANESE_TO_UTF8_WITHOUT_PUA = Hash[
94
+ [
95
+ #{literal_pairs.map{|m, u| %{ ["#{m}", "#{u}"]}}.join(",\n")}
96
+ ].each{|m, u| m.force_encoding(Encoding::MacJapanese)}
97
+ ]
98
+ end
99
+ EOS
100
+ end
101
+
102
+ # Make UTF-8 to MacJapanese table
103
+ path = "#{ROOT_DIR}/lib/mac_japanese/utf8_to_mac_japanese.rb"
104
+ puts path
105
+ literal_pairs = make_pairs.map{|m, u| [hex_literal(m), unicode_literal(u)]}
106
+ open(path, "w") do |f|
107
+ f.puts <<-EOS
108
+ # This file was automatically generated by `rake tables`.
109
+ # Cannot modify directly.
110
+ module MacJapanese
111
+ UTF8_TO_MAC_JAPANESE = Hash[
112
+ [
113
+ #{literal_pairs.map{|m, u| %{ ["#{u}", "#{m}"]}}.join(",\n")}
114
+ ].each{|u, m| m.force_encoding(Encoding::MacJapanese)}
115
+ ]
116
+ end
117
+ EOS
118
+ end
119
+
120
+ # Make UTF-8 single character or decomposed characters regexp
121
+ path = "#{ROOT_DIR}/lib/mac_japanese/decomposed_or_normal_character_regexp.rb"
122
+ puts path
123
+ decomposed_or_single_character_regexp =
124
+ "/(" + make_pairs.map{|*, u| u}.select{|u| u.size > 1}.map{|u| unicode_literal(u)}.join("|") + "|.)/m"
125
+ open(path, "w") do |f|
126
+ f.puts <<-EOS
127
+ # This file was automatically generated by `rake tables`.
128
+ # Cannot modify directly.
129
+ module MacJapanese
130
+ DECOMPOSED_OR_NORMAL_CHARACTER_REGEXP = #{decomposed_or_single_character_regexp}
131
+ end
132
+ EOS
133
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mac_japanese
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - labocho
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-22 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.11.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 2.11.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: guard-rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 0.7.0
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 0.7.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: ruby-debug19
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Convert MacJapanese string to UTF-8 and vice versa.
63
+ email:
64
+ - labocho@penguinlab.jp
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - .gitignore
70
+ - .rspec
71
+ - Gemfile
72
+ - Guardfile
73
+ - LICENSE
74
+ - README.md
75
+ - Rakefile
76
+ - lib/mac_japanese.rb
77
+ - lib/mac_japanese/decomposed_or_normal_character_regexp.rb
78
+ - lib/mac_japanese/mac_japanese_to_utf8_with_pua.rb
79
+ - lib/mac_japanese/mac_japanese_to_utf8_without_pua.rb
80
+ - lib/mac_japanese/utf8_to_mac_japanese.rb
81
+ - lib/mac_japanese/version.rb
82
+ - mac_japanese.gemspec
83
+ - spec/mac_japanese_spec.rb
84
+ - spec/spec_helper.rb
85
+ - src/generate_conversion_tables.rb
86
+ homepage: https://github.com/labocho/mac_japanese
87
+ licenses: []
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 1.8.23
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: Convert MacJapanese string to UTF-8 and vice versa.
110
+ test_files:
111
+ - spec/mac_japanese_spec.rb
112
+ - spec/spec_helper.rb