jis2euc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in jis2euc.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
data/jis2euc.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "jis2euc/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "jis2euc"
7
+ s.version = Jis2euc::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["pierr.chen"]
10
+ s.email = ["pierr.chen@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{convert jis encoding to EUC-JP}
13
+ s.description = %q{ARIB standard combines serveral JIS encoding for texts ,this gem convert them to EUC-JP }
14
+
15
+ s.rubyforge_project = "jis2euc"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+ end
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ =begin
4
+
5
+ Two area : GL , GR
6
+ For each area it could be ONE of the following 4 value ,that could be G0 ,G1, G2 and G3.
7
+ GL was init to G0
8
+ GR was init to G2
9
+ For G0..G3 ,it could be assigned to different character set.
10
+ G0 was init to KANJI_1
11
+ G1 was init to EISU
12
+ G2 was init to HIRAGANA
13
+ G3 was init to KATAKANA
14
+
15
+ How to decide the character set we are using is :
16
+ 1. which area we are using?
17
+ 2. what's the value of the that area? Say Gx
18
+ 3. what is the character was assigned to Gx
19
+
20
+ Q:what is single shift?
21
+ A:We have to restore back to previous code set after finis processing following character.
22
+
23
+ Q:How to determine current area?
24
+ A: if current_char > 0x80
25
+ "GR"
26
+ else
27
+ "GL"
28
+ =end
29
+
30
+ #To understand how ctl code works
31
+
32
+ def is_ctl_code? b
33
+
34
+ return true if b <= 0x20 or b == 0x7f or (b >0x80 and b < 0xa0) or b == 0xff
35
+ false
36
+ end
37
+
38
+ =begin
39
+ Table 7-1 Invocation of code elements
40
+ {{0x0f,}, CODE_SET_G0, CODE_AREA_GL, LOCKING_SHIFT}, //LS0 set GL to G0
41
+ {{0x0e,}, CODE_SET_G1, CODE_AREA_GL, LOCKING_SHIFT}, //LS1 set GL to G1
42
+ {{0x1b,0x6e,}, CODE_SET_G2, CODE_AREA_GL, LOCKING_SHIFT}, //LS2 set GL to G2
43
+ {{0x1b,0x6f,}, CODE_SET_G3, CODE_AREA_GL, LOCKING_SHIFT}, //LS3 set GL to G3
44
+ {{0x1b,0x7e,}, CODE_SET_G1, CODE_AREA_GR, LOCKING_SHIFT}, //LS1R set GR to G1
45
+ {{0x1b,0x7d,}, CODE_SET_G2, CODE_AREA_GR, LOCKING_SHIFT}, //LS2R set GR to G2
46
+ {{0x1b,0x7c,}, CODE_SET_G3, CODE_AREA_GR, LOCKING_SHIFT}, //LS3R set GR to G3
47
+ {{0x19,}, CODE_SET_G2, CODE_AREA_GL, SINGLE_SHIFT }, //SS2 set GL to G2 ,single shift
48
+ {{0x1d,}, CODE_SET_G3, CODE_AREA_GL, SINGLE_SHIFT } //SS3 set GL to G3 ,single shift
49
+ =end
50
+
51
+ $esc_invok = {
52
+ 0x6e=>"LS2 : GL=>G2 , locking shift",
53
+ 0x6f=>"LS3 : GL=>G3 , locking shift",
54
+ 0x7e=>"LS1R : GR=>G1 , locking shift",
55
+ 0x7d=>"LS2R : GR=>G2 , locking shift",
56
+ 0x7c=>"LS3R : GR=>G3 , locking shift",
57
+ 0x7b=>"Confused... "
58
+ }
59
+ $other_invok = {
60
+ 0x0f=>"LS0 : GL=>G0, locking shift",
61
+ 0x0e=>"LS1 : GL=>G1, locking shift",
62
+ 0x19=>"SS2 : GL=>G2, single shift",
63
+ 0x1d=>"SS3 : GL=>G3, single shift",
64
+ #0x20=>"SP", #means space " "
65
+ #0x0d=>"APR", #means "\n"
66
+ #0x89=>"MSZ", #specify the character size is Middle
67
+ }
68
+ #?
69
+
70
+ #Table 7-2 B24
71
+ #All start with ESC (0x1B)
72
+ $set_designation = [0x24,0x28,0x29,0x2a,0x2b]
73
+
74
+ $designation_action={
75
+ "0x24" =>"set 2 bytes Graphic set to G0",
76
+ "0x24,0x29" =>"set 2 bytes Graphic set to G1",
77
+ "0x24,0x2a" =>"set 2 bytes Graphic set to G2",
78
+ "0x24,0x2b" =>"set 2 bytes Graphic set to G3",
79
+ "0x28" =>"set 1 bytes Graphic set to G0",
80
+ "0x29" =>"set 1 bytes Graphic set to G1",
81
+ "0x2a" =>"set 1 bytes Graphic set to G2",
82
+ "0x2b" =>"set 1 bytes Graphic set to G3",
83
+ }
84
+ ESC = 0x1b
85
+ $encoding_mapping = {
86
+ "0x39"=>"kanji1",
87
+ "0x3a"=>"kanji2",
88
+ "0x4a"=>"eisu",
89
+ "0x30"=>"hira",
90
+ "0x31"=>"kata",
91
+ "0x3b"=>"tusika",
92
+ }
93
+
94
+ #
95
+ #
96
+ #
97
+ #
98
+
99
+
100
+
101
+ class Jis8charset
102
+
103
+ #TODO : make it singleton
104
+ def initialize
105
+
106
+ @areas= {"GL"=> "G0" , "GR" => "G2"}
107
+ @sets = {"G0"=> "kanji1" , "G1" => "eisu", "G2"=>"hira","G3"=>"kata"}
108
+ @last_gl = "G0"
109
+ @single_shift = false
110
+ end
111
+ def reset
112
+ @areas= {"GL"=> "G0" , "GR" => "G2"}
113
+ @sets = {"G0"=> "kanji1" , "G1" => "eisu", "G2"=>"hira","G3"=>"kata"}
114
+ @last_gl = "G0"
115
+ @single_shift = false
116
+ end
117
+
118
+ def verbose s
119
+ #puts s
120
+ end
121
+
122
+ def get_current_charset area
123
+ charset = @sets[@areas[area]]
124
+ bytes = if charset.eql?("kanji1") or charset.eql?("tusika") then 2 else 1 end
125
+
126
+ return charset , bytes , @single_shift
127
+ end
128
+
129
+ def set_area area ,graphic_set ,single_shift=false
130
+ raise ArgumentError, "unknow #{area} , illeale key" if !@areas.key?(area)
131
+ if single_shift
132
+ raise ArgumentError if !area.eql?("GL")
133
+ @last_gl = @areas["GL"]
134
+ @single_shift = true
135
+ end
136
+ @areas[area] = graphic_set
137
+ end
138
+
139
+ def set_graphicset graphic_set, character_set
140
+ raise ArgumentError, "unknow #{graphic_set} , illeale key" if !@sets.key?(graphic_set)
141
+ verbose "set #{graphic_set} to #{character_set}"
142
+ @sets[graphic_set] = character_set
143
+ end
144
+
145
+ def show
146
+ p @areas
147
+ p @sets
148
+ end
149
+
150
+ def restore
151
+ #TODO: restore previous value if current is single shift
152
+ #
153
+ #only to restore @areas["GL"]
154
+ @areas["GL"]=@last_gl
155
+ @single_shift = false
156
+ end
157
+
158
+
159
+ def do action
160
+ matches = /.*(\w{2})=>(\w{2}).*/.match(action)
161
+ shift = (action.include?("single") ? true :false)
162
+ set_area(matches[1],matches[2],shift)
163
+ end
164
+
165
+
166
+ def do_b action , para
167
+ verbose "action #{action}"
168
+ action =~ /to\s(.*)/
169
+ set=$1
170
+ raise ArgumentError , "unknow encoding format #{para}" unless $encoding_mapping.key?(para)
171
+ log = action + " using " + "[#{$encoding_mapping[para]}]"
172
+ verbose "-> Action : #{log}"
173
+ @sets[set]=$encoding_mapping[para]
174
+ end
175
+
176
+ end
177
+
178
+ def test_jis8charset
179
+
180
+
181
+ j = Jis8charset.new
182
+
183
+ puts j.get_current_charset "GL"
184
+
185
+ puts j.get_current_charset "GR"
186
+
187
+ j.show
188
+ j.set_area("GL","G1")
189
+ j.set_graphicset("G0" ,"eisu")
190
+ j.set_graphicset("G1" ,"kanji")
191
+ j.show
192
+
193
+
194
+ #matches = /.*(\w{2})=>(\w{2}).*/.match("xxxxxx GL=>G0 yyyyy ")
195
+ puts action = $esc_invok[0x6e]
196
+ j.do(action)
197
+
198
+ c , b , shift = j.get_current_charset("GL")
199
+ puts c
200
+ puts b
201
+ puts shift
202
+
203
+ end
204
+
205
+ #test_jis8charset
@@ -0,0 +1,271 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'iconv'
5
+ #require 'jis8ctl.rb'
6
+
7
+ require File.join(File.dirname(__FILE__),'jis8ctl')
8
+
9
+ =begin
10
+ Dir.foreach("sample") do |f|
11
+ puts "->#{f}";
12
+ f.each_byte {|b| puts b}
13
+
14
+ end
15
+ =end
16
+
17
+
18
+
19
+
20
+ #puts "中文"
21
+ #puts "Résumés"
22
+ #puts "講座は4つの分野で"
23
+
24
+ if RUBY_VERSION < "1.9"
25
+ #no use in ruby 1.9
26
+ $KCODE = "UTF8"
27
+ end
28
+
29
+ #An internal test case
30
+ def test_1
31
+ ch = "中文"
32
+ fr = "Résumés"
33
+ jp = "講座は4つの分野で"
34
+ eng = "Hello"
35
+ a = [eng,ch, fr, jp].each do |s|
36
+
37
+ puts "->#{s}"
38
+ if RUBY_VERSION > "1.9"
39
+ puts RUBY_VERSION
40
+ puts s.encoding.name
41
+ #s.each_char {|c| puts c}
42
+
43
+ else
44
+ puts RUBY_VERSION
45
+ puts %w|->scan(/./u):|
46
+ p s.scan(/./u)
47
+ puts "->size = #{s.size}"
48
+ puts "->length = #{s.length}"
49
+ puts "->each_char :"
50
+ s.each_char {|c| print c , ","}
51
+ print "\n"
52
+ puts "->each_byte :"
53
+ s.each_byte {|b| print b , ","}
54
+ print "\n"
55
+ end
56
+ #puts s.encode("UTF-8")
57
+ end
58
+
59
+
60
+ #convert EUC-JP to UTF-8 so that Terminal can display it
61
+
62
+ utf8_jp = "講座は4つの分野で" #as the source coding is UTF8 , it will be UTF8
63
+ puts utf8_jp.size
64
+ utf8_jp.each_byte {|b| print b , ","} ;print "\n"
65
+ euc_jp = Iconv.conv("EUCJP","UTF8",utf8_jp)
66
+ puts "ouput euc_jp coding :"
67
+ puts euc_jp #Terminal can not understand EUC-JP ,so only garbage ouput
68
+ puts euc_jp.size
69
+ euc_jp.each_byte {|b| print b , ","} ; print "\n"
70
+ puts "convert back to utf-8"
71
+ utf8_jp_again = Iconv.conv("UTF8","EUCJP",euc_jp)
72
+ puts utf8_jp_again
73
+
74
+ end
75
+
76
+ $eisu_to_euc = [
77
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
78
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 ,
79
+ # ! " # $ % & ' ( ) * + , - . /
80
+ 0xA1A0, 0xA1AA, 0xA1ED, 0xA1F4, 0xA1F0, 0xA1F3, 0xA1F5, 0xA1EC, 0xA1CA, 0xA1CB, 0xA1F6, 0xA1DC, 0xA1A4, 0xA1BD, 0xA1A5, 0xA1BF ,
81
+ #0 1 2 3 4 5 6 7 8 9 : ; < = > ?
82
+ 0xA3B0, 0xA3B1, 0xA3B2, 0xA3B3, 0xA3B4, 0xA3B5, 0xA3B6, 0xA3B7, 0xA3B8, 0xA3B9, 0xA1A7, 0xA1A8, 0xA1E3, 0xA1E1, 0xA1E4, 0xA1A9 ,
83
+ #@ A B C D E F G H I J K L M N O
84
+ 0xA1F7, 0xA3C1, 0xA3C2, 0xA3C3, 0xA3C4, 0xA3C5, 0xA3C6, 0xA3C7, 0xA3C8, 0xA3C9, 0xA3CA, 0xA3CB, 0xA3CC, 0xA3CD, 0xA3CE, 0xA3CF ,
85
+ #P Q R S T U V W X Y Z [ \ ] ^ _
86
+ 0xA3D0, 0xA3D1, 0xA3D2, 0xA3D3, 0xA3D4, 0xA3D5, 0xA3D6, 0xA3D7, 0xA3D8, 0xA3D9, 0xA3DA, 0xA1CE, 0xA1EF, 0xA1CF, 0xA1B0, 0xA1A1 ,
87
+ #` a b c d e f g h i j k l m n o
88
+ 0xA1AE, 0xA3E1, 0xA3E2, 0xA3E3, 0xA3E4, 0xA3E5, 0xA3E6, 0xA3E7, 0xA3E8, 0xA3E9, 0xA3EA, 0xA3EB, 0xA3EC, 0xA3ED, 0xA3EE, 0xA3EF ,
89
+ #p q r s t u v w x y z { | } ~
90
+ 0xA3F0, 0xA3F1, 0xA3F2, 0xA3F3, 0xA3F4, 0xA3F5, 0xA3F6, 0xA3F7, 0xA3F8, 0xA3F9, 0xA3FA, 0xA1D0, 0xA1C3, 0xA1D1, 0xA1B1, 0x0000
91
+ ]
92
+
93
+
94
+
95
+ $hiragana_to_euc = [
96
+ #0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
97
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 ,
98
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 ,
99
+ 0xA1A0, 0xA4A1, 0xA4A2, 0xA4A3, 0xA4A4, 0xA4A5, 0xA4A6, 0xA4A7, 0xA4A8, 0xA4A9, 0xA4AA, 0xA4AB, 0xA4AC, 0xA4AD, 0xA4AE, 0xA4AF ,
100
+ 0xA4B0, 0xA4B1, 0xA4B2, 0xA4B3, 0xA4B4, 0xA4B5, 0xA4B6, 0xA4B7, 0xA4B8, 0xA4B9, 0xA4BA, 0xA4BB, 0xA4BC, 0xA4BD, 0xA4BE, 0xA4BF ,
101
+ 0xA4C0, 0xA4C1, 0xA4C2, 0xA4C3, 0xA4C4, 0xA4C5, 0xA4C6, 0xA4C7, 0xA4C8, 0xA4C9, 0xA4CA, 0xA4CB, 0xA4CC, 0xA4CD, 0xA4CE, 0xA4CF ,
102
+ 0xA4D0, 0xA4D1, 0xA4D2, 0xA4D3, 0xA4D4, 0xA4D5, 0xA4D6, 0xA4D7, 0xA4D8, 0xA4D9, 0xA4DA, 0xA4DB, 0xA4DC, 0xA4DD, 0xA4DE, 0xA4DF ,
103
+ 0xA4E0, 0xA4E1, 0xA4E2, 0xA4E3, 0xA4E4, 0xA4E5, 0xA4E6, 0xA4E7, 0xA4E8, 0xA4E9, 0xA4EA, 0xA4EB, 0xA4EC, 0xA4ED, 0xA4EE, 0xA4EF ,
104
+ 0xA4F0, 0xA4F1, 0xA4F2, 0xA4F3, 0x0000, 0x0000, 0x0000, 0xA1B5, 0xA1B6, 0xA1BC, 0xA1A3, 0xA1D6, 0xA1D7, 0xA1A2, 0xA1A6, 0x0000
105
+ ]
106
+
107
+ $katakana_to_euc = [
108
+ #0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
109
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 ,
110
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 ,
111
+ 0xA1A0, 0xA5A1, 0xA5A2, 0xA5A3, 0xA5A4, 0xA5A5, 0xA5A6, 0xA5A7, 0xA5A8, 0xA5A9, 0xA5AA, 0xA5AB, 0xA5AC, 0xA5AD, 0xA5AE, 0xA5AF ,
112
+ 0xA5B0, 0xA5B1, 0xA5B2, 0xA5B3, 0xA5B4, 0xA5B5, 0xA5B6, 0xA5B7, 0xA5B8, 0xA5B9, 0xA5BA, 0xA5BB, 0xA5BC, 0xA5BD, 0xA5BE, 0xA5BF ,
113
+ 0xA5C0, 0xA5C1, 0xA5C2, 0xA5C3, 0xA5C4, 0xA5C5, 0xA5C6, 0xA5C7, 0xA5C8, 0xA5C9, 0xA5CA, 0xA5CB, 0xA5CC, 0xA5CD, 0xA5CE, 0xA5CF ,
114
+ 0xA5D0, 0xA5D1, 0xA5D2, 0xA5D3, 0xA5D4, 0xA5D5, 0xA5D6, 0xA5D7, 0xA5D8, 0xA5D9, 0xA5DA, 0xA5DB, 0xA5DC, 0xA5DD, 0xA5DE, 0xA5DF ,
115
+ 0xA5E0, 0xA5E1, 0xA5E2, 0xA5E3, 0xA5E4, 0xA5E5, 0xA5E6, 0xA5E7, 0xA5E8, 0xA5E9, 0xA5EA, 0xA5EB, 0xA5EC, 0xA5ED, 0xA5EE, 0xA5EF ,
116
+ 0xA5F0, 0xA5F1, 0xA5F2, 0xA5F3, 0xA5F4, 0xA5F5, 0xA5F6, 0xA1B3, 0xA1B4, 0xA1BC, 0xA1A3, 0xA1D6, 0xA1D7, 0xA1A2, 0xA1A6, 0x0000
117
+ ]
118
+
119
+
120
+ $debug = false
121
+ def verbose s
122
+ puts s if $debug
123
+ end
124
+
125
+ $eucjp_to_utf8 = Iconv.new("UTF8//TRANSLIT//IGNORE","EUCJP")
126
+
127
+
128
+ def to_euc from ,a ,b = 0x00
129
+ special = [0x2d,0x7a,0x7b,0x7c,0x7e,0x7f]
130
+ v = case from
131
+ when "eisu" then $eisu_to_euc[a & 0x7F]
132
+ when "hira" then $hiragana_to_euc[a & 0x7F]
133
+ when "kata" then $katakana_to_euc[a & 0x7F]
134
+ #Hack. Icon can not handle 0x2Dxx ,which is leagal in JIS-X-0213
135
+ when "kanji1" then if special.index(a) != nil then 0xa1a1 else ((a|0x80) << 8 )+ (b|0x80) end
136
+ when "tusika" then ((a|0x80) << 8 )+ (b|0x80) #these are character in 91-93 , we may not be able to show correctly in desktop
137
+ else raise ArgumentError , "unknow encoding #{from}"
138
+ end
139
+ s = ""
140
+ s << ((v & 0xFF00) >> 8 ).chr << (v & 0x00FF).chr
141
+ end
142
+
143
+
144
+
145
+ #To display the sisu
146
+
147
+ def test_display_eisu
148
+ s_euc = ""
149
+ 0x23.upto(0x7e) do |eisu|
150
+ s_euc << to_euc("eisu",eisu)
151
+ end
152
+ 0x23.upto(0x7e) do |b|
153
+ s_euc << to_euc("hira",b)
154
+ end
155
+ 0x23.upto(0x7e) do |b|
156
+ s_euc << to_euc("kata",b)
157
+ end
158
+ #Kanji1
159
+ s_euc << to_euc("kanji1",0x5f,0x37)
160
+ puts eucjp_to_utf8(s_euc)
161
+ end
162
+
163
+ $jis8 = Jis8charset.new
164
+ def arib_jis_to_euc s
165
+
166
+ #convert string to array , so that we can index it freely
167
+ src = []
168
+ out = ""
169
+ s.each_byte {|c| src << c}
170
+ #each message is independent of each other
171
+ $jis8.reset
172
+
173
+ if $debug
174
+ puts "convert following string to euc :"
175
+ src.each_with_index do |c , index|
176
+ print "#{c.to_s(16)} "
177
+ end
178
+
179
+ puts "\n"
180
+ end
181
+
182
+ while !src.empty?
183
+ c1 = src.shift
184
+ if $debug
185
+ print "c1 : #{c1.to_s(16)} "
186
+ if is_ctl_code?(c1) then puts "->Is CTL" else print "->Char" end
187
+ end
188
+ if is_ctl_code?(c1)
189
+ #may need to get c2, c3 to determine what to do.
190
+ if c1 == ESC
191
+ verbose "ESC .."
192
+ c2 = src.shift
193
+ if $esc_invok.key?(c2)
194
+ action = $esc_invok[c2]
195
+ verbose action
196
+ $jis8.do(action)
197
+ elsif $set_designation.index(c2) != nil
198
+ #raise StandardError , "this case no implemented yet"
199
+ #Let's check follwing 3 bytes,using powerful regex!
200
+ c3 = src[1].nil? ? nil : src[1].to_s(16)
201
+ s = [c2.to_s(16), src[0].to_s(16) , c3 ].join(',')
202
+ #puts "following 3 string #{s}"
203
+ if s =~ /(24,29,|24,2a,|24,2b,)([0-9a-f]{1,})/
204
+ cmd , para = $1 , $2
205
+ elsif s =~ /(24,|28,|29,|2a,|2b,)([0-9a-f]{1,})/
206
+ cmd , para = $1 , $2
207
+ else
208
+ raise ArgumentError , "unknow ctrl command"
209
+ end
210
+ verbose "-> CTL #{cmd.split(",").map{|t| "0x"+t}.join(",")} Type #{para}"
211
+ action = $designation_action[cmd.split(",").map {|t| "0x"+t}.join(",")]
212
+ $jis8.do_b(action ,"0x"+para)
213
+ c = cmd.split(",").size
214
+ c.times {|i| src.shift}
215
+ #may need to take two more
216
+ end
217
+ elsif $other_invok.key?(c1)
218
+ action = $other_invok[c1]
219
+ verbose action
220
+ $jis8.do(action)
221
+ elsif c1 == 0x20
222
+ out << " "
223
+ elsif c1 == 0x0d
224
+ out << "\n"
225
+ elsif c1 == 0x89
226
+ verbose "ingor"
227
+ else
228
+ verbose "ignor"
229
+ end
230
+ else
231
+ #depend on what current charaset is using , we will get one or two characters
232
+ area = (c1 > 0x80) ? "GR" : "GL"
233
+ charset , bytes , single_shift = $jis8.get_current_charset(area)
234
+ #ignor tusika character
235
+ verbose "-> #{charset.ljust(10)} , #{bytes}"
236
+ if charset.eql?("tusika")
237
+ c2 = src.shift
238
+ out << "~~"
239
+ else
240
+ if bytes == 1
241
+ out << to_euc(charset, c1)
242
+ elsif bytes == 2
243
+ c2 = src.shift
244
+ if c2.nil?
245
+ #It is an error. Just ignor it
246
+ out << "??"
247
+ puts "Error: No more bytes for 2 bytes character."
248
+ else
249
+ out << to_euc(charset, c1, c2 )
250
+ end
251
+ #used to locate error when iconv
252
+ verbose eucjp_to_utf8(out)
253
+ end
254
+ end
255
+ $jis8.restore if single_shift
256
+
257
+ end
258
+
259
+ end
260
+
261
+
262
+ out
263
+ end
264
+
265
+ def eucjp_to_utf8 euc
266
+ $eucjp_to_utf8.iconv(euc)
267
+ end
268
+
269
+
270
+ #test_display_eisu
271
+
@@ -0,0 +1,3 @@
1
+ module Jis2euc
2
+ VERSION = "0.0.1"
3
+ end
data/lib/jis2euc.rb ADDED
@@ -0,0 +1,15 @@
1
+
2
+ require File.join(File.dirname(__FILE__),'jis2euc/libjis2euc')
3
+
4
+
5
+ module Jis2euc
6
+ # Your code goes here...
7
+ def self.jis2euc s
8
+ arib_jis_to_euc s
9
+ end
10
+
11
+ def self.euc2utf8 s
12
+ eucjp_to_utf8 s
13
+ end
14
+
15
+ end
data/lib/test.rb ADDED
@@ -0,0 +1,55 @@
1
+ #include this to test local latest change before publishing the gem
2
+ require './lib/jis2euc'
3
+
4
+ #include this to use the installed gem.
5
+ #require 'jis2euc'
6
+ require 'test/unit'
7
+
8
+ #THIS IS MUST
9
+ if RUBY_VERSION < "1.9"
10
+ #no use in ruby 1.9
11
+ $KCODE = "UTF8"
12
+ end
13
+
14
+ class TC_JIS2EUC < Test::Unit::TestCase
15
+
16
+ def setup
17
+
18
+ @raw1 = "32 2d 46 6c cb fb 1b 7c b5 d0 cb fc 1b 7d c8 a4 a6 45 41 45 7d ce 3e 2e 3d 2e c7 e 33 37 f 2d 52 e2 ce 35 77 4e 25 f2 41 66 a4 c7 3f 4a e0 32 61 39 73 ca 1b 7c ec f9 b9 1b 7d ac a2 eb fa b3 ce 1b 7c ec f9 b9 1b 7d cb 3b 32 32 43 b9 eb e 35 30 f 42 65 ce 43 4b bf c1 ac 4d 27 3e 70 f2 49 70 34 6f cb 42 4e 4e 4f ce 4a 49 f2 3e 68 ea 31 5b a8 eb 3b 51 f2 44 49 c3 bf fa "
19
+ @raw2 = "1b 24 3b 7a 56 1b 24 39 34 6f 21 21 4c 34 39 29 4b 3c fb 35 35 4e 76 ac 3f 25 ea ca b9 37 4a 3f 27 fc "
20
+ @raw3 = "1b 24 2a 3b fa d7 1b 7c ab f3 cb f3 b0 1b 2a 30 1b 7d ce e 44 41 49 f 30 42 35 48 46 7c 21 2a "
21
+ @raw4 = "48 56 41 48 46 62 4d 46 e 32 89 2d 8a 1b 24 3b "
22
+ @raw5 = [0xe,0x51,0x56,0x43,0xf,0x21,0x21,0x5f,0x37,0x3a,0x6a,0x89,0x20,0x8a,0x4e,0x34,0x3f,0x4d,0xce,0x1b,0x7c,0xc9].inject("") {|s,c| s << c.chr}
23
+
24
+ end
25
+ def test_arib_jis_to_euc_2
26
+
27
+ puts_jis(@raw1)
28
+ puts_jis(@raw2)
29
+ puts_jis(@raw3)
30
+ puts_jis(@raw4)
31
+ puts_jis(@raw5)
32
+
33
+ end
34
+
35
+
36
+ def puts_jis raw
37
+
38
+ s = raw.split.map {|t| ("0x"+t).to_i(16)}.inject("") {|p,c| p << c.chr}
39
+ euc_s = Jis2euc.jis2euc(s)
40
+ puts Jis2euc.euc2utf8(euc_s)
41
+ end
42
+
43
+ def test_euc_to_utf8
44
+ #Refer to : http://www.rikai.com/library/kanjitables/kanji_codes.euc.shtml
45
+ #s = [0xa3,0xb0,0xa5,0xc0,0x8f,0xe6,0xc0,0x8f,0xe6,0xd0].inject("") {|s ,b| s << b.chr}
46
+ s = [0xa1,0xa1,0xdf,0xb7,0xba,0xea].inject("") {|p ,b| p << b.chr}
47
+ puts Jis2euc.euc2utf8(s)
48
+ end
49
+
50
+
51
+ end
52
+
53
+ #test_iconv_euc_to_utf8
54
+ #test_display_eisu
55
+ #test_arib_jis_to_euc_2
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jis2euc
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - pierr.chen
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-07-05 00:00:00 +08:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: "ARIB standard combines serveral JIS encoding for texts ,this gem convert them to EUC-JP "
23
+ email:
24
+ - pierr.chen@gmail.com
25
+ executables: []
26
+
27
+ extensions: []
28
+
29
+ extra_rdoc_files: []
30
+
31
+ files:
32
+ - .gitignore
33
+ - Gemfile
34
+ - Rakefile
35
+ - jis2euc.gemspec
36
+ - lib/jis2euc.rb
37
+ - lib/jis2euc/jis8ctl.rb
38
+ - lib/jis2euc/libjis2euc.rb
39
+ - lib/jis2euc/version.rb
40
+ - lib/test.rb
41
+ has_rdoc: true
42
+ homepage: ""
43
+ licenses: []
44
+
45
+ post_install_message:
46
+ rdoc_options: []
47
+
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ hash: 3
56
+ segments:
57
+ - 0
58
+ version: "0"
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ hash: 3
65
+ segments:
66
+ - 0
67
+ version: "0"
68
+ requirements: []
69
+
70
+ rubyforge_project: jis2euc
71
+ rubygems_version: 1.3.7
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: convert jis encoding to EUC-JP
75
+ test_files: []
76
+