jis2euc 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +37 -0
- data/lib/jis2euc.rb +5 -3
- data/lib/jis2euc/jis8ctl.rb +4 -1
- data/lib/jis2euc/libjis2euc.rb +7 -3
- data/lib/jis2euc/version.rb +1 -1
- data/lib/test.rb +27 -0
- metadata +5 -4
data/README
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
ARIB is using a bunch of encodings when transmitting the text information, such as EPG, cc and BML text encoding. Control code is used to switch the encoding. This gem is used to convent the text information to standard EUC-JP encoding.
|
2
|
+
|
3
|
+
|
4
|
+
Examples :
|
5
|
+
|
6
|
+
require 'jis2euc'
|
7
|
+
|
8
|
+
raw = "48 56 41 48 46 62 4d 46 e 32 89 2d 8a 1b 24 3b "
|
9
|
+
s = raw.split.map {|t| ("0x"+t).to_i(16)}.inject("") {|p,c| p << c.chr}
|
10
|
+
|
11
|
+
euc_s = Jis2euc.jis2euc(s,true) #set the 2nd parameter to ture will enable debug information
|
12
|
+
|
13
|
+
|
14
|
+
#output debug information
|
15
|
+
convert following string to euc :
|
16
|
+
48 56 41 48 46 62 4d 46 e 32 89 2d 8a 1b 24 3b
|
17
|
+
c1 : 48 ->Char-> kanji1 , 2
|
18
|
+
番
|
19
|
+
c1 : 41 ->Char-> kanji1 , 2
|
20
|
+
番組
|
21
|
+
c1 : 46 ->Char-> kanji1 , 2
|
22
|
+
番組内
|
23
|
+
c1 : 4d ->Char-> kanji1 , 2
|
24
|
+
番組内容
|
25
|
+
c1 : e ->Is CTL
|
26
|
+
LS1 : GL=>G1, locking shift
|
27
|
+
c1 : 32 ->Char-> eisu , 1
|
28
|
+
c1 : 89 ->Is CTL
|
29
|
+
ingor
|
30
|
+
c1 : 2d ->Char-> eisu , 1
|
31
|
+
c1 : 8a ->Is CTL
|
32
|
+
ignor
|
33
|
+
c1 : 1b ->Is CTL
|
34
|
+
ESC ..
|
35
|
+
-> CTL 0x24 Type 3b
|
36
|
+
番組内容2―
|
37
|
+
|
data/lib/jis2euc.rb
CHANGED
@@ -3,9 +3,11 @@ require File.join(File.dirname(__FILE__),'jis2euc/libjis2euc')
|
|
3
3
|
|
4
4
|
|
5
5
|
module Jis2euc
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
#
|
7
|
+
# set verbose to true will print out the detail parse and conversion information
|
8
|
+
#
|
9
|
+
def self.jis2euc s ,verbose = false
|
10
|
+
arib_jis_to_euc s , verbose
|
9
11
|
end
|
10
12
|
|
11
13
|
def self.euc2utf8 s
|
data/lib/jis2euc/jis8ctl.rb
CHANGED
@@ -100,7 +100,9 @@ $encoding_mapping = {
|
|
100
100
|
|
101
101
|
class Jis8charset
|
102
102
|
|
103
|
-
|
103
|
+
|
104
|
+
include Singleton
|
105
|
+
|
104
106
|
def initialize
|
105
107
|
|
106
108
|
@areas= {"GL"=> "G0" , "GR" => "G2"}
|
@@ -108,6 +110,7 @@ def initialize
|
|
108
110
|
@last_gl = "G0"
|
109
111
|
@single_shift = false
|
110
112
|
end
|
113
|
+
|
111
114
|
def reset
|
112
115
|
@areas= {"GL"=> "G0" , "GR" => "G2"}
|
113
116
|
@sets = {"G0"=> "kanji1" , "G1" => "eisu", "G2"=>"hira","G3"=>"kata"}
|
data/lib/jis2euc/libjis2euc.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
# encoding: utf-8
|
3
3
|
|
4
4
|
require 'iconv'
|
5
|
+
require 'singleton'
|
5
6
|
#require 'jis8ctl.rb'
|
6
7
|
|
7
8
|
require File.join(File.dirname(__FILE__),'jis8ctl')
|
@@ -117,7 +118,7 @@ $katakana_to_euc = [
|
|
117
118
|
]
|
118
119
|
|
119
120
|
|
120
|
-
$debug =
|
121
|
+
$debug = true
|
121
122
|
def verbose s
|
122
123
|
puts s if $debug
|
123
124
|
end
|
@@ -160,9 +161,12 @@ def test_display_eisu
|
|
160
161
|
puts eucjp_to_utf8(s_euc)
|
161
162
|
end
|
162
163
|
|
163
|
-
$jis8 = Jis8charset.
|
164
|
-
def arib_jis_to_euc s
|
164
|
+
$jis8 = Jis8charset.instance
|
165
165
|
|
166
|
+
|
167
|
+
def arib_jis_to_euc s , enable_debug = false
|
168
|
+
|
169
|
+
$debug = enable_debug
|
166
170
|
#convert string to array , so that we can index it freely
|
167
171
|
src = []
|
168
172
|
out = ""
|
data/lib/jis2euc/version.rb
CHANGED
data/lib/test.rb
CHANGED
@@ -48,8 +48,35 @@ def test_euc_to_utf8
|
|
48
48
|
end
|
49
49
|
|
50
50
|
|
51
|
+
def test_verbose_output
|
52
|
+
|
53
|
+
raw = @raw1
|
54
|
+
s = raw.split.map {|t| ("0x"+t).to_i(16)}.inject("") {|p,c| p << c.chr}
|
55
|
+
euc_s = Jis2euc.jis2euc(s,false)
|
56
|
+
print_out_euc euc_s
|
51
57
|
end
|
52
58
|
|
59
|
+
|
60
|
+
def test_print_convert_eucjp
|
61
|
+
|
62
|
+
raw = @raw3
|
63
|
+
s = raw.split.map {|t| ("0x"+t).to_i(16)}.inject("") {|p,c| p << c.chr}
|
64
|
+
euc_s = Jis2euc.jis2euc(s,false)
|
65
|
+
|
66
|
+
print_out_euc euc_s
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
def print_out_euc euc_s
|
71
|
+
puts "---------------------------------------------"
|
72
|
+
puts "view the characters in UTF-8 encoding console"
|
73
|
+
puts Jis2euc.euc2utf8(euc_s)
|
74
|
+
puts "The EUC_JP encoding bytes :"
|
75
|
+
euc_s.each_byte {|c| print c.to_s(16) ; print ","}
|
76
|
+
puts "\n---------------------------------------------"
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
53
80
|
#test_iconv_euc_to_utf8
|
54
81
|
#test_display_eisu
|
55
82
|
#test_arib_jis_to_euc_2
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jis2euc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- pierr.chen
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-08-26 00:00:00 +08:00
|
19
19
|
default_executable:
|
20
20
|
dependencies: []
|
21
21
|
|
@@ -31,6 +31,7 @@ extra_rdoc_files: []
|
|
31
31
|
files:
|
32
32
|
- .gitignore
|
33
33
|
- Gemfile
|
34
|
+
- README
|
34
35
|
- Rakefile
|
35
36
|
- jis2euc.gemspec
|
36
37
|
- lib/jis2euc.rb
|