charesc 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +57 -0
- data/lib/charesc.rb +39 -0
- data/test/test_euc.rb +32 -0
- data/test/test_fail.rb +42 -0
- data/test/test_mixed.rb +27 -0
- data/test/test_sjis.rb +32 -0
- data/test/test_utf8.rb +126 -0
- metadata +56 -0
data/README
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
== charesc version 0.1.0
|
2
|
+
|
3
|
+
=== Overview
|
4
|
+
Many programming languages and data formats provide character
|
5
|
+
escapes based on Unicode. This gem, 'charesc', does so for Ruby.
|
6
|
+
|
7
|
+
=== Syntax
|
8
|
+
Character Escapes are defined as constants starting with the
|
9
|
+
letter 'U', followed by at least four hexadecimal digits.
|
10
|
+
Four hexadecimal digits represent characters in the basic
|
11
|
+
multilingual plane (BMP) of Unicode/ISO 10646.
|
12
|
+
Five hexadecimal digits represent characters in planes 1 to 15.
|
13
|
+
Six hexadecimal digits, starting with '10', represent characters
|
14
|
+
in plane 16.
|
15
|
+
Up and including four digits, leading zeros are mandatory,
|
16
|
+
but otherwise, they are forbidden. In this respect, the
|
17
|
+
syntax is the same as for the U+ notation from the Unicode book.
|
18
|
+
|
19
|
+
=== Usage
|
20
|
+
Character escapes can be used inside strings, with the interpolation
|
21
|
+
syntax, e.g., "abcd#{U6789u789A}". They can also be used on their
|
22
|
+
own, as free-standing constants, e.g., "abcd" + U6789u789A.
|
23
|
+
|
24
|
+
=== Returned Values
|
25
|
+
All codepoints including non-characters (e.g. U+FFFF) are available,
|
26
|
+
but surrogates (U+D800-U+DFFF) are not available, guaranteeing
|
27
|
+
that no ill-formed UTF-8 sequences are produced.
|
28
|
+
Character escapes can either be used as individual characters
|
29
|
+
(e.g., U6789) or in strings (e.g., U6789U789A). Starting from the
|
30
|
+
second 'U', it is possible to use 'u' instead for easier visual
|
31
|
+
parsing (e.g., U6789u789A). The hexadecimal characters A-F can
|
32
|
+
always also be written lower-case. The value of a character
|
33
|
+
escape is never a character (e.g., ?a), always a string.
|
34
|
+
|
35
|
+
|
36
|
+
=== Character Escapes and Character Encodings
|
37
|
+
The charesc gem takes the value of $KCODE into account automatically.
|
38
|
+
If $KCODE is set to Shift_JIS or EUC-JP, the character escapes are
|
39
|
+
coverted to the respective encoding (as far as allowed by these
|
40
|
+
encodings). If $KCODE indicates UTF-8 or 'none', character escapes
|
41
|
+
return their values in UTF-8.
|
42
|
+
By redefining the method charesc_non_utf8_conversion_hook,
|
43
|
+
it is possible to change this behavior if necessary.
|
44
|
+
|
45
|
+
=== Future Work
|
46
|
+
- Adapt syntax if there is community consensus for something
|
47
|
+
different (warning: discussing syntactic details can become
|
48
|
+
a rathole).
|
49
|
+
- Make this part of the standard Ruby distribution, or even
|
50
|
+
better, integrate it into Ruby itself. In the later case,
|
51
|
+
the syntax can be reconsidered, because we can then e.g.
|
52
|
+
use \u.... or so.
|
53
|
+
|
54
|
+
=== Copyright
|
55
|
+
Copyright (c) 2007 Martin J. Du"rst (duerst@it.aoyama.ac.jp)
|
56
|
+
Licensed under the same terms as Ruby. Absolutely no warranty.
|
57
|
+
(see http://www.ruby-lang.org/en/LICENSE.txt)
|
data/lib/charesc.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# :include: ../README
|
2
|
+
|
3
|
+
class Module
|
4
|
+
alias charesc_old_const_missing const_missing
|
5
|
+
|
6
|
+
# pretend that constants of the form Uhhhh, with h a hexadecimal digit,
|
7
|
+
# are defined and their value corresponds to the value of the Unicode
|
8
|
+
# character U+hhhh. For details, see the README.
|
9
|
+
def const_missing (const)
|
10
|
+
# Everything but the first 'U' is case-insensitive, and
|
11
|
+
# the first 'U' is guaranteed to be upper-case anyway,
|
12
|
+
# otherwise, we never get here anyway.
|
13
|
+
if const.to_s =~ /^((U( [0-9ABCEF][0-9A-F]{3} # general BMP
|
14
|
+
| D[0-7][0-9A-F]{2} # excluding surrogates
|
15
|
+
| [1-9A-F][0-9A-F]{4} # planes 1-15
|
16
|
+
| 10 [0-9A-F]{4} # plane 16
|
17
|
+
)
|
18
|
+
)*
|
19
|
+
)
|
20
|
+
$/ix
|
21
|
+
unescaped = $1.split(/[Uu]/)[1..-1].collect do |hex| hex.to_i(16) end.pack('U*')
|
22
|
+
|
23
|
+
# make it work with other built-in encodings
|
24
|
+
return charesc_non_utf8_conversion_hook(unescaped)
|
25
|
+
else
|
26
|
+
charesc_old_const_missing(const)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# redefine this hook method if you need to handle non-UTF-8
|
31
|
+
# encodings differently
|
32
|
+
def charesc_non_utf8_conversion_hook (unescaped)
|
33
|
+
if nkf_options = {'SJIS'=>'-WsIm0', 'EUC'=>'-WeIm0'}[$KCODE]
|
34
|
+
require 'nkf' # avoid that for a pure UTF-8 application
|
35
|
+
unescaped = NKF.nkf(nkf_options, unescaped)
|
36
|
+
end
|
37
|
+
return unescaped
|
38
|
+
end
|
39
|
+
end
|
data/test/test_euc.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# testing charesc gem with EUC-JP
|
2
|
+
|
3
|
+
# Copyright 2007 Martin J. Du"rst (duerst@it.aoyama.ac.jp);
|
4
|
+
# available under the same licence as Ruby itself
|
5
|
+
# (see http://www.ruby-lang.org/en/LICENSE.txt)
|
6
|
+
|
7
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
8
|
+
require 'charesc'
|
9
|
+
|
10
|
+
$KCODE = 'EUC-JP'
|
11
|
+
|
12
|
+
require 'test/unit'
|
13
|
+
class TestEUC < Test::Unit::TestCase
|
14
|
+
def test_euc
|
15
|
+
assert_equal('Yukihiro Matsumoto - ���ܹԹ�',
|
16
|
+
"Yukihiro Matsumoto - #{U677Eu672Cu884Cu5F18}")
|
17
|
+
assert_equal('Matz - �ޤĤ�� �椭�Ҥ�',
|
18
|
+
"Matz - #{U307Eu3064u3082u3068} #{U3086u304Du3072u308D}")
|
19
|
+
assert_equal("Aoyama Gakuin University - \xC0\xC4\xBB\xB3\xB3\xD8\xB1\xA1\xC2\xE7\xB3\xD8",
|
20
|
+
"Aoyama Gakuin University - #{U9752u5C71u5B66u9662u5927u5B66}")
|
21
|
+
assert_equal('Aoyama Gakuin University - �Ļ��ر����',
|
22
|
+
"Aoyama Gakuin University - #{U9752u5C71u5B66u9662u5927u5B66}")
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_mime
|
26
|
+
# make sure MIME is not decoded
|
27
|
+
# MIME header encoding of �Ļ�: =?ISO-2022-JP?B?GyRCQEQ7MxsoQg==?=
|
28
|
+
assert_equal('=?ISO-2022-JP?B?GyRCQEQ7MxsoQg==?=',
|
29
|
+
U003Du003Fu0049u0053u004Fu002Du0032u0030u0032u0032u002Du004Au0050u003Fu0042u003Fu0047u0079u0052u0043u0051u0045u0051u0037u004Du0078u0073u006Fu0051u0067u003Du003Du003Fu003D)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
data/test/test_fail.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# testing failure cases for charesc gem
|
2
|
+
|
3
|
+
# Copyright 2007 Martin J. Du"rst (duerst@it.aoyama.ac.jp);
|
4
|
+
# available under the same licence as Ruby itself
|
5
|
+
# (see http://www.ruby-lang.org/en/LICENSE.txt)
|
6
|
+
|
7
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
8
|
+
require 'charesc'
|
9
|
+
|
10
|
+
require 'test/unit'
|
11
|
+
# Tests to make sure that arbitrary nonexisting constants
|
12
|
+
# and disallowed cases are handled correctly
|
13
|
+
class TestFail < Test::Unit::TestCase
|
14
|
+
def test_foo
|
15
|
+
assert_raise(NameError) { FOO } # arbitrary constant
|
16
|
+
assert_raise(NameError) { string = "#{BAR}" } # interpolation
|
17
|
+
assert_raise(NameError) { uABCD } # lower-case 'u'
|
18
|
+
assert_raise(NameError) { UD800 } # surrogate block
|
19
|
+
assert_raise(NameError) { UDCBA } # surrogate block
|
20
|
+
assert_raise(NameError) { UDFFF } # surrogate block
|
21
|
+
assert_raise(NameError) { UD847uDD9A } # surrogate pair
|
22
|
+
assert_raise(NameError) { U0ABCD } # leading zero
|
23
|
+
assert_raise(NameError) { U00ABCD } # leading zeros
|
24
|
+
assert_raise(NameError) { Uabc } # too short
|
25
|
+
assert_raise(NameError) { Uabcdef } # too long
|
26
|
+
assert_raise(NameError) { U110000 } # too high
|
27
|
+
assert_not_equal(?a, U0061) # we return strings, not chararcters
|
28
|
+
|
29
|
+
# with leading correct escape
|
30
|
+
assert_raise(NameError) { UABCDFOO } # arbitrary constant
|
31
|
+
assert_raise(NameError) { string = "#{UABCDBAR}" } # interpolation
|
32
|
+
assert_raise(NameError) { UABCDuD800 } # surrogate block
|
33
|
+
assert_raise(NameError) { UABCDuDCBA } # surrogate block
|
34
|
+
assert_raise(NameError) { UABCDuDFFF } # surrogate block
|
35
|
+
assert_raise(NameError) { UABCDUD847uDD9A } # surrogate pair
|
36
|
+
assert_raise(NameError) { UABCDu0ABCD } # leading zero
|
37
|
+
assert_raise(NameError) { UABCDu00ABCD } # leading zeros
|
38
|
+
assert_raise(NameError) { UABCDuabc } # too short
|
39
|
+
assert_raise(NameError) { UABCDuabcdef } # too long
|
40
|
+
assert_raise(NameError) { UABCDu110000 } # too high
|
41
|
+
end
|
42
|
+
end
|
data/test/test_mixed.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# testing charesc gem with mixed encodings
|
2
|
+
|
3
|
+
# Copyright 2007 Martin J. Du"rst (duerst@it.aoyama.ac.jp);
|
4
|
+
# available under the same licence as Ruby itself
|
5
|
+
# (see http://www.ruby-lang.org/en/LICENSE.txt)
|
6
|
+
|
7
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
8
|
+
require 'charesc'
|
9
|
+
|
10
|
+
require 'test/unit'
|
11
|
+
class TestMixed < Test::Unit::TestCase
|
12
|
+
def test_mixed
|
13
|
+
assert_equal("Aoyama Gakuin University - \xE9\x9D\x92\xE5\xB1\xB1\xE5\xAD\xA6\xE9\x99\xA2\xE5\xA4\xA7\xE5\xAD\xA6",
|
14
|
+
"Aoyama Gakuin University - #{U9752u5C71u5B66u9662u5927u5B66}")
|
15
|
+
assert_equal("Martin D\xC3\xBCrst", "Martin D#{U00FC}rst")
|
16
|
+
$KCODE = 'Shift_JIS'
|
17
|
+
assert_equal("Aoyama Gakuin University - \x90\xc2\x8e\x52\x8a\x77\x89\x40\x91\xe5\x8a\x77",
|
18
|
+
"Aoyama Gakuin University - #{U9752u5C71u5B66u9662u5927u5B66}")
|
19
|
+
$KCODE = 'EUC-JP'
|
20
|
+
assert_equal("Aoyama Gakuin University - \xC0\xC4\xBB\xB3\xB3\xD8\xB1\xA1\xC2\xE7\xB3\xD8",
|
21
|
+
"Aoyama Gakuin University - #{U9752u5C71u5B66u9662u5927u5B66}")
|
22
|
+
$KCODE = 'UTF-8'
|
23
|
+
assert_equal("Aoyama Gakuin University - \xE9\x9D\x92\xE5\xB1\xB1\xE5\xAD\xA6\xE9\x99\xA2\xE5\xA4\xA7\xE5\xAD\xA6",
|
24
|
+
"Aoyama Gakuin University - #{U9752u5C71u5B66u9662u5927u5B66}")
|
25
|
+
assert_equal("Martin D\xC3\xBCrst", "Martin D#{U00FC}rst")
|
26
|
+
end
|
27
|
+
end
|
data/test/test_sjis.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# testing charesc gem with Shift_JIS
|
2
|
+
|
3
|
+
# Copyright 2007 Martin J. Du"rst (duerst@it.aoyama.ac.jp);
|
4
|
+
# available under the same licence as Ruby itself
|
5
|
+
# (see http://www.ruby-lang.org/en/LICENSE.txt)
|
6
|
+
|
7
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
8
|
+
require 'charesc'
|
9
|
+
|
10
|
+
$KCODE = 'Shift_JIS'
|
11
|
+
|
12
|
+
require 'test/unit'
|
13
|
+
class TestSJIS < Test::Unit::TestCase
|
14
|
+
def test_sjis
|
15
|
+
assert_equal('Yukihiro Matsumoto - ���{�s�O',
|
16
|
+
"Yukihiro Matsumoto - #{U677Eu672Cu884Cu5F18}")
|
17
|
+
assert_equal('Matz - �܂��� �䂫�Ђ�',
|
18
|
+
"Matz - #{U307Eu3064u3082u3068} #{U3086u304Du3072u308D}")
|
19
|
+
assert_equal("Aoyama Gakuin University - \x90\xc2\x8e\x52\x8a\x77\x89\x40\x91\xe5\x8a\x77",
|
20
|
+
"Aoyama Gakuin University - #{U9752u5C71u5B66u9662u5927u5B66}")
|
21
|
+
assert_equal('Aoyama Gakuin University - �R�w�@��w',
|
22
|
+
"Aoyama Gakuin University - #{U9752u5C71u5B66u9662u5927u5B66}")
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_mime
|
26
|
+
# make sure MIME is not decoded
|
27
|
+
# MIME header encoding of �R: =?ISO-2022-JP?B?GyRCQEQ7MxsoQg==?=
|
28
|
+
assert_equal('=?ISO-2022-JP?B?GyRCQEQ7MxsoQg==?=',
|
29
|
+
U003Du003Fu0049u0053u004Fu002Du0032u0030u0032u0032u002Du004Au0050u003Fu0042u003Fu0047u0079u0052u0043u0051u0045u0051u0037u004Du0078u0073u006Fu0051u0067u003Du003Du003Fu003D)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
data/test/test_utf8.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
# testing charesc gem with UTF-8
|
2
|
+
|
3
|
+
# Copyright 2007 Martin J. Du"rst (duerst@it.aoyama.ac.jp);
|
4
|
+
# available under the same licence as Ruby itself
|
5
|
+
# (see http://www.ruby-lang.org/en/LICENSE.txt)
|
6
|
+
|
7
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
8
|
+
require 'charesc'
|
9
|
+
|
10
|
+
require 'test/unit'
|
11
|
+
class TestUTF8 < Test::Unit::TestCase
|
12
|
+
def test_utf8
|
13
|
+
assert_equal('Yukihiro Matsumoto - 松本行弘',
|
14
|
+
"Yukihiro Matsumoto - #{U677Eu672Cu884Cu5F18}")
|
15
|
+
assert_equal('Matz - まつもと ゆきひろ',
|
16
|
+
"Matz - #{U307Eu3064u3082u3068} #{U3086u304Du3072u308D}")
|
17
|
+
assert_equal("Aoyama Gakuin University - \xE9\x9D\x92\xE5\xB1\xB1\xE5\xAD\xA6\xE9\x99\xA2\xE5\xA4\xA7\xE5\xAD\xA6",
|
18
|
+
"Aoyama Gakuin University - #{U9752u5C71u5B66u9662u5927u5B66}")
|
19
|
+
assert_equal('Aoyama Gakuin University - 青山学院大学',
|
20
|
+
"Aoyama Gakuin University - #{U9752u5C71u5B66u9662u5927u5B66}")
|
21
|
+
assert_equal('青山学院大学', U9752u5C71u5B66u9662u5927u5B66)
|
22
|
+
assert_equal("Martin D\xC3\xBCrst", "Martin D#{U00FC}rst")
|
23
|
+
assert_equal('Martin Dürst', "Martin D#{U00FC}rst")
|
24
|
+
assert_equal('ü', U00FC)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_syntax_variants
|
28
|
+
# upper/lower case variants
|
29
|
+
assert_equal('松本行弘', U677Eu672Cu884Cu5F18)
|
30
|
+
assert_equal('松本行弘', U677EU672CU884CU5F18)
|
31
|
+
assert_equal('松本行弘', U677eu672cu884cu5f18)
|
32
|
+
assert_equal('松本行弘', U677eU672cU884cU5f18)
|
33
|
+
# all hex digits
|
34
|
+
assert_equal("\xC4\xA3\xE4\x95\xA7\xE8\xA6\xAB\xEC\xB7\xAF", U0123u4567u89ABuCDEF)
|
35
|
+
assert_equal("\xC4\xA3\xE4\x95\xA7\xE8\xA6\xAB\xEC\xB7\xAF", U0123U4567U89ABUCDEF)
|
36
|
+
assert_equal("\xC4\xA3\xE4\x95\xA7\xE8\xA6\xAB\xEC\xB7\xAF", U0123u4567u89abucdef)
|
37
|
+
assert_equal("\xC4\xA3\xE4\x95\xA7\xE8\xA6\xAB\xEC\xB7\xAF", U0123U4567U89abUcdef)
|
38
|
+
assert_equal("\xC4\xA3\xE4\x95\xA7\xE8\xA6\xAB\xEC\xB7\xAF", U0123u4567u89aBuCdEf)
|
39
|
+
assert_equal("\xC4\xA3\xE4\x95\xA7\xE8\xA6\xAB\xEC\xB7\xAF", U0123u4567u89aBUcDEF)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_fulton
|
43
|
+
# examples from Hal Fulton's book (second edition), chapter 4
|
44
|
+
# precomposed e'pe'e
|
45
|
+
assert_equal('épée', U00E9u0070u00E9u0065)
|
46
|
+
assert_equal('épée', "#{U00E9u0070u00E9u0065}")
|
47
|
+
assert_equal('épée', "#{U00E9}p#{U00E9}e")
|
48
|
+
assert_equal("\xC3\xA9\x70\xC3\xA9\x65", U00E9u0070u00E9u0065)
|
49
|
+
assert_equal("\xC3\xA9\x70\xC3\xA9\x65", "#{U00E9u0070u00E9u0065}")
|
50
|
+
assert_equal("\xC3\xA9\x70\xC3\xA9\x65", "#{U00E9}p#{U00E9}e")
|
51
|
+
# decomposed e'pe'e
|
52
|
+
assert_equal('épée', U0065u0301u0070u0065u0301u0065)
|
53
|
+
assert_equal('épée', "#{U0065u0301u0070u0065u0301u0065}")
|
54
|
+
assert_equal('épée', "e#{U0301}pe#{U0301}e")
|
55
|
+
assert_equal("\x65\xCC\x81\x70\x65\xCC\x81\x65", U0065u0301u0070u0065u0301u0065)
|
56
|
+
assert_equal("\x65\xCC\x81\x70\x65\xCC\x81\x65", "#{U0065u0301u0070u0065u0301u0065}")
|
57
|
+
assert_equal("\x65\xCC\x81\x70\x65\xCC\x81\x65", "e#{U0301}pe#{U0301}e")
|
58
|
+
# combinations of NFC/D, NFKC/D
|
59
|
+
assert_equal('öffnen', U00F6u0066u0066u006Eu0065u006E)
|
60
|
+
assert_equal("\xC3\xB6ffnen", U00F6u0066u0066u006Eu0065u006E)
|
61
|
+
assert_equal('öffnen', "#{U00F6}ffnen")
|
62
|
+
assert_equal("\xC3\xB6ffnen", "#{U00F6}ffnen")
|
63
|
+
assert_equal('öffnen', U006Fu0308u0066u0066u006Eu0065u006E)
|
64
|
+
assert_equal("\x6F\xCC\x88ffnen", U006Fu0308u0066u0066u006Eu0065u006E)
|
65
|
+
assert_equal('öffnen', "o#{U0308}ffnen")
|
66
|
+
assert_equal("\x6F\xCC\x88ffnen", "o#{U0308}ffnen")
|
67
|
+
assert_equal('öffnen', U00F6uFB00u006Eu0065u006E)
|
68
|
+
assert_equal("\xC3\xB6\xEF\xAC\x80nen", U00F6uFB00u006Eu0065u006E)
|
69
|
+
assert_equal('öffnen', "#{U00F6uFB00}nen")
|
70
|
+
assert_equal("\xC3\xB6\xEF\xAC\x80nen", "#{U00F6uFB00}nen")
|
71
|
+
assert_equal('öffnen', U006Fu0308uFB00u006Eu0065u006E)
|
72
|
+
assert_equal("\x6F\xCC\x88\xEF\xAC\x80nen", U006Fu0308uFB00u006Eu0065u006E)
|
73
|
+
assert_equal('öffnen', "o#{U0308uFB00}nen")
|
74
|
+
assert_equal("\x6F\xCC\x88\xEF\xAC\x80nen", "o#{U0308uFB00}nen")
|
75
|
+
# German sharp s (sz)
|
76
|
+
assert_equal('Straße', U0053u0074u0072u0061u00DFu0065)
|
77
|
+
assert_equal("\x53\x74\x72\x61\xC3\x9F\x65", U0053u0074u0072u0061u00DFu0065)
|
78
|
+
assert_equal('Straße', "Stra#{U00DF}e")
|
79
|
+
assert_equal("\x53\x74\x72\x61\xC3\x9F\x65", "Stra#{U00DF}e")
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_edge_cases
|
83
|
+
# start and end of each outer plane
|
84
|
+
assert_equal("\xF4\x8F\xBF\xBF", U10FFFF)
|
85
|
+
assert_equal("\xF4\x80\x80\x80", U100000)
|
86
|
+
assert_equal("\xF3\xBF\xBF\xBF", UFFFFF)
|
87
|
+
assert_equal("\xF3\xB0\x80\x80", UF0000)
|
88
|
+
assert_equal("\xF3\xAF\xBF\xBF", UEFFFF)
|
89
|
+
assert_equal("\xF3\xA0\x80\x80", UE0000)
|
90
|
+
assert_equal("\xF3\x9F\xBF\xBF", UDFFFF)
|
91
|
+
assert_equal("\xF3\x90\x80\x80", UD0000)
|
92
|
+
assert_equal("\xF3\x8F\xBF\xBF", UCFFFF)
|
93
|
+
assert_equal("\xF3\x80\x80\x80", UC0000)
|
94
|
+
assert_equal("\xF2\xBF\xBF\xBF", UBFFFF)
|
95
|
+
assert_equal("\xF2\xB0\x80\x80", UB0000)
|
96
|
+
assert_equal("\xF2\xAF\xBF\xBF", UAFFFF)
|
97
|
+
assert_equal("\xF2\xA0\x80\x80", UA0000)
|
98
|
+
assert_equal("\xF2\x9F\xBF\xBF", U9FFFF)
|
99
|
+
assert_equal("\xF2\x90\x80\x80", U90000)
|
100
|
+
assert_equal("\xF2\x8F\xBF\xBF", U8FFFF)
|
101
|
+
assert_equal("\xF2\x80\x80\x80", U80000)
|
102
|
+
assert_equal("\xF1\xBF\xBF\xBF", U7FFFF)
|
103
|
+
assert_equal("\xF1\xB0\x80\x80", U70000)
|
104
|
+
assert_equal("\xF1\xAF\xBF\xBF", U6FFFF)
|
105
|
+
assert_equal("\xF1\xA0\x80\x80", U60000)
|
106
|
+
assert_equal("\xF1\x9F\xBF\xBF", U5FFFF)
|
107
|
+
assert_equal("\xF1\x90\x80\x80", U50000)
|
108
|
+
assert_equal("\xF1\x8F\xBF\xBF", U4FFFF)
|
109
|
+
assert_equal("\xF1\x80\x80\x80", U40000)
|
110
|
+
assert_equal("\xF0\xBF\xBF\xBF", U3FFFF)
|
111
|
+
assert_equal("\xF0\xB0\x80\x80", U30000)
|
112
|
+
assert_equal("\xF0\xAF\xBF\xBF", U2FFFF)
|
113
|
+
assert_equal("\xF0\xA0\x80\x80", U20000)
|
114
|
+
assert_equal("\xF0\x9F\xBF\xBF", U1FFFF)
|
115
|
+
assert_equal("\xF0\x90\x80\x80", U10000)
|
116
|
+
# BMP
|
117
|
+
assert_equal("\xEF\xBF\xBF", UFFFF)
|
118
|
+
assert_equal("\xEE\x80\x80", UE000)
|
119
|
+
assert_equal("\xED\x9F\xBF", UD7FF)
|
120
|
+
assert_equal("\xE0\xA0\x80", U0800)
|
121
|
+
assert_equal("\xDF\xBF", U07FF)
|
122
|
+
assert_equal("\xC2\x80", U0080)
|
123
|
+
assert_equal("\x7F", U007F)
|
124
|
+
assert_equal("\x00", U0000)
|
125
|
+
end
|
126
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
4
|
+
name: charesc
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.1.0
|
7
|
+
date: 2007-06-05 00:00:00 +09:00
|
8
|
+
summary: Unicode based character escapes for Ruby; works with UTF-8 as well as Shift_JIS and EUC-JP (within the limits of these encodings)
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: duerst@it.aoyama.ac.jp
|
12
|
+
homepage:
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire: charesc
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Martin J. Du"rst
|
31
|
+
files:
|
32
|
+
- lib/charesc.rb
|
33
|
+
- test/test_euc.rb
|
34
|
+
- test/test_fail.rb
|
35
|
+
- test/test_mixed.rb
|
36
|
+
- test/test_sjis.rb
|
37
|
+
- test/test_utf8.rb
|
38
|
+
- README
|
39
|
+
test_files:
|
40
|
+
- test/test_utf8.rb
|
41
|
+
- test/test_sjis.rb
|
42
|
+
- test/test_euc.rb
|
43
|
+
- test/test_fail.rb
|
44
|
+
- test/test_mixed.rb
|
45
|
+
rdoc_options: []
|
46
|
+
|
47
|
+
extra_rdoc_files:
|
48
|
+
- README
|
49
|
+
executables: []
|
50
|
+
|
51
|
+
extensions: []
|
52
|
+
|
53
|
+
requirements: []
|
54
|
+
|
55
|
+
dependencies: []
|
56
|
+
|