unicode 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +107 -0
- data/bin/mkunidata.rb +128 -0
- data/ext/extconf.rb +3 -0
- data/ext/unicode.c +666 -0
- data/ext/unidata.map +10642 -0
- data/ext/ustring.c +206 -0
- data/ext/ustring.h +48 -0
- data/ext/wstring.c +185 -0
- data/ext/wstring.h +41 -0
- data/test/test.rb +68 -0
- metadata +47 -0
data/test/test.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
#! /usr/local/bin/ruby -KU
|
2
|
+
|
3
|
+
require 'unicode'
|
4
|
+
|
5
|
+
## dump Unicode string
|
6
|
+
class String
|
7
|
+
def udump
|
8
|
+
ustr = self.unpack("U*")
|
9
|
+
ret = []
|
10
|
+
ustr.each do |e|
|
11
|
+
if e.is_a?(Integer)
|
12
|
+
ret << "U+%04X" % e
|
13
|
+
else
|
14
|
+
ret << e
|
15
|
+
end
|
16
|
+
end
|
17
|
+
ret
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
print "Canonical decomposition vs compatibility decomposition\n"
|
23
|
+
p Unicode::decompose("⑽ o\xef\xac\x83ce").udump
|
24
|
+
p Unicode::decompose_compat("⑽ o\xef\xac\x83ce")
|
25
|
+
|
26
|
+
print "Canonical equivalent vs Compatibility equivalent\n"
|
27
|
+
p Unicode::strcmp("ガ", "ガ")
|
28
|
+
p Unicode::strcmp("ガ", "ガ")
|
29
|
+
p Unicode::strcmp_compat("ガ", "ガ")
|
30
|
+
|
31
|
+
print "Decomposition/composition\n"
|
32
|
+
p Unicode::normalize_D([?c, 0x301, 0x327].pack("U*")).udump
|
33
|
+
p Unicode::normalize_D([?c, 0x327, 0x301].pack("U*")).udump
|
34
|
+
p Unicode::normalize_D([0x107, 0x327].pack("U*")).udump
|
35
|
+
p Unicode::normalize_D([0xe7, 0x301].pack("U*")).udump
|
36
|
+
p Unicode::normalize_C([?c, 0x301, 0x327].pack("U*")).udump
|
37
|
+
p Unicode::normalize_C([?c, 0x327, 0x301].pack("U*")).udump
|
38
|
+
p Unicode::normalize_C([0x107, 0x327].pack("U*")).udump
|
39
|
+
p Unicode::normalize_C([0xe7, 0x301].pack("U*")).udump
|
40
|
+
|
41
|
+
print "Kana Normalization\n"
|
42
|
+
p Unicode::normalize_D("ガガ").udump
|
43
|
+
p Unicode::normalize_C("ガガ").udump
|
44
|
+
p Unicode::normalize_KD("ガガ").udump
|
45
|
+
p Unicode::normalize_KC("ガガ").udump
|
46
|
+
|
47
|
+
print "Hangul\n"
|
48
|
+
p "요시담".udump
|
49
|
+
p Unicode::normalize_D("요시담").udump
|
50
|
+
p Unicode::normalize_C("요시담").udump
|
51
|
+
|
52
|
+
print "Composition Exclusion\n"
|
53
|
+
print " ANGSTROM SIGN [U+221B]\n"
|
54
|
+
p Unicode::normalize_D([0x212b].pack("U")).udump
|
55
|
+
p Unicode::normalize_C([0x212b].pack("U")).udump
|
56
|
+
print " LATIN CAPITAL LETTER A WITH RING ABOVE [U+00C5]\n"
|
57
|
+
p Unicode::normalize_D([0x00c5].pack("U")).udump
|
58
|
+
p Unicode::normalize_C([0x00c5].pack("U")).udump
|
59
|
+
|
60
|
+
print "Case conversion\n"
|
61
|
+
p Unicode::normalize_C(Unicode::upcase([?c, 0x301, 0x327, 0xff41].pack("U*"))).udump
|
62
|
+
p Unicode::normalize_C(Unicode::downcase([?C, 0x301, 0x327, 0xff21].pack("U*"))).udump
|
63
|
+
p Unicode::capitalize([0x1f1, ?A, ?a, 0xff21].pack("U*")).udump
|
64
|
+
|
65
|
+
|
66
|
+
## Local variables:
|
67
|
+
## coding: utf-8
|
68
|
+
## End:
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.10
|
3
|
+
specification_version: 1
|
4
|
+
name: unicode
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: "0.1"
|
7
|
+
date: 2005-08-13
|
8
|
+
summary: Unicode normalization library.
|
9
|
+
require_paths:
|
10
|
+
- "."
|
11
|
+
email: yoshidam@yoshidam.net
|
12
|
+
homepage: http://www.yoshidam.net/Ruby.html#unicode
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
-
|
22
|
+
- ">"
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 0.0.0
|
25
|
+
version:
|
26
|
+
platform: ruby
|
27
|
+
authors: []
|
28
|
+
files:
|
29
|
+
- bin/mkunidata.rb
|
30
|
+
- ext/ustring.c
|
31
|
+
- ext/ustring.h
|
32
|
+
- ext/unicode.c
|
33
|
+
- ext/wstring.c
|
34
|
+
- ext/wstring.h
|
35
|
+
- ext/extconf.rb
|
36
|
+
- ext/unidata.map
|
37
|
+
- test/test.rb
|
38
|
+
- README
|
39
|
+
test_files: []
|
40
|
+
rdoc_options: []
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README
|
43
|
+
executables: []
|
44
|
+
extensions:
|
45
|
+
- ext/extconf.rb
|
46
|
+
requirements: []
|
47
|
+
dependencies: []
|