unidecoder 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (189) hide show
  1. data/Changelog.md +5 -0
  2. data/README.md +31 -0
  3. data/Rakefile +28 -0
  4. data/lib/unidecoder.rb +98 -0
  5. data/lib/unidecoder/data/x00.yml +257 -0
  6. data/lib/unidecoder/data/x01.yml +257 -0
  7. data/lib/unidecoder/data/x02.yml +256 -0
  8. data/lib/unidecoder/data/x03.yml +256 -0
  9. data/lib/unidecoder/data/x04.yml +256 -0
  10. data/lib/unidecoder/data/x05.yml +256 -0
  11. data/lib/unidecoder/data/x06.yml +256 -0
  12. data/lib/unidecoder/data/x07.yml +256 -0
  13. data/lib/unidecoder/data/x09.yml +256 -0
  14. data/lib/unidecoder/data/x0a.yml +256 -0
  15. data/lib/unidecoder/data/x0b.yml +256 -0
  16. data/lib/unidecoder/data/x0c.yml +256 -0
  17. data/lib/unidecoder/data/x0d.yml +256 -0
  18. data/lib/unidecoder/data/x0e.yml +256 -0
  19. data/lib/unidecoder/data/x0f.yml +256 -0
  20. data/lib/unidecoder/data/x10.yml +256 -0
  21. data/lib/unidecoder/data/x11.yml +256 -0
  22. data/lib/unidecoder/data/x12.yml +257 -0
  23. data/lib/unidecoder/data/x13.yml +256 -0
  24. data/lib/unidecoder/data/x14.yml +257 -0
  25. data/lib/unidecoder/data/x15.yml +257 -0
  26. data/lib/unidecoder/data/x16.yml +256 -0
  27. data/lib/unidecoder/data/x17.yml +256 -0
  28. data/lib/unidecoder/data/x18.yml +256 -0
  29. data/lib/unidecoder/data/x1e.yml +256 -0
  30. data/lib/unidecoder/data/x1f.yml +256 -0
  31. data/lib/unidecoder/data/x20.yml +256 -0
  32. data/lib/unidecoder/data/x21.yml +256 -0
  33. data/lib/unidecoder/data/x22.yml +256 -0
  34. data/lib/unidecoder/data/x23.yml +256 -0
  35. data/lib/unidecoder/data/x24.yml +256 -0
  36. data/lib/unidecoder/data/x25.yml +256 -0
  37. data/lib/unidecoder/data/x26.yml +256 -0
  38. data/lib/unidecoder/data/x27.yml +256 -0
  39. data/lib/unidecoder/data/x28.yml +257 -0
  40. data/lib/unidecoder/data/x2e.yml +256 -0
  41. data/lib/unidecoder/data/x2f.yml +256 -0
  42. data/lib/unidecoder/data/x30.yml +256 -0
  43. data/lib/unidecoder/data/x31.yml +256 -0
  44. data/lib/unidecoder/data/x32.yml +256 -0
  45. data/lib/unidecoder/data/x33.yml +256 -0
  46. data/lib/unidecoder/data/x4d.yml +256 -0
  47. data/lib/unidecoder/data/x4e.yml +257 -0
  48. data/lib/unidecoder/data/x4f.yml +257 -0
  49. data/lib/unidecoder/data/x50.yml +257 -0
  50. data/lib/unidecoder/data/x51.yml +257 -0
  51. data/lib/unidecoder/data/x52.yml +257 -0
  52. data/lib/unidecoder/data/x53.yml +257 -0
  53. data/lib/unidecoder/data/x54.yml +257 -0
  54. data/lib/unidecoder/data/x55.yml +257 -0
  55. data/lib/unidecoder/data/x56.yml +257 -0
  56. data/lib/unidecoder/data/x57.yml +257 -0
  57. data/lib/unidecoder/data/x58.yml +257 -0
  58. data/lib/unidecoder/data/x59.yml +257 -0
  59. data/lib/unidecoder/data/x5a.yml +257 -0
  60. data/lib/unidecoder/data/x5b.yml +257 -0
  61. data/lib/unidecoder/data/x5c.yml +257 -0
  62. data/lib/unidecoder/data/x5d.yml +257 -0
  63. data/lib/unidecoder/data/x5e.yml +257 -0
  64. data/lib/unidecoder/data/x5f.yml +257 -0
  65. data/lib/unidecoder/data/x60.yml +257 -0
  66. data/lib/unidecoder/data/x61.yml +257 -0
  67. data/lib/unidecoder/data/x62.yml +257 -0
  68. data/lib/unidecoder/data/x63.yml +257 -0
  69. data/lib/unidecoder/data/x64.yml +257 -0
  70. data/lib/unidecoder/data/x65.yml +257 -0
  71. data/lib/unidecoder/data/x66.yml +257 -0
  72. data/lib/unidecoder/data/x67.yml +257 -0
  73. data/lib/unidecoder/data/x68.yml +257 -0
  74. data/lib/unidecoder/data/x69.yml +257 -0
  75. data/lib/unidecoder/data/x6a.yml +257 -0
  76. data/lib/unidecoder/data/x6b.yml +257 -0
  77. data/lib/unidecoder/data/x6c.yml +257 -0
  78. data/lib/unidecoder/data/x6d.yml +257 -0
  79. data/lib/unidecoder/data/x6e.yml +257 -0
  80. data/lib/unidecoder/data/x6f.yml +257 -0
  81. data/lib/unidecoder/data/x70.yml +257 -0
  82. data/lib/unidecoder/data/x71.yml +257 -0
  83. data/lib/unidecoder/data/x72.yml +257 -0
  84. data/lib/unidecoder/data/x73.yml +257 -0
  85. data/lib/unidecoder/data/x74.yml +257 -0
  86. data/lib/unidecoder/data/x75.yml +257 -0
  87. data/lib/unidecoder/data/x76.yml +257 -0
  88. data/lib/unidecoder/data/x77.yml +257 -0
  89. data/lib/unidecoder/data/x78.yml +257 -0
  90. data/lib/unidecoder/data/x79.yml +257 -0
  91. data/lib/unidecoder/data/x7a.yml +257 -0
  92. data/lib/unidecoder/data/x7b.yml +257 -0
  93. data/lib/unidecoder/data/x7c.yml +257 -0
  94. data/lib/unidecoder/data/x7d.yml +257 -0
  95. data/lib/unidecoder/data/x7e.yml +257 -0
  96. data/lib/unidecoder/data/x7f.yml +257 -0
  97. data/lib/unidecoder/data/x80.yml +257 -0
  98. data/lib/unidecoder/data/x81.yml +257 -0
  99. data/lib/unidecoder/data/x82.yml +257 -0
  100. data/lib/unidecoder/data/x83.yml +257 -0
  101. data/lib/unidecoder/data/x84.yml +257 -0
  102. data/lib/unidecoder/data/x85.yml +257 -0
  103. data/lib/unidecoder/data/x86.yml +257 -0
  104. data/lib/unidecoder/data/x87.yml +257 -0
  105. data/lib/unidecoder/data/x88.yml +257 -0
  106. data/lib/unidecoder/data/x89.yml +257 -0
  107. data/lib/unidecoder/data/x8a.yml +257 -0
  108. data/lib/unidecoder/data/x8b.yml +257 -0
  109. data/lib/unidecoder/data/x8c.yml +257 -0
  110. data/lib/unidecoder/data/x8d.yml +257 -0
  111. data/lib/unidecoder/data/x8e.yml +257 -0
  112. data/lib/unidecoder/data/x8f.yml +257 -0
  113. data/lib/unidecoder/data/x90.yml +257 -0
  114. data/lib/unidecoder/data/x91.yml +257 -0
  115. data/lib/unidecoder/data/x92.yml +257 -0
  116. data/lib/unidecoder/data/x93.yml +257 -0
  117. data/lib/unidecoder/data/x94.yml +257 -0
  118. data/lib/unidecoder/data/x95.yml +257 -0
  119. data/lib/unidecoder/data/x96.yml +257 -0
  120. data/lib/unidecoder/data/x97.yml +257 -0
  121. data/lib/unidecoder/data/x98.yml +257 -0
  122. data/lib/unidecoder/data/x99.yml +257 -0
  123. data/lib/unidecoder/data/x9a.yml +257 -0
  124. data/lib/unidecoder/data/x9b.yml +257 -0
  125. data/lib/unidecoder/data/x9c.yml +257 -0
  126. data/lib/unidecoder/data/x9d.yml +257 -0
  127. data/lib/unidecoder/data/x9e.yml +257 -0
  128. data/lib/unidecoder/data/x9f.yml +256 -0
  129. data/lib/unidecoder/data/xa0.yml +257 -0
  130. data/lib/unidecoder/data/xa1.yml +257 -0
  131. data/lib/unidecoder/data/xa2.yml +257 -0
  132. data/lib/unidecoder/data/xa3.yml +257 -0
  133. data/lib/unidecoder/data/xa4.yml +256 -0
  134. data/lib/unidecoder/data/xac.yml +257 -0
  135. data/lib/unidecoder/data/xad.yml +257 -0
  136. data/lib/unidecoder/data/xae.yml +257 -0
  137. data/lib/unidecoder/data/xaf.yml +257 -0
  138. data/lib/unidecoder/data/xb0.yml +257 -0
  139. data/lib/unidecoder/data/xb1.yml +257 -0
  140. data/lib/unidecoder/data/xb2.yml +257 -0
  141. data/lib/unidecoder/data/xb3.yml +257 -0
  142. data/lib/unidecoder/data/xb4.yml +257 -0
  143. data/lib/unidecoder/data/xb5.yml +257 -0
  144. data/lib/unidecoder/data/xb6.yml +257 -0
  145. data/lib/unidecoder/data/xb7.yml +257 -0
  146. data/lib/unidecoder/data/xb8.yml +257 -0
  147. data/lib/unidecoder/data/xb9.yml +257 -0
  148. data/lib/unidecoder/data/xba.yml +257 -0
  149. data/lib/unidecoder/data/xbb.yml +257 -0
  150. data/lib/unidecoder/data/xbc.yml +257 -0
  151. data/lib/unidecoder/data/xbd.yml +257 -0
  152. data/lib/unidecoder/data/xbe.yml +257 -0
  153. data/lib/unidecoder/data/xbf.yml +257 -0
  154. data/lib/unidecoder/data/xc0.yml +257 -0
  155. data/lib/unidecoder/data/xc1.yml +257 -0
  156. data/lib/unidecoder/data/xc2.yml +257 -0
  157. data/lib/unidecoder/data/xc3.yml +257 -0
  158. data/lib/unidecoder/data/xc4.yml +257 -0
  159. data/lib/unidecoder/data/xc5.yml +257 -0
  160. data/lib/unidecoder/data/xc6.yml +257 -0
  161. data/lib/unidecoder/data/xc7.yml +257 -0
  162. data/lib/unidecoder/data/xc8.yml +257 -0
  163. data/lib/unidecoder/data/xc9.yml +257 -0
  164. data/lib/unidecoder/data/xca.yml +257 -0
  165. data/lib/unidecoder/data/xcb.yml +257 -0
  166. data/lib/unidecoder/data/xcc.yml +257 -0
  167. data/lib/unidecoder/data/xcd.yml +257 -0
  168. data/lib/unidecoder/data/xce.yml +257 -0
  169. data/lib/unidecoder/data/xcf.yml +257 -0
  170. data/lib/unidecoder/data/xd0.yml +257 -0
  171. data/lib/unidecoder/data/xd1.yml +257 -0
  172. data/lib/unidecoder/data/xd2.yml +257 -0
  173. data/lib/unidecoder/data/xd3.yml +257 -0
  174. data/lib/unidecoder/data/xd4.yml +257 -0
  175. data/lib/unidecoder/data/xd5.yml +257 -0
  176. data/lib/unidecoder/data/xd6.yml +257 -0
  177. data/lib/unidecoder/data/xd7.yml +256 -0
  178. data/lib/unidecoder/data/xf9.yml +257 -0
  179. data/lib/unidecoder/data/xfa.yml +256 -0
  180. data/lib/unidecoder/data/xfb.yml +257 -0
  181. data/lib/unidecoder/data/xfc.yml +257 -0
  182. data/lib/unidecoder/data/xfd.yml +256 -0
  183. data/lib/unidecoder/data/xfe.yml +257 -0
  184. data/lib/unidecoder/data/xff.yml +257 -0
  185. data/lib/unidecoder/version.rb +9 -0
  186. data/test/unicode_point_suite/basic_latin_test.rb +144 -0
  187. data/test/unicode_point_suite/codepoint_test_helper.rb +28 -0
  188. data/test/unidecoder_test.rb +114 -0
  189. metadata +255 -0
@@ -0,0 +1,9 @@
1
+ module Unidecoder
2
+ module Version
3
+ MAJOR = 1
4
+ MINOR = 1
5
+ TINY = 0
6
+ BUILD = nil
7
+ STRING = [MAJOR, MINOR, TINY, BUILD].compact.join('.')
8
+ end
9
+ end
@@ -0,0 +1,144 @@
1
+ $:.unshift File.expand_path("../../lib", File.dirname(__FILE__))
2
+ $:.unshift File.expand_path(File.dirname(__FILE__))
3
+ $:.uniq!
4
+ require "test/unit"
5
+ require "unidecoder"
6
+ require "codepoint_test_helper"
7
+
8
+ include CodepointTestHelper
9
+
10
+ class BasicLatinTest < Test::Unit::TestCase
11
+ # This test suite is just regression test and debugging
12
+ # to better transliterate the Basic Latin Unicode codepoints
13
+ #
14
+ # http://unicode.org/charts/
15
+ # http://unicode.org/charts/PDF/U0000.pdf
16
+
17
+ # NOTE: I can't figure out how to test control characters.
18
+ # Get weird results trying to pack them to unicode.
19
+
20
+ def test_spaces
21
+ assert_equal_encoded " ", %w{0020 00a0}
22
+ assert_equal_encoded "", %w{200b 2060}
23
+ end
24
+
25
+ def test_exclamation_marks
26
+ assert_equal_encoded "!", %w{0021 2762}
27
+ assert_equal_encoded "!!", "203c"
28
+ assert_equal_encoded "", "00a1"
29
+ assert_equal_encoded "?!", "203d"
30
+ end
31
+
32
+ def test_quotation_marks
33
+ assert_equal_encoded "\"", %w{0022 02ba 2033 3003}
34
+ end
35
+
36
+ def test_apostrophes
37
+ assert_equal_encoded "'", %w{0027 02b9 02bc 02c8 2032}
38
+ end
39
+
40
+ def test_asterisks
41
+ assert_equal_encoded "*", %w{002a 066d 204e 2217 26b9 2731}
42
+ end
43
+
44
+ def test_commas
45
+ assert_equal_encoded ",", %w{002c 060c}
46
+ end
47
+
48
+ def test_periods
49
+ assert_equal_encoded ".", %w{002e 06d4}
50
+ end
51
+
52
+ def test_hyphens
53
+ assert_equal_encoded "-", %w{002d 2010 2011 2012 2212}
54
+ end
55
+
56
+ def test_endash
57
+ assert_equal_encoded "--", %w{2013 2015}
58
+ end
59
+
60
+ def test_emdash
61
+ assert_equal_encoded "---", %w{2014}
62
+ end
63
+
64
+ def test_dotleader
65
+ assert_equal_encoded "..", %w{2025}
66
+ end
67
+
68
+ def test_ellipsis
69
+ assert_equal_encoded "...", %w{2026}
70
+ end
71
+
72
+ def test_slashes
73
+ assert_equal_encoded "/", %w{002f 2044 2215}
74
+ assert_equal_encoded "\\", %w{005c 2216}
75
+ end
76
+
77
+ def test_colons
78
+ assert_equal_encoded ":", %w{003a 2236}
79
+ end
80
+
81
+ def test_semicolons
82
+ assert_equal_encoded ";", %w{003b 061b}
83
+ end
84
+
85
+ def test_less_thans
86
+ assert_equal_encoded "<", %w{003c 2039 2329 27e8 3008}
87
+ end
88
+
89
+ def test_equals
90
+ assert_equal_encoded "=", "003d"
91
+ end
92
+
93
+ def test_greater_thans
94
+ assert_equal_encoded ">", %w{003e 203a 232a 27e9 3009}
95
+ end
96
+
97
+ def test_question_marks
98
+ assert_equal_encoded "?", %w{003f 061f}
99
+ assert_equal_encoded "", "00bf"
100
+ assert_equal_encoded "?!", %w{203d 2048}
101
+ assert_equal_encoded "!?", "2049"
102
+ end
103
+
104
+ def test_circumflexes
105
+ assert_equal_encoded "^", %w{005e 2038 2303}
106
+ end
107
+
108
+ def test_underscores
109
+ assert_equal_encoded "_", %w{005f 02cd 2017}
110
+ end
111
+
112
+ def test_grave_accents
113
+ assert_equal_encoded "`", %w{0060 02cb 2035}
114
+ end
115
+
116
+ def test_bars
117
+ assert_equal_encoded "|", %w{007c 2223 2758}
118
+ end
119
+
120
+ def test_tildes
121
+ assert_equal_encoded "~", %w{007e 02dc 2053 223c ff5e}
122
+ end
123
+
124
+ def test_related_letters
125
+ {
126
+ "B" => "212c",
127
+ "C" => %w{2102 212d},
128
+ "E" => %w{2107 2130},
129
+ "F" => "2131",
130
+ "H" => %w{210b 210c 210d},
131
+ "I" => %w{0130 0406 04c0 2110 2111 2160},
132
+ "K" => "212a",
133
+ "L" => "2112",
134
+ "M" => "2133",
135
+ "N" => "2115",
136
+ "P" => "2119",
137
+ "Q" => "211a",
138
+ "R" => %w{211b 211c 211d},
139
+ "Z" => %w{2124 2128}
140
+ }.each do |expected, encode_mes|
141
+ assert_equal_encoded expected, encode_mes
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,28 @@
1
+ # 100% shorthand
2
+ module CodepointTestHelper
3
+ def assert_equal_encoded(expected, encode_mes)
4
+ # Killing a duck because Ruby 1.9 doesn't mix Enumerable into String
5
+ encode_mes = [encode_mes] if encode_mes.is_a?(String)
6
+ encode_mes.each do |encode_me|
7
+ encoded = encode(encode_me)
8
+ actual = encoded.to_ascii
9
+ if expected == actual
10
+ # Let's not retest it
11
+ assert true
12
+ else
13
+ message = "<#{expected.inspect}> expected but was\n<#{actual.inspect}>\n"
14
+ message << " defined in #{Unidecoder.in_yaml_file(encoded)}"
15
+ fail message
16
+ end
17
+ end
18
+ end
19
+
20
+ private
21
+ def encode(codepoint)
22
+ Unidecoder.encode(codepoint)
23
+ end
24
+
25
+ def which_yaml(codepoint)
26
+ Unidecoder.in_yaml_file(encode(codepoint))
27
+ end
28
+ end
@@ -0,0 +1,114 @@
1
+ # encoding: utf-8
2
+ $:.unshift File.expand_path("../lib", File.dirname(__FILE__))
3
+ $:.unshift File.expand_path(File.dirname(__FILE__))
4
+ $:.uniq!
5
+ require "test/unit"
6
+ require "unidecoder"
7
+
8
+
9
+ class UnidecoderTest < Test::Unit::TestCase
10
+ # Silly phrases courtesy of Frank da Cruz (http://www.columbia.edu/kermit/utf8.html).
11
+
12
+ DONT_CONVERT = [
13
+ "Vitrum edere possum; mihi non nocet.", # Latin
14
+ "Je puis mangier del voirre. Ne me nuit.", # Old French
15
+ "Kristala jan dezaket, ez dit minik ematen.", # Basque
16
+ "Kaya kong kumain nang bubog at hindi ako masaktan.", # Tagalog
17
+ "Ich kann Glas essen, ohne mir weh zu tun.", # German
18
+ "I can eat glass and it doesn't hurt me.", # English
19
+ ]
20
+
21
+ CONVERT_PAIRS = {
22
+ # French
23
+ "Je peux manger du verre, ça ne me fait pas de mal." => "Je peux manger du verre, ca ne me fait pas de mal.",
24
+ # Romanian
25
+ "Pot să mănânc sticlă și ea nu mă rănește." => "Pot sa mananc sticla si ea nu ma raneste.",
26
+ # Icelandic
27
+ "Ég get etið gler án þess að meiða mig." => "Eg get etid gler an thess ad meida mig.",
28
+ # Albanian
29
+ "Unë mund të ha qelq dhe nuk më gjen gjë." => "Une mund te ha qelq dhe nuk me gjen gje.",
30
+ # Polish
31
+ "Mogę jeść szkło i mi nie szkodzi." => "Moge jesc szklo i mi nie szkodzi.",
32
+ # Russian
33
+ "Я могу есть стекло, оно мне не вредит." => "Ia moghu iest' stieklo, ono mnie nie vriedit.",
34
+ # Bulgarian
35
+ "Мога да ям стъкло, то не ми вреди." => "Mogha da iam stklo, to nie mi vriedi.",
36
+ # Anglo-Saxon
37
+ "ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬" => "ic.mag.glas.eotacn.ond.hit.ne.heacrmiacth.me:",
38
+ # Classical Greek
39
+ "ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει" => "ualon phagein dunamai; touto ou me blaptei",
40
+ # Hindi
41
+ "मैं काँच खा सकता हूँ और मुझे उससे कोई चोट नहीं पहुंचती" => "maiN kaaNc khaa sktaa huuN aur mujhe usse koii cott nhiiN phuNctii",
42
+ # Thai
43
+ "ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ" => "chankinkracchkaid aetmanaimthamaihchanecchb",
44
+ # Chinese
45
+ "我能吞下玻璃而不伤身体。" => "Wo Neng Tun Xia Bo Li Er Bu Shang Shen Ti . ",
46
+ # Japanese
47
+ "私はガラスを食べられます。それは私を傷つけません。" => "Si hagarasuwoShi beraremasu. sorehaSi woShang tukemasen. ",
48
+ "من می توانم بدونِ احساس درد شيشه بخورم" => # Persian
49
+ "mn my twnm bdwni Hss drd shyshh bkhwrm",
50
+ "أنا قادر على أكل الزجاج و هذا لا يؤلمن" => # Arabic
51
+ "'n qdr `l~ 'kl lzjj w hdh l yw'lmn",
52
+ "אני יכול לאכול זכוכית וזה לא מזיק לי" => # Hebrew
53
+ "ny ykvl lkvl zkvkyt vzh l mzyq ly",
54
+ }
55
+
56
+ def test_should_raise_error_with_invalid_utf8
57
+ [
58
+ "\x80", # Continuation byte, low (cp125)
59
+ "\x94", # Continuation byte, mid (cp125)
60
+ "\x9F", # Continuation byte, high (cp125)
61
+ "\xC0", # Overlong encoding, start of 2-byte sequence, but codepoint < 128
62
+ "\xC1", # Overlong encoding, start of 2-byte sequence, but codepoint < 128
63
+ "\xC2", # Start of 2-byte sequence, low
64
+ "\xC8", # Start of 2-byte sequence, mid
65
+ "\xDF", # Start of 2-byte sequence, high
66
+ "\xE0", # Start of 3-byte sequence, low
67
+ "\xE8", # Start of 3-byte sequence, mid
68
+ "\xEF", # Start of 3-byte sequence, high
69
+ "\xF0", # Start of 4-byte sequence
70
+ "\xF1", # Unused byte
71
+ "\xFF", # Restricted byte
72
+ ].map do |byte|
73
+ assert_raise ArgumentError, "#{byte.inspect} did not raise error" do
74
+ Unidecoder.decode("a#{byte}a")
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ def test_unidecoder_decode
81
+ DONT_CONVERT.each do |ascii|
82
+ assert_equal ascii, Unidecoder.decode(ascii)
83
+ end
84
+ CONVERT_PAIRS.each do |unicode, ascii|
85
+ assert_equal ascii, Unidecoder.decode(unicode)
86
+ end
87
+ end
88
+
89
+ def test_unidecoder_encode
90
+ {
91
+ # Strings
92
+ "0041" => "A",
93
+ "00e6" => "æ",
94
+ "042f" => "Я"
95
+ }.each do |codepoint, unicode|
96
+ assert_equal unicode, Unidecoder.encode(codepoint)
97
+ end
98
+ end
99
+
100
+ def test_unidecoder_in_yaml_file
101
+ {
102
+ "A" => "x00.yml (line 67)",
103
+ "π" => "x03.yml (line 194)",
104
+ "Я" => "x04.yml (line 49)"
105
+ }.each do |character, output|
106
+ assert_equal output, Unidecoder.in_yaml_file(character)
107
+ end
108
+ end
109
+
110
+ def test_override
111
+ assert_equal "Juergen", Unidecoder.decode("Jürgen", "ü" => "ue")
112
+ end
113
+
114
+ end
metadata ADDED
@@ -0,0 +1,255 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: unidecoder
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 1
8
+ - 0
9
+ version: 1.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Russell Norris
13
+ - Norman Clarke
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-06-23 00:00:00 -03:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: A port of Perl's Unidecoder to Ruby. Transliterates Unicode strings to an ASCII approximation.
23
+ email:
24
+ - rsl@luckysneaks.com
25
+ - norman@njclarke.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files: []
31
+
32
+ files:
33
+ - lib/unidecoder/version.rb
34
+ - lib/unidecoder.rb
35
+ - Changelog.md
36
+ - README.md
37
+ - Rakefile
38
+ - test/unicode_point_suite/basic_latin_test.rb
39
+ - test/unicode_point_suite/codepoint_test_helper.rb
40
+ - test/unidecoder_test.rb
41
+ - lib/unidecoder/data/x00.yml
42
+ - lib/unidecoder/data/x01.yml
43
+ - lib/unidecoder/data/x02.yml
44
+ - lib/unidecoder/data/x03.yml
45
+ - lib/unidecoder/data/x04.yml
46
+ - lib/unidecoder/data/x05.yml
47
+ - lib/unidecoder/data/x06.yml
48
+ - lib/unidecoder/data/x07.yml
49
+ - lib/unidecoder/data/x09.yml
50
+ - lib/unidecoder/data/x0a.yml
51
+ - lib/unidecoder/data/x0b.yml
52
+ - lib/unidecoder/data/x0c.yml
53
+ - lib/unidecoder/data/x0d.yml
54
+ - lib/unidecoder/data/x0e.yml
55
+ - lib/unidecoder/data/x0f.yml
56
+ - lib/unidecoder/data/x10.yml
57
+ - lib/unidecoder/data/x11.yml
58
+ - lib/unidecoder/data/x12.yml
59
+ - lib/unidecoder/data/x13.yml
60
+ - lib/unidecoder/data/x14.yml
61
+ - lib/unidecoder/data/x15.yml
62
+ - lib/unidecoder/data/x16.yml
63
+ - lib/unidecoder/data/x17.yml
64
+ - lib/unidecoder/data/x18.yml
65
+ - lib/unidecoder/data/x1e.yml
66
+ - lib/unidecoder/data/x1f.yml
67
+ - lib/unidecoder/data/x20.yml
68
+ - lib/unidecoder/data/x21.yml
69
+ - lib/unidecoder/data/x22.yml
70
+ - lib/unidecoder/data/x23.yml
71
+ - lib/unidecoder/data/x24.yml
72
+ - lib/unidecoder/data/x25.yml
73
+ - lib/unidecoder/data/x26.yml
74
+ - lib/unidecoder/data/x27.yml
75
+ - lib/unidecoder/data/x28.yml
76
+ - lib/unidecoder/data/x2e.yml
77
+ - lib/unidecoder/data/x2f.yml
78
+ - lib/unidecoder/data/x30.yml
79
+ - lib/unidecoder/data/x31.yml
80
+ - lib/unidecoder/data/x32.yml
81
+ - lib/unidecoder/data/x33.yml
82
+ - lib/unidecoder/data/x4d.yml
83
+ - lib/unidecoder/data/x4e.yml
84
+ - lib/unidecoder/data/x4f.yml
85
+ - lib/unidecoder/data/x50.yml
86
+ - lib/unidecoder/data/x51.yml
87
+ - lib/unidecoder/data/x52.yml
88
+ - lib/unidecoder/data/x53.yml
89
+ - lib/unidecoder/data/x54.yml
90
+ - lib/unidecoder/data/x55.yml
91
+ - lib/unidecoder/data/x56.yml
92
+ - lib/unidecoder/data/x57.yml
93
+ - lib/unidecoder/data/x58.yml
94
+ - lib/unidecoder/data/x59.yml
95
+ - lib/unidecoder/data/x5a.yml
96
+ - lib/unidecoder/data/x5b.yml
97
+ - lib/unidecoder/data/x5c.yml
98
+ - lib/unidecoder/data/x5d.yml
99
+ - lib/unidecoder/data/x5e.yml
100
+ - lib/unidecoder/data/x5f.yml
101
+ - lib/unidecoder/data/x60.yml
102
+ - lib/unidecoder/data/x61.yml
103
+ - lib/unidecoder/data/x62.yml
104
+ - lib/unidecoder/data/x63.yml
105
+ - lib/unidecoder/data/x64.yml
106
+ - lib/unidecoder/data/x65.yml
107
+ - lib/unidecoder/data/x66.yml
108
+ - lib/unidecoder/data/x67.yml
109
+ - lib/unidecoder/data/x68.yml
110
+ - lib/unidecoder/data/x69.yml
111
+ - lib/unidecoder/data/x6a.yml
112
+ - lib/unidecoder/data/x6b.yml
113
+ - lib/unidecoder/data/x6c.yml
114
+ - lib/unidecoder/data/x6d.yml
115
+ - lib/unidecoder/data/x6e.yml
116
+ - lib/unidecoder/data/x6f.yml
117
+ - lib/unidecoder/data/x70.yml
118
+ - lib/unidecoder/data/x71.yml
119
+ - lib/unidecoder/data/x72.yml
120
+ - lib/unidecoder/data/x73.yml
121
+ - lib/unidecoder/data/x74.yml
122
+ - lib/unidecoder/data/x75.yml
123
+ - lib/unidecoder/data/x76.yml
124
+ - lib/unidecoder/data/x77.yml
125
+ - lib/unidecoder/data/x78.yml
126
+ - lib/unidecoder/data/x79.yml
127
+ - lib/unidecoder/data/x7a.yml
128
+ - lib/unidecoder/data/x7b.yml
129
+ - lib/unidecoder/data/x7c.yml
130
+ - lib/unidecoder/data/x7d.yml
131
+ - lib/unidecoder/data/x7e.yml
132
+ - lib/unidecoder/data/x7f.yml
133
+ - lib/unidecoder/data/x80.yml
134
+ - lib/unidecoder/data/x81.yml
135
+ - lib/unidecoder/data/x82.yml
136
+ - lib/unidecoder/data/x83.yml
137
+ - lib/unidecoder/data/x84.yml
138
+ - lib/unidecoder/data/x85.yml
139
+ - lib/unidecoder/data/x86.yml
140
+ - lib/unidecoder/data/x87.yml
141
+ - lib/unidecoder/data/x88.yml
142
+ - lib/unidecoder/data/x89.yml
143
+ - lib/unidecoder/data/x8a.yml
144
+ - lib/unidecoder/data/x8b.yml
145
+ - lib/unidecoder/data/x8c.yml
146
+ - lib/unidecoder/data/x8d.yml
147
+ - lib/unidecoder/data/x8e.yml
148
+ - lib/unidecoder/data/x8f.yml
149
+ - lib/unidecoder/data/x90.yml
150
+ - lib/unidecoder/data/x91.yml
151
+ - lib/unidecoder/data/x92.yml
152
+ - lib/unidecoder/data/x93.yml
153
+ - lib/unidecoder/data/x94.yml
154
+ - lib/unidecoder/data/x95.yml
155
+ - lib/unidecoder/data/x96.yml
156
+ - lib/unidecoder/data/x97.yml
157
+ - lib/unidecoder/data/x98.yml
158
+ - lib/unidecoder/data/x99.yml
159
+ - lib/unidecoder/data/x9a.yml
160
+ - lib/unidecoder/data/x9b.yml
161
+ - lib/unidecoder/data/x9c.yml
162
+ - lib/unidecoder/data/x9d.yml
163
+ - lib/unidecoder/data/x9e.yml
164
+ - lib/unidecoder/data/x9f.yml
165
+ - lib/unidecoder/data/xa0.yml
166
+ - lib/unidecoder/data/xa1.yml
167
+ - lib/unidecoder/data/xa2.yml
168
+ - lib/unidecoder/data/xa3.yml
169
+ - lib/unidecoder/data/xa4.yml
170
+ - lib/unidecoder/data/xac.yml
171
+ - lib/unidecoder/data/xad.yml
172
+ - lib/unidecoder/data/xae.yml
173
+ - lib/unidecoder/data/xaf.yml
174
+ - lib/unidecoder/data/xb0.yml
175
+ - lib/unidecoder/data/xb1.yml
176
+ - lib/unidecoder/data/xb2.yml
177
+ - lib/unidecoder/data/xb3.yml
178
+ - lib/unidecoder/data/xb4.yml
179
+ - lib/unidecoder/data/xb5.yml
180
+ - lib/unidecoder/data/xb6.yml
181
+ - lib/unidecoder/data/xb7.yml
182
+ - lib/unidecoder/data/xb8.yml
183
+ - lib/unidecoder/data/xb9.yml
184
+ - lib/unidecoder/data/xba.yml
185
+ - lib/unidecoder/data/xbb.yml
186
+ - lib/unidecoder/data/xbc.yml
187
+ - lib/unidecoder/data/xbd.yml
188
+ - lib/unidecoder/data/xbe.yml
189
+ - lib/unidecoder/data/xbf.yml
190
+ - lib/unidecoder/data/xc0.yml
191
+ - lib/unidecoder/data/xc1.yml
192
+ - lib/unidecoder/data/xc2.yml
193
+ - lib/unidecoder/data/xc3.yml
194
+ - lib/unidecoder/data/xc4.yml
195
+ - lib/unidecoder/data/xc5.yml
196
+ - lib/unidecoder/data/xc6.yml
197
+ - lib/unidecoder/data/xc7.yml
198
+ - lib/unidecoder/data/xc8.yml
199
+ - lib/unidecoder/data/xc9.yml
200
+ - lib/unidecoder/data/xca.yml
201
+ - lib/unidecoder/data/xcb.yml
202
+ - lib/unidecoder/data/xcc.yml
203
+ - lib/unidecoder/data/xcd.yml
204
+ - lib/unidecoder/data/xce.yml
205
+ - lib/unidecoder/data/xcf.yml
206
+ - lib/unidecoder/data/xd0.yml
207
+ - lib/unidecoder/data/xd1.yml
208
+ - lib/unidecoder/data/xd2.yml
209
+ - lib/unidecoder/data/xd3.yml
210
+ - lib/unidecoder/data/xd4.yml
211
+ - lib/unidecoder/data/xd5.yml
212
+ - lib/unidecoder/data/xd6.yml
213
+ - lib/unidecoder/data/xd7.yml
214
+ - lib/unidecoder/data/xf9.yml
215
+ - lib/unidecoder/data/xfa.yml
216
+ - lib/unidecoder/data/xfb.yml
217
+ - lib/unidecoder/data/xfc.yml
218
+ - lib/unidecoder/data/xfd.yml
219
+ - lib/unidecoder/data/xfe.yml
220
+ - lib/unidecoder/data/xff.yml
221
+ has_rdoc: true
222
+ homepage: http://github.com/norman/unidecoder
223
+ licenses: []
224
+
225
+ post_install_message:
226
+ rdoc_options: []
227
+
228
+ require_paths:
229
+ - lib
230
+ required_ruby_version: !ruby/object:Gem::Requirement
231
+ none: false
232
+ requirements:
233
+ - - ">="
234
+ - !ruby/object:Gem::Version
235
+ segments:
236
+ - 0
237
+ version: "0"
238
+ required_rubygems_version: !ruby/object:Gem::Requirement
239
+ none: false
240
+ requirements:
241
+ - - ">="
242
+ - !ruby/object:Gem::Version
243
+ segments:
244
+ - 0
245
+ version: "0"
246
+ requirements: []
247
+
248
+ rubyforge_project: "[none]"
249
+ rubygems_version: 1.3.7
250
+ signing_key:
251
+ specification_version: 3
252
+ summary: Transliterates Unicode strings to an ASCII approximation.
253
+ test_files:
254
+ - test/unicode_point_suite/basic_latin_test.rb
255
+ - test/unidecoder_test.rb