unidecoder 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. data/Changelog.md +5 -0
  2. data/README.md +31 -0
  3. data/Rakefile +28 -0
  4. data/lib/unidecoder.rb +98 -0
  5. data/lib/unidecoder/data/x00.yml +257 -0
  6. data/lib/unidecoder/data/x01.yml +257 -0
  7. data/lib/unidecoder/data/x02.yml +256 -0
  8. data/lib/unidecoder/data/x03.yml +256 -0
  9. data/lib/unidecoder/data/x04.yml +256 -0
  10. data/lib/unidecoder/data/x05.yml +256 -0
  11. data/lib/unidecoder/data/x06.yml +256 -0
  12. data/lib/unidecoder/data/x07.yml +256 -0
  13. data/lib/unidecoder/data/x09.yml +256 -0
  14. data/lib/unidecoder/data/x0a.yml +256 -0
  15. data/lib/unidecoder/data/x0b.yml +256 -0
  16. data/lib/unidecoder/data/x0c.yml +256 -0
  17. data/lib/unidecoder/data/x0d.yml +256 -0
  18. data/lib/unidecoder/data/x0e.yml +256 -0
  19. data/lib/unidecoder/data/x0f.yml +256 -0
  20. data/lib/unidecoder/data/x10.yml +256 -0
  21. data/lib/unidecoder/data/x11.yml +256 -0
  22. data/lib/unidecoder/data/x12.yml +257 -0
  23. data/lib/unidecoder/data/x13.yml +256 -0
  24. data/lib/unidecoder/data/x14.yml +257 -0
  25. data/lib/unidecoder/data/x15.yml +257 -0
  26. data/lib/unidecoder/data/x16.yml +256 -0
  27. data/lib/unidecoder/data/x17.yml +256 -0
  28. data/lib/unidecoder/data/x18.yml +256 -0
  29. data/lib/unidecoder/data/x1e.yml +256 -0
  30. data/lib/unidecoder/data/x1f.yml +256 -0
  31. data/lib/unidecoder/data/x20.yml +256 -0
  32. data/lib/unidecoder/data/x21.yml +256 -0
  33. data/lib/unidecoder/data/x22.yml +256 -0
  34. data/lib/unidecoder/data/x23.yml +256 -0
  35. data/lib/unidecoder/data/x24.yml +256 -0
  36. data/lib/unidecoder/data/x25.yml +256 -0
  37. data/lib/unidecoder/data/x26.yml +256 -0
  38. data/lib/unidecoder/data/x27.yml +256 -0
  39. data/lib/unidecoder/data/x28.yml +257 -0
  40. data/lib/unidecoder/data/x2e.yml +256 -0
  41. data/lib/unidecoder/data/x2f.yml +256 -0
  42. data/lib/unidecoder/data/x30.yml +256 -0
  43. data/lib/unidecoder/data/x31.yml +256 -0
  44. data/lib/unidecoder/data/x32.yml +256 -0
  45. data/lib/unidecoder/data/x33.yml +256 -0
  46. data/lib/unidecoder/data/x4d.yml +256 -0
  47. data/lib/unidecoder/data/x4e.yml +257 -0
  48. data/lib/unidecoder/data/x4f.yml +257 -0
  49. data/lib/unidecoder/data/x50.yml +257 -0
  50. data/lib/unidecoder/data/x51.yml +257 -0
  51. data/lib/unidecoder/data/x52.yml +257 -0
  52. data/lib/unidecoder/data/x53.yml +257 -0
  53. data/lib/unidecoder/data/x54.yml +257 -0
  54. data/lib/unidecoder/data/x55.yml +257 -0
  55. data/lib/unidecoder/data/x56.yml +257 -0
  56. data/lib/unidecoder/data/x57.yml +257 -0
  57. data/lib/unidecoder/data/x58.yml +257 -0
  58. data/lib/unidecoder/data/x59.yml +257 -0
  59. data/lib/unidecoder/data/x5a.yml +257 -0
  60. data/lib/unidecoder/data/x5b.yml +257 -0
  61. data/lib/unidecoder/data/x5c.yml +257 -0
  62. data/lib/unidecoder/data/x5d.yml +257 -0
  63. data/lib/unidecoder/data/x5e.yml +257 -0
  64. data/lib/unidecoder/data/x5f.yml +257 -0
  65. data/lib/unidecoder/data/x60.yml +257 -0
  66. data/lib/unidecoder/data/x61.yml +257 -0
  67. data/lib/unidecoder/data/x62.yml +257 -0
  68. data/lib/unidecoder/data/x63.yml +257 -0
  69. data/lib/unidecoder/data/x64.yml +257 -0
  70. data/lib/unidecoder/data/x65.yml +257 -0
  71. data/lib/unidecoder/data/x66.yml +257 -0
  72. data/lib/unidecoder/data/x67.yml +257 -0
  73. data/lib/unidecoder/data/x68.yml +257 -0
  74. data/lib/unidecoder/data/x69.yml +257 -0
  75. data/lib/unidecoder/data/x6a.yml +257 -0
  76. data/lib/unidecoder/data/x6b.yml +257 -0
  77. data/lib/unidecoder/data/x6c.yml +257 -0
  78. data/lib/unidecoder/data/x6d.yml +257 -0
  79. data/lib/unidecoder/data/x6e.yml +257 -0
  80. data/lib/unidecoder/data/x6f.yml +257 -0
  81. data/lib/unidecoder/data/x70.yml +257 -0
  82. data/lib/unidecoder/data/x71.yml +257 -0
  83. data/lib/unidecoder/data/x72.yml +257 -0
  84. data/lib/unidecoder/data/x73.yml +257 -0
  85. data/lib/unidecoder/data/x74.yml +257 -0
  86. data/lib/unidecoder/data/x75.yml +257 -0
  87. data/lib/unidecoder/data/x76.yml +257 -0
  88. data/lib/unidecoder/data/x77.yml +257 -0
  89. data/lib/unidecoder/data/x78.yml +257 -0
  90. data/lib/unidecoder/data/x79.yml +257 -0
  91. data/lib/unidecoder/data/x7a.yml +257 -0
  92. data/lib/unidecoder/data/x7b.yml +257 -0
  93. data/lib/unidecoder/data/x7c.yml +257 -0
  94. data/lib/unidecoder/data/x7d.yml +257 -0
  95. data/lib/unidecoder/data/x7e.yml +257 -0
  96. data/lib/unidecoder/data/x7f.yml +257 -0
  97. data/lib/unidecoder/data/x80.yml +257 -0
  98. data/lib/unidecoder/data/x81.yml +257 -0
  99. data/lib/unidecoder/data/x82.yml +257 -0
  100. data/lib/unidecoder/data/x83.yml +257 -0
  101. data/lib/unidecoder/data/x84.yml +257 -0
  102. data/lib/unidecoder/data/x85.yml +257 -0
  103. data/lib/unidecoder/data/x86.yml +257 -0
  104. data/lib/unidecoder/data/x87.yml +257 -0
  105. data/lib/unidecoder/data/x88.yml +257 -0
  106. data/lib/unidecoder/data/x89.yml +257 -0
  107. data/lib/unidecoder/data/x8a.yml +257 -0
  108. data/lib/unidecoder/data/x8b.yml +257 -0
  109. data/lib/unidecoder/data/x8c.yml +257 -0
  110. data/lib/unidecoder/data/x8d.yml +257 -0
  111. data/lib/unidecoder/data/x8e.yml +257 -0
  112. data/lib/unidecoder/data/x8f.yml +257 -0
  113. data/lib/unidecoder/data/x90.yml +257 -0
  114. data/lib/unidecoder/data/x91.yml +257 -0
  115. data/lib/unidecoder/data/x92.yml +257 -0
  116. data/lib/unidecoder/data/x93.yml +257 -0
  117. data/lib/unidecoder/data/x94.yml +257 -0
  118. data/lib/unidecoder/data/x95.yml +257 -0
  119. data/lib/unidecoder/data/x96.yml +257 -0
  120. data/lib/unidecoder/data/x97.yml +257 -0
  121. data/lib/unidecoder/data/x98.yml +257 -0
  122. data/lib/unidecoder/data/x99.yml +257 -0
  123. data/lib/unidecoder/data/x9a.yml +257 -0
  124. data/lib/unidecoder/data/x9b.yml +257 -0
  125. data/lib/unidecoder/data/x9c.yml +257 -0
  126. data/lib/unidecoder/data/x9d.yml +257 -0
  127. data/lib/unidecoder/data/x9e.yml +257 -0
  128. data/lib/unidecoder/data/x9f.yml +256 -0
  129. data/lib/unidecoder/data/xa0.yml +257 -0
  130. data/lib/unidecoder/data/xa1.yml +257 -0
  131. data/lib/unidecoder/data/xa2.yml +257 -0
  132. data/lib/unidecoder/data/xa3.yml +257 -0
  133. data/lib/unidecoder/data/xa4.yml +256 -0
  134. data/lib/unidecoder/data/xac.yml +257 -0
  135. data/lib/unidecoder/data/xad.yml +257 -0
  136. data/lib/unidecoder/data/xae.yml +257 -0
  137. data/lib/unidecoder/data/xaf.yml +257 -0
  138. data/lib/unidecoder/data/xb0.yml +257 -0
  139. data/lib/unidecoder/data/xb1.yml +257 -0
  140. data/lib/unidecoder/data/xb2.yml +257 -0
  141. data/lib/unidecoder/data/xb3.yml +257 -0
  142. data/lib/unidecoder/data/xb4.yml +257 -0
  143. data/lib/unidecoder/data/xb5.yml +257 -0
  144. data/lib/unidecoder/data/xb6.yml +257 -0
  145. data/lib/unidecoder/data/xb7.yml +257 -0
  146. data/lib/unidecoder/data/xb8.yml +257 -0
  147. data/lib/unidecoder/data/xb9.yml +257 -0
  148. data/lib/unidecoder/data/xba.yml +257 -0
  149. data/lib/unidecoder/data/xbb.yml +257 -0
  150. data/lib/unidecoder/data/xbc.yml +257 -0
  151. data/lib/unidecoder/data/xbd.yml +257 -0
  152. data/lib/unidecoder/data/xbe.yml +257 -0
  153. data/lib/unidecoder/data/xbf.yml +257 -0
  154. data/lib/unidecoder/data/xc0.yml +257 -0
  155. data/lib/unidecoder/data/xc1.yml +257 -0
  156. data/lib/unidecoder/data/xc2.yml +257 -0
  157. data/lib/unidecoder/data/xc3.yml +257 -0
  158. data/lib/unidecoder/data/xc4.yml +257 -0
  159. data/lib/unidecoder/data/xc5.yml +257 -0
  160. data/lib/unidecoder/data/xc6.yml +257 -0
  161. data/lib/unidecoder/data/xc7.yml +257 -0
  162. data/lib/unidecoder/data/xc8.yml +257 -0
  163. data/lib/unidecoder/data/xc9.yml +257 -0
  164. data/lib/unidecoder/data/xca.yml +257 -0
  165. data/lib/unidecoder/data/xcb.yml +257 -0
  166. data/lib/unidecoder/data/xcc.yml +257 -0
  167. data/lib/unidecoder/data/xcd.yml +257 -0
  168. data/lib/unidecoder/data/xce.yml +257 -0
  169. data/lib/unidecoder/data/xcf.yml +257 -0
  170. data/lib/unidecoder/data/xd0.yml +257 -0
  171. data/lib/unidecoder/data/xd1.yml +257 -0
  172. data/lib/unidecoder/data/xd2.yml +257 -0
  173. data/lib/unidecoder/data/xd3.yml +257 -0
  174. data/lib/unidecoder/data/xd4.yml +257 -0
  175. data/lib/unidecoder/data/xd5.yml +257 -0
  176. data/lib/unidecoder/data/xd6.yml +257 -0
  177. data/lib/unidecoder/data/xd7.yml +256 -0
  178. data/lib/unidecoder/data/xf9.yml +257 -0
  179. data/lib/unidecoder/data/xfa.yml +256 -0
  180. data/lib/unidecoder/data/xfb.yml +257 -0
  181. data/lib/unidecoder/data/xfc.yml +257 -0
  182. data/lib/unidecoder/data/xfd.yml +256 -0
  183. data/lib/unidecoder/data/xfe.yml +257 -0
  184. data/lib/unidecoder/data/xff.yml +257 -0
  185. data/lib/unidecoder/version.rb +9 -0
  186. data/test/unicode_point_suite/basic_latin_test.rb +144 -0
  187. data/test/unicode_point_suite/codepoint_test_helper.rb +28 -0
  188. data/test/unidecoder_test.rb +114 -0
  189. metadata +255 -0
@@ -0,0 +1,9 @@
1
+ module Unidecoder
2
+ module Version
3
+ MAJOR = 1
4
+ MINOR = 1
5
+ TINY = 0
6
+ BUILD = nil
7
+ STRING = [MAJOR, MINOR, TINY, BUILD].compact.join('.')
8
+ end
9
+ end
@@ -0,0 +1,144 @@
1
+ $:.unshift File.expand_path("../../lib", File.dirname(__FILE__))
2
+ $:.unshift File.expand_path(File.dirname(__FILE__))
3
+ $:.uniq!
4
+ require "test/unit"
5
+ require "unidecoder"
6
+ require "codepoint_test_helper"
7
+
8
+ include CodepointTestHelper
9
+
10
+ class BasicLatinTest < Test::Unit::TestCase
11
+ # This test suite is just regression test and debugging
12
+ # to better transliterate the Basic Latin Unicode codepoints
13
+ #
14
+ # http://unicode.org/charts/
15
+ # http://unicode.org/charts/PDF/U0000.pdf
16
+
17
+ # NOTE: I can't figure out how to test control characters.
18
+ # Get weird results trying to pack them to unicode.
19
+
20
+ def test_spaces
21
+ assert_equal_encoded " ", %w{0020 00a0}
22
+ assert_equal_encoded "", %w{200b 2060}
23
+ end
24
+
25
+ def test_exclamation_marks
26
+ assert_equal_encoded "!", %w{0021 2762}
27
+ assert_equal_encoded "!!", "203c"
28
+ assert_equal_encoded "", "00a1"
29
+ assert_equal_encoded "?!", "203d"
30
+ end
31
+
32
+ def test_quotation_marks
33
+ assert_equal_encoded "\"", %w{0022 02ba 2033 3003}
34
+ end
35
+
36
+ def test_apostrophes
37
+ assert_equal_encoded "'", %w{0027 02b9 02bc 02c8 2032}
38
+ end
39
+
40
+ def test_asterisks
41
+ assert_equal_encoded "*", %w{002a 066d 204e 2217 26b9 2731}
42
+ end
43
+
44
+ def test_commas
45
+ assert_equal_encoded ",", %w{002c 060c}
46
+ end
47
+
48
+ def test_periods
49
+ assert_equal_encoded ".", %w{002e 06d4}
50
+ end
51
+
52
+ def test_hyphens
53
+ assert_equal_encoded "-", %w{002d 2010 2011 2012 2212}
54
+ end
55
+
56
+ def test_endash
57
+ assert_equal_encoded "--", %w{2013 2015}
58
+ end
59
+
60
+ def test_emdash
61
+ assert_equal_encoded "---", %w{2014}
62
+ end
63
+
64
+ def test_dotleader
65
+ assert_equal_encoded "..", %w{2025}
66
+ end
67
+
68
+ def test_ellipsis
69
+ assert_equal_encoded "...", %w{2026}
70
+ end
71
+
72
+ def test_slashes
73
+ assert_equal_encoded "/", %w{002f 2044 2215}
74
+ assert_equal_encoded "\\", %w{005c 2216}
75
+ end
76
+
77
+ def test_colons
78
+ assert_equal_encoded ":", %w{003a 2236}
79
+ end
80
+
81
+ def test_semicolons
82
+ assert_equal_encoded ";", %w{003b 061b}
83
+ end
84
+
85
+ def test_less_thans
86
+ assert_equal_encoded "<", %w{003c 2039 2329 27e8 3008}
87
+ end
88
+
89
+ def test_equals
90
+ assert_equal_encoded "=", "003d"
91
+ end
92
+
93
+ def test_greater_thans
94
+ assert_equal_encoded ">", %w{003e 203a 232a 27e9 3009}
95
+ end
96
+
97
+ def test_question_marks
98
+ assert_equal_encoded "?", %w{003f 061f}
99
+ assert_equal_encoded "", "00bf"
100
+ assert_equal_encoded "?!", %w{203d 2048}
101
+ assert_equal_encoded "!?", "2049"
102
+ end
103
+
104
+ def test_circumflexes
105
+ assert_equal_encoded "^", %w{005e 2038 2303}
106
+ end
107
+
108
+ def test_underscores
109
+ assert_equal_encoded "_", %w{005f 02cd 2017}
110
+ end
111
+
112
+ def test_grave_accents
113
+ assert_equal_encoded "`", %w{0060 02cb 2035}
114
+ end
115
+
116
+ def test_bars
117
+ assert_equal_encoded "|", %w{007c 2223 2758}
118
+ end
119
+
120
+ def test_tildes
121
+ assert_equal_encoded "~", %w{007e 02dc 2053 223c ff5e}
122
+ end
123
+
124
+ def test_related_letters
125
+ {
126
+ "B" => "212c",
127
+ "C" => %w{2102 212d},
128
+ "E" => %w{2107 2130},
129
+ "F" => "2131",
130
+ "H" => %w{210b 210c 210d},
131
+ "I" => %w{0130 0406 04c0 2110 2111 2160},
132
+ "K" => "212a",
133
+ "L" => "2112",
134
+ "M" => "2133",
135
+ "N" => "2115",
136
+ "P" => "2119",
137
+ "Q" => "211a",
138
+ "R" => %w{211b 211c 211d},
139
+ "Z" => %w{2124 2128}
140
+ }.each do |expected, encode_mes|
141
+ assert_equal_encoded expected, encode_mes
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,28 @@
1
+ # 100% shorthand
2
+ module CodepointTestHelper
3
+ def assert_equal_encoded(expected, encode_mes)
4
+ # Killing a duck because Ruby 1.9 doesn't mix Enumerable into String
5
+ encode_mes = [encode_mes] if encode_mes.is_a?(String)
6
+ encode_mes.each do |encode_me|
7
+ encoded = encode(encode_me)
8
+ actual = encoded.to_ascii
9
+ if expected == actual
10
+ # Let's not retest it
11
+ assert true
12
+ else
13
+ message = "<#{expected.inspect}> expected but was\n<#{actual.inspect}>\n"
14
+ message << " defined in #{Unidecoder.in_yaml_file(encoded)}"
15
+ fail message
16
+ end
17
+ end
18
+ end
19
+
20
+ private
21
+ def encode(codepoint)
22
+ Unidecoder.encode(codepoint)
23
+ end
24
+
25
+ def which_yaml(codepoint)
26
+ Unidecoder.in_yaml_file(encode(codepoint))
27
+ end
28
+ end
@@ -0,0 +1,114 @@
1
+ # encoding: utf-8
2
+ $:.unshift File.expand_path("../lib", File.dirname(__FILE__))
3
+ $:.unshift File.expand_path(File.dirname(__FILE__))
4
+ $:.uniq!
5
+ require "test/unit"
6
+ require "unidecoder"
7
+
8
+
9
+ class UnidecoderTest < Test::Unit::TestCase
10
+ # Silly phrases courtesy of Frank da Cruz (http://www.columbia.edu/kermit/utf8.html).
11
+
12
+ DONT_CONVERT = [
13
+ "Vitrum edere possum; mihi non nocet.", # Latin
14
+ "Je puis mangier del voirre. Ne me nuit.", # Old French
15
+ "Kristala jan dezaket, ez dit minik ematen.", # Basque
16
+ "Kaya kong kumain nang bubog at hindi ako masaktan.", # Tagalog
17
+ "Ich kann Glas essen, ohne mir weh zu tun.", # German
18
+ "I can eat glass and it doesn't hurt me.", # English
19
+ ]
20
+
21
+ CONVERT_PAIRS = {
22
+ # French
23
+ "Je peux manger du verre, ça ne me fait pas de mal." => "Je peux manger du verre, ca ne me fait pas de mal.",
24
+ # Romanian
25
+ "Pot să mănânc sticlă și ea nu mă rănește." => "Pot sa mananc sticla si ea nu ma raneste.",
26
+ # Icelandic
27
+ "Ég get etið gler án þess að meiða mig." => "Eg get etid gler an thess ad meida mig.",
28
+ # Albanian
29
+ "Unë mund të ha qelq dhe nuk më gjen gjë." => "Une mund te ha qelq dhe nuk me gjen gje.",
30
+ # Polish
31
+ "Mogę jeść szkło i mi nie szkodzi." => "Moge jesc szklo i mi nie szkodzi.",
32
+ # Russian
33
+ "Я могу есть стекло, оно мне не вредит." => "Ia moghu iest' stieklo, ono mnie nie vriedit.",
34
+ # Bulgarian
35
+ "Мога да ям стъкло, то не ми вреди." => "Mogha da iam stklo, to nie mi vriedi.",
36
+ # Anglo-Saxon
37
+ "ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬" => "ic.mag.glas.eotacn.ond.hit.ne.heacrmiacth.me:",
38
+ # Classical Greek
39
+ "ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει" => "ualon phagein dunamai; touto ou me blaptei",
40
+ # Hindi
41
+ "मैं काँच खा सकता हूँ और मुझे उससे कोई चोट नहीं पहुंचती" => "maiN kaaNc khaa sktaa huuN aur mujhe usse koii cott nhiiN phuNctii",
42
+ # Thai
43
+ "ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ" => "chankinkracchkaid aetmanaimthamaihchanecchb",
44
+ # Chinese
45
+ "我能吞下玻璃而不伤身体。" => "Wo Neng Tun Xia Bo Li Er Bu Shang Shen Ti . ",
46
+ # Japanese
47
+ "私はガラスを食べられます。それは私を傷つけません。" => "Si hagarasuwoShi beraremasu. sorehaSi woShang tukemasen. ",
48
+ "من می توانم بدونِ احساس درد شيشه بخورم" => # Persian
49
+ "mn my twnm bdwni Hss drd shyshh bkhwrm",
50
+ "أنا قادر على أكل الزجاج و هذا لا يؤلمن" => # Arabic
51
+ "'n qdr `l~ 'kl lzjj w hdh l yw'lmn",
52
+ "אני יכול לאכול זכוכית וזה לא מזיק לי" => # Hebrew
53
+ "ny ykvl lkvl zkvkyt vzh l mzyq ly",
54
+ }
55
+
56
+ def test_should_raise_error_with_invalid_utf8
57
+ [
58
+ "\x80", # Continuation byte, low (cp125)
59
+ "\x94", # Continuation byte, mid (cp125)
60
+ "\x9F", # Continuation byte, high (cp125)
61
+ "\xC0", # Overlong encoding, start of 2-byte sequence, but codepoint < 128
62
+ "\xC1", # Overlong encoding, start of 2-byte sequence, but codepoint < 128
63
+ "\xC2", # Start of 2-byte sequence, low
64
+ "\xC8", # Start of 2-byte sequence, mid
65
+ "\xDF", # Start of 2-byte sequence, high
66
+ "\xE0", # Start of 3-byte sequence, low
67
+ "\xE8", # Start of 3-byte sequence, mid
68
+ "\xEF", # Start of 3-byte sequence, high
69
+ "\xF0", # Start of 4-byte sequence
70
+ "\xF1", # Unused byte
71
+ "\xFF", # Restricted byte
72
+ ].map do |byte|
73
+ assert_raise ArgumentError, "#{byte.inspect} did not raise error" do
74
+ Unidecoder.decode("a#{byte}a")
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ def test_unidecoder_decode
81
+ DONT_CONVERT.each do |ascii|
82
+ assert_equal ascii, Unidecoder.decode(ascii)
83
+ end
84
+ CONVERT_PAIRS.each do |unicode, ascii|
85
+ assert_equal ascii, Unidecoder.decode(unicode)
86
+ end
87
+ end
88
+
89
+ def test_unidecoder_encode
90
+ {
91
+ # Strings
92
+ "0041" => "A",
93
+ "00e6" => "æ",
94
+ "042f" => "Я"
95
+ }.each do |codepoint, unicode|
96
+ assert_equal unicode, Unidecoder.encode(codepoint)
97
+ end
98
+ end
99
+
100
+ def test_unidecoder_in_yaml_file
101
+ {
102
+ "A" => "x00.yml (line 67)",
103
+ "π" => "x03.yml (line 194)",
104
+ "Я" => "x04.yml (line 49)"
105
+ }.each do |character, output|
106
+ assert_equal output, Unidecoder.in_yaml_file(character)
107
+ end
108
+ end
109
+
110
+ def test_override
111
+ assert_equal "Juergen", Unidecoder.decode("Jürgen", "ü" => "ue")
112
+ end
113
+
114
+ end
metadata ADDED
@@ -0,0 +1,255 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: unidecoder
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 1
8
+ - 0
9
+ version: 1.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Russell Norris
13
+ - Norman Clarke
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-06-23 00:00:00 -03:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: A port of Perl's Unidecoder to Ruby. Transliterates Unicode strings to an ASCII approximation.
23
+ email:
24
+ - rsl@luckysneaks.com
25
+ - norman@njclarke.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files: []
31
+
32
+ files:
33
+ - lib/unidecoder/version.rb
34
+ - lib/unidecoder.rb
35
+ - Changelog.md
36
+ - README.md
37
+ - Rakefile
38
+ - test/unicode_point_suite/basic_latin_test.rb
39
+ - test/unicode_point_suite/codepoint_test_helper.rb
40
+ - test/unidecoder_test.rb
41
+ - lib/unidecoder/data/x00.yml
42
+ - lib/unidecoder/data/x01.yml
43
+ - lib/unidecoder/data/x02.yml
44
+ - lib/unidecoder/data/x03.yml
45
+ - lib/unidecoder/data/x04.yml
46
+ - lib/unidecoder/data/x05.yml
47
+ - lib/unidecoder/data/x06.yml
48
+ - lib/unidecoder/data/x07.yml
49
+ - lib/unidecoder/data/x09.yml
50
+ - lib/unidecoder/data/x0a.yml
51
+ - lib/unidecoder/data/x0b.yml
52
+ - lib/unidecoder/data/x0c.yml
53
+ - lib/unidecoder/data/x0d.yml
54
+ - lib/unidecoder/data/x0e.yml
55
+ - lib/unidecoder/data/x0f.yml
56
+ - lib/unidecoder/data/x10.yml
57
+ - lib/unidecoder/data/x11.yml
58
+ - lib/unidecoder/data/x12.yml
59
+ - lib/unidecoder/data/x13.yml
60
+ - lib/unidecoder/data/x14.yml
61
+ - lib/unidecoder/data/x15.yml
62
+ - lib/unidecoder/data/x16.yml
63
+ - lib/unidecoder/data/x17.yml
64
+ - lib/unidecoder/data/x18.yml
65
+ - lib/unidecoder/data/x1e.yml
66
+ - lib/unidecoder/data/x1f.yml
67
+ - lib/unidecoder/data/x20.yml
68
+ - lib/unidecoder/data/x21.yml
69
+ - lib/unidecoder/data/x22.yml
70
+ - lib/unidecoder/data/x23.yml
71
+ - lib/unidecoder/data/x24.yml
72
+ - lib/unidecoder/data/x25.yml
73
+ - lib/unidecoder/data/x26.yml
74
+ - lib/unidecoder/data/x27.yml
75
+ - lib/unidecoder/data/x28.yml
76
+ - lib/unidecoder/data/x2e.yml
77
+ - lib/unidecoder/data/x2f.yml
78
+ - lib/unidecoder/data/x30.yml
79
+ - lib/unidecoder/data/x31.yml
80
+ - lib/unidecoder/data/x32.yml
81
+ - lib/unidecoder/data/x33.yml
82
+ - lib/unidecoder/data/x4d.yml
83
+ - lib/unidecoder/data/x4e.yml
84
+ - lib/unidecoder/data/x4f.yml
85
+ - lib/unidecoder/data/x50.yml
86
+ - lib/unidecoder/data/x51.yml
87
+ - lib/unidecoder/data/x52.yml
88
+ - lib/unidecoder/data/x53.yml
89
+ - lib/unidecoder/data/x54.yml
90
+ - lib/unidecoder/data/x55.yml
91
+ - lib/unidecoder/data/x56.yml
92
+ - lib/unidecoder/data/x57.yml
93
+ - lib/unidecoder/data/x58.yml
94
+ - lib/unidecoder/data/x59.yml
95
+ - lib/unidecoder/data/x5a.yml
96
+ - lib/unidecoder/data/x5b.yml
97
+ - lib/unidecoder/data/x5c.yml
98
+ - lib/unidecoder/data/x5d.yml
99
+ - lib/unidecoder/data/x5e.yml
100
+ - lib/unidecoder/data/x5f.yml
101
+ - lib/unidecoder/data/x60.yml
102
+ - lib/unidecoder/data/x61.yml
103
+ - lib/unidecoder/data/x62.yml
104
+ - lib/unidecoder/data/x63.yml
105
+ - lib/unidecoder/data/x64.yml
106
+ - lib/unidecoder/data/x65.yml
107
+ - lib/unidecoder/data/x66.yml
108
+ - lib/unidecoder/data/x67.yml
109
+ - lib/unidecoder/data/x68.yml
110
+ - lib/unidecoder/data/x69.yml
111
+ - lib/unidecoder/data/x6a.yml
112
+ - lib/unidecoder/data/x6b.yml
113
+ - lib/unidecoder/data/x6c.yml
114
+ - lib/unidecoder/data/x6d.yml
115
+ - lib/unidecoder/data/x6e.yml
116
+ - lib/unidecoder/data/x6f.yml
117
+ - lib/unidecoder/data/x70.yml
118
+ - lib/unidecoder/data/x71.yml
119
+ - lib/unidecoder/data/x72.yml
120
+ - lib/unidecoder/data/x73.yml
121
+ - lib/unidecoder/data/x74.yml
122
+ - lib/unidecoder/data/x75.yml
123
+ - lib/unidecoder/data/x76.yml
124
+ - lib/unidecoder/data/x77.yml
125
+ - lib/unidecoder/data/x78.yml
126
+ - lib/unidecoder/data/x79.yml
127
+ - lib/unidecoder/data/x7a.yml
128
+ - lib/unidecoder/data/x7b.yml
129
+ - lib/unidecoder/data/x7c.yml
130
+ - lib/unidecoder/data/x7d.yml
131
+ - lib/unidecoder/data/x7e.yml
132
+ - lib/unidecoder/data/x7f.yml
133
+ - lib/unidecoder/data/x80.yml
134
+ - lib/unidecoder/data/x81.yml
135
+ - lib/unidecoder/data/x82.yml
136
+ - lib/unidecoder/data/x83.yml
137
+ - lib/unidecoder/data/x84.yml
138
+ - lib/unidecoder/data/x85.yml
139
+ - lib/unidecoder/data/x86.yml
140
+ - lib/unidecoder/data/x87.yml
141
+ - lib/unidecoder/data/x88.yml
142
+ - lib/unidecoder/data/x89.yml
143
+ - lib/unidecoder/data/x8a.yml
144
+ - lib/unidecoder/data/x8b.yml
145
+ - lib/unidecoder/data/x8c.yml
146
+ - lib/unidecoder/data/x8d.yml
147
+ - lib/unidecoder/data/x8e.yml
148
+ - lib/unidecoder/data/x8f.yml
149
+ - lib/unidecoder/data/x90.yml
150
+ - lib/unidecoder/data/x91.yml
151
+ - lib/unidecoder/data/x92.yml
152
+ - lib/unidecoder/data/x93.yml
153
+ - lib/unidecoder/data/x94.yml
154
+ - lib/unidecoder/data/x95.yml
155
+ - lib/unidecoder/data/x96.yml
156
+ - lib/unidecoder/data/x97.yml
157
+ - lib/unidecoder/data/x98.yml
158
+ - lib/unidecoder/data/x99.yml
159
+ - lib/unidecoder/data/x9a.yml
160
+ - lib/unidecoder/data/x9b.yml
161
+ - lib/unidecoder/data/x9c.yml
162
+ - lib/unidecoder/data/x9d.yml
163
+ - lib/unidecoder/data/x9e.yml
164
+ - lib/unidecoder/data/x9f.yml
165
+ - lib/unidecoder/data/xa0.yml
166
+ - lib/unidecoder/data/xa1.yml
167
+ - lib/unidecoder/data/xa2.yml
168
+ - lib/unidecoder/data/xa3.yml
169
+ - lib/unidecoder/data/xa4.yml
170
+ - lib/unidecoder/data/xac.yml
171
+ - lib/unidecoder/data/xad.yml
172
+ - lib/unidecoder/data/xae.yml
173
+ - lib/unidecoder/data/xaf.yml
174
+ - lib/unidecoder/data/xb0.yml
175
+ - lib/unidecoder/data/xb1.yml
176
+ - lib/unidecoder/data/xb2.yml
177
+ - lib/unidecoder/data/xb3.yml
178
+ - lib/unidecoder/data/xb4.yml
179
+ - lib/unidecoder/data/xb5.yml
180
+ - lib/unidecoder/data/xb6.yml
181
+ - lib/unidecoder/data/xb7.yml
182
+ - lib/unidecoder/data/xb8.yml
183
+ - lib/unidecoder/data/xb9.yml
184
+ - lib/unidecoder/data/xba.yml
185
+ - lib/unidecoder/data/xbb.yml
186
+ - lib/unidecoder/data/xbc.yml
187
+ - lib/unidecoder/data/xbd.yml
188
+ - lib/unidecoder/data/xbe.yml
189
+ - lib/unidecoder/data/xbf.yml
190
+ - lib/unidecoder/data/xc0.yml
191
+ - lib/unidecoder/data/xc1.yml
192
+ - lib/unidecoder/data/xc2.yml
193
+ - lib/unidecoder/data/xc3.yml
194
+ - lib/unidecoder/data/xc4.yml
195
+ - lib/unidecoder/data/xc5.yml
196
+ - lib/unidecoder/data/xc6.yml
197
+ - lib/unidecoder/data/xc7.yml
198
+ - lib/unidecoder/data/xc8.yml
199
+ - lib/unidecoder/data/xc9.yml
200
+ - lib/unidecoder/data/xca.yml
201
+ - lib/unidecoder/data/xcb.yml
202
+ - lib/unidecoder/data/xcc.yml
203
+ - lib/unidecoder/data/xcd.yml
204
+ - lib/unidecoder/data/xce.yml
205
+ - lib/unidecoder/data/xcf.yml
206
+ - lib/unidecoder/data/xd0.yml
207
+ - lib/unidecoder/data/xd1.yml
208
+ - lib/unidecoder/data/xd2.yml
209
+ - lib/unidecoder/data/xd3.yml
210
+ - lib/unidecoder/data/xd4.yml
211
+ - lib/unidecoder/data/xd5.yml
212
+ - lib/unidecoder/data/xd6.yml
213
+ - lib/unidecoder/data/xd7.yml
214
+ - lib/unidecoder/data/xf9.yml
215
+ - lib/unidecoder/data/xfa.yml
216
+ - lib/unidecoder/data/xfb.yml
217
+ - lib/unidecoder/data/xfc.yml
218
+ - lib/unidecoder/data/xfd.yml
219
+ - lib/unidecoder/data/xfe.yml
220
+ - lib/unidecoder/data/xff.yml
221
+ has_rdoc: true
222
+ homepage: http://github.com/norman/unidecoder
223
+ licenses: []
224
+
225
+ post_install_message:
226
+ rdoc_options: []
227
+
228
+ require_paths:
229
+ - lib
230
+ required_ruby_version: !ruby/object:Gem::Requirement
231
+ none: false
232
+ requirements:
233
+ - - ">="
234
+ - !ruby/object:Gem::Version
235
+ segments:
236
+ - 0
237
+ version: "0"
238
+ required_rubygems_version: !ruby/object:Gem::Requirement
239
+ none: false
240
+ requirements:
241
+ - - ">="
242
+ - !ruby/object:Gem::Version
243
+ segments:
244
+ - 0
245
+ version: "0"
246
+ requirements: []
247
+
248
+ rubyforge_project: "[none]"
249
+ rubygems_version: 1.3.7
250
+ signing_key:
251
+ specification_version: 3
252
+ summary: Transliterates Unicode strings to an ASCII approximation.
253
+ test_files:
254
+ - test/unicode_point_suite/basic_latin_test.rb
255
+ - test/unidecoder_test.rb