yosina 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +36 -0
  3. data/Gemfile +6 -0
  4. data/README.ja.md +229 -0
  5. data/README.md +229 -0
  6. data/Rakefile +30 -0
  7. data/codegen/dataset.rb +215 -0
  8. data/codegen/emitters/circled_or_squared_transliterator_data.rb +30 -0
  9. data/codegen/emitters/combined_transliterator_data.rb +28 -0
  10. data/codegen/emitters/hyphens_transliterator_data.rb +48 -0
  11. data/codegen/emitters/ivs_svs_base_transliterator_data.rb +121 -0
  12. data/codegen/emitters/simple_transliterator.rb +76 -0
  13. data/codegen/emitters/utils.rb +45 -0
  14. data/codegen/emitters.rb +8 -0
  15. data/codegen/main.rb +109 -0
  16. data/lib/yosina/char.rb +65 -0
  17. data/lib/yosina/chars.rb +152 -0
  18. data/lib/yosina/recipes.rb +359 -0
  19. data/lib/yosina/transliterator.rb +49 -0
  20. data/lib/yosina/transliterators/circled_or_squared.rb +67 -0
  21. data/lib/yosina/transliterators/circled_or_squared_data.rb +469 -0
  22. data/lib/yosina/transliterators/combined.rb +52 -0
  23. data/lib/yosina/transliterators/combined_data.rb +495 -0
  24. data/lib/yosina/transliterators/hira_kata.rb +106 -0
  25. data/lib/yosina/transliterators/hira_kata_composition.rb +103 -0
  26. data/lib/yosina/transliterators/hira_kata_table.rb +116 -0
  27. data/lib/yosina/transliterators/hyphens.rb +83 -0
  28. data/lib/yosina/transliterators/hyphens_data.rb +60 -0
  29. data/lib/yosina/transliterators/ideographic_annotations.rb +73 -0
  30. data/lib/yosina/transliterators/ivs_svs_base.rb +169 -0
  31. data/lib/yosina/transliterators/ivs_svs_base_data.rb +0 -0
  32. data/lib/yosina/transliterators/japanese_iteration_marks.rb +261 -0
  33. data/lib/yosina/transliterators/jisx0201_and_alike.rb +451 -0
  34. data/lib/yosina/transliterators/kanji_old_new.rb +1137 -0
  35. data/lib/yosina/transliterators/mathematical_alphanumerics.rb +799 -0
  36. data/lib/yosina/transliterators/prolonged_sound_marks.rb +206 -0
  37. data/lib/yosina/transliterators/radicals.rb +361 -0
  38. data/lib/yosina/transliterators/spaces.rb +79 -0
  39. data/lib/yosina/transliterators.rb +57 -0
  40. data/lib/yosina/version.rb +5 -0
  41. data/lib/yosina.rb +62 -0
  42. data/yosina.gemspec +41 -0
  43. metadata +159 -0
@@ -0,0 +1,495 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yosina
4
+ module Transliterators
5
+ # Replace each combined character with its corresponding individual characters
6
+ module Combined
7
+ # Generated mapping data from combined-chars.json
8
+ COMBINED_MAPPINGS = {
9
+ "\u{2116}" => ['N', 'o'],
10
+ "\u{2120}" => ['S', 'M'],
11
+ "\u{2121}" => ['T', 'E', 'L'],
12
+ "\u{2122}" => ['T', 'M'],
13
+ "\u{213b}" => ['F', 'A', 'X'],
14
+ "\u{2150}" => ['1', '/', '7'],
15
+ "\u{2151}" => ['1', '/', '9'],
16
+ "\u{2152}" => ['1', '/', '1', '0'],
17
+ "\u{2153}" => ['1', '/', '3'],
18
+ "\u{2154}" => ['2', '/', '3'],
19
+ "\u{2155}" => ['1', '/', '5'],
20
+ "\u{2156}" => ['2', '/', '5'],
21
+ "\u{2157}" => ['3', '/', '5'],
22
+ "\u{2158}" => ['4', '/', '5'],
23
+ "\u{2159}" => ['1', '/', '6'],
24
+ "\u{215a}" => ['5', '/', '6'],
25
+ "\u{215b}" => ['1', '/', '8'],
26
+ "\u{215c}" => ['3', '/', '8'],
27
+ "\u{215d}" => ['5', '/', '8'],
28
+ "\u{215e}" => ['7', '/', '8'],
29
+ "\u{215f}" => ['1', '/'],
30
+ "\u{2189}" => ['0', '/', '3'],
31
+ "\u{2400}" => ['N', 'U', 'L'],
32
+ "\u{2401}" => ['S', 'O', 'H'],
33
+ "\u{2402}" => ['S', 'T', 'X'],
34
+ "\u{2403}" => ['E', 'T', 'X'],
35
+ "\u{2404}" => ['E', 'O', 'T'],
36
+ "\u{2405}" => ['E', 'N', 'Q'],
37
+ "\u{2406}" => ['A', 'C', 'K'],
38
+ "\u{2407}" => ['B', 'E', 'L'],
39
+ "\u{2408}" => ['B', 'S'],
40
+ "\u{2409}" => ['H', 'T'],
41
+ "\u{240a}" => ['L', 'F'],
42
+ "\u{240b}" => ['V', 'T'],
43
+ "\u{240c}" => ['F', 'F'],
44
+ "\u{240d}" => ['C', 'R'],
45
+ "\u{240e}" => ['S', 'O'],
46
+ "\u{240f}" => ['S', 'I'],
47
+ "\u{2410}" => ['D', 'L', 'E'],
48
+ "\u{2411}" => ['D', 'C', '1'],
49
+ "\u{2412}" => ['D', 'C', '2'],
50
+ "\u{2413}" => ['D', 'C', '3'],
51
+ "\u{2414}" => ['D', 'C', '4'],
52
+ "\u{2415}" => ['N', 'A', 'K'],
53
+ "\u{2416}" => ['S', 'Y', 'N'],
54
+ "\u{2417}" => ['E', 'T', 'B'],
55
+ "\u{2418}" => ['C', 'A', 'N'],
56
+ "\u{2419}" => ['E', 'M'],
57
+ "\u{241a}" => ['S', 'U', 'B'],
58
+ "\u{241b}" => ['E', 'S', 'C'],
59
+ "\u{241c}" => ['F', 'S'],
60
+ "\u{241d}" => ['G', 'S'],
61
+ "\u{241e}" => ['R', 'S'],
62
+ "\u{241f}" => ['U', 'S'],
63
+ "\u{2420}" => ['S', 'P'],
64
+ "\u{2421}" => ['D', 'E', 'L'],
65
+ "\u{2474}" => ['(', '1', ')'],
66
+ "\u{2475}" => ['(', '2', ')'],
67
+ "\u{2476}" => ['(', '3', ')'],
68
+ "\u{2477}" => ['(', '4', ')'],
69
+ "\u{2478}" => ['(', '5', ')'],
70
+ "\u{2479}" => ['(', '6', ')'],
71
+ "\u{247a}" => ['(', '7', ')'],
72
+ "\u{247b}" => ['(', '8', ')'],
73
+ "\u{247c}" => ['(', '9', ')'],
74
+ "\u{247d}" => ['(', '1', '0', ')'],
75
+ "\u{247e}" => ['(', '1', '1', ')'],
76
+ "\u{247f}" => ['(', '1', '2', ')'],
77
+ "\u{2480}" => ['(', '1', '3', ')'],
78
+ "\u{2481}" => ['(', '1', '4', ')'],
79
+ "\u{2482}" => ['(', '1', '5', ')'],
80
+ "\u{2483}" => ['(', '1', '6', ')'],
81
+ "\u{2484}" => ['(', '1', '7', ')'],
82
+ "\u{2485}" => ['(', '1', '8', ')'],
83
+ "\u{2486}" => ['(', '1', '9', ')'],
84
+ "\u{2487}" => ['(', '2', '0', ')'],
85
+ "\u{2488}" => ['1', '.'],
86
+ "\u{2489}" => ['2', '.'],
87
+ "\u{248a}" => ['3', '.'],
88
+ "\u{248b}" => ['4', '.'],
89
+ "\u{248c}" => ['5', '.'],
90
+ "\u{248d}" => ['6', '.'],
91
+ "\u{248e}" => ['7', '.'],
92
+ "\u{248f}" => ['8', '.'],
93
+ "\u{2490}" => ['9', '.'],
94
+ "\u{2491}" => ['1', '0', '.'],
95
+ "\u{2492}" => ['1', '1', '.'],
96
+ "\u{2493}" => ['1', '2', '.'],
97
+ "\u{2494}" => ['1', '3', '.'],
98
+ "\u{2495}" => ['1', '4', '.'],
99
+ "\u{2496}" => ['1', '5', '.'],
100
+ "\u{2497}" => ['1', '6', '.'],
101
+ "\u{2498}" => ['1', '7', '.'],
102
+ "\u{2499}" => ['1', '8', '.'],
103
+ "\u{249a}" => ['1', '9', '.'],
104
+ "\u{249b}" => ['2', '0', '.'],
105
+ "\u{249c}" => ['(', 'a', ')'],
106
+ "\u{249d}" => ['(', 'b', ')'],
107
+ "\u{249e}" => ['(', 'c', ')'],
108
+ "\u{249f}" => ['(', 'd', ')'],
109
+ "\u{24a0}" => ['(', 'e', ')'],
110
+ "\u{24a1}" => ['(', 'f', ')'],
111
+ "\u{24a2}" => ['(', 'g', ')'],
112
+ "\u{24a3}" => ['(', 'h', ')'],
113
+ "\u{24a4}" => ['(', 'i', ')'],
114
+ "\u{24a5}" => ['(', 'j', ')'],
115
+ "\u{24a6}" => ['(', 'k', ')'],
116
+ "\u{24a7}" => ['(', 'l', ')'],
117
+ "\u{24a8}" => ['(', 'm', ')'],
118
+ "\u{24a9}" => ['(', 'n', ')'],
119
+ "\u{24aa}" => ['(', 'o', ')'],
120
+ "\u{24ab}" => ['(', 'p', ')'],
121
+ "\u{24ac}" => ['(', 'q', ')'],
122
+ "\u{24ad}" => ['(', 'r', ')'],
123
+ "\u{24ae}" => ['(', 's', ')'],
124
+ "\u{24af}" => ['(', 't', ')'],
125
+ "\u{24b0}" => ['(', 'u', ')'],
126
+ "\u{24b1}" => ['(', 'v', ')'],
127
+ "\u{24b2}" => ['(', 'w', ')'],
128
+ "\u{24b3}" => ['(', 'x', ')'],
129
+ "\u{24b4}" => ['(', 'y', ')'],
130
+ "\u{24b5}" => ['(', 'z', ')'],
131
+ "\u{3220}" => ['(', "\u{4e00}", ')'],
132
+ "\u{3221}" => ['(', "\u{4e8c}", ')'],
133
+ "\u{3222}" => ['(', "\u{4e09}", ')'],
134
+ "\u{3223}" => ['(', "\u{56db}", ')'],
135
+ "\u{3224}" => ['(', "\u{4e94}", ')'],
136
+ "\u{3225}" => ['(', "\u{516d}", ')'],
137
+ "\u{3226}" => ['(', "\u{4e03}", ')'],
138
+ "\u{3227}" => ['(', "\u{516b}", ')'],
139
+ "\u{3228}" => ['(', "\u{4e5d}", ')'],
140
+ "\u{3229}" => ['(', "\u{5341}", ')'],
141
+ "\u{322a}" => ['(', "\u{6708}", ')'],
142
+ "\u{322b}" => ['(', "\u{706b}", ')'],
143
+ "\u{322c}" => ['(', "\u{6c34}", ')'],
144
+ "\u{322d}" => ['(', "\u{6728}", ')'],
145
+ "\u{322e}" => ['(', "\u{91d1}", ')'],
146
+ "\u{322f}" => ['(', "\u{571f}", ')'],
147
+ "\u{3230}" => ['(', "\u{65e5}", ')'],
148
+ "\u{3231}" => ['(', "\u{682a}", ')'],
149
+ "\u{3232}" => ['(', "\u{6709}", ')'],
150
+ "\u{3233}" => ['(', "\u{793e}", ')'],
151
+ "\u{3234}" => ['(', "\u{540d}", ')'],
152
+ "\u{3235}" => ['(', "\u{7279}", ')'],
153
+ "\u{3236}" => ['(', "\u{8ca1}", ')'],
154
+ "\u{3237}" => ['(', "\u{795d}", ')'],
155
+ "\u{3238}" => ['(', "\u{52b4}", ')'],
156
+ "\u{3239}" => ['(', "\u{4ee3}", ')'],
157
+ "\u{323a}" => ['(', "\u{547c}", ')'],
158
+ "\u{323b}" => ['(', "\u{5b66}", ')'],
159
+ "\u{323c}" => ['(', "\u{76e3}", ')'],
160
+ "\u{323d}" => ['(', "\u{4f01}", ')'],
161
+ "\u{323e}" => ['(', "\u{8cc7}", ')'],
162
+ "\u{323f}" => ['(', "\u{5354}", ')'],
163
+ "\u{3240}" => ['(', "\u{796d}", ')'],
164
+ "\u{3241}" => ['(', "\u{4f11}", ')'],
165
+ "\u{3242}" => ['(', "\u{81ea}", ')'],
166
+ "\u{3243}" => ['(', "\u{81f3}", ')'],
167
+ "\u{3250}" => ['P', 'T', 'E'],
168
+ "\u{32c0}" => ['1', "\u{6708}"],
169
+ "\u{32c1}" => ['2', "\u{6708}"],
170
+ "\u{32c2}" => ['3', "\u{6708}"],
171
+ "\u{32c3}" => ['4', "\u{6708}"],
172
+ "\u{32c4}" => ['5', "\u{6708}"],
173
+ "\u{32c5}" => ['6', "\u{6708}"],
174
+ "\u{32c6}" => ['7', "\u{6708}"],
175
+ "\u{32c7}" => ['8', "\u{6708}"],
176
+ "\u{32c8}" => ['9', "\u{6708}"],
177
+ "\u{32c9}" => ['1', '0', "\u{6708}"],
178
+ "\u{32ca}" => ['1', '1', "\u{6708}"],
179
+ "\u{32cb}" => ['1', '2', "\u{6708}"],
180
+ "\u{32cc}" => ['H', 'g'],
181
+ "\u{32cd}" => ['e', 'r', 'g'],
182
+ "\u{32ce}" => ['e', 'V'],
183
+ "\u{32cf}" => ['L', 'T', 'D'],
184
+ "\u{32ff}" => ["\u{4ee4}", "\u{548c}"],
185
+ "\u{3300}" => ["\u{30a2}", "\u{30d1}", "\u{30fc}", "\u{30c8}"],
186
+ "\u{3301}" => ["\u{30a2}", "\u{30eb}", "\u{30d5}", "\u{30a1}"],
187
+ "\u{3302}" => ["\u{30a2}", "\u{30f3}", "\u{30da}", "\u{30a2}"],
188
+ "\u{3303}" => ["\u{30a2}", "\u{30fc}", "\u{30eb}"],
189
+ "\u{3304}" => ["\u{30a4}", "\u{30cb}", "\u{30f3}", "\u{30b0}"],
190
+ "\u{3305}" => ["\u{30a4}", "\u{30f3}", "\u{30c1}"],
191
+ "\u{3306}" => ["\u{30a6}", "\u{30a9}", "\u{30f3}"],
192
+ "\u{3307}" => ["\u{30a8}", "\u{30b9}", "\u{30af}", "\u{30fc}", "\u{30c9}"],
193
+ "\u{3308}" => ["\u{30a8}", "\u{30fc}", "\u{30ab}", "\u{30fc}"],
194
+ "\u{3309}" => ["\u{30aa}", "\u{30f3}", "\u{30b9}"],
195
+ "\u{330a}" => ["\u{30aa}", "\u{30fc}", "\u{30e0}"],
196
+ "\u{330b}" => ["\u{30ab}", "\u{30a4}", "\u{30ea}"],
197
+ "\u{330c}" => ["\u{30ab}", "\u{30e9}", "\u{30c3}", "\u{30c8}"],
198
+ "\u{330d}" => ["\u{30ab}", "\u{30ed}", "\u{30ea}", "\u{30fc}"],
199
+ "\u{330e}" => ["\u{30ac}", "\u{30ed}", "\u{30f3}"],
200
+ "\u{330f}" => ["\u{30ac}", "\u{30f3}", "\u{30de}"],
201
+ "\u{3310}" => ["\u{30ae}", "\u{30ac}"],
202
+ "\u{3311}" => ["\u{30ae}", "\u{30cb}", "\u{30fc}"],
203
+ "\u{3312}" => ["\u{30ad}", "\u{30e5}", "\u{30ea}", "\u{30fc}"],
204
+ "\u{3313}" => ["\u{30ae}", "\u{30eb}", "\u{30c0}", "\u{30fc}"],
205
+ "\u{3314}" => ["\u{30ad}", "\u{30ed}"],
206
+ "\u{3315}" => ["\u{30ad}", "\u{30ed}", "\u{30b0}", "\u{30e9}", "\u{30e0}"],
207
+ "\u{3316}" => ["\u{30ad}", "\u{30ed}", "\u{30e1}", "\u{30fc}", "\u{30c8}", "\u{30eb}"],
208
+ "\u{3317}" => ["\u{30ad}", "\u{30ed}", "\u{30ef}", "\u{30c3}", "\u{30c8}"],
209
+ "\u{3318}" => ["\u{30b0}", "\u{30e9}", "\u{30e0}"],
210
+ "\u{3319}" => ["\u{30b0}", "\u{30e9}", "\u{30e0}", "\u{30c8}", "\u{30f3}"],
211
+ "\u{331a}" => ["\u{30af}", "\u{30eb}", "\u{30bc}", "\u{30a4}", "\u{30ed}"],
212
+ "\u{331b}" => ["\u{30af}", "\u{30ed}", "\u{30fc}", "\u{30cd}"],
213
+ "\u{331c}" => ["\u{30b1}", "\u{30fc}", "\u{30b9}"],
214
+ "\u{331d}" => ["\u{30b3}", "\u{30eb}", "\u{30ca}"],
215
+ "\u{331e}" => ["\u{30b3}", "\u{30fc}", "\u{30dd}"],
216
+ "\u{331f}" => ["\u{30b5}", "\u{30a4}", "\u{30af}", "\u{30eb}"],
217
+ "\u{3320}" => ["\u{30b5}", "\u{30f3}", "\u{30c1}", "\u{30fc}", "\u{30e0}"],
218
+ "\u{3321}" => ["\u{30b7}", "\u{30ea}", "\u{30f3}", "\u{30b0}"],
219
+ "\u{3322}" => ["\u{30bb}", "\u{30f3}", "\u{30c1}"],
220
+ "\u{3323}" => ["\u{30bb}", "\u{30f3}", "\u{30c8}"],
221
+ "\u{3324}" => ["\u{30c0}", "\u{30fc}", "\u{30b9}"],
222
+ "\u{3325}" => ["\u{30c7}", "\u{30b7}"],
223
+ "\u{3326}" => ["\u{30c9}", "\u{30eb}"],
224
+ "\u{3327}" => ["\u{30c8}", "\u{30f3}"],
225
+ "\u{3328}" => ["\u{30ca}", "\u{30ce}"],
226
+ "\u{3329}" => ["\u{30ce}", "\u{30c3}", "\u{30c8}"],
227
+ "\u{332a}" => ["\u{30cf}", "\u{30a4}", "\u{30c4}"],
228
+ "\u{332b}" => ["\u{30d1}", "\u{30fc}", "\u{30bb}", "\u{30f3}", "\u{30c8}"],
229
+ "\u{332c}" => ["\u{30d1}", "\u{30fc}", "\u{30c4}"],
230
+ "\u{332d}" => ["\u{30d0}", "\u{30fc}", "\u{30ec}", "\u{30eb}"],
231
+ "\u{332e}" => ["\u{30d4}", "\u{30a2}", "\u{30b9}", "\u{30c8}", "\u{30eb}"],
232
+ "\u{332f}" => ["\u{30d4}", "\u{30af}", "\u{30eb}"],
233
+ "\u{3330}" => ["\u{30d4}", "\u{30b3}"],
234
+ "\u{3331}" => ["\u{30d3}", "\u{30eb}"],
235
+ "\u{3332}" => ["\u{30d5}", "\u{30a1}", "\u{30e9}", "\u{30c3}", "\u{30c9}"],
236
+ "\u{3333}" => ["\u{30d5}", "\u{30a3}", "\u{30fc}", "\u{30c8}"],
237
+ "\u{3334}" => ["\u{30d6}", "\u{30c3}", "\u{30b7}", "\u{30a7}", "\u{30eb}"],
238
+ "\u{3335}" => ["\u{30d5}", "\u{30e9}", "\u{30f3}"],
239
+ "\u{3336}" => ["\u{30d8}", "\u{30af}", "\u{30bf}", "\u{30fc}", "\u{30eb}"],
240
+ "\u{3337}" => ["\u{30da}", "\u{30bd}"],
241
+ "\u{3338}" => ["\u{30da}", "\u{30cb}", "\u{30d2}"],
242
+ "\u{3339}" => ["\u{30d8}", "\u{30eb}", "\u{30c4}"],
243
+ "\u{333a}" => ["\u{30da}", "\u{30f3}", "\u{30b9}"],
244
+ "\u{333b}" => ["\u{30da}", "\u{30fc}", "\u{30b8}"],
245
+ "\u{333c}" => ["\u{30d9}", "\u{30fc}", "\u{30bf}"],
246
+ "\u{333d}" => ["\u{30dd}", "\u{30a4}", "\u{30f3}", "\u{30c8}"],
247
+ "\u{333e}" => ["\u{30dc}", "\u{30eb}", "\u{30c8}"],
248
+ "\u{333f}" => ["\u{30db}", "\u{30f3}"],
249
+ "\u{3340}" => ["\u{30dd}", "\u{30f3}", "\u{30c9}"],
250
+ "\u{3341}" => ["\u{30db}", "\u{30fc}", "\u{30eb}"],
251
+ "\u{3342}" => ["\u{30db}", "\u{30fc}", "\u{30f3}"],
252
+ "\u{3343}" => ["\u{30de}", "\u{30a4}", "\u{30af}", "\u{30ed}"],
253
+ "\u{3344}" => ["\u{30de}", "\u{30a4}", "\u{30eb}"],
254
+ "\u{3345}" => ["\u{30de}", "\u{30c3}", "\u{30cf}"],
255
+ "\u{3346}" => ["\u{30de}", "\u{30eb}", "\u{30af}"],
256
+ "\u{3347}" => ["\u{30de}", "\u{30f3}", "\u{30b7}", "\u{30e7}", "\u{30f3}"],
257
+ "\u{3348}" => ["\u{30df}", "\u{30af}", "\u{30ed}", "\u{30f3}"],
258
+ "\u{3349}" => ["\u{30df}", "\u{30ea}"],
259
+ "\u{334a}" => ["\u{30df}", "\u{30ea}", "\u{30d0}", "\u{30fc}", "\u{30eb}"],
260
+ "\u{334b}" => ["\u{30e1}", "\u{30ac}"],
261
+ "\u{334c}" => ["\u{30e1}", "\u{30ac}", "\u{30c8}", "\u{30f3}"],
262
+ "\u{334d}" => ["\u{30e1}", "\u{30fc}", "\u{30c8}", "\u{30eb}"],
263
+ "\u{334e}" => ["\u{30e4}", "\u{30fc}", "\u{30c9}"],
264
+ "\u{334f}" => ["\u{30e4}", "\u{30fc}", "\u{30eb}"],
265
+ "\u{3350}" => ["\u{30e6}", "\u{30a2}", "\u{30f3}"],
266
+ "\u{3351}" => ["\u{30ea}", "\u{30c3}", "\u{30c8}", "\u{30eb}"],
267
+ "\u{3352}" => ["\u{30ea}", "\u{30e9}"],
268
+ "\u{3353}" => ["\u{30eb}", "\u{30d4}", "\u{30fc}"],
269
+ "\u{3354}" => ["\u{30eb}", "\u{30fc}", "\u{30d6}", "\u{30eb}"],
270
+ "\u{3355}" => ["\u{30ec}", "\u{30e0}"],
271
+ "\u{3356}" => ["\u{30ec}", "\u{30f3}", "\u{30c8}", "\u{30b2}", "\u{30f3}"],
272
+ "\u{3357}" => ["\u{30ef}", "\u{30c3}", "\u{30c8}"],
273
+ "\u{3358}" => ['0', "\u{70b9}"],
274
+ "\u{3359}" => ['1', "\u{70b9}"],
275
+ "\u{335a}" => ['2', "\u{70b9}"],
276
+ "\u{335b}" => ['3', "\u{70b9}"],
277
+ "\u{335c}" => ['4', "\u{70b9}"],
278
+ "\u{335d}" => ['5', "\u{70b9}"],
279
+ "\u{335e}" => ['6', "\u{70b9}"],
280
+ "\u{335f}" => ['7', "\u{70b9}"],
281
+ "\u{3360}" => ['8', "\u{70b9}"],
282
+ "\u{3361}" => ['9', "\u{70b9}"],
283
+ "\u{3362}" => ['1', '0', "\u{70b9}"],
284
+ "\u{3363}" => ['1', '1', "\u{70b9}"],
285
+ "\u{3364}" => ['1', '2', "\u{70b9}"],
286
+ "\u{3365}" => ['1', '3', "\u{70b9}"],
287
+ "\u{3366}" => ['1', '4', "\u{70b9}"],
288
+ "\u{3367}" => ['1', '5', "\u{70b9}"],
289
+ "\u{3368}" => ['1', '6', "\u{70b9}"],
290
+ "\u{3369}" => ['1', '7', "\u{70b9}"],
291
+ "\u{336a}" => ['1', '8', "\u{70b9}"],
292
+ "\u{336b}" => ['1', '9', "\u{70b9}"],
293
+ "\u{336c}" => ['2', '0', "\u{70b9}"],
294
+ "\u{336d}" => ['2', '1', "\u{70b9}"],
295
+ "\u{336e}" => ['2', '2', "\u{70b9}"],
296
+ "\u{336f}" => ['2', '3', "\u{70b9}"],
297
+ "\u{3370}" => ['2', '4', "\u{70b9}"],
298
+ "\u{3371}" => ['h', 'P', 'a'],
299
+ "\u{3372}" => ['d', 'a'],
300
+ "\u{3373}" => ['A', 'U'],
301
+ "\u{3374}" => ['b', 'a', 'r'],
302
+ "\u{3375}" => ['o', 'V'],
303
+ "\u{3376}" => ['p', 'c'],
304
+ "\u{3377}" => ['d', 'm'],
305
+ "\u{3378}" => ['d', 'm', '2'],
306
+ "\u{3379}" => ['d', 'm', '3'],
307
+ "\u{337a}" => ['I', 'U'],
308
+ "\u{337b}" => ["\u{5e73}", "\u{6210}"],
309
+ "\u{337c}" => ["\u{662d}", "\u{548c}"],
310
+ "\u{337d}" => ["\u{5927}", "\u{6b63}"],
311
+ "\u{337e}" => ["\u{660e}", "\u{6cbb}"],
312
+ "\u{337f}" => ["\u{682a}", "\u{5f0f}", "\u{4f1a}", "\u{793e}"],
313
+ "\u{3380}" => ['p', 'A'],
314
+ "\u{3381}" => ['n', 'A'],
315
+ "\u{3382}" => ["\u{b5}", 'A'],
316
+ "\u{3383}" => ['m', 'A'],
317
+ "\u{3384}" => ['k', 'A'],
318
+ "\u{3385}" => ['K', 'B'],
319
+ "\u{3386}" => ['M', 'B'],
320
+ "\u{3387}" => ['G', 'B'],
321
+ "\u{3388}" => ['c', 'a', 'l'],
322
+ "\u{3389}" => ['k', 'c', 'a', 'l'],
323
+ "\u{338a}" => ['p', 'F'],
324
+ "\u{338b}" => ['n', 'F'],
325
+ "\u{338c}" => ["\u{b5}", 'F'],
326
+ "\u{338d}" => ["\u{b5}", 'g'],
327
+ "\u{338e}" => ['m', 'g'],
328
+ "\u{338f}" => ['k', 'g'],
329
+ "\u{3390}" => ['H', 'z'],
330
+ "\u{3391}" => ['k', 'H', 'z'],
331
+ "\u{3392}" => ['M', 'H', 'z'],
332
+ "\u{3393}" => ['G', 'H', 'z'],
333
+ "\u{3394}" => ['T', 'H', 'z'],
334
+ "\u{3395}" => ["\u{b5}", 'l'],
335
+ "\u{3396}" => ['m', 'l'],
336
+ "\u{3397}" => ['d', 'l'],
337
+ "\u{3398}" => ['k', 'l'],
338
+ "\u{3399}" => ['f', 'm'],
339
+ "\u{339a}" => ['n', 'm'],
340
+ "\u{339b}" => ["\u{b5}", 'm'],
341
+ "\u{339c}" => ['m', 'm'],
342
+ "\u{339d}" => ['c', 'm'],
343
+ "\u{339e}" => ['k', 'm'],
344
+ "\u{339f}" => ['m', 'm', '2'],
345
+ "\u{33a0}" => ['c', 'm', '2'],
346
+ "\u{33a1}" => ['m', '2'],
347
+ "\u{33a2}" => ['k', 'm', '2'],
348
+ "\u{33a3}" => ['m', 'm', '3'],
349
+ "\u{33a4}" => ['c', 'm', '3'],
350
+ "\u{33a5}" => ['m', '3'],
351
+ "\u{33a6}" => ['k', 'm', '3'],
352
+ "\u{33a7}" => ['m', '/', 's'],
353
+ "\u{33a8}" => ['m', '/', 's', '2'],
354
+ "\u{33a9}" => ['P', 'a'],
355
+ "\u{33aa}" => ['k', 'P', 'a'],
356
+ "\u{33ab}" => ['M', 'P', 'a'],
357
+ "\u{33ac}" => ['G', 'P', 'a'],
358
+ "\u{33ad}" => ['r', 'a', 'd'],
359
+ "\u{33ae}" => ['r', 'a', 'd', '/', 's'],
360
+ "\u{33af}" => ['r', 'a', 'd', '/', 's', '2'],
361
+ "\u{33b0}" => ['p', 's'],
362
+ "\u{33b1}" => ['n', 's'],
363
+ "\u{33b2}" => ["\u{b5}", 's'],
364
+ "\u{33b3}" => ['m', 's'],
365
+ "\u{33b4}" => ['p', 'V'],
366
+ "\u{33b5}" => ['n', 'V'],
367
+ "\u{33b6}" => ["\u{b5}", 'V'],
368
+ "\u{33b7}" => ['m', 'V'],
369
+ "\u{33b8}" => ['k', 'V'],
370
+ "\u{33b9}" => ['M', 'V'],
371
+ "\u{33ba}" => ['p', 'W'],
372
+ "\u{33bb}" => ['n', 'W'],
373
+ "\u{33bc}" => ["\u{b5}", 'W'],
374
+ "\u{33bd}" => ['m', 'W'],
375
+ "\u{33be}" => ['k', 'W'],
376
+ "\u{33bf}" => ['M', 'W'],
377
+ "\u{33c0}" => ['k', "\u{3a9}"],
378
+ "\u{33c1}" => ['M', "\u{3a9}"],
379
+ "\u{33c2}" => ['a', '.', 'm', '.'],
380
+ "\u{33c3}" => ['B', 'q'],
381
+ "\u{33c4}" => ['c', 'c'],
382
+ "\u{33c5}" => ['c', 'd'],
383
+ "\u{33c6}" => ['C', '/', 'k', 'g'],
384
+ "\u{33c7}" => ['C', 'o', '.'],
385
+ "\u{33c8}" => ['d', 'B'],
386
+ "\u{33c9}" => ['G', 'y'],
387
+ "\u{33ca}" => ['h', 'a'],
388
+ "\u{33cb}" => ['H', 'P'],
389
+ "\u{33cc}" => ['i', 'n'],
390
+ "\u{33cd}" => ['K', '.', 'K', '.'],
391
+ "\u{33ce}" => ['K', 'M'],
392
+ "\u{33cf}" => ['k', 't'],
393
+ "\u{33d0}" => ['l', 'm'],
394
+ "\u{33d1}" => ['l', 'n'],
395
+ "\u{33d2}" => ['l', 'o', 'g'],
396
+ "\u{33d3}" => ['l', 'x'],
397
+ "\u{33d4}" => ['m', 'b'],
398
+ "\u{33d5}" => ['m', 'i', 'l'],
399
+ "\u{33d6}" => ['m', 'o', 'l'],
400
+ "\u{33d7}" => ['p', 'H'],
401
+ "\u{33d8}" => ['p', '.', 'm', '.'],
402
+ "\u{33d9}" => ['p', 'p', 'm'],
403
+ "\u{33da}" => ['P', 'R'],
404
+ "\u{33db}" => ['s', 'r'],
405
+ "\u{33dc}" => ['S', 'v'],
406
+ "\u{33dd}" => ['W', 'b'],
407
+ "\u{33de}" => ['V', '/', 'm'],
408
+ "\u{33df}" => ['A', '/', 'm'],
409
+ "\u{33e0}" => ['1', "\u{65e5}"],
410
+ "\u{33e1}" => ['2', "\u{65e5}"],
411
+ "\u{33e2}" => ['3', "\u{65e5}"],
412
+ "\u{33e3}" => ['4', "\u{65e5}"],
413
+ "\u{33e4}" => ['5', "\u{65e5}"],
414
+ "\u{33e5}" => ['6', "\u{65e5}"],
415
+ "\u{33e6}" => ['7', "\u{65e5}"],
416
+ "\u{33e7}" => ['8', "\u{65e5}"],
417
+ "\u{33e8}" => ['9', "\u{65e5}"],
418
+ "\u{33e9}" => ['1', '0', "\u{65e5}"],
419
+ "\u{33ea}" => ['1', '1', "\u{65e5}"],
420
+ "\u{33eb}" => ['1', '2', "\u{65e5}"],
421
+ "\u{33ec}" => ['1', '3', "\u{65e5}"],
422
+ "\u{33ed}" => ['1', '4', "\u{65e5}"],
423
+ "\u{33ee}" => ['1', '5', "\u{65e5}"],
424
+ "\u{33ef}" => ['1', '6', "\u{65e5}"],
425
+ "\u{33f0}" => ['1', '7', "\u{65e5}"],
426
+ "\u{33f1}" => ['1', '8', "\u{65e5}"],
427
+ "\u{33f2}" => ['1', '9', "\u{65e5}"],
428
+ "\u{33f3}" => ['2', '0', "\u{65e5}"],
429
+ "\u{33f4}" => ['2', '1', "\u{65e5}"],
430
+ "\u{33f5}" => ['2', '2', "\u{65e5}"],
431
+ "\u{33f6}" => ['2', '3', "\u{65e5}"],
432
+ "\u{33f7}" => ['2', '4', "\u{65e5}"],
433
+ "\u{33f8}" => ['2', '5', "\u{65e5}"],
434
+ "\u{33f9}" => ['2', '6', "\u{65e5}"],
435
+ "\u{33fa}" => ['2', '7', "\u{65e5}"],
436
+ "\u{33fb}" => ['2', '8', "\u{65e5}"],
437
+ "\u{33fc}" => ['2', '9', "\u{65e5}"],
438
+ "\u{33fd}" => ['3', '0', "\u{65e5}"],
439
+ "\u{33fe}" => ['3', '1', "\u{65e5}"],
440
+ "\u{33ff}" => ['g', 'a', 'l'],
441
+ "\u{1f100}" => ['0', '.'],
442
+ "\u{1f101}" => ['0', ','],
443
+ "\u{1f102}" => ['1', ','],
444
+ "\u{1f103}" => ['2', ','],
445
+ "\u{1f104}" => ['3', ','],
446
+ "\u{1f105}" => ['4', ','],
447
+ "\u{1f106}" => ['5', ','],
448
+ "\u{1f107}" => ['6', ','],
449
+ "\u{1f108}" => ['7', ','],
450
+ "\u{1f109}" => ['8', ','],
451
+ "\u{1f10a}" => ['9', ','],
452
+ "\u{1f110}" => ['(', 'A', ')'],
453
+ "\u{1f111}" => ['(', 'B', ')'],
454
+ "\u{1f112}" => ['(', 'C', ')'],
455
+ "\u{1f113}" => ['(', 'D', ')'],
456
+ "\u{1f114}" => ['(', 'E', ')'],
457
+ "\u{1f115}" => ['(', 'F', ')'],
458
+ "\u{1f116}" => ['(', 'G', ')'],
459
+ "\u{1f117}" => ['(', 'H', ')'],
460
+ "\u{1f118}" => ['(', 'I', ')'],
461
+ "\u{1f119}" => ['(', 'J', ')'],
462
+ "\u{1f11a}" => ['(', 'K', ')'],
463
+ "\u{1f11b}" => ['(', 'L', ')'],
464
+ "\u{1f11c}" => ['(', 'M', ')'],
465
+ "\u{1f11d}" => ['(', 'N', ')'],
466
+ "\u{1f11e}" => ['(', 'O', ')'],
467
+ "\u{1f11f}" => ['(', 'P', ')'],
468
+ "\u{1f120}" => ['(', 'Q', ')'],
469
+ "\u{1f121}" => ['(', 'R', ')'],
470
+ "\u{1f122}" => ['(', 'S', ')'],
471
+ "\u{1f123}" => ['(', 'T', ')'],
472
+ "\u{1f124}" => ['(', 'U', ')'],
473
+ "\u{1f125}" => ['(', 'V', ')'],
474
+ "\u{1f126}" => ['(', 'W', ')'],
475
+ "\u{1f127}" => ['(', 'X', ')'],
476
+ "\u{1f128}" => ['(', 'Y', ')'],
477
+ "\u{1f129}" => ['(', 'Z', ')'],
478
+ "\u{1f12a}" => ["\u{3014}", 'S', "\u{3015}"],
479
+ "\u{1f16a}" => ['M', 'C'],
480
+ "\u{1f16b}" => ['M', 'D'],
481
+ "\u{1f16c}" => ['M', 'R'],
482
+ "\u{1f200}" => ["\u{307b}", "\u{304b}"],
483
+ "\u{1f240}" => ["\u{3014}", "\u{672c}", "\u{3015}"],
484
+ "\u{1f241}" => ["\u{3014}", "\u{4e09}", "\u{3015}"],
485
+ "\u{1f242}" => ["\u{3014}", "\u{4e8c}", "\u{3015}"],
486
+ "\u{1f243}" => ["\u{3014}", "\u{5b89}", "\u{3015}"],
487
+ "\u{1f244}" => ["\u{3014}", "\u{70b9}", "\u{3015}"],
488
+ "\u{1f245}" => ["\u{3014}", "\u{6253}", "\u{3015}"],
489
+ "\u{1f246}" => ["\u{3014}", "\u{76d7}", "\u{3015}"],
490
+ "\u{1f247}" => ["\u{3014}", "\u{52dd}", "\u{3015}"],
491
+ "\u{1f248}" => ["\u{3014}", "\u{6557}", "\u{3015}"]
492
+ }.freeze
493
+ end
494
+ end
495
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'hira_kata_table'
4
+
5
+ module Yosina
6
+ module Transliterators
7
+ # Module for converting between Hiragana and Katakana scripts
8
+ module HiraKata
9
+ include HiraKataTable
10
+
11
+ # Cache for mapping tables
12
+ @mapping_cache = {}
13
+
14
+ class << self
15
+ attr_accessor :mapping_cache
16
+ end
17
+
18
+ # Transliterator for hiragana/katakana conversion
19
+ class Transliterator < Yosina::BaseTransliterator
20
+ attr_reader :mode
21
+
22
+ # Initialize the transliterator with options
23
+ #
24
+ # @param options [Hash] Configuration options
25
+ # @option options [Symbol] :mode Either :hira_to_kata or :kata_to_hira
26
+ def initialize(options = {})
27
+ super()
28
+ @mode = options[:mode] || :hira_to_kata
29
+ @mapping_table = HiraKata.build_mapping_table(@mode)
30
+ end
31
+
32
+ # Convert between hiragana and katakana
33
+ #
34
+ # @param input_chars [Enumerable<Char>] The characters to transliterate
35
+ # @return [Enumerable<Char>] The transliterated characters
36
+ def call(input_chars)
37
+ Chars.enum do |y|
38
+ input_chars.each do |char|
39
+ if char.sentinel?
40
+ y << char
41
+ break
42
+ end
43
+
44
+ mapped = @mapping_table[char.c]
45
+ y << if mapped
46
+ Char.new(c: mapped, offset: char.offset, source: char)
47
+ else
48
+ char
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+
55
+ # Build the mapping table for the specified mode
56
+ # @param mode [Symbol] :hira_to_kata or :kata_to_hira
57
+ # @return [Hash<String, String>]
58
+ def self.build_mapping_table(mode)
59
+ # Check cache first
60
+ cached = @mapping_cache[mode]
61
+ return cached if cached
62
+
63
+ mapping = {}
64
+
65
+ # Main table mappings
66
+ HIRAGANA_KATAKANA_TABLE.each do |hiragana_entry, katakana_entry, _|
67
+ next unless hiragana_entry
68
+
69
+ hira, hira_voiced, hira_semivoiced = hiragana_entry
70
+ kata, kata_voiced, kata_semivoiced = katakana_entry
71
+
72
+ if mode == :hira_to_kata
73
+ mapping[hira] = kata
74
+ mapping[hira_voiced] = kata_voiced if hira_voiced && kata_voiced
75
+ mapping[hira_semivoiced] = kata_semivoiced if hira_semivoiced && kata_semivoiced
76
+ else
77
+ mapping[kata] = hira
78
+ mapping[kata_voiced] = hira_voiced if kata_voiced && hira_voiced
79
+ mapping[kata_semivoiced] = hira_semivoiced if kata_semivoiced && hira_semivoiced
80
+ end
81
+ end
82
+
83
+ # Small character mappings
84
+ HIRAGANA_KATAKANA_SMALL_TABLE.each do |hira, kata, _|
85
+ if mode == :hira_to_kata
86
+ mapping[hira] = kata
87
+ else
88
+ mapping[kata] = hira
89
+ end
90
+ end
91
+
92
+ # Cache the result
93
+ @mapping_cache[mode] = mapping
94
+ mapping
95
+ end
96
+
97
+ # Factory method to create a hiragana/katakana transliterator
98
+ #
99
+ # @param options [Hash] Configuration options
100
+ # @return [Transliterator] A new hiragana/katakana transliterator instance
101
+ def self.call(options = {})
102
+ Transliterator.new(options)
103
+ end
104
+ end
105
+ end
106
+ end