sekka 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +22 -0
- data/bin/.gitignore +1 -0
- data/bin/sekka-jisyo +98 -0
- data/bin/sekka-server +83 -0
- data/emacs/http-cookies.el +416 -0
- data/emacs/http-get.el +448 -0
- data/emacs/sekka.el +1069 -0
- data/lib/sekka/alphabet-lib.nnd +59 -0
- data/lib/sekka/approximatesearch.rb +72 -0
- data/lib/sekka/convert-jisyo.nnd +129 -0
- data/lib/sekka/henkan.nnd +464 -0
- data/lib/sekka/jisyo-db.nnd +184 -0
- data/lib/sekka/kvs.rb +135 -0
- data/lib/sekka/roman-lib.nnd +660 -0
- data/lib/sekka/sekkaversion.rb +6 -0
- data/lib/sekka/util.nnd +64 -0
- data/lib/sekka.ru +36 -0
- data/lib/sekkaconfig.rb +62 -0
- data/lib/sekkaserver.rb +127 -0
- data/test/alphabet-lib.nnd +188 -0
- data/test/approximate-bench.nnd +83 -0
- data/test/common.nnd +51 -0
- data/test/henkan-main.nnd +942 -0
- data/test/jisyo.nnd +94 -0
- data/test/roman-lib.nnd +422 -0
- data/test/util.nnd +100 -0
- metadata +223 -0
@@ -0,0 +1,660 @@
|
|
1
|
+
:; #-*- mode: nendo; syntax: scheme -*-;;
|
2
|
+
;;;
|
3
|
+
;;; roman-lib.nnd - ローマ字と平仮名、片仮名の相互変換ライブラリ
|
4
|
+
;;;
|
5
|
+
;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
;;;
|
7
|
+
;;; Redistribution and use in source and binary forms, with or without
|
8
|
+
;;; modification, are permitted provided that the following conditions
|
9
|
+
;;; are met:
|
10
|
+
;;;
|
11
|
+
;;; 1. Redistributions of source code must retain the above copyright
|
12
|
+
;;; notice, this list of conditions and the following disclaimer.
|
13
|
+
;;;
|
14
|
+
;;; 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
;;; notice, this list of conditions and the following disclaimer in the
|
16
|
+
;;; documentation and/or other materials provided with the distribution.
|
17
|
+
;;;
|
18
|
+
;;; 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
;;; may be used to endorse or promote products derived from this
|
20
|
+
;;; software without specific prior written permission.
|
21
|
+
;;;
|
22
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
;;;
|
34
|
+
;;; $Id:
|
35
|
+
;;;
|
36
|
+
(use srfi-1)
|
37
|
+
|
38
|
+
;; 子音が1音だけ入ったテーブル
|
39
|
+
;; このテーブルはruby-romkan から変換し、一部不足を追加した。
|
40
|
+
;; クンレイ、日本式、ヘボン式もデータ中に併記し全てを網羅している。
|
41
|
+
(define sekka-kana->roman-alist-short
|
42
|
+
'(
|
43
|
+
;; normal roman ---AZIK---
|
44
|
+
("ぁ" "la")
|
45
|
+
("あ" "a")
|
46
|
+
("ぃ" "li")
|
47
|
+
("い" "i")
|
48
|
+
("ぅ" "lu")
|
49
|
+
("う" "u")
|
50
|
+
("う゛" "vu")
|
51
|
+
("う゛ぁ" "va")
|
52
|
+
("う゛ぃ" "vi")
|
53
|
+
("う゛ぇ" "ve")
|
54
|
+
("う゛ぉ" "vo")
|
55
|
+
("ぇ" "le")
|
56
|
+
("え" "e")
|
57
|
+
("ぉ" "lo")
|
58
|
+
("お" "o")
|
59
|
+
("か" "ka")
|
60
|
+
("が" "ga")
|
61
|
+
("き" "ki")
|
62
|
+
("きゃ" "kya" "kga")
|
63
|
+
("きゅ" "kyu" "kgu")
|
64
|
+
("きぇ" "kye" "kge")
|
65
|
+
("きょ" "kyo" "kgo")
|
66
|
+
("ぎ" "gi")
|
67
|
+
("ぎゃ" "gya")
|
68
|
+
("ぎゅ" "gyu")
|
69
|
+
("ぎぇ" "gye")
|
70
|
+
("ぎょ" "gyo")
|
71
|
+
("く" "ku")
|
72
|
+
("ぐ" "gu")
|
73
|
+
("け" "ke")
|
74
|
+
("げ" "ge")
|
75
|
+
("こ" "ko")
|
76
|
+
("ご" "go")
|
77
|
+
("さ" "sa")
|
78
|
+
("ざ" "za" "zc") ;; AZIKザ行の特別措置
|
79
|
+
("し" "shi" "si")
|
80
|
+
("しゃ" "sha" "sya" "xa")
|
81
|
+
;; hu の例外処理: shu は sfu になってはいけない
|
82
|
+
("しゅ" "shu" "syu" "xu")
|
83
|
+
("しぇ" "she" "sye" "xe")
|
84
|
+
("しょ" "sho" "syo" "xo")
|
85
|
+
("じ" "ji" "zi")
|
86
|
+
("じゃ" "ja" "jya" "zya")
|
87
|
+
("じゅ" "ju" "jyu" "zyu")
|
88
|
+
("じぇ" "je" "jye" "zye")
|
89
|
+
("じょ" "jo" "jyo" "zyo")
|
90
|
+
("す" "su")
|
91
|
+
("ず" "zu")
|
92
|
+
("せ" "se")
|
93
|
+
("ぜ" "ze")
|
94
|
+
("そ" "so")
|
95
|
+
("ぞ" "zo")
|
96
|
+
("た" "ta")
|
97
|
+
("だ" "da")
|
98
|
+
;; normal roman ---AZIK---
|
99
|
+
("ち" "chi" "tyi" "ti")
|
100
|
+
("ちゃ" "cha" "tya" "ca")
|
101
|
+
;; hu の例外処理: chu は cfu になってはいけない
|
102
|
+
("ちゅ" "chu" "tyu" "cu")
|
103
|
+
("ちぇ" "che" "tye" "ce")
|
104
|
+
("ちょ" "cho" "tyo" "co")
|
105
|
+
("ぢ" "di")
|
106
|
+
("ぢゃ" "dya")
|
107
|
+
("ぢゅ" "dyu")
|
108
|
+
("ぢぇ" "dye")
|
109
|
+
("ぢょ" "dyo")
|
110
|
+
("っ" "tt" "@" ";" "ltu")
|
111
|
+
;; normal roman ---AZIK---
|
112
|
+
("つ" "tsu" "tu")
|
113
|
+
("づ" "du")
|
114
|
+
("て" "te")
|
115
|
+
("で" "de")
|
116
|
+
("と" "to")
|
117
|
+
("ど" "do")
|
118
|
+
("な" "na")
|
119
|
+
("に" "ni")
|
120
|
+
("にゃ" "nya")
|
121
|
+
("にゅ" "nyu")
|
122
|
+
("にぇ" "nye")
|
123
|
+
("にょ" "nyo")
|
124
|
+
("ぬ" "nu")
|
125
|
+
("ね" "ne")
|
126
|
+
("の" "no")
|
127
|
+
("は" "ha")
|
128
|
+
("ば" "ba")
|
129
|
+
("ぱ" "pa")
|
130
|
+
("ひ" "hi")
|
131
|
+
("ひゃ" "hya" "hga")
|
132
|
+
("ひゅ" "hyu" "hgu")
|
133
|
+
("ひぇ" "hye" "hge")
|
134
|
+
("ひょ" "hyo" "hgo")
|
135
|
+
("び" "bi")
|
136
|
+
("びゃ" "bya")
|
137
|
+
("びゅ" "byu")
|
138
|
+
("びぇ" "bye")
|
139
|
+
("びょ" "byo")
|
140
|
+
("ぴ" "pi")
|
141
|
+
("ぴゃ" "pya" "pga")
|
142
|
+
("ぴゅ" "pyu" "pgu")
|
143
|
+
("ぴぇ" "pye" "pge")
|
144
|
+
("ぴょ" "pyo" "pgo")
|
145
|
+
("ふ" "fu" "hu")
|
146
|
+
("ふぁ" "fa")
|
147
|
+
("ふぃ" "fi")
|
148
|
+
("ふぇ" "fe")
|
149
|
+
("ふぉ" "fo")
|
150
|
+
("ぶ" "bu")
|
151
|
+
("ぷ" "pu")
|
152
|
+
("へ" "he")
|
153
|
+
("べ" "be")
|
154
|
+
("ぺ" "pe")
|
155
|
+
("ほ" "ho")
|
156
|
+
("ぼ" "bo")
|
157
|
+
("ぽ" "po")
|
158
|
+
("ま" "ma")
|
159
|
+
("み" "mi")
|
160
|
+
("みゃ" "mya" "mga")
|
161
|
+
("みゅ" "myu" "mgu")
|
162
|
+
("みぇ" "mye" "mge")
|
163
|
+
("みょ" "myo" "mgo")
|
164
|
+
("む" "mu")
|
165
|
+
("め" "me")
|
166
|
+
("も" "mo")
|
167
|
+
;; normal roman ---AZIK---
|
168
|
+
("ゃ" "lya")
|
169
|
+
("や" "ya")
|
170
|
+
("ゅ" "lyu")
|
171
|
+
("ゆ" "yu")
|
172
|
+
("ょ" "lyo")
|
173
|
+
("よ" "yo")
|
174
|
+
("ら" "ra")
|
175
|
+
("り" "ri")
|
176
|
+
("りゃ" "rya")
|
177
|
+
("りゅ" "ryu")
|
178
|
+
("りょ" "ryo")
|
179
|
+
("る" "ru")
|
180
|
+
("れ" "re")
|
181
|
+
("ろ" "ro")
|
182
|
+
("ゎ" "lwa")
|
183
|
+
("わ" "wa")
|
184
|
+
("うぃ" "wi")
|
185
|
+
("うぇ" "we")
|
186
|
+
("を" "wo")
|
187
|
+
("うぉ" "wso")
|
188
|
+
;; normal roman ---AZIK---
|
189
|
+
("ん" "nn" "n" "q")
|
190
|
+
("でぃ" "dyi" "dhi" "dci")
|
191
|
+
("でぅ" "dyu" "dhu" "dcu")
|
192
|
+
("ー" "-" "^" ":")
|
193
|
+
|
194
|
+
;; Ruby romkanからの不足分追加
|
195
|
+
("てぃ" "thi" "tgi")
|
196
|
+
("てぅ" "thu" "tgu")
|
197
|
+
;; Sekkaの辞書に入っている特別なキーワード ">あん" など
|
198
|
+
(">" ">")
|
199
|
+
|
200
|
+
;; 撥音から始まるキーワード
|
201
|
+
("っう゛" "vvu" "@vu" ";vu" )
|
202
|
+
("っう゛ぁ" "vva" "@va" ";va" )
|
203
|
+
("っう゛ぃ" "vvi" "@vi" ";vi" )
|
204
|
+
("っう゛ぇ" "vve" "@ve" ";ve" )
|
205
|
+
("っう゛ぉ" "vvo" "@vo" ";vo" )
|
206
|
+
("っか" "kka" "@ka" ";ka" )
|
207
|
+
("っが" "gga" "@ga" ";ga" )
|
208
|
+
("っき" "kki" "@ki" ";ki" )
|
209
|
+
("っきゃ" "kkya" "@kya" ";kya")
|
210
|
+
("っきゅ" "kkyu" "@kyu" ";kyu")
|
211
|
+
("っきぇ" "kkye" "@kye" ";kye")
|
212
|
+
("っきょ" "kkyo" "@kyo" ";kyo")
|
213
|
+
("っぎ" "ggi" "@gi" ";gi" )
|
214
|
+
("っぎゃ" "ggya" "@gya" ";gya")
|
215
|
+
("っぎゅ" "ggyu" "@gyu" ";gyu")
|
216
|
+
("っぎぇ" "ggye" "@gye" ";gye")
|
217
|
+
("っぎょ" "ggyo" "@gyo" ";gyo")
|
218
|
+
("っく" "kku" "@ku" ";ku" )
|
219
|
+
("っぐ" "ggu" "@gu" ";gu" )
|
220
|
+
("っけ" "kke" "@ke" ";ke" )
|
221
|
+
("っげ" "gge" "@ge" ";ge" )
|
222
|
+
("っこ" "kko" "@ko" ";ko" )
|
223
|
+
("っご" "ggo" "@go" ";go" )
|
224
|
+
("っさ" "ssa" "@sa" ";sa" )
|
225
|
+
("っざ" "zza" "@za" ";za" )
|
226
|
+
("っし" "sshi" "sshi" "@shi" "@shi" ";shi" ";shi")
|
227
|
+
("っしゃ" "ssha" "ssya" "@sha" "@sya" ";sha" ";sya")
|
228
|
+
("っしゅ" "sshu" "ssyu" "@shu" "@syu" ";shu" ";syu")
|
229
|
+
("っしぇ" "sshe" "ssye" "@she" "@sye" ";she" ";sye")
|
230
|
+
("っしょ" "ssho" "ssyo" "@sho" "@syo" ";sho" ";syo")
|
231
|
+
("っじ" "jji" "@ji" ";ji" )
|
232
|
+
("っじゃ" "jja" "@ja" ";ja" )
|
233
|
+
("っじゅ" "jju" "@ju" ";ju" )
|
234
|
+
("っじぇ" "jje" "@je" ";je" )
|
235
|
+
("っじょ" "jjo" "@jo" ";jo" )
|
236
|
+
("っす" "ssu" "@su" ";su" )
|
237
|
+
("っず" "zzu" "@zu" ";zu" )
|
238
|
+
("っせ" "sse" "@se" ";se" )
|
239
|
+
("っぜ" "zze" "@ze" ";ze" )
|
240
|
+
("っそ" "sso" "@so" ";so" )
|
241
|
+
("っぞ" "zzo" "@zo" ";zo" )
|
242
|
+
("った" "tta" "@ta" ";ta" )
|
243
|
+
("っだ" "dda" "@da" ";da" )
|
244
|
+
("っち" "cchi" "ttyi" "cci" "@chi" "@tyi" "@ci" ";chi" ";tyi" ";ci")
|
245
|
+
("っちゃ" "ccha" "ttya" "cca" "@cha" "@tya" "@ca" ";cha" ";tya" ";ca")
|
246
|
+
("っちゅ" "cchu" "ttyu" "ccu" "@chu" "@tyu" "@cu" ";chu" ";tyu" ";cu")
|
247
|
+
("っちぇ" "cche" "ttye" "cce" "@che" "@tye" "@ce" ";che" ";tye" ";ce")
|
248
|
+
("っちょ" "ccho" "ttyo" "cco" "@cho" "@tyo" "@co" ";cho" ";tyo" ";co")
|
249
|
+
("っぢ" "ddi" "@di" ";di" )
|
250
|
+
("っぢゃ" "ddya" "@dya" ";dya")
|
251
|
+
("っぢゅ" "ddyu" "@dyu" ";dyu")
|
252
|
+
("っぢぇ" "ddye" "@dye" ";dye")
|
253
|
+
("っぢょ" "ddyo" "@dyo" ";dyo")
|
254
|
+
("っつ" "ttsu" "@tsu" ";tsu")
|
255
|
+
("っづ" "ddu" "@du" ";du" )
|
256
|
+
("って" "tte" "@te" ";te" )
|
257
|
+
("っで" "dde" "@de" ";de" )
|
258
|
+
("っと" "tto" "@to" ";to" )
|
259
|
+
("っど" "ddo" "@do" ";do" )
|
260
|
+
("っは" "hha" "@ha" ";ha" )
|
261
|
+
("っば" "bba" "@ba" ";ba" )
|
262
|
+
("っぱ" "ppa" "@pa" ";pa" )
|
263
|
+
("っひ" "hhi" "@hi" ";hi" )
|
264
|
+
("っひゃ" "hhya" "@hya" ";hya")
|
265
|
+
("っひゅ" "hhyu" "@hyu" ";hyu")
|
266
|
+
("っひぇ" "hhye" "@hye" ";hye")
|
267
|
+
("っひょ" "hhyo" "@hyo" ";hyo")
|
268
|
+
("っび" "bbi" "@bi" ";bi" )
|
269
|
+
("っびゃ" "bbya" "@bya" ";bya")
|
270
|
+
("っびゅ" "bbyu" "@byu" ";byu")
|
271
|
+
("っびぇ" "bbye" "@bye" ";bye")
|
272
|
+
("っびょ" "bbyo" "@byo" ";byo")
|
273
|
+
("っぴ" "ppi" "@pi" ";pi" )
|
274
|
+
("っぴゃ" "ppya" "@pya" ";pya")
|
275
|
+
("っぴゅ" "ppyu" "@pyu" ";pyu")
|
276
|
+
("っぴぇ" "ppye" "@pye" ";pye")
|
277
|
+
("っぴょ" "ppyo" "@pyo" ";pyo")
|
278
|
+
("っふ" "ffu" "hhu" "@fu" "@hu" ";fu" ";hu")
|
279
|
+
("っふぁ" "ffa" "@fa" ";fa" )
|
280
|
+
("っふぃ" "ffi" "@fi" ";fi" )
|
281
|
+
("っふぇ" "ffe" "@fe" ";fe" )
|
282
|
+
("っふぉ" "ffo" "@fo" ";fo" )
|
283
|
+
("っぶ" "bbu" "@bu" ";bu" )
|
284
|
+
("っぷ" "ppu" "@pu" ";pu" )
|
285
|
+
("っへ" "hhe" "@he" ";he" )
|
286
|
+
("っべ" "bbe" "@be" ";be" )
|
287
|
+
("っぺ" "ppe" "@pe" ";pe" )
|
288
|
+
("っほ" "hho" "@ho" ";ho" )
|
289
|
+
("っぼ" "bbo" "@bo" ";bo" )
|
290
|
+
("っぽ" "ppo" "@po" ";po" )
|
291
|
+
("っや" "yya" "@ya" ";ya" )
|
292
|
+
("っゆ" "yyu" "@yu" ";yu" )
|
293
|
+
("っよ" "yyo" "@yo" ";yo" )
|
294
|
+
("っら" "rra" "@ra" ";ra" )
|
295
|
+
("っり" "rri" "@ri" ";ri" )
|
296
|
+
("っりゃ" "rrya" "@rya" ";rya")
|
297
|
+
("っりゅ" "rryu" "@ryu" ";ryu")
|
298
|
+
("っりぇ" "rrye" "@rye" ";rye")
|
299
|
+
("っりょ" "rryo" "@ryo" ";ryo")
|
300
|
+
("っる" "rru" "@ru" ";ru" )
|
301
|
+
("っれ" "rre" "@re" ";re" )
|
302
|
+
("っろ" "rro" "@ro" ";ro" )
|
303
|
+
))
|
304
|
+
|
305
|
+
;; 子音が2音入ったテーブル
|
306
|
+
(define sekka-kana->roman-alist-long
|
307
|
+
`(
|
308
|
+
;; "n" 一つで "nn" を表現する件と被るのでAZIK専用拡張とする(エントリ上書き)
|
309
|
+
("にゃ" "nya" "nga")
|
310
|
+
("にゅ" "nyu" "ngu")
|
311
|
+
("にぇ" "nye" "nge")
|
312
|
+
("にょ" "nyo" "ngo")
|
313
|
+
|
314
|
+
;; ---以下AZIK---
|
315
|
+
;; ------ AZIK 撥音拡張
|
316
|
+
("かん" "kz" "kn")
|
317
|
+
("きん" "kk")
|
318
|
+
("くん" "kj")
|
319
|
+
("けん" "kd")
|
320
|
+
("こん" "kl")
|
321
|
+
("さん" "sz" "sn")
|
322
|
+
("しん" "sk")
|
323
|
+
("すん" "sj")
|
324
|
+
("せん" "sd")
|
325
|
+
("そん" "sl")
|
326
|
+
("たん" "tz" "tn")
|
327
|
+
("ちん" "tk")
|
328
|
+
("つん" "tj")
|
329
|
+
("てん" "td")
|
330
|
+
("とん" "tl")
|
331
|
+
("なん" "nz")
|
332
|
+
;;("さん" "nn") "ん"になる
|
333
|
+
("にん" "nk")
|
334
|
+
("ぬん" "nj")
|
335
|
+
("ねん" "nd")
|
336
|
+
("のん" "nl")
|
337
|
+
("はん" "hz" "hn")
|
338
|
+
("ひん" "hk")
|
339
|
+
("ふん" "hj")
|
340
|
+
("へん" "hd")
|
341
|
+
("ほん" "hl")
|
342
|
+
("ふぁん" "fz" "fn")
|
343
|
+
("ふぃん" "fk")
|
344
|
+
("ふん" "fj")
|
345
|
+
("ふぇん" "fd")
|
346
|
+
("ふぉん" "fl")
|
347
|
+
("まん" "mz")
|
348
|
+
;;("まん" "mn") "もの"になる
|
349
|
+
("みん" "mk")
|
350
|
+
("むん" "mj")
|
351
|
+
("めん" "md")
|
352
|
+
("もん" "ml")
|
353
|
+
("やん" "yz" "yn")
|
354
|
+
("ゆん" "yj")
|
355
|
+
("よん" "yl")
|
356
|
+
("らん" "rz")
|
357
|
+
("らん" "rn")
|
358
|
+
("りん" "rk")
|
359
|
+
("るん" "rj")
|
360
|
+
("れん" "rd")
|
361
|
+
("ろん" "rl" "wz")
|
362
|
+
("わん" "wn")
|
363
|
+
("うぃん" "wk")
|
364
|
+
("うぇん" "wd")
|
365
|
+
("うぉん" "wl")
|
366
|
+
|
367
|
+
;; ------ AZIK 二重母音拡張
|
368
|
+
("かい" "kq")
|
369
|
+
("くう" "kh")
|
370
|
+
("けい" "kw")
|
371
|
+
("こう" "kp")
|
372
|
+
("さい" "sq")
|
373
|
+
("すう" "sh")
|
374
|
+
("せい" "sw")
|
375
|
+
("そう" "sp")
|
376
|
+
("たい" "tq")
|
377
|
+
("つう" "th")
|
378
|
+
("てい" "tw")
|
379
|
+
("とう" "tp")
|
380
|
+
("ない" "nq")
|
381
|
+
("ぬう" "nh")
|
382
|
+
("ねい" "nw")
|
383
|
+
("のう" "np")
|
384
|
+
("はい" "hq")
|
385
|
+
("ふう" "hh")
|
386
|
+
("へい" "hw")
|
387
|
+
("ほう" "hp")
|
388
|
+
("ふぁい" "fq")
|
389
|
+
("ふう" "fh")
|
390
|
+
("ふぇい" "fw")
|
391
|
+
("ふぉー" "fp")
|
392
|
+
("まい" "mq")
|
393
|
+
("むう" "mh")
|
394
|
+
("めい" "mw")
|
395
|
+
("もう" "mp")
|
396
|
+
("やい" "yq")
|
397
|
+
("ゆう" "yh")
|
398
|
+
("よう" "yp")
|
399
|
+
("らい" "rq")
|
400
|
+
("るう" "rh")
|
401
|
+
("れい" "rw")
|
402
|
+
("ろう" "rp")
|
403
|
+
("わい" "wq")
|
404
|
+
("うぉー" "wp")
|
405
|
+
|
406
|
+
;; ------ AZIK 濁音、半濁音
|
407
|
+
("がん" "gz" "gn")
|
408
|
+
("ぎん" "gg")
|
409
|
+
("ぐん" "gj")
|
410
|
+
("げん" "gd")
|
411
|
+
("ごん" "gl")
|
412
|
+
("ざん" "zz" "zn")
|
413
|
+
("じん" "zz")
|
414
|
+
("ずん" "zj")
|
415
|
+
("ぜん" "zd")
|
416
|
+
("ぞん" "zl")
|
417
|
+
("だん" "dz" "dn")
|
418
|
+
("ぢん" "dd")
|
419
|
+
("づん" "dj")
|
420
|
+
("でん" "dd")
|
421
|
+
("どん" "dl")
|
422
|
+
("ばん" "bz" "bn")
|
423
|
+
("びん" "bb")
|
424
|
+
("ぶん" "bj")
|
425
|
+
("べん" "bd")
|
426
|
+
("ぼん" "bl")
|
427
|
+
("ぱん" "pz" "pn")
|
428
|
+
("ぴん" "pp")
|
429
|
+
("ぷん" "pj")
|
430
|
+
("ぺん" "pd")
|
431
|
+
("ぽん" "pl")
|
432
|
+
|
433
|
+
;; ------ AZIK 濁音、半濁音二重母音拡張
|
434
|
+
("がい" "kq")
|
435
|
+
("ぐう" "kh")
|
436
|
+
("げい" "kw")
|
437
|
+
("ごう" "kp")
|
438
|
+
("ざい" "zq" "zv")
|
439
|
+
("ずう" "zh")
|
440
|
+
("ぜい" "zw" "zx")
|
441
|
+
("ぞう" "zp")
|
442
|
+
("だい" "dq")
|
443
|
+
("づう" "dh")
|
444
|
+
("でい" "dw")
|
445
|
+
("どう" "dp")
|
446
|
+
("ばい" "bq")
|
447
|
+
("ぶう" "bh")
|
448
|
+
("べい" "bw")
|
449
|
+
("ぼう" "bp")
|
450
|
+
("ぱい" "pq")
|
451
|
+
("ぷう" "ph")
|
452
|
+
("ぺい" "pw")
|
453
|
+
("ぽう" "pp")
|
454
|
+
|
455
|
+
;; ------ AZIK 特殊拡張
|
456
|
+
("こと" "kt")
|
457
|
+
("わた" "wt")
|
458
|
+
("かも" "km")
|
459
|
+
("する" "sr")
|
460
|
+
("られ" "rr")
|
461
|
+
("ねば" "nb")
|
462
|
+
("にち" "nt")
|
463
|
+
|
464
|
+
("した" "st")
|
465
|
+
("もの" "mn")
|
466
|
+
("ため" "tm")
|
467
|
+
("たら" "tr")
|
468
|
+
("ざる" "zr")
|
469
|
+
("びと" "bt")
|
470
|
+
("だち" "dt")
|
471
|
+
|
472
|
+
("たち" "tt")
|
473
|
+
("ます" "ms")
|
474
|
+
("でも" "dm")
|
475
|
+
("なる" "nr")
|
476
|
+
("また" "mt")
|
477
|
+
("がら" "gr")
|
478
|
+
("われ" "wr")
|
479
|
+
|
480
|
+
("ひと" "ht")
|
481
|
+
("です" "ds")
|
482
|
+
("から" "kr")
|
483
|
+
("よる" "yr")
|
484
|
+
("たび" "tb")
|
485
|
+
("ごと" "gt")
|
486
|
+
))
|
487
|
+
|
488
|
+
|
489
|
+
;; ハッシュテーブル 平仮名 =>ローマ字
|
490
|
+
(define sekka-kana->roman-hash-short
|
491
|
+
(alist->hash-table sekka-kana->roman-alist-short))
|
492
|
+
(define sekka-kana->roman-hash-long
|
493
|
+
(alist->hash-table (append
|
494
|
+
sekka-kana->roman-alist-short
|
495
|
+
sekka-kana->roman-alist-long)))
|
496
|
+
|
497
|
+
;; ハッシュテーブル ローマ字 =>平仮名
|
498
|
+
(define (sekka-alist-swap alist)
|
499
|
+
(append-map
|
500
|
+
(lambda (x)
|
501
|
+
(let ((hira (car x))
|
502
|
+
(romans (cdr x)))
|
503
|
+
(map (lambda (r) (list r hira)) romans)))
|
504
|
+
alist))
|
505
|
+
(define sekka-roman->kana-hash-short
|
506
|
+
(alist->hash-table (sekka-alist-swap sekka-kana->roman-alist-short)))
|
507
|
+
(define sekka-roman->kana-hash-long
|
508
|
+
(alist->hash-table (sekka-alist-swap
|
509
|
+
(append
|
510
|
+
sekka-kana->roman-alist-short
|
511
|
+
sekka-kana->roman-alist-long))))
|
512
|
+
|
513
|
+
;; 平仮名->カタカナ 変換
|
514
|
+
(define (gen-hiragana->katakana str)
|
515
|
+
(str.tr "あ-んぁぃぅぇぉゃゅょっー" "ア-ンァィゥェォャュョッー"))
|
516
|
+
|
517
|
+
;; カタカナ->平仮名 変換
|
518
|
+
(define (gen-katakana->hiragana str)
|
519
|
+
(str.tr "ア-ンァィゥェォャュョッー" "あ-んぁぃぅぇぉゃゅょっー"))
|
520
|
+
|
521
|
+
;; カタカナの文字列かどうかを評価する
|
522
|
+
(define (is-katakana str)
|
523
|
+
(if (rxmatch #/^[ア-ンァィゥェォャュョッー]+$/ str) #t #f))
|
524
|
+
|
525
|
+
;; 平仮名の文字列かどうかを評価する
|
526
|
+
(define (is-hiragana str)
|
527
|
+
(if (rxmatch #/^[あ-んぁぃぅぇぉゃゅょっー]+$/ str) #t #f))
|
528
|
+
|
529
|
+
;; 送り仮名付き平仮名文字列(例:"おこなu") かどうかを評価する
|
530
|
+
(define (is-hiragana-and-okuri str)
|
531
|
+
(if (rxmatch #/^[あ-んぁぃぅぇぉゃゅょっー]+[a-z]$/ str) #t #f))
|
532
|
+
|
533
|
+
;; 送り仮名付き漢字文字列(例:"行う") の送り仮名部分を削除する
|
534
|
+
(define (drop-okuri str)
|
535
|
+
(if-let1 m (rxmatch #/^([^あ-んぁぃぅぇぉゃゅょっー]+)(.+)$/ str)
|
536
|
+
(rxmatch-substring m 1)
|
537
|
+
str))
|
538
|
+
|
539
|
+
;; 小文字を大文字にして返す。 "@"と";"も扱う。
|
540
|
+
(define (sekka-upcase str)
|
541
|
+
(. (str.tr "@;" "`+") upcase))
|
542
|
+
|
543
|
+
;; 大文字を小文字にして返す。 "@"と";"も扱う。
|
544
|
+
(define (sekka-downcase str)
|
545
|
+
(. (str.tr "`+" "@;") downcase))
|
546
|
+
|
547
|
+
(define (gen-hiragana->roman-pattens-with-hash h hiragana)
|
548
|
+
(let1 lst '()
|
549
|
+
(let loop ((str hiragana))
|
550
|
+
(let ((str1 (str.slice 0 1))
|
551
|
+
(str2 (str.slice 0 2))
|
552
|
+
(str3 (str.slice 0 3)))
|
553
|
+
(cond
|
554
|
+
((eq? 0 (str.size))
|
555
|
+
#f)
|
556
|
+
((hash-table-exist? h str3)
|
557
|
+
(set! lst (cons (hash-table-get h str3) lst))
|
558
|
+
(loop (str.slice (str3.size) (str.size))))
|
559
|
+
((hash-table-exist? h str2)
|
560
|
+
(set! lst (cons (hash-table-get h str2) lst))
|
561
|
+
(loop (str.slice (str2.size) (str.size))))
|
562
|
+
((hash-table-exist? h str1)
|
563
|
+
(set! lst (cons (hash-table-get h str1) lst))
|
564
|
+
(loop (str.slice (str1.size) (str.size)))))))
|
565
|
+
(reverse lst)))
|
566
|
+
|
567
|
+
|
568
|
+
(define (gen-hiragana->roman-pattens hiragana)
|
569
|
+
(delete-duplicates
|
570
|
+
(list
|
571
|
+
(gen-hiragana->roman-pattens-with-hash sekka-kana->roman-hash-short hiragana)
|
572
|
+
(gen-hiragana->roman-pattens-with-hash sekka-kana->roman-hash-long hiragana))))
|
573
|
+
|
574
|
+
|
575
|
+
;; if failed, return #f
|
576
|
+
(define (gen-roman->hiragana-with-hash h roman-str)
|
577
|
+
(let ((lst '())
|
578
|
+
(err #f))
|
579
|
+
(let loop ((str roman-str))
|
580
|
+
(let ((str1 (str.slice 0 1))
|
581
|
+
(str2 (str.slice 0 2))
|
582
|
+
(str3 (str.slice 0 3))
|
583
|
+
(str4 (str.slice 0 4)))
|
584
|
+
(cond
|
585
|
+
((eq? 0 (str.size))
|
586
|
+
#f)
|
587
|
+
((hash-table-exist? h str4)
|
588
|
+
(set! lst (cons (hash-table-get h str4) lst))
|
589
|
+
(loop (str.slice (str4.size) (str.size))))
|
590
|
+
((hash-table-exist? h str3)
|
591
|
+
(set! lst (cons (hash-table-get h str3) lst))
|
592
|
+
(loop (str.slice (str3.size) (str.size))))
|
593
|
+
((hash-table-exist? h str2)
|
594
|
+
(set! lst (cons (hash-table-get h str2) lst))
|
595
|
+
(loop (str.slice (str2.size) (str.size))))
|
596
|
+
((hash-table-exist? h str1)
|
597
|
+
(set! lst (cons (hash-table-get h str1) lst))
|
598
|
+
(loop (str.slice (str1.size) (str.size))))
|
599
|
+
(else
|
600
|
+
(set! err #t)))))
|
601
|
+
(if err
|
602
|
+
#f
|
603
|
+
(string-join
|
604
|
+
(map
|
605
|
+
(lambda (x) (car x))
|
606
|
+
(reverse lst))))))
|
607
|
+
|
608
|
+
|
609
|
+
;; if failed, return '()
|
610
|
+
;; roman-methodには :normal か :azik を指定します。
|
611
|
+
;; それにより、通常のローマ字かAZIK(拡張ローマ字)のどちらを優先するかを指定できます。
|
612
|
+
(define (gen-roman->hiragana roman-str roman-method)
|
613
|
+
(let ((s (gen-roman->hiragana-with-hash sekka-roman->kana-hash-short roman-str))
|
614
|
+
(l (gen-roman->hiragana-with-hash sekka-roman->kana-hash-long roman-str)))
|
615
|
+
(delete-duplicates
|
616
|
+
(filter
|
617
|
+
(lambda (x) x)
|
618
|
+
(case roman-method
|
619
|
+
((:azik)
|
620
|
+
(list l s))
|
621
|
+
((:normal)
|
622
|
+
(list s l))
|
623
|
+
(else
|
624
|
+
(error "Error: gen-roman->hiragana got illegal roman-method.")))))))
|
625
|
+
|
626
|
+
|
627
|
+
;; if failed, return '()
|
628
|
+
(define (gen-roman->katakana roman-str roman-method)
|
629
|
+
(filter-map
|
630
|
+
(lambda (x)
|
631
|
+
(gen-hiragana->katakana x))
|
632
|
+
(gen-roman->hiragana roman-str roman-method)))
|
633
|
+
|
634
|
+
;; This function port from Gauche-0.9's util.combinations.
|
635
|
+
(define (cartesian-product lol)
|
636
|
+
(if (null? lol)
|
637
|
+
(list '())
|
638
|
+
(let ((l (car lol))
|
639
|
+
(rest (cartesian-product (cdr lol))))
|
640
|
+
(append-map
|
641
|
+
(lambda (x)
|
642
|
+
(map (lambda (sub-prod) (cons x sub-prod)) rest))
|
643
|
+
l))))
|
644
|
+
|
645
|
+
|
646
|
+
(define (patterns->roman-list patterns)
|
647
|
+
(uniq
|
648
|
+
(sort
|
649
|
+
(append-map
|
650
|
+
(lambda (_pattern)
|
651
|
+
(map
|
652
|
+
(lambda (x)
|
653
|
+
(string-join x))
|
654
|
+
(cartesian-product _pattern)))
|
655
|
+
patterns))))
|
656
|
+
|
657
|
+
|
658
|
+
(define (gen-hiragana->roman-list hiragana)
|
659
|
+
(patterns->roman-list
|
660
|
+
(gen-hiragana->roman-pattens hiragana)))
|