sekka 0.8.8 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/sekka-jisyo +26 -9
- data/emacs/sekka.el +39 -4
- data/lib/sekka/approximatesearch.rb +12 -8
- data/lib/sekka/convert-jisyo.nnd +35 -12
- data/lib/sekka/henkan.nnd +87 -52
- data/lib/sekka/jisyo-db.nnd +63 -42
- data/lib/sekka/kvs.rb +61 -9
- data/lib/sekka/sekkaversion.rb +1 -1
- data/test/approximate-bench.nnd +6 -4
- data/test/henkan-bench.nnd +85 -0
- data/test/henkan-main.nnd +214 -48
- data/test/jisyo.nnd +7 -1
- metadata +127 -28
data/test/henkan-main.nnd
CHANGED
@@ -51,6 +51,10 @@
|
|
51
51
|
(cond
|
52
52
|
((eq? dbtype 'tokyocabinet)
|
53
53
|
(set! target "./test.tch"))
|
54
|
+
((eq? dbtype 'dbm)
|
55
|
+
(set! target "./test"))
|
56
|
+
((eq? dbtype 'pure)
|
57
|
+
(set! target "./test.dump"))
|
54
58
|
((eq? dbtype 'memcache)
|
55
59
|
(error "memcached interface is obsolute."))
|
56
60
|
(else
|
@@ -87,16 +91,18 @@
|
|
87
91
|
(let ((_input (f.read))
|
88
92
|
(_output (StringIO.new)))
|
89
93
|
(_output.set_encoding "utf-8")
|
90
|
-
(when (eq? dbtype 'tokyocabinet)
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
94
|
+
(when (or (eq? dbtype 'tokyocabinet)
|
95
|
+
(eq? dbtype 'dbm)
|
96
|
+
(eq? dbtype 'pure))
|
97
|
+
(test-section "dump db")
|
98
|
+
(test* "dump DB to SEKKA jisyo file. (1)"
|
99
|
+
_input
|
100
|
+
(begin
|
101
|
+
(dump-sekka-jisyo-f _output target)
|
102
|
+
(with-open "./test/sekka-dump-out-1.tmp"
|
103
|
+
(lambda (f) (f.write (_output.string)))
|
104
|
+
"w")
|
105
|
+
(_output.string)))))))
|
100
106
|
|
101
107
|
|
102
108
|
(test-section "Open the test-suite DB")
|
@@ -106,6 +112,11 @@
|
|
106
112
|
((eq? dbtype 'tokyocabinet)
|
107
113
|
(kvs.open target)
|
108
114
|
(require "tokyocabinet"))
|
115
|
+
((eq? dbtype 'dbm)
|
116
|
+
(kvs.open target)
|
117
|
+
(require "dbm"))
|
118
|
+
((eq? dbtype 'pure)
|
119
|
+
(kvs.open target))
|
109
120
|
((eq? dbtype 'memcache)
|
110
121
|
(error "memcached interface is obsolute."))
|
111
122
|
(else
|
@@ -140,6 +151,19 @@
|
|
140
151
|
(#f ,userid "") ;; 空行
|
141
152
|
(#f ,userid "aaaa") ;; 明らかなフォーマットエラー
|
142
153
|
(#f ,userid "単語 /たんご/") ;; フォーマットエラー
|
154
|
+
(#t ,userid "する //") ;; 平仮名フレーズ
|
155
|
+
(#t ,userid "したい //") ;; 平仮名フレーズ
|
156
|
+
(#t ,userid "なります //") ;; 平仮名フレーズ
|
157
|
+
(#t ,userid "なりました //") ;; 平仮名フレーズ
|
158
|
+
(#t ,userid "なりません //") ;; 平仮名フレーズ
|
159
|
+
(#t ,userid "はい //") ;; 平仮名フレーズ
|
160
|
+
(#t ,userid "あい //") ;; 平仮名フレーズ
|
161
|
+
(#t ,userid "あいう //") ;; 平仮名フレーズ
|
162
|
+
(#t ,userid "あいうえ //") ;; 平仮名フレーズ
|
163
|
+
(#t ,userid "あいうえお //") ;; 平仮名フレーズ
|
164
|
+
(#t ,userid "あいうえおか //") ;; 平仮名フレーズ
|
165
|
+
(#t ,userid "あいうえおかき //") ;; 平仮名フレーズ
|
166
|
+
(#t ,userid "なっている //") ;; 平仮名フレーズ
|
143
167
|
))
|
144
168
|
|
145
169
|
|
@@ -189,6 +213,24 @@
|
|
189
213
|
"yu'za'jisho yu'za'jisyo yu'za'jixo yu'za'zisho yu'za'zisyo yu'za'zixo yu'za-jisho yu'za-jisyo yu'za-jixo yu'za-zisho yu'za-zisyo yu'za-zixo yu'za:jisho yu'za:jisyo yu'za:jixo yu'za:zisho yu'za:zisyo yu'za:zixo yu'za^jisho yu'za^jisyo yu'za^jixo yu'za^zisho yu'za^zisyo yu'za^zixo yu'zc'jisho yu'zc'jisyo yu'zc'jixo yu'zc'zisho yu'zc'zisyo yu'zc'zixo yu'zc-jisho yu'zc-jisyo yu'zc-jixo yu'zc-zisho yu'zc-zisyo yu'zc-zixo yu'zc:jisho yu'zc:jisyo yu'zc:jixo yu'zc:zisho yu'zc:zisyo yu'zc:zixo yu'zc^jisho yu'zc^jisyo yu'zc^jixo yu'zc^zisho yu'zc^zisyo yu'zc^zixo yu-za'jisho yu-za'jisyo yu-za'jixo yu-za'zisho yu-za'zisyo yu-za'zixo yu-za-jisho yu-za-jisyo yu-za-jixo yu-za-zisho yu-za-zisyo yu-za-zixo yu-za:jisho yu-za:jisyo yu-za:jixo yu-za:zisho yu-za:zisyo yu-za:zixo yu-za^jisho yu-za^jisyo yu-za^jixo yu-za^zisho yu-za^zisyo yu-za^zixo yu-zc'jisho yu-zc'jisyo yu-zc'jixo yu-zc'zisho yu-zc'zisyo yu-zc'zixo yu-zc-jisho yu-zc-jisyo yu-zc-jixo yu-zc-zisho yu-zc-zisyo yu-zc-zixo yu-zc:jisho yu-zc:jisyo yu-zc:jixo yu-zc:zisho yu-zc:zisyo yu-zc:zixo yu-zc^jisho yu-zc^jisyo yu-zc^jixo yu-zc^zisho yu-zc^zisyo yu-zc^zixo yu:za'jisho yu:za'jisyo yu:za'jixo yu:za'zisho yu:za'zisyo yu:za'zixo yu:za-jisho yu:za-jisyo yu:za-jixo yu:za-zisho yu:za-zisyo yu:za-zixo yu:za:jisho yu:za:jisyo yu:za:jixo yu:za:zisho yu:za:zisyo yu:za:zixo yu:za^jisho yu:za^jisyo yu:za^jixo yu:za^zisho yu:za^zisyo yu:za^zixo yu:zc'jisho yu:zc'jisyo yu:zc'jixo yu:zc'zisho yu:zc'zisyo yu:zc'zixo yu:zc-jisho yu:zc-jisyo yu:zc-jixo yu:zc-zisho yu:zc-zisyo yu:zc-zixo yu:zc:jisho yu:zc:jisyo yu:zc:jixo yu:zc:zisho yu:zc:zisyo yu:zc:zixo yu:zc^jisho yu:zc^jisyo yu:zc^jixo yu:zc^zisho yu:zc^zisyo yu:zc^zixo yu^za'jisho yu^za'jisyo yu^za'jixo yu^za'zisho yu^za'zisyo yu^za'zixo yu^za-jisho yu^za-jisyo yu^za-jixo yu^za-zisho yu^za-zisyo yu^za-zixo yu^za:jisho yu^za:jisyo yu^za:jixo yu^za:zisho yu^za:zisyo yu^za:zixo yu^za^jisho yu^za^jisyo yu^za^jixo yu^za^zisho yu^za^zisyo yu^za^zixo yu^zc'jisho yu^zc'jisyo yu^zc'jixo yu^zc'zisho yu^zc'zisyo yu^zc'zixo yu^zc-jisho yu^zc-jisyo yu^zc-jixo yu^zc-zisho yu^zc-zisyo yu^zc-zixo yu^zc:jisho yu^zc:jisyo yu^zc:jixo yu^zc:zisho yu^zc:zisyo yu^zc:zixo yu^zc^jisho yu^zc^jisyo yu^zc^jixo yu^zc^zisho yu^zc^zisyo yu^zc^zixo"
|
190
214
|
(. (hash-table-get db "dummy::(yu)" #f) force_encoding "UTF-8"))
|
191
215
|
|
216
|
+
(test* "db fetch 12"
|
217
|
+
"する"
|
218
|
+
(. (hash-table-get db "dummy::=sr" #f) force_encoding "UTF-8"))
|
219
|
+
|
220
|
+
(test* "db fetch 13"
|
221
|
+
"=suru"
|
222
|
+
(. (hash-table-get db "dummy::{su}" #f) force_encoding "UTF-8"))
|
223
|
+
|
224
|
+
(test* "db fetch 14"
|
225
|
+
"したい"
|
226
|
+
(. (hash-table-get db "dummy::=shitai" #f) force_encoding "UTF-8"))
|
227
|
+
|
228
|
+
(test* "db fetch 15"
|
229
|
+
"=shita =shitai"
|
230
|
+
(. (hash-table-get db "dummy::{sh}" #f) force_encoding "UTF-8"))
|
231
|
+
|
232
|
+
|
233
|
+
|
192
234
|
|
193
235
|
;;-------------------------------------------------------------------
|
194
236
|
(test-section "Replace user dict entry")
|
@@ -240,103 +282,158 @@
|
|
240
282
|
'((1.0 "henkan") (0.981 "henkann") (0.9722 "henka") (0.9611 "hennka") (0.9444 "henkaq"))
|
241
283
|
(my-round-map
|
242
284
|
(arr->list
|
243
|
-
(a-search.search userid kvs "henkan"
|
285
|
+
(a-search.search userid kvs "henkan" "k"))))
|
244
286
|
|
245
287
|
(test* "search keyword list 2"
|
246
288
|
'((1.0 "hennkan") (0.9875 "hennkann") (0.981 "hennka") (0.9667 "henkann") (0.9619 "hennkaq") (0.9421 "hennkanenzin") (0.9421 "hennkanenjiq") (0.9421 "hennkaneqjin") (0.9421 "hennkaneqjiq") (0.9421 "hennkaneqzin") (0.9421 "hennkaneqziq") (0.9421 "hennkanenjin") (0.9421 "hennkanenziq"))
|
247
289
|
(my-round-map
|
248
290
|
(arr->list
|
249
|
-
(a-search.search userid kvs "hennkan"
|
291
|
+
(a-search.search userid kvs "hennkan" "k"))))
|
250
292
|
|
251
293
|
(test* "search keyword list 3"
|
252
294
|
'((1.0 "henkann") (0.981 "henkan") (0.9667 "hennkan") (0.9524 "henka") (0.9449 "henkanenzin") (0.9449 "henkanenziq") (0.9449 "henkaneqjin") (0.9449 "henkaneqzin") (0.9449 "henkanenjiq") (0.9449 "henkanenjin") (0.9421 "henkannenzin") (0.9421 "henkannenjiq") (0.9421 "henkannenziq") (0.9421 "henkanneqjin") (0.9421 "henkanneqjiq") (0.9421 "henkanneqzin") (0.9421 "henkanneqziq") (0.9421 "henkannenjin"))
|
253
295
|
(my-round-map
|
254
296
|
(arr->list
|
255
|
-
(a-search.search userid kvs "henkann"
|
297
|
+
(a-search.search userid kvs "henkann" "k"))))
|
256
298
|
|
257
299
|
(test* "search keyword list 4"
|
258
300
|
'((1.0 "hennkann") (0.9875 "hennkan") (0.9667 "hennka") (0.9537 "hennkanenjiq") (0.9537 "hennkanenjin") (0.9537 "hennkanenzin") (0.9537 "hennkaneqjin") (0.9537 "hennkanenziq") (0.9537 "hennkaneqzin") (0.9507 "hennkannenjiq") (0.9507 "hennkannenzin") (0.9507 "hennkannenjin") (0.9507 "hennkanneqziq") (0.9507 "hennkanneqzin") (0.9507 "hennkannenziq") (0.9507 "hennkanneqjin") (0.9507 "hennkanneqjiq") (0.9476 "hennkaq") (0.9444 "hennkaqenjin") (0.9444 "hennkaqenzin") (0.9408 "hennkanenjinn") (0.9408 "hennkaneqzinn") (0.9408 "hennkanennjin") (0.9408 "hennkaneqjinn") (0.9408 "hennkanennjiq") (0.9408 "hennkanennzin") (0.9408 "hennkanenzinn") (0.9408 "hennkanennziq"))
|
259
301
|
(my-round-map
|
260
302
|
(arr->list
|
261
|
-
(a-search.search userid kvs "hennkann"
|
303
|
+
(a-search.search userid kvs "hennkann" "k"))))
|
262
304
|
|
263
305
|
(test* "search keyword list 5"
|
264
306
|
'((1.0 "heqkaq") (0.9722 "heqka") (0.9444 "heqkan"))
|
265
307
|
(my-round-map
|
266
308
|
(arr->list
|
267
|
-
(a-search.search userid kvs "heqkaq"
|
309
|
+
(a-search.search userid kvs "heqkaq" "k"))))
|
268
310
|
|
269
311
|
(test* "search keyword list 6"
|
270
312
|
'((0.9762 "henkaS") (0.9667 "hennkaS"))
|
271
313
|
(my-round-map
|
272
314
|
(arr->list
|
273
|
-
(a-search.search userid kvs "henkanS"
|
315
|
+
(a-search.search userid kvs "henkanS" "K"))))
|
274
316
|
|
275
317
|
(test* "search keyword list 7"
|
276
318
|
'((0.9833 "hennkaS") (0.9417 "henkaS"))
|
277
319
|
(my-round-map
|
278
320
|
(arr->list
|
279
|
-
(a-search.search userid kvs "hennkanS"
|
321
|
+
(a-search.search userid kvs "hennkanS" "K"))))
|
280
322
|
|
281
323
|
(test* "approximate search 1"
|
282
324
|
'((1.0 "henka" "Cへんか") (0.9722 "henkan" "Cへんかん") (0.9722 "henkaq" "Cへんかん") (0.9611 "hennka" "Cへんか") (0.9524 "henkann" "Cへんかん"))
|
283
325
|
(my-round-map
|
284
|
-
(approximate-search userid kvs "henka"
|
326
|
+
(approximate-search userid kvs "henka" "k" 0)))
|
285
327
|
|
286
328
|
(test* "approximate search 2"
|
287
329
|
'((1.1 "hennka" "Cへんか") (1.079 "hennkaq" "Cへんかん") (1.079 "hennkan" "Cへんかん") (1.0633 "hennkann" "Cへんかん") (0.9611 "henkan" "Cへんかん") (0.9611 "henka" "Cへんか"))
|
288
330
|
(my-round-map
|
289
|
-
(approximate-search userid kvs "hennka"
|
331
|
+
(approximate-search userid kvs "hennka" "k" 0)))
|
290
332
|
|
291
333
|
(test* "approximate search 3"
|
292
334
|
'((0.9722 "henka" "Cへんか") (0.9444 "henkan" "Cへんかん") (0.9444 "henkaq" "Cへんかん"))
|
293
335
|
(my-round-map
|
294
|
-
(approximate-search userid kvs "henkaS"
|
336
|
+
(approximate-search userid kvs "henkaS" "k" 0)))
|
295
337
|
|
296
338
|
(test* "approximate search 4"
|
297
339
|
'((1.079 "hennka" "Cへんか") (1.0581 "hennkan" "Cへんかん") (1.0581 "hennkaq" "Cへんかん") (1.0424 "hennkann" "Cへんかん"))
|
298
340
|
(my-round-map
|
299
|
-
(approximate-search userid kvs "hennkaS"
|
341
|
+
(approximate-search userid kvs "hennkaS" "k" 0)))
|
300
342
|
|
301
343
|
(test* "approximate search 5"
|
302
344
|
'((1.0 "kani" "Cかに") (1.0 "kani" "Cかんい") (0.9533 "kanni" "Cかんい"))
|
303
345
|
(my-round-map
|
304
|
-
(approximate-search userid kvs "kani"
|
346
|
+
(approximate-search userid kvs "kani" "k" 0)))
|
305
347
|
|
306
348
|
(test* "approximate search 6"
|
307
349
|
'((1.1 "hennka" "Cへんか"))
|
308
350
|
(my-round-map
|
309
|
-
(approximate-search userid kvs "hennka"
|
351
|
+
(approximate-search userid kvs "hennka" "k" 1)))
|
310
352
|
|
311
353
|
(test* "approximate search 7"
|
312
354
|
'((1.1 "hennka" "Cへんか") (1.079 "hennkaq" "Cへんかん"))
|
313
355
|
(my-round-map
|
314
|
-
(approximate-search userid kvs "hennka"
|
356
|
+
(approximate-search userid kvs "hennka" "k" 2)))
|
315
357
|
|
316
358
|
(test* "approximate search 8"
|
317
359
|
'((1.0 "saki" "Cさき"))
|
318
360
|
(my-round-map
|
319
|
-
(approximate-search userid kvs "saki"
|
361
|
+
(approximate-search userid kvs "saki" "k" 0)))
|
320
362
|
|
321
363
|
(test* "approximate search 9"
|
322
364
|
'((1.0 "inyou" "Cいんよう") (0.9556 "innyou" "Cいんにょう") (0.9556 "innyou" "Cいんよう") (0.9429 "inyowhu" "Cいんよう"))
|
323
365
|
(my-round-map
|
324
|
-
(approximate-search userid kvs "inyou"
|
366
|
+
(approximate-search userid kvs "inyou" "k" 0)))
|
325
367
|
|
326
368
|
(test* "approximate search 10"
|
327
369
|
'((1.1 "innyou" "Cいんよう") (1.0542 "innyowhu" "Cいんよう") (1.0 "innyou" "Cいんにょう") (0.9667 "innnyou" "Cいんにょう") (0.9583 "innyowhu" "Cいんにょう") (0.9556 "inyou" "Cいんよう"))
|
328
370
|
(my-round-map
|
329
|
-
(approximate-search userid kvs "innyou"
|
371
|
+
(approximate-search userid kvs "innyou" "k" 0)))
|
330
372
|
|
331
373
|
(test* "approximate search 11"
|
332
374
|
'((0.981 "#gyoume" "C#ぎょうめ"))
|
333
375
|
(my-round-map
|
334
|
-
(approximate-search userid kvs "#gyoum"
|
376
|
+
(approximate-search userid kvs "#gyoum" "k" 0)))
|
335
377
|
|
336
378
|
(test* "approximate search 12"
|
337
379
|
'((0.9722 "#kara#" "C#から#"))
|
338
380
|
(my-round-map
|
339
|
-
(approximate-search userid kvs "#kara"
|
381
|
+
(approximate-search userid kvs "#kara" "k" 0)))
|
382
|
+
|
383
|
+
(test* "approximate search 13"
|
384
|
+
'((1.0 "=shita" "した") (0.981 "=shitai" "したい"))
|
385
|
+
(my-round-map
|
386
|
+
(approximate-search userid kvs "=shita" "h" 0)))
|
387
|
+
|
388
|
+
(test* "approximate search 14"
|
389
|
+
'()
|
390
|
+
(my-round-map
|
391
|
+
(approximate-search userid kvs "=shit" "h" 0)))
|
392
|
+
|
393
|
+
(test* "approximate search 15"
|
394
|
+
'((1.0 "=sr" "する"))
|
395
|
+
(my-round-map
|
396
|
+
(approximate-search userid kvs "=sr" "h" 0)))
|
397
|
+
|
398
|
+
(test* "approximate search 16"
|
399
|
+
'()
|
400
|
+
(my-round-map
|
401
|
+
(approximate-search userid kvs "=sur" "h" 0)))
|
402
|
+
|
403
|
+
(test* "approximate search 17"
|
404
|
+
'()
|
405
|
+
(my-round-map
|
406
|
+
(approximate-search userid kvs "=sure" "h" 0)))
|
407
|
+
|
408
|
+
(test* "approximate search 18"
|
409
|
+
'((0.9926 "=narimasd" "なりません") (0.9926 "=narimast" "なりました") (0.9926 "=narimasu" "なります") (0.9867 "=narimaseq" "なりません") (0.9867 "=narimasen" "なりません") (0.9833 "=narims" "なります") (0.9752 "=narimasenn" "なりません") (0.9752 "=narimasita" "なりました"))
|
410
|
+
(my-round-map
|
411
|
+
(approximate-search userid kvs "=narimas" "h" 0)))
|
412
|
+
|
413
|
+
(test* "approximate search 19"
|
414
|
+
'((0.9967 "=narimaseq" "なりません") (0.9967 "=narimasen" "なりません") (0.989 "=narimasenn" "なりません") (0.9852 "=narimasd" "なりません") (0.9852 "=narimast" "なりました") (0.9852 "=narimasu" "なります"))
|
415
|
+
(my-round-map
|
416
|
+
(approximate-search userid kvs "=narimase" "h" 0)))
|
417
|
+
|
418
|
+
(test* "approximate search 20"
|
419
|
+
'((0.9852 "=narimasd" "なりません") (0.9852 "=narimast" "なりました") (0.9852 "=narimasu" "なります") (0.9793 "=narimasen" "なりません") (0.9793 "=narimaseq" "なりません") (0.9792 "=narimashita" "なりました"))
|
420
|
+
(my-round-map
|
421
|
+
(approximate-search userid kvs "=narimash" "h" 0)))
|
422
|
+
|
423
|
+
(test* "approximate search 21"
|
424
|
+
'((0.9907 "=narimashita" "なりました") (0.9793 "=narimasd" "なりません") (0.9793 "=narimast" "なりました") (0.9793 "=narimasu" "なります"))
|
425
|
+
(my-round-map
|
426
|
+
(approximate-search userid kvs "=narimashi" "h" 0)))
|
427
|
+
|
428
|
+
(test* "approximate search 22"
|
429
|
+
'((1.0 "=natteiru" "なっている"))
|
430
|
+
(my-round-map
|
431
|
+
(approximate-search userid kvs "=natteiru" "h" 0)))
|
432
|
+
|
433
|
+
(test* "approximate search 23"
|
434
|
+
'((0.9778 "=natteiru" "なっている"))
|
435
|
+
(my-round-map
|
436
|
+
(approximate-search userid kvs "=nateiru" "h" 0)))
|
340
437
|
|
341
438
|
|
342
439
|
;;-------------------------------------------------------------------
|
@@ -434,6 +531,10 @@
|
|
434
531
|
'(("ユーザー辞書" #f "ゆーざーじしょ" j))
|
435
532
|
(henkan-okuri-nashi userid kvs "Yu'za'jisyo" 0))
|
436
533
|
|
534
|
+
(test* "okuri nashi 24"
|
535
|
+
'(("組み込み" #f "くみこみ" j) ("組込" #f "くみこみ" j))
|
536
|
+
(henkan-okuri-nashi userid kvs "Kumikomu" 0))
|
537
|
+
|
437
538
|
;;-------------------------------------------------------------------
|
438
539
|
(test-section "henkan (okuri nashi and number)")
|
439
540
|
|
@@ -496,37 +597,37 @@
|
|
496
597
|
(test* "approximate search 1"
|
497
598
|
'((1.0 "henkaS" "Cへんかs") (0.9667 "hennkaS" "Cへんかs"))
|
498
599
|
(my-round-map
|
499
|
-
(approximate-search userid kvs "henkaS"
|
600
|
+
(approximate-search userid kvs "henkaS" "K" 0)))
|
500
601
|
|
501
602
|
(test* "approximate search 2"
|
502
603
|
'((0.9762 "henkaS" "Cへんかs") (0.9667 "hennkaS" "Cへんかs"))
|
503
604
|
(my-round-map
|
504
|
-
(approximate-search userid kvs "henkanS"
|
605
|
+
(approximate-search userid kvs "henkanS" "K" 0)))
|
505
606
|
|
506
607
|
(test* "approximate search 3"
|
507
608
|
'((1.0 "okonaU" "Cおこなu"))
|
508
609
|
(my-round-map
|
509
|
-
(approximate-search userid kvs "okonaU"
|
610
|
+
(approximate-search userid kvs "okonaU" "K" 0)))
|
510
611
|
|
511
612
|
(test* "approximate search 4"
|
512
613
|
'((0.9667 "okonaU" "Cおこなu"))
|
513
614
|
(my-round-map
|
514
|
-
(approximate-search userid kvs "okonU"
|
615
|
+
(approximate-search userid kvs "okonU" "K" 0)))
|
515
616
|
|
516
617
|
(test* "approximate search 5"
|
517
618
|
'((1.0 "eR" "Cえr"))
|
518
619
|
(my-round-map
|
519
|
-
(approximate-search userid kvs "eR"
|
620
|
+
(approximate-search userid kvs "eR" "K" 0)))
|
520
621
|
|
521
622
|
(test* "approximate search 6"
|
522
623
|
'((1.0 "henkaS" "Cへんかs"))
|
523
624
|
(my-round-map
|
524
|
-
(approximate-search userid kvs "henkaS"
|
625
|
+
(approximate-search userid kvs "henkaS" "K" 1)))
|
525
626
|
|
526
627
|
(test* "approximate search 7"
|
527
628
|
'((1.0 "henkaS" "Cへんかs") (0.9667 "hennkaS" "Cへんかs"))
|
528
629
|
(my-round-map
|
529
|
-
(approximate-search userid kvs "henkaS"
|
630
|
+
(approximate-search userid kvs "henkaS" "K" 2)))
|
530
631
|
|
531
632
|
|
532
633
|
(test* "okuri ari 1"
|
@@ -631,45 +732,110 @@
|
|
631
732
|
(test-section "henkan hiragana")
|
632
733
|
|
633
734
|
(test* "hiragana 1"
|
634
|
-
'(("あいうえお" #f "aiueo" h) ("アイウエオ" #f "aiueo" k))
|
635
|
-
(henkan-hiragana kvs "aiueo" :normal))
|
735
|
+
'(("あいうえお" #f "aiueo" h) ("あいうえお" #f "aiueo" h) ("アイウエオ" #f "aiueo" k))
|
736
|
+
(henkan-hiragana userid kvs "aiueo" :normal))
|
636
737
|
|
637
738
|
(test* "hiragana 2"
|
638
739
|
'(("の" #f "no" h) ("ノ" #f "no" k))
|
639
|
-
(henkan-hiragana kvs "no" :normal))
|
740
|
+
(henkan-hiragana userid kvs "no" :normal))
|
640
741
|
|
641
742
|
(test* "hiragana 3"
|
642
743
|
'(("b" #f "b" j))
|
643
|
-
(henkan-hiragana kvs "b" :normal))
|
744
|
+
(henkan-hiragana userid kvs "b" :normal))
|
644
745
|
|
645
746
|
(test* "hiragana 4"
|
646
747
|
'(("if" #f "if" j))
|
647
|
-
(henkan-hiragana kvs "if" :normal))
|
748
|
+
(henkan-hiragana userid kvs "if" :normal))
|
648
749
|
|
649
750
|
(test* "hiragana 5"
|
650
751
|
'(("ぁぃぅぇぉゃゅょ" #f "lalilulelolyalyulyo" h) ("ァィゥェォャュョ" #f "lalilulelolyalyulyo" k))
|
651
|
-
(henkan-hiragana kvs "lalilulelolyalyulyo" :normal))
|
752
|
+
(henkan-hiragana userid kvs "lalilulelolyalyulyo" :normal))
|
652
753
|
|
653
754
|
(test* "hiragana 6"
|
654
755
|
'(("ぁぃぅぇぉゃゅょ" #f "xaxixuxexoxyaxyuxyo" h) ("ァィゥェォャュョ" #f "xaxixuxexoxyaxyuxyo" k) ("しゃししゅしぇしょゃゅょ" #f "xaxixuxexoxyaxyuxyo" h) ("シャシシュシェショャュョ" #f "xaxixuxexoxyaxyuxyo" k))
|
655
|
-
(henkan-hiragana kvs "xaxixuxexoxyaxyuxyo" :normal))
|
756
|
+
(henkan-hiragana userid kvs "xaxixuxexoxyaxyuxyo" :normal))
|
656
757
|
|
657
758
|
(test* "hiragana 7"
|
658
759
|
'(("することです" #f "srktds" h) ("スルコトデス" #f "srktds" k))
|
659
|
-
(henkan-hiragana kvs "srktds" :normal))
|
760
|
+
(henkan-hiragana userid kvs "srktds" :normal))
|
660
761
|
|
661
762
|
(test* "hiragana 8"
|
662
763
|
'(("することです" #f "srktds" h) ("スルコトデス" #f "srktds" k))
|
663
|
-
(henkan-hiragana kvs "srktds" :azik))
|
764
|
+
(henkan-hiragana userid kvs "srktds" :azik))
|
664
765
|
|
665
766
|
(test* "hiragana 9"
|
666
767
|
'(("んんあんはんわんぱ" #f "nqanhanwanpa" h) ("ンンアンハンワンパ" #f "nqanhanwanpa" k) ("ないあぬうあねいあのうあ" #f "nqanhanwanpa" h) ("ナイアヌウアネイアノウア" #f "nqanhanwanpa" k))
|
667
|
-
(henkan-hiragana kvs "nqanhanwanpa" :normal))
|
768
|
+
(henkan-hiragana userid kvs "nqanhanwanpa" :normal))
|
668
769
|
|
669
770
|
(test* "hiragana 10"
|
670
771
|
'(("ないあぬうあねいあのうあ" #f "nqanhanwanpa" h) ("ナイアヌウアネイアノウア" #f "nqanhanwanpa" k) ("んんあんはんわんぱ" #f "nqanhanwanpa" h) ("ンンアンハンワンパ" #f "nqanhanwanpa" k))
|
671
|
-
(henkan-hiragana kvs "nqanhanwanpa" :azik))
|
772
|
+
(henkan-hiragana userid kvs "nqanhanwanpa" :azik))
|
672
773
|
|
774
|
+
;;-------------------------------------------------------------------
|
775
|
+
(test-section "henkan hiragana-phrase")
|
776
|
+
(test* "hiragana-phrase 1"
|
777
|
+
'(("した" #f "shita" h) ("したい" #f "shita" h) ("した" #f "shita" h) ("シタ" #f "shita" k))
|
778
|
+
(henkan-hiragana userid kvs "shita" :normal))
|
779
|
+
|
780
|
+
(test* "hiragana-phrase 2"
|
781
|
+
'(("shit" #f "shit" j))
|
782
|
+
(henkan-hiragana userid kvs "shit" :normal))
|
783
|
+
|
784
|
+
(test* "hiragana-phrase 3"
|
785
|
+
'(("したい" #f "shitai" h) ("した" #f "shitai" h) ("したい" #f "shitai" h) ("シタイ" #f "shitai" k))
|
786
|
+
(henkan-hiragana userid kvs "shitai" :normal))
|
787
|
+
|
788
|
+
(test* "hiragana-phrase 4"
|
789
|
+
'(("したい" #f "shitaii" h) ("したいい" #f "shitaii" h) ("シタイイ" #f "shitaii" k))
|
790
|
+
(henkan-hiragana userid kvs "shitaii" :normal))
|
791
|
+
|
792
|
+
(test* "hiragana-phrase 5"
|
793
|
+
'(("なります" #f "narimasu" h) ("なりました" #f "narimasu" h) ("なりません" #f "narimasu" h) ("なります" #f "narimasu" h) ("ナリマス" #f "narimasu" k))
|
794
|
+
(henkan-hiragana userid kvs "narimasu" :normal))
|
795
|
+
|
796
|
+
(test* "hiragana-phrase 6"
|
797
|
+
'(("なりました" #f "narimashita" h) ("なりました" #f "narimashita" h) ("ナリマシタ" #f "narimashita" k))
|
798
|
+
(henkan-hiragana userid kvs "narimashita" :normal))
|
799
|
+
|
800
|
+
(test* "hiragana-phrase 7"
|
801
|
+
'(("なりません" #f "narimasen" h) ("なりません" #f "narimasen" h) ("ナリマセン" #f "narimasen" k))
|
802
|
+
(henkan-hiragana userid kvs "narimasen" :normal))
|
803
|
+
|
804
|
+
(test* "hiragana-phrase 8"
|
805
|
+
'(("なりました" #f "narimashitaga" h) ("なりましたが" #f "narimashitaga" h) ("ナリマシタガ" #f "narimashitaga" k))
|
806
|
+
(henkan-hiragana userid kvs "narimashitaga" :normal))
|
807
|
+
|
808
|
+
(test* "hiragana-phrase 9"
|
809
|
+
'(("なりました" #f "narimashitanode" h) ("なりましたので" #f "narimashitanode" h) ("ナリマシタノデ" #f "narimashitanode" k))
|
810
|
+
(henkan-hiragana userid kvs "narimashitanode" :normal))
|
811
|
+
|
812
|
+
(test* "hiragana-phrase 10"
|
813
|
+
'(("なります" #f "narimasumai" h) ("なりますまい" #f "narimasumai" h) ("ナリマスマイ" #f "narimasumai" k))
|
814
|
+
(henkan-hiragana userid kvs "narimasumai" :normal))
|
815
|
+
|
816
|
+
(test* "hiragana-phrase 11"
|
817
|
+
'(("は" #f "ha" h) ("ハ" #f "ha" k))
|
818
|
+
(henkan-hiragana userid kvs "ha" :normal))
|
819
|
+
|
820
|
+
(test* "hiragana-phrase 12"
|
821
|
+
'((("あ" #f "a" h) ("ア" #f "a" k))
|
822
|
+
(("あい" #f "ai" h) ("あい" #f "ai" h) ("アイ" #f "ai" k))
|
823
|
+
(("あいう" #f "aiu" h) ("あいう" #f "aiu" h) ("アイウ" #f "aiu" k))
|
824
|
+
(("あいうえ" #f "aiue" h) ("あいうえ" #f "aiue" h) ("アイウエ" #f "aiue" k))
|
825
|
+
(("あいうえお" #f "aiueo" h) ("あいうえお" #f "aiueo" h) ("アイウエオ" #f "aiueo" k))
|
826
|
+
(("あいうえおか" #f "aiueoka" h) ("あいうえおかき" #f "aiueoka" h) ("あいうえおか" #f "aiueoka" h) ("アイウエオカ" #f "aiueoka" k))
|
827
|
+
(("あいうえおかき" #f "aiueokaki" h) ("あいうえおか" #f "aiueokaki" h) ("あいうえおかき" #f "aiueokaki" h) ("アイウエオカキ" #f "aiueokaki" k))
|
828
|
+
(("あいうえおかき" #f "aiueokakiku" h) ("あいうえおかきく" #f "aiueokakiku" h) ("アイウエオカキク" #f "aiueokakiku" k))
|
829
|
+
(("あいうえおかきくけ" #f "aiueokakikuke" h) ("アイウエオカキクケ" #f "aiueokakikuke" k))
|
830
|
+
(("あいうえおかきくけこ" #f "aiueokakikukeko" h) ("アイウエオカキクケコ" #f "aiueokakikukeko" k)))
|
831
|
+
(map
|
832
|
+
(lambda (query)
|
833
|
+
(henkan-hiragana userid kvs query :normal))
|
834
|
+
'("a" "ai" "aiu" "aiue" "aiueo" "aiueoka" "aiueokaki" "aiueokakiku" "aiueokakikuke" "aiueokakikukeko")))
|
835
|
+
|
836
|
+
(test* "hiragana-phrase 13"
|
837
|
+
'(("なっている" #f "nateiru" h) ("なている" #f "nateiru" h) ("ナテイル" #f "nateiru" k))
|
838
|
+
(henkan-hiragana userid kvs "nateiru" :normal))
|
673
839
|
|
674
840
|
|
675
841
|
;;-------------------------------------------------------------------
|
@@ -1031,7 +1197,7 @@
|
|
1031
1197
|
;;===================================================================
|
1032
1198
|
|
1033
1199
|
;; ---後処理---
|
1034
|
-
(
|
1200
|
+
(kvs.close)
|
1035
1201
|
;; 最終的な辞書の状態を目視するためのダンプ
|
1036
1202
|
(dump-sekka-jisyo-f STDOUT target)
|
1037
1203
|
|
data/test/jisyo.nnd
CHANGED
@@ -82,6 +82,11 @@
|
|
82
82
|
'(("kaT" . "Cかt") ("かt" . "/勝/買/飼/刈;(crop)草を刈る/狩;(hunt)兎を狩る/且;-つて/駆;バイクを駆って/交;飛び交って/克;(overcome) 逆境に克つ/糅;-てて加えて/苅;「刈」の異体字/驅;「駆」の旧字/支;ささえる/上;(尺八の)カリ⇔減り(メリ)/搗") ("ka`" . "Cか`") ("か`" . "/勝/買/飼/刈;(crop)草を刈る/狩;(hunt)兎を狩る/且;-つて/駆;バイクを駆って/交;飛び交って/克;(overcome) 逆境に克つ/糅;-てて加えて/苅;「刈」の異体字/驅;「駆」の旧字/支;ささえる/上;(尺八の)カリ⇔減り(メリ)/搗") ("ka+" . "Cか+") ("か+" . "/勝/買/飼/刈;(crop)草を刈る/狩;(hunt)兎を狩る/且;-つて/駆;バイクを駆って/交;飛び交って/克;(overcome) 逆境に克つ/糅;-てて加えて/苅;「刈」の異体字/驅;「駆」の旧字/支;ささえる/上;(尺八の)カリ⇔減り(メリ)/搗"))
|
83
83
|
(expand-okuri-ari-entry "か" "t" "/勝/買/飼/刈;(crop)草を刈る/狩;(hunt)兎を狩る/且;-つて/駆;バイクを駆って/交;飛び交って/克;(overcome) 逆境に克つ/糅;-てて加えて/苅;「刈」の異体字/驅;「駆」の旧字/支;ささえる/上;(尺八の)カリ⇔減り(メリ)/搗"))
|
84
84
|
|
85
|
+
(test* "expanding SKK(1) entry to SEKKA(n) entry. (平仮名フレーズ)"
|
86
|
+
'(("=shita" . "した") ("=sita" . "した") ("=st" . "した"))
|
87
|
+
(expand-hiragana-phrase-entry "した"))
|
88
|
+
|
89
|
+
|
85
90
|
(let1 _input (StringIO.new
|
86
91
|
(string-join
|
87
92
|
'(
|
@@ -95,10 +100,11 @@
|
|
95
100
|
"! /!/感嘆符/"
|
96
101
|
"? /?/"
|
97
102
|
"こーひー /コーヒー/"
|
103
|
+
"しました //"
|
98
104
|
)
|
99
105
|
"\n"))
|
100
106
|
(test* "convert SKK file to SEKKA jisyo file. (1)"
|
101
|
-
'("shiroI Cしろi" "siroI Cしろi" "xiroI Cしろi" "しろi /白" "ai Cあい" "あい /愛/哀/相/挨" "Greek /Α/Β/Γ/Δ/Ε/Ζ/Η/Θ/Ι/Κ/Λ/Μ/Ν/Ξ/Ο/Π/Ρ/Σ/Τ/Υ/Φ/Χ/Ψ/Ω" ">an C>あん" ">ann C>あん" ">aq C>あん" ">あん /案" ">yasuI C>やすi" ">やすi /易" "! /!/感嘆符" "? /?" "ko'hi' Cこーひー" "ko'hi- Cこーひー" "ko'hi: Cこーひー" "ko'hi^ Cこーひー" "ko-hi' Cこーひー" "ko-hi- Cこーひー" "ko-hi: Cこーひー" "ko-hi^ Cこーひー" "ko:hi' Cこーひー" "ko:hi- Cこーひー" "ko:hi: Cこーひー" "ko:hi^ Cこーひー" "ko^hi' Cこーひー" "ko^hi- Cこーひー" "ko^hi: Cこーひー" "ko^hi^ Cこーひー" "こーひー /コーヒー")
|
107
|
+
'("shiroI Cしろi" "siroI Cしろi" "xiroI Cしろi" "しろi /白" "ai Cあい" "あい /愛/哀/相/挨" "Greek /Α/Β/Γ/Δ/Ε/Ζ/Η/Θ/Ι/Κ/Λ/Μ/Ν/Ξ/Ο/Π/Ρ/Σ/Τ/Υ/Φ/Χ/Ψ/Ω" ">an C>あん" ">ann C>あん" ">aq C>あん" ">あん /案" ">yasuI C>やすi" ">やすi /易" "! /!/感嘆符" "? /?" "ko'hi' Cこーひー" "ko'hi- Cこーひー" "ko'hi: Cこーひー" "ko'hi^ Cこーひー" "ko-hi' Cこーひー" "ko-hi- Cこーひー" "ko-hi: Cこーひー" "ko-hi^ Cこーひー" "ko:hi' Cこーひー" "ko:hi- Cこーひー" "ko:hi: Cこーひー" "ko:hi^ Cこーひー" "ko^hi' Cこーひー" "ko^hi- Cこーひー" "ko^hi: Cこーひー" "ko^hi^ Cこーひー" "こーひー /コーヒー" "=shimashita しました" "=shimasita しました" "=shimast しました" "=simashita しました" "=simasita しました" "=simast しました" "=ximast しました")
|
102
108
|
(convert-skk-jisyo-f _input)))
|
103
109
|
|
104
110
|
;;===================================================================
|