sekka 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,464 @@
1
+ :; #-*- mode: nendo; syntax: scheme -*-;;
2
+ ;;;
3
+ ;;; henkan.nnd - 変換エンジンのコア
4
+ ;;;
5
+ ;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ ;;;
7
+ ;;; Redistribution and use in source and binary forms, with or without
8
+ ;;; modification, are permitted provided that the following conditions
9
+ ;;; are met:
10
+ ;;;
11
+ ;;; 1. Redistributions of source code must retain the above copyright
12
+ ;;; notice, this list of conditions and the following disclaimer.
13
+ ;;;
14
+ ;;; 2. Redistributions in binary form must reproduce the above copyright
15
+ ;;; notice, this list of conditions and the following disclaimer in the
16
+ ;;; documentation and/or other materials provided with the distribution.
17
+ ;;;
18
+ ;;; 3. Neither the name of the authors nor the names of its contributors
19
+ ;;; may be used to endorse or promote products derived from this
20
+ ;;; software without specific prior written permission.
21
+ ;;;
22
+ ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ ;;;
34
+ ;;; $Id:
35
+ ;;;
36
+ (use srfi-1)
37
+ (use sekka.util)
38
+ (use sekka.convert-jisyo)
39
+ (use sekka.jisyo-db)
40
+ (use sekka.alphabet-lib)
41
+ (require "sekka/approximatesearch")
42
+
43
+
44
+ ;; 曖昧検索の評価値にヒューリスティックを加える
45
+ ;; "nn" のように n が重なるほど、"ん" の可能性が強くなる。
46
+ ;; "にゃ" "にゅ" "にょ" が出現した場合は、n がそちらの解釈に取られたものと解釈してその件数分引いておく。
47
+ (define (nn-heuristics keyword lst)
48
+ (map
49
+ (lambda (x)
50
+ (let ((point (first x))
51
+ (roman (second x))
52
+ (target (third x)))
53
+ (let* ((nn-number (min
54
+ (vector-length (keyword.scan #/nn/))
55
+ (vector-length (roman.scan #/nn/))
56
+ (- (vector-length (target.scan #/ん/))
57
+ (vector-length (target.scan #/[ゃゅょ]/)))))
58
+ (nn-number (max 0 nn-number)))
59
+ (list
60
+ (if (= 0 nn-number)
61
+ point
62
+ (* point (* 1.1 nn-number)))
63
+ roman
64
+ target))))
65
+ lst))
66
+
67
+
68
+ (define (my-append-many lst)
69
+ (let1 result '()
70
+ (for-each
71
+ (lambda (x)
72
+ (set! result (append result x)))
73
+ lst)
74
+ result))
75
+
76
+
77
+ ;; 曖昧検索
78
+ ;; リストで返す。 (ソート済み検索結果)
79
+ (define (approximate-search userid kvs keyword okuri-ari-flag limit)
80
+ (define jarow-shikii 0.94)
81
+ (let* ((a-search (ApproximateSearch.new jarow-shikii))
82
+ (lst (my-append-many
83
+ (map
84
+ (lambda (x)
85
+ (let* ((val (vector-ref x 0))
86
+ (k (vector-ref x 1))
87
+ (v (dict-get userid kvs k ""))
88
+ (v-pair (string-split-first-and-rest v))
89
+ (v-first (car v-pair))
90
+ (v-rest (cdr v-pair)))
91
+ (map
92
+ (lambda (element)
93
+ (list val k (+ v-first element)))
94
+ (to-list (v-rest.split "/")))))
95
+ (to-list (a-search.search userid kvs keyword okuri-ari-flag)))))
96
+ (nn-heuristics-result (nn-heuristics keyword lst))
97
+ (sorted-lst (sort-by nn-heuristics-result (lambda (item) (- 1.0 (car item))))))
98
+ (if (= limit 0)
99
+ sorted-lst
100
+ (take* sorted-lst limit))))
101
+
102
+
103
+
104
+ ;; 厳密検索
105
+ (define (exact-search userid kvs keyword)
106
+ (let1 v (dict-get userid kvs keyword #f)
107
+ (if v
108
+ `((1.0 ,keyword ,v))
109
+ '())))
110
+
111
+
112
+ ;; split "/a;annotation/b/c/" into `(("a" "annotation" ,src) ("b" #f ,src) ("c" #f ,src)) alist
113
+ (define (split-henkan-kouho str src . okuri)
114
+ (define (append-j-type lst)
115
+ (map
116
+ (lambda (x)
117
+ (append x (list 'j)))
118
+ lst))
119
+
120
+ (let* ((trimmed (cond
121
+ ((rxmatch #/^[\/](.+)$/ str)
122
+ => (lambda (m)
123
+ (rxmatch-substring m 1)))
124
+ (else
125
+ "")))
126
+ (splitted (filter
127
+ (lambda (str) (< 0 str.size))
128
+ (to-list (trimmed.split "/"))))
129
+ (okuri (get-optional okuri "")))
130
+ ;; pickup annotation
131
+ (append-j-type
132
+ (map
133
+ (lambda (entry)
134
+ (let1 vec (entry.split ";")
135
+ (list (+ (vector-ref vec 0) okuri)
136
+ (vector-ref vec 1 #f)
137
+ src)))
138
+ splitted))))
139
+
140
+
141
+ ;; 送り仮名なしの変換
142
+ (define (henkan-okuri-nashi userid kvs keyword limit)
143
+ (let* ((result (approximate-search userid kvs keyword #f limit))
144
+ (kouho (uniq (map
145
+ (lambda (x)
146
+ (third x))
147
+ result))))
148
+ (let1 lst
149
+ (append-map
150
+ (lambda (value)
151
+ (if-let1 m (rxmatch #/^C(.+)$/ value)
152
+ (split-henkan-kouho
153
+ (dict-get userid kvs (string-drop value 1))
154
+ (rxmatch-substring m 1)
155
+ ) ;; continue to fetch
156
+ (split-henkan-kouho
157
+ value
158
+ keyword)))
159
+ kouho)
160
+ (if (= limit 0)
161
+ lst
162
+ (take* lst limit)))))
163
+
164
+
165
+
166
+ ;; 送り仮名ありの変換
167
+ (define (henkan-okuri-ari userid kvs keyword limit roman-method)
168
+ (let* ((keyword (let1 _pair (string-split-first-and-rest keyword)
169
+ (+ (sekka-downcase (car _pair))
170
+ (cdr _pair))))
171
+ (m (rxmatch #/^([a-z])([a-z\-^]*)([A-Z`+])([a-zA-Z]*)$/ keyword)))
172
+ (if (not m)
173
+ '() ;; 変換候補無し
174
+ (let* ((result (approximate-search userid kvs
175
+ (+ (rxmatch-substring m 1)
176
+ (rxmatch-substring m 2)
177
+ (rxmatch-substring m 3))
178
+ #t
179
+ limit))
180
+ (okurigana-lst (gen-roman->hiragana (sekka-downcase
181
+ (+
182
+ (rxmatch-substring m 3)
183
+ (rxmatch-substring m 4)))
184
+ roman-method))
185
+ (okurigana-lst (if (null? okurigana-lst)
186
+ '("")
187
+ okurigana-lst))
188
+ (kouho (uniq (map
189
+ (lambda (x)
190
+ (third x))
191
+ result))))
192
+ (let1 lst
193
+ (append-map
194
+ (lambda (value)
195
+ (append-map
196
+ (lambda (x)
197
+ (if-let1 m (rxmatch #/^C(.+)$/ value)
198
+ (split-henkan-kouho
199
+ (dict-get userid kvs (string-drop value 1))
200
+ (rxmatch-substring m 1)
201
+ x) ;; continue to fetch
202
+ (split-henkan-kouho value keyword x)))
203
+ okurigana-lst))
204
+ kouho)
205
+ (if (= limit 0)
206
+ lst
207
+ (take* lst limit)))))))
208
+
209
+
210
+ ;; 平仮名の変換
211
+ (define (henkan-hiragana kvs keyword roman-method)
212
+ (let* ((str (sekka-downcase keyword))
213
+ (hira (gen-roman->hiragana str roman-method))
214
+ (kata (gen-roman->katakana str roman-method)))
215
+ (if (null? hira)
216
+ `((,keyword #f ,keyword j))
217
+ (append-map (lambda (h k)
218
+ `(
219
+ (,h #f ,keyword h)
220
+ (,k #f ,keyword k)))
221
+ hira
222
+ kata))))
223
+
224
+
225
+ ;; アルファベットの単純変換
226
+ (define (henkan-alphabet kvs keyword)
227
+ (let* ((zen (gen-alphabet-han->zen keyword))
228
+ (han (gen-alphabet-zen->han keyword)))
229
+ `(
230
+ (,zen #f ,keyword z)
231
+ (,han #f ,keyword l))))
232
+
233
+
234
+ ;; 記号を含むキーワードの変換
235
+ (define (henkan-non-kanji userid kvs keyword)
236
+ (let* ((result (exact-search userid kvs keyword))
237
+ (kouho (map
238
+ (lambda (x)
239
+ (third x))
240
+ result)))
241
+ (append-map
242
+ (lambda (value)
243
+ (split-henkan-kouho value keyword))
244
+ kouho)))
245
+
246
+
247
+ ;; keyword には ローマ字のみを受け付ける
248
+ (define (sekka-henkan userid kvs keyword limit roman-method)
249
+ ;; 変換候補にindex番号を付加する
250
+ (define (append-index-number kouho-list)
251
+ (let1 count 0
252
+ (map
253
+ (lambda (x)
254
+ (begin0
255
+ (append x (list count))
256
+ (set! count (+ count 1))))
257
+ kouho-list)))
258
+
259
+ ;; キーワードの種別で処理を分割する
260
+ (append-index-number
261
+ (cond
262
+ ((rxmatch #/[A-Z`+]/ keyword)
263
+ (let1 k (string-downcase-first keyword)
264
+ (cond
265
+ ((rxmatch #/[a-z][A-Z`+]/ k)
266
+ ;; 送りあり
267
+ ;;(let1 result (approximate-search db k #t) (disp-search-result result))
268
+ (append
269
+ (append
270
+ (henkan-okuri-ari userid kvs k limit roman-method)
271
+ (if (null? (gen-roman->hiragana (sekka-downcase k) roman-method))
272
+ '()
273
+ (henkan-hiragana kvs (sekka-downcase k) roman-method)))
274
+ (henkan-alphabet kvs keyword)))
275
+ (else
276
+ ;; 送りなし
277
+ ;;(let1 result (approximate-search db k #f) (disp-search-result result))
278
+ (append
279
+ (append
280
+ (henkan-okuri-nashi userid kvs k limit)
281
+ (if (null? (gen-roman->hiragana (sekka-downcase k) roman-method))
282
+ '()
283
+ (henkan-hiragana kvs (sekka-downcase k) roman-method)))
284
+ (henkan-alphabet kvs keyword))))))
285
+ ((not (null? (gen-roman->hiragana keyword roman-method)))
286
+ (append
287
+ (append
288
+ (henkan-hiragana kvs keyword roman-method)
289
+ (henkan-alphabet kvs keyword))
290
+ (henkan-okuri-nashi userid kvs keyword limit)))
291
+ (else
292
+ (append
293
+ (henkan-non-kanji userid kvs keyword)
294
+ (henkan-alphabet kvs keyword))))))
295
+
296
+
297
+
298
+ ;; conversion #f => nil for EmacsLisp
299
+ (define (sekkaHenkan userid kvs cachesv keyword limit roman-method)
300
+ (define cache-exp-second (* 60 60))
301
+ (let* ((keyword (keyword.strip))
302
+ (sekka-keyword (+ "sekka::" roman-method "::" (limit.to_s) "::" keyword)))
303
+ (if-let1 fetched (and cachesv
304
+ (cachesv.get sekka-keyword #f))
305
+ (begin
306
+ keyword
307
+ (read-from-string fetched))
308
+ (let1 henkan-result (map
309
+ (lambda (x)
310
+ (map (lambda (val) (if val val nil)) x))
311
+ (sekka-henkan userid kvs keyword limit (make-keyword roman-method)))
312
+ (and cachesv
313
+ (not (null? henkan-result))
314
+ (let1 fetched2 (cachesv.get "sekka::(keys)" #f)
315
+ (cachesv.put! sekka-keyword (write-to-string henkan-result) cache-exp-second)
316
+ (cachesv.put! "sekka::(keys)" (if fetched2
317
+ (+ fetched2 " " sekka-keyword)
318
+ sekka-keyword)
319
+ cache-exp-second)
320
+ (if-let1 v (cachesv.get "sekka::(keys)" #f)
321
+ (begin
322
+ #?=(v.size)
323
+ #?=v)
324
+ #f)))
325
+ henkan-result))))
326
+ ;; Export to Ruby world
327
+ (export-to-ruby sekkaHenkan)
328
+
329
+
330
+ ;; Flush henkan-result cache data on cachesv(memcached)
331
+ (define (flush-cachesv cachesv)
332
+ #?="--- do (flush-cachesv) ---"
333
+ (if-let1 fetched (cachesv.get "sekka::(keys)" #f)
334
+ (begin
335
+ (for-each
336
+ (lambda (x)
337
+ (cachesv.delete #?=x))
338
+ (to-list (fetched.split #/[ ]+/)))
339
+ (cachesv.delete "sekka::(keys)"))
340
+ #f))
341
+
342
+
343
+ ;; 確定処理: 最終確定語を変換候補の先頭に持ってくる。
344
+ ;; key ... "developer" や "へんかん" など、変換候補レコードのキーとなる文字列
345
+ ;; tango ... "変換" など、変換候補から最終確定した、変換候補の文字列
346
+ ;; 登録失敗したら nil を返す
347
+ (define (sekkaKakutei userid kvs cachesv key tango)
348
+ (define (fetch userid kvs key)
349
+ (dict-get userid kvs key #f))
350
+
351
+ (define (write-user-entry userid kvs key value)
352
+ (kvs.put! (+ userid "::" key) value))
353
+
354
+ (define (join-henkan-kouho lst)
355
+ (+ "/"
356
+ (string-join
357
+ (map
358
+ (lambda (x)
359
+ (if (second x)
360
+ (+ (first x) ";" (second x))
361
+ (first x)))
362
+ lst)
363
+ "/")))
364
+
365
+ (let1 tango (if (is-hiragana-and-okuri key)
366
+ (drop-okuri tango)
367
+ tango)
368
+ (if-let1 kouho-str (fetch userid kvs key)
369
+ (if (rxmatch #/^\// kouho-str)
370
+ (let* ((kouho-lst (split-henkan-kouho kouho-str key))
371
+ (no1
372
+ (filter (lambda (x)
373
+ (eq? (car x) tango))
374
+ kouho-lst))
375
+ (other
376
+ (filter (lambda (x)
377
+ (not (eq? (car x) tango)))
378
+ kouho-lst))
379
+ (new-kouho-str
380
+ (join-henkan-kouho (append no1 other))))
381
+ (if (not (= kouho-str new-kouho-str))
382
+ (begin
383
+ (write-user-entry userid kvs key new-kouho-str)
384
+ (and cachesv
385
+ (flush-cachesv cachesv))
386
+ tango)
387
+ nil))
388
+ nil)
389
+ nil)))
390
+ ;; Export to Ruby world
391
+ (export-to-ruby sekkaKakutei)
392
+
393
+
394
+ ;; ユーザー定義語彙の登録処理
395
+ (define (registerUserJisyo userid kvs dict-line)
396
+ (define user-keylist '())
397
+ (define (insert-to-db sekka-jisyo-data)
398
+ (for-each
399
+ (lambda (entry)
400
+ (let* ((kv (to-list (entry.split #/[ ]+/)))
401
+ (k (first kv))
402
+ (v (second kv)))
403
+ (set! user-keylist (cons k user-keylist))
404
+ (append-entry userid kvs k v)))
405
+ sekka-jisyo-data))
406
+
407
+ (define (_create-ready-made-keylist keylist)
408
+ (receive (okuri-ari-hash
409
+ okuri-nashi-hash)
410
+ (create-2char-hash keylist)
411
+
412
+ ;; OKURI-ARI
413
+ (for-each
414
+ (lambda (key)
415
+ (let1 fetched (kvs.get (+ userid "::" "(" (sekka-upcase key) ")")
416
+ (kvs.get (+ masterid "::" "(" (sekka-upcase key) ")") ""))
417
+ (kvs.put! (+ userid "::" "(" (sekka-upcase key) ")")
418
+ (string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
419
+ (hash-table-get okuri-ari-hash key))))
420
+ " "))))
421
+ (hash-table-keys okuri-ari-hash))
422
+
423
+ ;; OKURI-NASHI
424
+ (for-each
425
+ (lambda (key)
426
+ (let1 fetched (kvs.get (+ userid "::" "(" (sekka-downcase key) ")")
427
+ (kvs.get (+ masterid "::" "(" (sekka-downcase key) ")") ""))
428
+ (kvs.put! (+ userid "::" "(" (sekka-downcase key) ")")
429
+ (string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
430
+ (hash-table-get okuri-nashi-hash key))))
431
+ " "))))
432
+ (hash-table-keys okuri-nashi-hash))))
433
+
434
+ ;; "ユーザー語彙を"(stored)"にpush!する"
435
+ (define (kvs-push! userid kvs entry-str)
436
+ (let* ((orig (kvs.get (+ userid "::(stored)") "()"))
437
+ (orig (read-from-string orig)))
438
+ (kvs.put! (+ userid "::(stored)") (write-to-string (append orig (list entry-str))))
439
+ entry-str))
440
+
441
+ (cond
442
+ ((memv dict-line (read-from-string (kvs.get (+ userid "::(stored)") "()")))
443
+ #?=(+ "already stored userid=[" userid "] tango=[" dict-line "]")
444
+ #f)
445
+ (else
446
+ (cond
447
+ ((rxmatch #/[ ]+[\/]/ dict-line)
448
+ (let1 lst (convert-skk-jisyo-f (StringIO.new (+ dict-line "\n")))
449
+ (if (null? lst)
450
+ (begin
451
+ #?=(+ "Error user dict format error userid=[" userid "] tango=[" dict-line "]")
452
+ #f)
453
+ (begin
454
+ (insert-to-db lst)
455
+ (_create-ready-made-keylist user-keylist)
456
+ (kvs-push! userid kvs dict-line)
457
+ #?=(+ "user dict stored userid=[" userid "] tango=[" dict-line "]")
458
+ #t))))
459
+ (else
460
+ #?=(+ "Error user dict format error userid=[" userid "] tango=[" dict-line "]")
461
+ #f)))))
462
+ ;; Export to Ruby world
463
+ (export-to-ruby registerUserJisyo)
464
+
@@ -0,0 +1,184 @@
1
+ :; #-*- mode: nendo; syntax: scheme -*-;;
2
+ ;;;
3
+ ;;; jisyo-db.nnd - 辞書DBの構築、辞書DBアクセスのライブラリ
4
+ ;;;
5
+ ;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ ;;;
7
+ ;;; Redistribution and use in source and binary forms, with or without
8
+ ;;; modification, are permitted provided that the following conditions
9
+ ;;; are met:
10
+ ;;;
11
+ ;;; 1. Redistributions of source code must retain the above copyright
12
+ ;;; notice, this list of conditions and the following disclaimer.
13
+ ;;;
14
+ ;;; 2. Redistributions in binary form must reproduce the above copyright
15
+ ;;; notice, this list of conditions and the following disclaimer in the
16
+ ;;; documentation and/or other materials provided with the distribution.
17
+ ;;;
18
+ ;;; 3. Neither the name of the authors nor the names of its contributors
19
+ ;;; may be used to endorse or promote products derived from this
20
+ ;;; software without specific prior written permission.
21
+ ;;;
22
+ ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ ;;;
34
+ ;;; $Id:
35
+ ;;;
36
+ (use srfi-1)
37
+ (require "sekka/kvs")
38
+ (use sekka.util)
39
+
40
+ (define masterid "MASTER")
41
+ (define workid "WORK")
42
+
43
+ (define alphabet-string "abcdefghijklmnopqrstuvwxyz>@;")
44
+ (define alphabet-lower-list (to-list (alphabet-string.split "")))
45
+ (define alphabet-upper-list (to-list (. (sekka-upcase alphabet-string) split "")))
46
+ (define alphabet-pairs (append-map
47
+ (lambda (a)
48
+ (map
49
+ (lambda (b)
50
+ (+ a b))
51
+ (append alphabet-upper-list alphabet-lower-list)))
52
+ alphabet-lower-list))
53
+
54
+ ;; KVS type setting
55
+ (define *kvs-type* 'tokyocabinet) ;; default
56
+ (define (set-kvs-type type)
57
+ (if (not (symbol? type))
58
+ (error "Error: set-kvs-type requires symbol argument.")
59
+ (set! *kvs-type* type)))
60
+ (define (get-kvs-type) *kvs-type*)
61
+
62
+
63
+ ;; dict-get search sequence
64
+ ;; (1) try "userid::keyword" key
65
+ ;; (2) try "MASTER::keyword" key
66
+ (define (dict-get userid kvs key . fallback)
67
+ (if-let1 value
68
+ (or (kvs.get (+ userid "::" key) #f)
69
+ (kvs.get (+ masterid "::" key) #f))
70
+ value
71
+ (let1 opt (get-optional fallback #f)
72
+ opt)))
73
+
74
+ (define (create-2char-hash keylist)
75
+ (define okuri-ari-hash (make-hash-table))
76
+ (define okuri-nashi-hash (make-hash-table))
77
+ (define (create-hash keylist)
78
+ (for-each
79
+ (lambda (k)
80
+ (when (rxmatch #/^[a-zA-Z^>-@`;+]+$/ k)
81
+ (let1 sliced (sekka-downcase (k.slice 0 2))
82
+ (when (= 2 sliced.size)
83
+ (if (rxmatch #/[A-Z`+]$/ k)
84
+ (hash-table-push! okuri-ari-hash sliced k)
85
+ (hash-table-push! okuri-nashi-hash sliced k))))))
86
+ keylist))
87
+ (create-hash keylist)
88
+ (values okuri-ari-hash
89
+ okuri-nashi-hash))
90
+
91
+ (define (setup-ready-made-keylist kvs keylist)
92
+ (for-each
93
+ (lambda (key)
94
+ (let1 key (+ masterid "::" key)
95
+ (unless (kvs.get key #f)
96
+ (kvs.put! key ""))))
97
+ alphabet-pairs)
98
+
99
+ (receive (okuri-ari-hash
100
+ okuri-nashi-hash)
101
+ (create-2char-hash keylist)
102
+
103
+ ;; OKURI-ARI
104
+ (for-each
105
+ (lambda (key)
106
+ (kvs.put! (+ masterid "::" "(" (sekka-upcase key) ")")
107
+ (string-join (uniq (sort (hash-table-get okuri-ari-hash key))) " ")))
108
+ (hash-table-keys okuri-ari-hash))
109
+
110
+ ;; OKURI-NASHI
111
+ (for-each
112
+ (lambda (key)
113
+ (kvs.put! (+ masterid "::" "(" (sekka-downcase key) ")")
114
+ (string-join (uniq (sort (hash-table-get okuri-nashi-hash key))) " ")))
115
+ (hash-table-keys okuri-nashi-hash))))
116
+
117
+
118
+ (define (append-entry userid kvs key value)
119
+ (let1 key (+ userid "::" key)
120
+ (if-let1 got (kvs.get key #f)
121
+ (let* ((trimmed-a-first1 (car (string-split-first-and-rest got)))
122
+ (trimmed-a (if-let1 m (rxmatch #/^[C\/](.+)$/ got)
123
+ (rxmatch-substring m 1)
124
+ got))
125
+ (trimmed-b-first1 (car (string-split-first-and-rest value)))
126
+ (trimmed-b (if-let1 m (rxmatch #/^[C\/](.+)$/ value)
127
+ (rxmatch-substring m 1)
128
+ value))
129
+ (lst (delete-duplicates (append (to-list (trimmed-a.split "/"))
130
+ (to-list (trimmed-b.split "/"))))))
131
+ (if (= trimmed-a-first1 trimmed-b-first1)
132
+ (kvs.put! key (+ trimmed-a-first1 (string-join lst "/")))
133
+ (begin
134
+ ;; "/" よりも "C" を優先する
135
+ (cond
136
+ ((= "C" trimmed-a-first1) ;; aを優先
137
+ (kvs.put! key got))
138
+ ((= "C" trimmed-b-first1) ;; bを優先
139
+ (kvs.put! key value))
140
+ (else
141
+ (errorf "Error: [%s] entry is wrong format" value))))))
142
+ (kvs.put! key value))))
143
+
144
+
145
+ (define (load-sekka-jisyo-f f filename)
146
+ (define keylist '())
147
+ (define (create-keylist kvs lines)
148
+ (for-each
149
+ (lambda (line)
150
+ (let1 fields (split-dict-line line)
151
+ (set! keylist (cons (first fields) keylist))
152
+ (append-entry masterid kvs (first fields) (second fields))))
153
+ lines))
154
+ (let1 kvs (Kvs.new (get-kvs-type))
155
+ (kvs.open filename)
156
+ (kvs.clear)
157
+ (create-keylist kvs (f.readlines.to_list))
158
+ (setup-ready-made-keylist kvs keylist)
159
+ (kvs.close)))
160
+
161
+
162
+ (define (dump-sekka-jisyo-f f filename)
163
+ (let1 kvs (Kvs.new (get-kvs-type))
164
+ (kvs.open filename)
165
+ (for-each
166
+ (lambda (key)
167
+ (f.puts (+ key " " (kvs.get key))))
168
+ (to-list (kvs.keys)))
169
+ (kvs.close)))
170
+
171
+
172
+ (define (openSekkaJisyo dictSource cacheSource)
173
+ (when (not (rxmatch #/tch$/ dictSource))
174
+ (set-kvs-type 'memcache))
175
+ (let1 kvs (Kvs.new (get-kvs-type))
176
+ (kvs.open #?=dictSource)
177
+ (let1 cachesv (if cacheSource
178
+ (let1 obj (Kvs.new 'memcache)
179
+ (obj.open cacheSource)
180
+ obj)
181
+ #f)
182
+ (to-arr (list kvs cachesv)))))
183
+ ;; Export to Ruby world
184
+ (export-to-ruby openSekkaJisyo)