sekka 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,464 @@
1
+ :; #-*- mode: nendo; syntax: scheme -*-;;
2
+ ;;;
3
+ ;;; henkan.nnd - 変換エンジンのコア
4
+ ;;;
5
+ ;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ ;;;
7
+ ;;; Redistribution and use in source and binary forms, with or without
8
+ ;;; modification, are permitted provided that the following conditions
9
+ ;;; are met:
10
+ ;;;
11
+ ;;; 1. Redistributions of source code must retain the above copyright
12
+ ;;; notice, this list of conditions and the following disclaimer.
13
+ ;;;
14
+ ;;; 2. Redistributions in binary form must reproduce the above copyright
15
+ ;;; notice, this list of conditions and the following disclaimer in the
16
+ ;;; documentation and/or other materials provided with the distribution.
17
+ ;;;
18
+ ;;; 3. Neither the name of the authors nor the names of its contributors
19
+ ;;; may be used to endorse or promote products derived from this
20
+ ;;; software without specific prior written permission.
21
+ ;;;
22
+ ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ ;;;
34
+ ;;; $Id:
35
+ ;;;
36
+ (use srfi-1)
37
+ (use sekka.util)
38
+ (use sekka.convert-jisyo)
39
+ (use sekka.jisyo-db)
40
+ (use sekka.alphabet-lib)
41
+ (require "sekka/approximatesearch")
42
+
43
+
44
+ ;; 曖昧検索の評価値にヒューリスティックを加える
45
+ ;; "nn" のように n が重なるほど、"ん" の可能性が強くなる。
46
+ ;; "にゃ" "にゅ" "にょ" が出現した場合は、n がそちらの解釈に取られたものと解釈してその件数分引いておく。
47
+ (define (nn-heuristics keyword lst)
48
+ (map
49
+ (lambda (x)
50
+ (let ((point (first x))
51
+ (roman (second x))
52
+ (target (third x)))
53
+ (let* ((nn-number (min
54
+ (vector-length (keyword.scan #/nn/))
55
+ (vector-length (roman.scan #/nn/))
56
+ (- (vector-length (target.scan #/ん/))
57
+ (vector-length (target.scan #/[ゃゅょ]/)))))
58
+ (nn-number (max 0 nn-number)))
59
+ (list
60
+ (if (= 0 nn-number)
61
+ point
62
+ (* point (* 1.1 nn-number)))
63
+ roman
64
+ target))))
65
+ lst))
66
+
67
+
68
+ (define (my-append-many lst)
69
+ (let1 result '()
70
+ (for-each
71
+ (lambda (x)
72
+ (set! result (append result x)))
73
+ lst)
74
+ result))
75
+
76
+
77
+ ;; 曖昧検索
78
+ ;; リストで返す。 (ソート済み検索結果)
79
+ (define (approximate-search userid kvs keyword okuri-ari-flag limit)
80
+ (define jarow-shikii 0.94)
81
+ (let* ((a-search (ApproximateSearch.new jarow-shikii))
82
+ (lst (my-append-many
83
+ (map
84
+ (lambda (x)
85
+ (let* ((val (vector-ref x 0))
86
+ (k (vector-ref x 1))
87
+ (v (dict-get userid kvs k ""))
88
+ (v-pair (string-split-first-and-rest v))
89
+ (v-first (car v-pair))
90
+ (v-rest (cdr v-pair)))
91
+ (map
92
+ (lambda (element)
93
+ (list val k (+ v-first element)))
94
+ (to-list (v-rest.split "/")))))
95
+ (to-list (a-search.search userid kvs keyword okuri-ari-flag)))))
96
+ (nn-heuristics-result (nn-heuristics keyword lst))
97
+ (sorted-lst (sort-by nn-heuristics-result (lambda (item) (- 1.0 (car item))))))
98
+ (if (= limit 0)
99
+ sorted-lst
100
+ (take* sorted-lst limit))))
101
+
102
+
103
+
104
+ ;; 厳密検索
105
+ (define (exact-search userid kvs keyword)
106
+ (let1 v (dict-get userid kvs keyword #f)
107
+ (if v
108
+ `((1.0 ,keyword ,v))
109
+ '())))
110
+
111
+
112
+ ;; split "/a;annotation/b/c/" into `(("a" "annotation" ,src) ("b" #f ,src) ("c" #f ,src)) alist
113
+ (define (split-henkan-kouho str src . okuri)
114
+ (define (append-j-type lst)
115
+ (map
116
+ (lambda (x)
117
+ (append x (list 'j)))
118
+ lst))
119
+
120
+ (let* ((trimmed (cond
121
+ ((rxmatch #/^[\/](.+)$/ str)
122
+ => (lambda (m)
123
+ (rxmatch-substring m 1)))
124
+ (else
125
+ "")))
126
+ (splitted (filter
127
+ (lambda (str) (< 0 str.size))
128
+ (to-list (trimmed.split "/"))))
129
+ (okuri (get-optional okuri "")))
130
+ ;; pickup annotation
131
+ (append-j-type
132
+ (map
133
+ (lambda (entry)
134
+ (let1 vec (entry.split ";")
135
+ (list (+ (vector-ref vec 0) okuri)
136
+ (vector-ref vec 1 #f)
137
+ src)))
138
+ splitted))))
139
+
140
+
141
+ ;; 送り仮名なしの変換
142
+ (define (henkan-okuri-nashi userid kvs keyword limit)
143
+ (let* ((result (approximate-search userid kvs keyword #f limit))
144
+ (kouho (uniq (map
145
+ (lambda (x)
146
+ (third x))
147
+ result))))
148
+ (let1 lst
149
+ (append-map
150
+ (lambda (value)
151
+ (if-let1 m (rxmatch #/^C(.+)$/ value)
152
+ (split-henkan-kouho
153
+ (dict-get userid kvs (string-drop value 1))
154
+ (rxmatch-substring m 1)
155
+ ) ;; continue to fetch
156
+ (split-henkan-kouho
157
+ value
158
+ keyword)))
159
+ kouho)
160
+ (if (= limit 0)
161
+ lst
162
+ (take* lst limit)))))
163
+
164
+
165
+
166
+ ;; 送り仮名ありの変換
167
+ (define (henkan-okuri-ari userid kvs keyword limit roman-method)
168
+ (let* ((keyword (let1 _pair (string-split-first-and-rest keyword)
169
+ (+ (sekka-downcase (car _pair))
170
+ (cdr _pair))))
171
+ (m (rxmatch #/^([a-z])([a-z\-^]*)([A-Z`+])([a-zA-Z]*)$/ keyword)))
172
+ (if (not m)
173
+ '() ;; 変換候補無し
174
+ (let* ((result (approximate-search userid kvs
175
+ (+ (rxmatch-substring m 1)
176
+ (rxmatch-substring m 2)
177
+ (rxmatch-substring m 3))
178
+ #t
179
+ limit))
180
+ (okurigana-lst (gen-roman->hiragana (sekka-downcase
181
+ (+
182
+ (rxmatch-substring m 3)
183
+ (rxmatch-substring m 4)))
184
+ roman-method))
185
+ (okurigana-lst (if (null? okurigana-lst)
186
+ '("")
187
+ okurigana-lst))
188
+ (kouho (uniq (map
189
+ (lambda (x)
190
+ (third x))
191
+ result))))
192
+ (let1 lst
193
+ (append-map
194
+ (lambda (value)
195
+ (append-map
196
+ (lambda (x)
197
+ (if-let1 m (rxmatch #/^C(.+)$/ value)
198
+ (split-henkan-kouho
199
+ (dict-get userid kvs (string-drop value 1))
200
+ (rxmatch-substring m 1)
201
+ x) ;; continue to fetch
202
+ (split-henkan-kouho value keyword x)))
203
+ okurigana-lst))
204
+ kouho)
205
+ (if (= limit 0)
206
+ lst
207
+ (take* lst limit)))))))
208
+
209
+
210
+ ;; 平仮名の変換
211
+ (define (henkan-hiragana kvs keyword roman-method)
212
+ (let* ((str (sekka-downcase keyword))
213
+ (hira (gen-roman->hiragana str roman-method))
214
+ (kata (gen-roman->katakana str roman-method)))
215
+ (if (null? hira)
216
+ `((,keyword #f ,keyword j))
217
+ (append-map (lambda (h k)
218
+ `(
219
+ (,h #f ,keyword h)
220
+ (,k #f ,keyword k)))
221
+ hira
222
+ kata))))
223
+
224
+
225
+ ;; アルファベットの単純変換
226
+ (define (henkan-alphabet kvs keyword)
227
+ (let* ((zen (gen-alphabet-han->zen keyword))
228
+ (han (gen-alphabet-zen->han keyword)))
229
+ `(
230
+ (,zen #f ,keyword z)
231
+ (,han #f ,keyword l))))
232
+
233
+
234
+ ;; 記号を含むキーワードの変換
235
+ (define (henkan-non-kanji userid kvs keyword)
236
+ (let* ((result (exact-search userid kvs keyword))
237
+ (kouho (map
238
+ (lambda (x)
239
+ (third x))
240
+ result)))
241
+ (append-map
242
+ (lambda (value)
243
+ (split-henkan-kouho value keyword))
244
+ kouho)))
245
+
246
+
247
+ ;; keyword には ローマ字のみを受け付ける
248
+ (define (sekka-henkan userid kvs keyword limit roman-method)
249
+ ;; 変換候補にindex番号を付加する
250
+ (define (append-index-number kouho-list)
251
+ (let1 count 0
252
+ (map
253
+ (lambda (x)
254
+ (begin0
255
+ (append x (list count))
256
+ (set! count (+ count 1))))
257
+ kouho-list)))
258
+
259
+ ;; キーワードの種別で処理を分割する
260
+ (append-index-number
261
+ (cond
262
+ ((rxmatch #/[A-Z`+]/ keyword)
263
+ (let1 k (string-downcase-first keyword)
264
+ (cond
265
+ ((rxmatch #/[a-z][A-Z`+]/ k)
266
+ ;; 送りあり
267
+ ;;(let1 result (approximate-search db k #t) (disp-search-result result))
268
+ (append
269
+ (append
270
+ (henkan-okuri-ari userid kvs k limit roman-method)
271
+ (if (null? (gen-roman->hiragana (sekka-downcase k) roman-method))
272
+ '()
273
+ (henkan-hiragana kvs (sekka-downcase k) roman-method)))
274
+ (henkan-alphabet kvs keyword)))
275
+ (else
276
+ ;; 送りなし
277
+ ;;(let1 result (approximate-search db k #f) (disp-search-result result))
278
+ (append
279
+ (append
280
+ (henkan-okuri-nashi userid kvs k limit)
281
+ (if (null? (gen-roman->hiragana (sekka-downcase k) roman-method))
282
+ '()
283
+ (henkan-hiragana kvs (sekka-downcase k) roman-method)))
284
+ (henkan-alphabet kvs keyword))))))
285
+ ((not (null? (gen-roman->hiragana keyword roman-method)))
286
+ (append
287
+ (append
288
+ (henkan-hiragana kvs keyword roman-method)
289
+ (henkan-alphabet kvs keyword))
290
+ (henkan-okuri-nashi userid kvs keyword limit)))
291
+ (else
292
+ (append
293
+ (henkan-non-kanji userid kvs keyword)
294
+ (henkan-alphabet kvs keyword))))))
295
+
296
+
297
+
298
+ ;; conversion #f => nil for EmacsLisp
299
+ (define (sekkaHenkan userid kvs cachesv keyword limit roman-method)
300
+ (define cache-exp-second (* 60 60))
301
+ (let* ((keyword (keyword.strip))
302
+ (sekka-keyword (+ "sekka::" roman-method "::" (limit.to_s) "::" keyword)))
303
+ (if-let1 fetched (and cachesv
304
+ (cachesv.get sekka-keyword #f))
305
+ (begin
306
+ keyword
307
+ (read-from-string fetched))
308
+ (let1 henkan-result (map
309
+ (lambda (x)
310
+ (map (lambda (val) (if val val nil)) x))
311
+ (sekka-henkan userid kvs keyword limit (make-keyword roman-method)))
312
+ (and cachesv
313
+ (not (null? henkan-result))
314
+ (let1 fetched2 (cachesv.get "sekka::(keys)" #f)
315
+ (cachesv.put! sekka-keyword (write-to-string henkan-result) cache-exp-second)
316
+ (cachesv.put! "sekka::(keys)" (if fetched2
317
+ (+ fetched2 " " sekka-keyword)
318
+ sekka-keyword)
319
+ cache-exp-second)
320
+ (if-let1 v (cachesv.get "sekka::(keys)" #f)
321
+ (begin
322
+ #?=(v.size)
323
+ #?=v)
324
+ #f)))
325
+ henkan-result))))
326
+ ;; Export to Ruby world
327
+ (export-to-ruby sekkaHenkan)
328
+
329
+
330
+ ;; Flush henkan-result cache data on cachesv(memcached)
331
+ (define (flush-cachesv cachesv)
332
+ #?="--- do (flush-cachesv) ---"
333
+ (if-let1 fetched (cachesv.get "sekka::(keys)" #f)
334
+ (begin
335
+ (for-each
336
+ (lambda (x)
337
+ (cachesv.delete #?=x))
338
+ (to-list (fetched.split #/[ ]+/)))
339
+ (cachesv.delete "sekka::(keys)"))
340
+ #f))
341
+
342
+
343
+ ;; 確定処理: 最終確定語を変換候補の先頭に持ってくる。
344
+ ;; key ... "developer" や "へんかん" など、変換候補レコードのキーとなる文字列
345
+ ;; tango ... "変換" など、変換候補から最終確定した、変換候補の文字列
346
+ ;; 登録失敗したら nil を返す
347
+ (define (sekkaKakutei userid kvs cachesv key tango)
348
+ (define (fetch userid kvs key)
349
+ (dict-get userid kvs key #f))
350
+
351
+ (define (write-user-entry userid kvs key value)
352
+ (kvs.put! (+ userid "::" key) value))
353
+
354
+ (define (join-henkan-kouho lst)
355
+ (+ "/"
356
+ (string-join
357
+ (map
358
+ (lambda (x)
359
+ (if (second x)
360
+ (+ (first x) ";" (second x))
361
+ (first x)))
362
+ lst)
363
+ "/")))
364
+
365
+ (let1 tango (if (is-hiragana-and-okuri key)
366
+ (drop-okuri tango)
367
+ tango)
368
+ (if-let1 kouho-str (fetch userid kvs key)
369
+ (if (rxmatch #/^\// kouho-str)
370
+ (let* ((kouho-lst (split-henkan-kouho kouho-str key))
371
+ (no1
372
+ (filter (lambda (x)
373
+ (eq? (car x) tango))
374
+ kouho-lst))
375
+ (other
376
+ (filter (lambda (x)
377
+ (not (eq? (car x) tango)))
378
+ kouho-lst))
379
+ (new-kouho-str
380
+ (join-henkan-kouho (append no1 other))))
381
+ (if (not (= kouho-str new-kouho-str))
382
+ (begin
383
+ (write-user-entry userid kvs key new-kouho-str)
384
+ (and cachesv
385
+ (flush-cachesv cachesv))
386
+ tango)
387
+ nil))
388
+ nil)
389
+ nil)))
390
+ ;; Export to Ruby world
391
+ (export-to-ruby sekkaKakutei)
392
+
393
+
394
+ ;; ユーザー定義語彙の登録処理
395
+ (define (registerUserJisyo userid kvs dict-line)
396
+ (define user-keylist '())
397
+ (define (insert-to-db sekka-jisyo-data)
398
+ (for-each
399
+ (lambda (entry)
400
+ (let* ((kv (to-list (entry.split #/[ ]+/)))
401
+ (k (first kv))
402
+ (v (second kv)))
403
+ (set! user-keylist (cons k user-keylist))
404
+ (append-entry userid kvs k v)))
405
+ sekka-jisyo-data))
406
+
407
+ (define (_create-ready-made-keylist keylist)
408
+ (receive (okuri-ari-hash
409
+ okuri-nashi-hash)
410
+ (create-2char-hash keylist)
411
+
412
+ ;; OKURI-ARI
413
+ (for-each
414
+ (lambda (key)
415
+ (let1 fetched (kvs.get (+ userid "::" "(" (sekka-upcase key) ")")
416
+ (kvs.get (+ masterid "::" "(" (sekka-upcase key) ")") ""))
417
+ (kvs.put! (+ userid "::" "(" (sekka-upcase key) ")")
418
+ (string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
419
+ (hash-table-get okuri-ari-hash key))))
420
+ " "))))
421
+ (hash-table-keys okuri-ari-hash))
422
+
423
+ ;; OKURI-NASHI
424
+ (for-each
425
+ (lambda (key)
426
+ (let1 fetched (kvs.get (+ userid "::" "(" (sekka-downcase key) ")")
427
+ (kvs.get (+ masterid "::" "(" (sekka-downcase key) ")") ""))
428
+ (kvs.put! (+ userid "::" "(" (sekka-downcase key) ")")
429
+ (string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
430
+ (hash-table-get okuri-nashi-hash key))))
431
+ " "))))
432
+ (hash-table-keys okuri-nashi-hash))))
433
+
434
+ ;; "ユーザー語彙を"(stored)"にpush!する"
435
+ (define (kvs-push! userid kvs entry-str)
436
+ (let* ((orig (kvs.get (+ userid "::(stored)") "()"))
437
+ (orig (read-from-string orig)))
438
+ (kvs.put! (+ userid "::(stored)") (write-to-string (append orig (list entry-str))))
439
+ entry-str))
440
+
441
+ (cond
442
+ ((memv dict-line (read-from-string (kvs.get (+ userid "::(stored)") "()")))
443
+ #?=(+ "already stored userid=[" userid "] tango=[" dict-line "]")
444
+ #f)
445
+ (else
446
+ (cond
447
+ ((rxmatch #/[ ]+[\/]/ dict-line)
448
+ (let1 lst (convert-skk-jisyo-f (StringIO.new (+ dict-line "\n")))
449
+ (if (null? lst)
450
+ (begin
451
+ #?=(+ "Error user dict format error userid=[" userid "] tango=[" dict-line "]")
452
+ #f)
453
+ (begin
454
+ (insert-to-db lst)
455
+ (_create-ready-made-keylist user-keylist)
456
+ (kvs-push! userid kvs dict-line)
457
+ #?=(+ "user dict stored userid=[" userid "] tango=[" dict-line "]")
458
+ #t))))
459
+ (else
460
+ #?=(+ "Error user dict format error userid=[" userid "] tango=[" dict-line "]")
461
+ #f)))))
462
+ ;; Export to Ruby world
463
+ (export-to-ruby registerUserJisyo)
464
+
@@ -0,0 +1,184 @@
1
+ :; #-*- mode: nendo; syntax: scheme -*-;;
2
+ ;;;
3
+ ;;; jisyo-db.nnd - 辞書DBの構築、辞書DBアクセスのライブラリ
4
+ ;;;
5
+ ;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ ;;;
7
+ ;;; Redistribution and use in source and binary forms, with or without
8
+ ;;; modification, are permitted provided that the following conditions
9
+ ;;; are met:
10
+ ;;;
11
+ ;;; 1. Redistributions of source code must retain the above copyright
12
+ ;;; notice, this list of conditions and the following disclaimer.
13
+ ;;;
14
+ ;;; 2. Redistributions in binary form must reproduce the above copyright
15
+ ;;; notice, this list of conditions and the following disclaimer in the
16
+ ;;; documentation and/or other materials provided with the distribution.
17
+ ;;;
18
+ ;;; 3. Neither the name of the authors nor the names of its contributors
19
+ ;;; may be used to endorse or promote products derived from this
20
+ ;;; software without specific prior written permission.
21
+ ;;;
22
+ ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ ;;;
34
+ ;;; $Id:
35
+ ;;;
36
+ (use srfi-1)
37
+ (require "sekka/kvs")
38
+ (use sekka.util)
39
+
40
+ (define masterid "MASTER")
41
+ (define workid "WORK")
42
+
43
+ (define alphabet-string "abcdefghijklmnopqrstuvwxyz>@;")
44
+ (define alphabet-lower-list (to-list (alphabet-string.split "")))
45
+ (define alphabet-upper-list (to-list (. (sekka-upcase alphabet-string) split "")))
46
+ (define alphabet-pairs (append-map
47
+ (lambda (a)
48
+ (map
49
+ (lambda (b)
50
+ (+ a b))
51
+ (append alphabet-upper-list alphabet-lower-list)))
52
+ alphabet-lower-list))
53
+
54
+ ;; KVS type setting
55
+ (define *kvs-type* 'tokyocabinet) ;; default
56
+ (define (set-kvs-type type)
57
+ (if (not (symbol? type))
58
+ (error "Error: set-kvs-type requires symbol argument.")
59
+ (set! *kvs-type* type)))
60
+ (define (get-kvs-type) *kvs-type*)
61
+
62
+
63
+ ;; dict-get search sequence
64
+ ;; (1) try "userid::keyword" key
65
+ ;; (2) try "MASTER::keyword" key
66
+ (define (dict-get userid kvs key . fallback)
67
+ (if-let1 value
68
+ (or (kvs.get (+ userid "::" key) #f)
69
+ (kvs.get (+ masterid "::" key) #f))
70
+ value
71
+ (let1 opt (get-optional fallback #f)
72
+ opt)))
73
+
74
+ (define (create-2char-hash keylist)
75
+ (define okuri-ari-hash (make-hash-table))
76
+ (define okuri-nashi-hash (make-hash-table))
77
+ (define (create-hash keylist)
78
+ (for-each
79
+ (lambda (k)
80
+ (when (rxmatch #/^[a-zA-Z^>-@`;+]+$/ k)
81
+ (let1 sliced (sekka-downcase (k.slice 0 2))
82
+ (when (= 2 sliced.size)
83
+ (if (rxmatch #/[A-Z`+]$/ k)
84
+ (hash-table-push! okuri-ari-hash sliced k)
85
+ (hash-table-push! okuri-nashi-hash sliced k))))))
86
+ keylist))
87
+ (create-hash keylist)
88
+ (values okuri-ari-hash
89
+ okuri-nashi-hash))
90
+
91
+ (define (setup-ready-made-keylist kvs keylist)
92
+ (for-each
93
+ (lambda (key)
94
+ (let1 key (+ masterid "::" key)
95
+ (unless (kvs.get key #f)
96
+ (kvs.put! key ""))))
97
+ alphabet-pairs)
98
+
99
+ (receive (okuri-ari-hash
100
+ okuri-nashi-hash)
101
+ (create-2char-hash keylist)
102
+
103
+ ;; OKURI-ARI
104
+ (for-each
105
+ (lambda (key)
106
+ (kvs.put! (+ masterid "::" "(" (sekka-upcase key) ")")
107
+ (string-join (uniq (sort (hash-table-get okuri-ari-hash key))) " ")))
108
+ (hash-table-keys okuri-ari-hash))
109
+
110
+ ;; OKURI-NASHI
111
+ (for-each
112
+ (lambda (key)
113
+ (kvs.put! (+ masterid "::" "(" (sekka-downcase key) ")")
114
+ (string-join (uniq (sort (hash-table-get okuri-nashi-hash key))) " ")))
115
+ (hash-table-keys okuri-nashi-hash))))
116
+
117
+
118
+ (define (append-entry userid kvs key value)
119
+ (let1 key (+ userid "::" key)
120
+ (if-let1 got (kvs.get key #f)
121
+ (let* ((trimmed-a-first1 (car (string-split-first-and-rest got)))
122
+ (trimmed-a (if-let1 m (rxmatch #/^[C\/](.+)$/ got)
123
+ (rxmatch-substring m 1)
124
+ got))
125
+ (trimmed-b-first1 (car (string-split-first-and-rest value)))
126
+ (trimmed-b (if-let1 m (rxmatch #/^[C\/](.+)$/ value)
127
+ (rxmatch-substring m 1)
128
+ value))
129
+ (lst (delete-duplicates (append (to-list (trimmed-a.split "/"))
130
+ (to-list (trimmed-b.split "/"))))))
131
+ (if (= trimmed-a-first1 trimmed-b-first1)
132
+ (kvs.put! key (+ trimmed-a-first1 (string-join lst "/")))
133
+ (begin
134
+ ;; "/" よりも "C" を優先する
135
+ (cond
136
+ ((= "C" trimmed-a-first1) ;; aを優先
137
+ (kvs.put! key got))
138
+ ((= "C" trimmed-b-first1) ;; bを優先
139
+ (kvs.put! key value))
140
+ (else
141
+ (errorf "Error: [%s] entry is wrong format" value))))))
142
+ (kvs.put! key value))))
143
+
144
+
145
+ (define (load-sekka-jisyo-f f filename)
146
+ (define keylist '())
147
+ (define (create-keylist kvs lines)
148
+ (for-each
149
+ (lambda (line)
150
+ (let1 fields (split-dict-line line)
151
+ (set! keylist (cons (first fields) keylist))
152
+ (append-entry masterid kvs (first fields) (second fields))))
153
+ lines))
154
+ (let1 kvs (Kvs.new (get-kvs-type))
155
+ (kvs.open filename)
156
+ (kvs.clear)
157
+ (create-keylist kvs (f.readlines.to_list))
158
+ (setup-ready-made-keylist kvs keylist)
159
+ (kvs.close)))
160
+
161
+
162
+ (define (dump-sekka-jisyo-f f filename)
163
+ (let1 kvs (Kvs.new (get-kvs-type))
164
+ (kvs.open filename)
165
+ (for-each
166
+ (lambda (key)
167
+ (f.puts (+ key " " (kvs.get key))))
168
+ (to-list (kvs.keys)))
169
+ (kvs.close)))
170
+
171
+
172
+ (define (openSekkaJisyo dictSource cacheSource)
173
+ (when (not (rxmatch #/tch$/ dictSource))
174
+ (set-kvs-type 'memcache))
175
+ (let1 kvs (Kvs.new (get-kvs-type))
176
+ (kvs.open #?=dictSource)
177
+ (let1 cachesv (if cacheSource
178
+ (let1 obj (Kvs.new 'memcache)
179
+ (obj.open cacheSource)
180
+ obj)
181
+ #f)
182
+ (to-arr (list kvs cachesv)))))
183
+ ;; Export to Ruby world
184
+ (export-to-ruby openSekkaJisyo)