sekka 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +22 -0
- data/bin/.gitignore +1 -0
- data/bin/sekka-jisyo +98 -0
- data/bin/sekka-server +83 -0
- data/emacs/http-cookies.el +416 -0
- data/emacs/http-get.el +448 -0
- data/emacs/sekka.el +1069 -0
- data/lib/sekka/alphabet-lib.nnd +59 -0
- data/lib/sekka/approximatesearch.rb +72 -0
- data/lib/sekka/convert-jisyo.nnd +129 -0
- data/lib/sekka/henkan.nnd +464 -0
- data/lib/sekka/jisyo-db.nnd +184 -0
- data/lib/sekka/kvs.rb +135 -0
- data/lib/sekka/roman-lib.nnd +660 -0
- data/lib/sekka/sekkaversion.rb +6 -0
- data/lib/sekka/util.nnd +64 -0
- data/lib/sekka.ru +36 -0
- data/lib/sekkaconfig.rb +62 -0
- data/lib/sekkaserver.rb +127 -0
- data/test/alphabet-lib.nnd +188 -0
- data/test/approximate-bench.nnd +83 -0
- data/test/common.nnd +51 -0
- data/test/henkan-main.nnd +942 -0
- data/test/jisyo.nnd +94 -0
- data/test/roman-lib.nnd +422 -0
- data/test/util.nnd +100 -0
- metadata +223 -0
@@ -0,0 +1,464 @@
|
|
1
|
+
:; #-*- mode: nendo; syntax: scheme -*-;;
|
2
|
+
;;;
|
3
|
+
;;; henkan.nnd - 変換エンジンのコア
|
4
|
+
;;;
|
5
|
+
;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
;;;
|
7
|
+
;;; Redistribution and use in source and binary forms, with or without
|
8
|
+
;;; modification, are permitted provided that the following conditions
|
9
|
+
;;; are met:
|
10
|
+
;;;
|
11
|
+
;;; 1. Redistributions of source code must retain the above copyright
|
12
|
+
;;; notice, this list of conditions and the following disclaimer.
|
13
|
+
;;;
|
14
|
+
;;; 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
;;; notice, this list of conditions and the following disclaimer in the
|
16
|
+
;;; documentation and/or other materials provided with the distribution.
|
17
|
+
;;;
|
18
|
+
;;; 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
;;; may be used to endorse or promote products derived from this
|
20
|
+
;;; software without specific prior written permission.
|
21
|
+
;;;
|
22
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
;;;
|
34
|
+
;;; $Id:
|
35
|
+
;;;
|
36
|
+
(use srfi-1)
|
37
|
+
(use sekka.util)
|
38
|
+
(use sekka.convert-jisyo)
|
39
|
+
(use sekka.jisyo-db)
|
40
|
+
(use sekka.alphabet-lib)
|
41
|
+
(require "sekka/approximatesearch")
|
42
|
+
|
43
|
+
|
44
|
+
;; 曖昧検索の評価値にヒューリスティックを加える
|
45
|
+
;; "nn" のように n が重なるほど、"ん" の可能性が強くなる。
|
46
|
+
;; "にゃ" "にゅ" "にょ" が出現した場合は、n がそちらの解釈に取られたものと解釈してその件数分引いておく。
|
47
|
+
(define (nn-heuristics keyword lst)
|
48
|
+
(map
|
49
|
+
(lambda (x)
|
50
|
+
(let ((point (first x))
|
51
|
+
(roman (second x))
|
52
|
+
(target (third x)))
|
53
|
+
(let* ((nn-number (min
|
54
|
+
(vector-length (keyword.scan #/nn/))
|
55
|
+
(vector-length (roman.scan #/nn/))
|
56
|
+
(- (vector-length (target.scan #/ん/))
|
57
|
+
(vector-length (target.scan #/[ゃゅょ]/)))))
|
58
|
+
(nn-number (max 0 nn-number)))
|
59
|
+
(list
|
60
|
+
(if (= 0 nn-number)
|
61
|
+
point
|
62
|
+
(* point (* 1.1 nn-number)))
|
63
|
+
roman
|
64
|
+
target))))
|
65
|
+
lst))
|
66
|
+
|
67
|
+
|
68
|
+
(define (my-append-many lst)
|
69
|
+
(let1 result '()
|
70
|
+
(for-each
|
71
|
+
(lambda (x)
|
72
|
+
(set! result (append result x)))
|
73
|
+
lst)
|
74
|
+
result))
|
75
|
+
|
76
|
+
|
77
|
+
;; 曖昧検索
|
78
|
+
;; リストで返す。 (ソート済み検索結果)
|
79
|
+
(define (approximate-search userid kvs keyword okuri-ari-flag limit)
|
80
|
+
(define jarow-shikii 0.94)
|
81
|
+
(let* ((a-search (ApproximateSearch.new jarow-shikii))
|
82
|
+
(lst (my-append-many
|
83
|
+
(map
|
84
|
+
(lambda (x)
|
85
|
+
(let* ((val (vector-ref x 0))
|
86
|
+
(k (vector-ref x 1))
|
87
|
+
(v (dict-get userid kvs k ""))
|
88
|
+
(v-pair (string-split-first-and-rest v))
|
89
|
+
(v-first (car v-pair))
|
90
|
+
(v-rest (cdr v-pair)))
|
91
|
+
(map
|
92
|
+
(lambda (element)
|
93
|
+
(list val k (+ v-first element)))
|
94
|
+
(to-list (v-rest.split "/")))))
|
95
|
+
(to-list (a-search.search userid kvs keyword okuri-ari-flag)))))
|
96
|
+
(nn-heuristics-result (nn-heuristics keyword lst))
|
97
|
+
(sorted-lst (sort-by nn-heuristics-result (lambda (item) (- 1.0 (car item))))))
|
98
|
+
(if (= limit 0)
|
99
|
+
sorted-lst
|
100
|
+
(take* sorted-lst limit))))
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
;; 厳密検索
|
105
|
+
(define (exact-search userid kvs keyword)
|
106
|
+
(let1 v (dict-get userid kvs keyword #f)
|
107
|
+
(if v
|
108
|
+
`((1.0 ,keyword ,v))
|
109
|
+
'())))
|
110
|
+
|
111
|
+
|
112
|
+
;; split "/a;annotation/b/c/" into `(("a" "annotation" ,src) ("b" #f ,src) ("c" #f ,src)) alist
|
113
|
+
(define (split-henkan-kouho str src . okuri)
|
114
|
+
(define (append-j-type lst)
|
115
|
+
(map
|
116
|
+
(lambda (x)
|
117
|
+
(append x (list 'j)))
|
118
|
+
lst))
|
119
|
+
|
120
|
+
(let* ((trimmed (cond
|
121
|
+
((rxmatch #/^[\/](.+)$/ str)
|
122
|
+
=> (lambda (m)
|
123
|
+
(rxmatch-substring m 1)))
|
124
|
+
(else
|
125
|
+
"")))
|
126
|
+
(splitted (filter
|
127
|
+
(lambda (str) (< 0 str.size))
|
128
|
+
(to-list (trimmed.split "/"))))
|
129
|
+
(okuri (get-optional okuri "")))
|
130
|
+
;; pickup annotation
|
131
|
+
(append-j-type
|
132
|
+
(map
|
133
|
+
(lambda (entry)
|
134
|
+
(let1 vec (entry.split ";")
|
135
|
+
(list (+ (vector-ref vec 0) okuri)
|
136
|
+
(vector-ref vec 1 #f)
|
137
|
+
src)))
|
138
|
+
splitted))))
|
139
|
+
|
140
|
+
|
141
|
+
;; 送り仮名なしの変換
|
142
|
+
(define (henkan-okuri-nashi userid kvs keyword limit)
|
143
|
+
(let* ((result (approximate-search userid kvs keyword #f limit))
|
144
|
+
(kouho (uniq (map
|
145
|
+
(lambda (x)
|
146
|
+
(third x))
|
147
|
+
result))))
|
148
|
+
(let1 lst
|
149
|
+
(append-map
|
150
|
+
(lambda (value)
|
151
|
+
(if-let1 m (rxmatch #/^C(.+)$/ value)
|
152
|
+
(split-henkan-kouho
|
153
|
+
(dict-get userid kvs (string-drop value 1))
|
154
|
+
(rxmatch-substring m 1)
|
155
|
+
) ;; continue to fetch
|
156
|
+
(split-henkan-kouho
|
157
|
+
value
|
158
|
+
keyword)))
|
159
|
+
kouho)
|
160
|
+
(if (= limit 0)
|
161
|
+
lst
|
162
|
+
(take* lst limit)))))
|
163
|
+
|
164
|
+
|
165
|
+
|
166
|
+
;; 送り仮名ありの変換
|
167
|
+
(define (henkan-okuri-ari userid kvs keyword limit roman-method)
|
168
|
+
(let* ((keyword (let1 _pair (string-split-first-and-rest keyword)
|
169
|
+
(+ (sekka-downcase (car _pair))
|
170
|
+
(cdr _pair))))
|
171
|
+
(m (rxmatch #/^([a-z])([a-z\-^]*)([A-Z`+])([a-zA-Z]*)$/ keyword)))
|
172
|
+
(if (not m)
|
173
|
+
'() ;; 変換候補無し
|
174
|
+
(let* ((result (approximate-search userid kvs
|
175
|
+
(+ (rxmatch-substring m 1)
|
176
|
+
(rxmatch-substring m 2)
|
177
|
+
(rxmatch-substring m 3))
|
178
|
+
#t
|
179
|
+
limit))
|
180
|
+
(okurigana-lst (gen-roman->hiragana (sekka-downcase
|
181
|
+
(+
|
182
|
+
(rxmatch-substring m 3)
|
183
|
+
(rxmatch-substring m 4)))
|
184
|
+
roman-method))
|
185
|
+
(okurigana-lst (if (null? okurigana-lst)
|
186
|
+
'("")
|
187
|
+
okurigana-lst))
|
188
|
+
(kouho (uniq (map
|
189
|
+
(lambda (x)
|
190
|
+
(third x))
|
191
|
+
result))))
|
192
|
+
(let1 lst
|
193
|
+
(append-map
|
194
|
+
(lambda (value)
|
195
|
+
(append-map
|
196
|
+
(lambda (x)
|
197
|
+
(if-let1 m (rxmatch #/^C(.+)$/ value)
|
198
|
+
(split-henkan-kouho
|
199
|
+
(dict-get userid kvs (string-drop value 1))
|
200
|
+
(rxmatch-substring m 1)
|
201
|
+
x) ;; continue to fetch
|
202
|
+
(split-henkan-kouho value keyword x)))
|
203
|
+
okurigana-lst))
|
204
|
+
kouho)
|
205
|
+
(if (= limit 0)
|
206
|
+
lst
|
207
|
+
(take* lst limit)))))))
|
208
|
+
|
209
|
+
|
210
|
+
;; 平仮名の変換
|
211
|
+
(define (henkan-hiragana kvs keyword roman-method)
|
212
|
+
(let* ((str (sekka-downcase keyword))
|
213
|
+
(hira (gen-roman->hiragana str roman-method))
|
214
|
+
(kata (gen-roman->katakana str roman-method)))
|
215
|
+
(if (null? hira)
|
216
|
+
`((,keyword #f ,keyword j))
|
217
|
+
(append-map (lambda (h k)
|
218
|
+
`(
|
219
|
+
(,h #f ,keyword h)
|
220
|
+
(,k #f ,keyword k)))
|
221
|
+
hira
|
222
|
+
kata))))
|
223
|
+
|
224
|
+
|
225
|
+
;; アルファベットの単純変換
|
226
|
+
(define (henkan-alphabet kvs keyword)
|
227
|
+
(let* ((zen (gen-alphabet-han->zen keyword))
|
228
|
+
(han (gen-alphabet-zen->han keyword)))
|
229
|
+
`(
|
230
|
+
(,zen #f ,keyword z)
|
231
|
+
(,han #f ,keyword l))))
|
232
|
+
|
233
|
+
|
234
|
+
;; 記号を含むキーワードの変換
|
235
|
+
(define (henkan-non-kanji userid kvs keyword)
|
236
|
+
(let* ((result (exact-search userid kvs keyword))
|
237
|
+
(kouho (map
|
238
|
+
(lambda (x)
|
239
|
+
(third x))
|
240
|
+
result)))
|
241
|
+
(append-map
|
242
|
+
(lambda (value)
|
243
|
+
(split-henkan-kouho value keyword))
|
244
|
+
kouho)))
|
245
|
+
|
246
|
+
|
247
|
+
;; keyword には ローマ字のみを受け付ける
|
248
|
+
(define (sekka-henkan userid kvs keyword limit roman-method)
|
249
|
+
;; 変換候補にindex番号を付加する
|
250
|
+
(define (append-index-number kouho-list)
|
251
|
+
(let1 count 0
|
252
|
+
(map
|
253
|
+
(lambda (x)
|
254
|
+
(begin0
|
255
|
+
(append x (list count))
|
256
|
+
(set! count (+ count 1))))
|
257
|
+
kouho-list)))
|
258
|
+
|
259
|
+
;; キーワードの種別で処理を分割する
|
260
|
+
(append-index-number
|
261
|
+
(cond
|
262
|
+
((rxmatch #/[A-Z`+]/ keyword)
|
263
|
+
(let1 k (string-downcase-first keyword)
|
264
|
+
(cond
|
265
|
+
((rxmatch #/[a-z][A-Z`+]/ k)
|
266
|
+
;; 送りあり
|
267
|
+
;;(let1 result (approximate-search db k #t) (disp-search-result result))
|
268
|
+
(append
|
269
|
+
(append
|
270
|
+
(henkan-okuri-ari userid kvs k limit roman-method)
|
271
|
+
(if (null? (gen-roman->hiragana (sekka-downcase k) roman-method))
|
272
|
+
'()
|
273
|
+
(henkan-hiragana kvs (sekka-downcase k) roman-method)))
|
274
|
+
(henkan-alphabet kvs keyword)))
|
275
|
+
(else
|
276
|
+
;; 送りなし
|
277
|
+
;;(let1 result (approximate-search db k #f) (disp-search-result result))
|
278
|
+
(append
|
279
|
+
(append
|
280
|
+
(henkan-okuri-nashi userid kvs k limit)
|
281
|
+
(if (null? (gen-roman->hiragana (sekka-downcase k) roman-method))
|
282
|
+
'()
|
283
|
+
(henkan-hiragana kvs (sekka-downcase k) roman-method)))
|
284
|
+
(henkan-alphabet kvs keyword))))))
|
285
|
+
((not (null? (gen-roman->hiragana keyword roman-method)))
|
286
|
+
(append
|
287
|
+
(append
|
288
|
+
(henkan-hiragana kvs keyword roman-method)
|
289
|
+
(henkan-alphabet kvs keyword))
|
290
|
+
(henkan-okuri-nashi userid kvs keyword limit)))
|
291
|
+
(else
|
292
|
+
(append
|
293
|
+
(henkan-non-kanji userid kvs keyword)
|
294
|
+
(henkan-alphabet kvs keyword))))))
|
295
|
+
|
296
|
+
|
297
|
+
|
298
|
+
;; conversion #f => nil for EmacsLisp
|
299
|
+
(define (sekkaHenkan userid kvs cachesv keyword limit roman-method)
|
300
|
+
(define cache-exp-second (* 60 60))
|
301
|
+
(let* ((keyword (keyword.strip))
|
302
|
+
(sekka-keyword (+ "sekka::" roman-method "::" (limit.to_s) "::" keyword)))
|
303
|
+
(if-let1 fetched (and cachesv
|
304
|
+
(cachesv.get sekka-keyword #f))
|
305
|
+
(begin
|
306
|
+
keyword
|
307
|
+
(read-from-string fetched))
|
308
|
+
(let1 henkan-result (map
|
309
|
+
(lambda (x)
|
310
|
+
(map (lambda (val) (if val val nil)) x))
|
311
|
+
(sekka-henkan userid kvs keyword limit (make-keyword roman-method)))
|
312
|
+
(and cachesv
|
313
|
+
(not (null? henkan-result))
|
314
|
+
(let1 fetched2 (cachesv.get "sekka::(keys)" #f)
|
315
|
+
(cachesv.put! sekka-keyword (write-to-string henkan-result) cache-exp-second)
|
316
|
+
(cachesv.put! "sekka::(keys)" (if fetched2
|
317
|
+
(+ fetched2 " " sekka-keyword)
|
318
|
+
sekka-keyword)
|
319
|
+
cache-exp-second)
|
320
|
+
(if-let1 v (cachesv.get "sekka::(keys)" #f)
|
321
|
+
(begin
|
322
|
+
#?=(v.size)
|
323
|
+
#?=v)
|
324
|
+
#f)))
|
325
|
+
henkan-result))))
|
326
|
+
;; Export to Ruby world
|
327
|
+
(export-to-ruby sekkaHenkan)
|
328
|
+
|
329
|
+
|
330
|
+
;; Flush henkan-result cache data on cachesv(memcached)
|
331
|
+
(define (flush-cachesv cachesv)
|
332
|
+
#?="--- do (flush-cachesv) ---"
|
333
|
+
(if-let1 fetched (cachesv.get "sekka::(keys)" #f)
|
334
|
+
(begin
|
335
|
+
(for-each
|
336
|
+
(lambda (x)
|
337
|
+
(cachesv.delete #?=x))
|
338
|
+
(to-list (fetched.split #/[ ]+/)))
|
339
|
+
(cachesv.delete "sekka::(keys)"))
|
340
|
+
#f))
|
341
|
+
|
342
|
+
|
343
|
+
;; 確定処理: 最終確定語を変換候補の先頭に持ってくる。
|
344
|
+
;; key ... "developer" や "へんかん" など、変換候補レコードのキーとなる文字列
|
345
|
+
;; tango ... "変換" など、変換候補から最終確定した、変換候補の文字列
|
346
|
+
;; 登録失敗したら nil を返す
|
347
|
+
(define (sekkaKakutei userid kvs cachesv key tango)
|
348
|
+
(define (fetch userid kvs key)
|
349
|
+
(dict-get userid kvs key #f))
|
350
|
+
|
351
|
+
(define (write-user-entry userid kvs key value)
|
352
|
+
(kvs.put! (+ userid "::" key) value))
|
353
|
+
|
354
|
+
(define (join-henkan-kouho lst)
|
355
|
+
(+ "/"
|
356
|
+
(string-join
|
357
|
+
(map
|
358
|
+
(lambda (x)
|
359
|
+
(if (second x)
|
360
|
+
(+ (first x) ";" (second x))
|
361
|
+
(first x)))
|
362
|
+
lst)
|
363
|
+
"/")))
|
364
|
+
|
365
|
+
(let1 tango (if (is-hiragana-and-okuri key)
|
366
|
+
(drop-okuri tango)
|
367
|
+
tango)
|
368
|
+
(if-let1 kouho-str (fetch userid kvs key)
|
369
|
+
(if (rxmatch #/^\// kouho-str)
|
370
|
+
(let* ((kouho-lst (split-henkan-kouho kouho-str key))
|
371
|
+
(no1
|
372
|
+
(filter (lambda (x)
|
373
|
+
(eq? (car x) tango))
|
374
|
+
kouho-lst))
|
375
|
+
(other
|
376
|
+
(filter (lambda (x)
|
377
|
+
(not (eq? (car x) tango)))
|
378
|
+
kouho-lst))
|
379
|
+
(new-kouho-str
|
380
|
+
(join-henkan-kouho (append no1 other))))
|
381
|
+
(if (not (= kouho-str new-kouho-str))
|
382
|
+
(begin
|
383
|
+
(write-user-entry userid kvs key new-kouho-str)
|
384
|
+
(and cachesv
|
385
|
+
(flush-cachesv cachesv))
|
386
|
+
tango)
|
387
|
+
nil))
|
388
|
+
nil)
|
389
|
+
nil)))
|
390
|
+
;; Export to Ruby world
|
391
|
+
(export-to-ruby sekkaKakutei)
|
392
|
+
|
393
|
+
|
394
|
+
;; ユーザー定義語彙の登録処理
|
395
|
+
(define (registerUserJisyo userid kvs dict-line)
|
396
|
+
(define user-keylist '())
|
397
|
+
(define (insert-to-db sekka-jisyo-data)
|
398
|
+
(for-each
|
399
|
+
(lambda (entry)
|
400
|
+
(let* ((kv (to-list (entry.split #/[ ]+/)))
|
401
|
+
(k (first kv))
|
402
|
+
(v (second kv)))
|
403
|
+
(set! user-keylist (cons k user-keylist))
|
404
|
+
(append-entry userid kvs k v)))
|
405
|
+
sekka-jisyo-data))
|
406
|
+
|
407
|
+
(define (_create-ready-made-keylist keylist)
|
408
|
+
(receive (okuri-ari-hash
|
409
|
+
okuri-nashi-hash)
|
410
|
+
(create-2char-hash keylist)
|
411
|
+
|
412
|
+
;; OKURI-ARI
|
413
|
+
(for-each
|
414
|
+
(lambda (key)
|
415
|
+
(let1 fetched (kvs.get (+ userid "::" "(" (sekka-upcase key) ")")
|
416
|
+
(kvs.get (+ masterid "::" "(" (sekka-upcase key) ")") ""))
|
417
|
+
(kvs.put! (+ userid "::" "(" (sekka-upcase key) ")")
|
418
|
+
(string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
|
419
|
+
(hash-table-get okuri-ari-hash key))))
|
420
|
+
" "))))
|
421
|
+
(hash-table-keys okuri-ari-hash))
|
422
|
+
|
423
|
+
;; OKURI-NASHI
|
424
|
+
(for-each
|
425
|
+
(lambda (key)
|
426
|
+
(let1 fetched (kvs.get (+ userid "::" "(" (sekka-downcase key) ")")
|
427
|
+
(kvs.get (+ masterid "::" "(" (sekka-downcase key) ")") ""))
|
428
|
+
(kvs.put! (+ userid "::" "(" (sekka-downcase key) ")")
|
429
|
+
(string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
|
430
|
+
(hash-table-get okuri-nashi-hash key))))
|
431
|
+
" "))))
|
432
|
+
(hash-table-keys okuri-nashi-hash))))
|
433
|
+
|
434
|
+
;; "ユーザー語彙を"(stored)"にpush!する"
|
435
|
+
(define (kvs-push! userid kvs entry-str)
|
436
|
+
(let* ((orig (kvs.get (+ userid "::(stored)") "()"))
|
437
|
+
(orig (read-from-string orig)))
|
438
|
+
(kvs.put! (+ userid "::(stored)") (write-to-string (append orig (list entry-str))))
|
439
|
+
entry-str))
|
440
|
+
|
441
|
+
(cond
|
442
|
+
((memv dict-line (read-from-string (kvs.get (+ userid "::(stored)") "()")))
|
443
|
+
#?=(+ "already stored userid=[" userid "] tango=[" dict-line "]")
|
444
|
+
#f)
|
445
|
+
(else
|
446
|
+
(cond
|
447
|
+
((rxmatch #/[ ]+[\/]/ dict-line)
|
448
|
+
(let1 lst (convert-skk-jisyo-f (StringIO.new (+ dict-line "\n")))
|
449
|
+
(if (null? lst)
|
450
|
+
(begin
|
451
|
+
#?=(+ "Error user dict format error userid=[" userid "] tango=[" dict-line "]")
|
452
|
+
#f)
|
453
|
+
(begin
|
454
|
+
(insert-to-db lst)
|
455
|
+
(_create-ready-made-keylist user-keylist)
|
456
|
+
(kvs-push! userid kvs dict-line)
|
457
|
+
#?=(+ "user dict stored userid=[" userid "] tango=[" dict-line "]")
|
458
|
+
#t))))
|
459
|
+
(else
|
460
|
+
#?=(+ "Error user dict format error userid=[" userid "] tango=[" dict-line "]")
|
461
|
+
#f)))))
|
462
|
+
;; Export to Ruby world
|
463
|
+
(export-to-ruby registerUserJisyo)
|
464
|
+
|
@@ -0,0 +1,184 @@
|
|
1
|
+
:; #-*- mode: nendo; syntax: scheme -*-;;
|
2
|
+
;;;
|
3
|
+
;;; jisyo-db.nnd - 辞書DBの構築、辞書DBアクセスのライブラリ
|
4
|
+
;;;
|
5
|
+
;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
;;;
|
7
|
+
;;; Redistribution and use in source and binary forms, with or without
|
8
|
+
;;; modification, are permitted provided that the following conditions
|
9
|
+
;;; are met:
|
10
|
+
;;;
|
11
|
+
;;; 1. Redistributions of source code must retain the above copyright
|
12
|
+
;;; notice, this list of conditions and the following disclaimer.
|
13
|
+
;;;
|
14
|
+
;;; 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
;;; notice, this list of conditions and the following disclaimer in the
|
16
|
+
;;; documentation and/or other materials provided with the distribution.
|
17
|
+
;;;
|
18
|
+
;;; 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
;;; may be used to endorse or promote products derived from this
|
20
|
+
;;; software without specific prior written permission.
|
21
|
+
;;;
|
22
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
;;;
|
34
|
+
;;; $Id:
|
35
|
+
;;;
|
36
|
+
(use srfi-1)
|
37
|
+
(require "sekka/kvs")
|
38
|
+
(use sekka.util)
|
39
|
+
|
40
|
+
(define masterid "MASTER")
|
41
|
+
(define workid "WORK")
|
42
|
+
|
43
|
+
(define alphabet-string "abcdefghijklmnopqrstuvwxyz>@;")
|
44
|
+
(define alphabet-lower-list (to-list (alphabet-string.split "")))
|
45
|
+
(define alphabet-upper-list (to-list (. (sekka-upcase alphabet-string) split "")))
|
46
|
+
(define alphabet-pairs (append-map
|
47
|
+
(lambda (a)
|
48
|
+
(map
|
49
|
+
(lambda (b)
|
50
|
+
(+ a b))
|
51
|
+
(append alphabet-upper-list alphabet-lower-list)))
|
52
|
+
alphabet-lower-list))
|
53
|
+
|
54
|
+
;; KVS type setting
|
55
|
+
(define *kvs-type* 'tokyocabinet) ;; default
|
56
|
+
(define (set-kvs-type type)
|
57
|
+
(if (not (symbol? type))
|
58
|
+
(error "Error: set-kvs-type requires symbol argument.")
|
59
|
+
(set! *kvs-type* type)))
|
60
|
+
(define (get-kvs-type) *kvs-type*)
|
61
|
+
|
62
|
+
|
63
|
+
;; dict-get search sequence
|
64
|
+
;; (1) try "userid::keyword" key
|
65
|
+
;; (2) try "MASTER::keyword" key
|
66
|
+
(define (dict-get userid kvs key . fallback)
|
67
|
+
(if-let1 value
|
68
|
+
(or (kvs.get (+ userid "::" key) #f)
|
69
|
+
(kvs.get (+ masterid "::" key) #f))
|
70
|
+
value
|
71
|
+
(let1 opt (get-optional fallback #f)
|
72
|
+
opt)))
|
73
|
+
|
74
|
+
(define (create-2char-hash keylist)
|
75
|
+
(define okuri-ari-hash (make-hash-table))
|
76
|
+
(define okuri-nashi-hash (make-hash-table))
|
77
|
+
(define (create-hash keylist)
|
78
|
+
(for-each
|
79
|
+
(lambda (k)
|
80
|
+
(when (rxmatch #/^[a-zA-Z^>-@`;+]+$/ k)
|
81
|
+
(let1 sliced (sekka-downcase (k.slice 0 2))
|
82
|
+
(when (= 2 sliced.size)
|
83
|
+
(if (rxmatch #/[A-Z`+]$/ k)
|
84
|
+
(hash-table-push! okuri-ari-hash sliced k)
|
85
|
+
(hash-table-push! okuri-nashi-hash sliced k))))))
|
86
|
+
keylist))
|
87
|
+
(create-hash keylist)
|
88
|
+
(values okuri-ari-hash
|
89
|
+
okuri-nashi-hash))
|
90
|
+
|
91
|
+
(define (setup-ready-made-keylist kvs keylist)
|
92
|
+
(for-each
|
93
|
+
(lambda (key)
|
94
|
+
(let1 key (+ masterid "::" key)
|
95
|
+
(unless (kvs.get key #f)
|
96
|
+
(kvs.put! key ""))))
|
97
|
+
alphabet-pairs)
|
98
|
+
|
99
|
+
(receive (okuri-ari-hash
|
100
|
+
okuri-nashi-hash)
|
101
|
+
(create-2char-hash keylist)
|
102
|
+
|
103
|
+
;; OKURI-ARI
|
104
|
+
(for-each
|
105
|
+
(lambda (key)
|
106
|
+
(kvs.put! (+ masterid "::" "(" (sekka-upcase key) ")")
|
107
|
+
(string-join (uniq (sort (hash-table-get okuri-ari-hash key))) " ")))
|
108
|
+
(hash-table-keys okuri-ari-hash))
|
109
|
+
|
110
|
+
;; OKURI-NASHI
|
111
|
+
(for-each
|
112
|
+
(lambda (key)
|
113
|
+
(kvs.put! (+ masterid "::" "(" (sekka-downcase key) ")")
|
114
|
+
(string-join (uniq (sort (hash-table-get okuri-nashi-hash key))) " ")))
|
115
|
+
(hash-table-keys okuri-nashi-hash))))
|
116
|
+
|
117
|
+
|
118
|
+
(define (append-entry userid kvs key value)
|
119
|
+
(let1 key (+ userid "::" key)
|
120
|
+
(if-let1 got (kvs.get key #f)
|
121
|
+
(let* ((trimmed-a-first1 (car (string-split-first-and-rest got)))
|
122
|
+
(trimmed-a (if-let1 m (rxmatch #/^[C\/](.+)$/ got)
|
123
|
+
(rxmatch-substring m 1)
|
124
|
+
got))
|
125
|
+
(trimmed-b-first1 (car (string-split-first-and-rest value)))
|
126
|
+
(trimmed-b (if-let1 m (rxmatch #/^[C\/](.+)$/ value)
|
127
|
+
(rxmatch-substring m 1)
|
128
|
+
value))
|
129
|
+
(lst (delete-duplicates (append (to-list (trimmed-a.split "/"))
|
130
|
+
(to-list (trimmed-b.split "/"))))))
|
131
|
+
(if (= trimmed-a-first1 trimmed-b-first1)
|
132
|
+
(kvs.put! key (+ trimmed-a-first1 (string-join lst "/")))
|
133
|
+
(begin
|
134
|
+
;; "/" よりも "C" を優先する
|
135
|
+
(cond
|
136
|
+
((= "C" trimmed-a-first1) ;; aを優先
|
137
|
+
(kvs.put! key got))
|
138
|
+
((= "C" trimmed-b-first1) ;; bを優先
|
139
|
+
(kvs.put! key value))
|
140
|
+
(else
|
141
|
+
(errorf "Error: [%s] entry is wrong format" value))))))
|
142
|
+
(kvs.put! key value))))
|
143
|
+
|
144
|
+
|
145
|
+
(define (load-sekka-jisyo-f f filename)
|
146
|
+
(define keylist '())
|
147
|
+
(define (create-keylist kvs lines)
|
148
|
+
(for-each
|
149
|
+
(lambda (line)
|
150
|
+
(let1 fields (split-dict-line line)
|
151
|
+
(set! keylist (cons (first fields) keylist))
|
152
|
+
(append-entry masterid kvs (first fields) (second fields))))
|
153
|
+
lines))
|
154
|
+
(let1 kvs (Kvs.new (get-kvs-type))
|
155
|
+
(kvs.open filename)
|
156
|
+
(kvs.clear)
|
157
|
+
(create-keylist kvs (f.readlines.to_list))
|
158
|
+
(setup-ready-made-keylist kvs keylist)
|
159
|
+
(kvs.close)))
|
160
|
+
|
161
|
+
|
162
|
+
(define (dump-sekka-jisyo-f f filename)
|
163
|
+
(let1 kvs (Kvs.new (get-kvs-type))
|
164
|
+
(kvs.open filename)
|
165
|
+
(for-each
|
166
|
+
(lambda (key)
|
167
|
+
(f.puts (+ key " " (kvs.get key))))
|
168
|
+
(to-list (kvs.keys)))
|
169
|
+
(kvs.close)))
|
170
|
+
|
171
|
+
|
172
|
+
(define (openSekkaJisyo dictSource cacheSource)
|
173
|
+
(when (not (rxmatch #/tch$/ dictSource))
|
174
|
+
(set-kvs-type 'memcache))
|
175
|
+
(let1 kvs (Kvs.new (get-kvs-type))
|
176
|
+
(kvs.open #?=dictSource)
|
177
|
+
(let1 cachesv (if cacheSource
|
178
|
+
(let1 obj (Kvs.new 'memcache)
|
179
|
+
(obj.open cacheSource)
|
180
|
+
obj)
|
181
|
+
#f)
|
182
|
+
(to-arr (list kvs cachesv)))))
|
183
|
+
;; Export to Ruby world
|
184
|
+
(export-to-ruby openSekkaJisyo)
|