sekka 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +22 -0
- data/bin/.gitignore +1 -0
- data/bin/sekka-jisyo +98 -0
- data/bin/sekka-server +83 -0
- data/emacs/http-cookies.el +416 -0
- data/emacs/http-get.el +448 -0
- data/emacs/sekka.el +1069 -0
- data/lib/sekka/alphabet-lib.nnd +59 -0
- data/lib/sekka/approximatesearch.rb +72 -0
- data/lib/sekka/convert-jisyo.nnd +129 -0
- data/lib/sekka/henkan.nnd +464 -0
- data/lib/sekka/jisyo-db.nnd +184 -0
- data/lib/sekka/kvs.rb +135 -0
- data/lib/sekka/roman-lib.nnd +660 -0
- data/lib/sekka/sekkaversion.rb +6 -0
- data/lib/sekka/util.nnd +64 -0
- data/lib/sekka.ru +36 -0
- data/lib/sekkaconfig.rb +62 -0
- data/lib/sekkaserver.rb +127 -0
- data/test/alphabet-lib.nnd +188 -0
- data/test/approximate-bench.nnd +83 -0
- data/test/common.nnd +51 -0
- data/test/henkan-main.nnd +942 -0
- data/test/jisyo.nnd +94 -0
- data/test/roman-lib.nnd +422 -0
- data/test/util.nnd +100 -0
- metadata +223 -0
@@ -0,0 +1,464 @@
|
|
1
|
+
:; #-*- mode: nendo; syntax: scheme -*-;;
|
2
|
+
;;;
|
3
|
+
;;; henkan.nnd - 変換エンジンのコア
|
4
|
+
;;;
|
5
|
+
;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
;;;
|
7
|
+
;;; Redistribution and use in source and binary forms, with or without
|
8
|
+
;;; modification, are permitted provided that the following conditions
|
9
|
+
;;; are met:
|
10
|
+
;;;
|
11
|
+
;;; 1. Redistributions of source code must retain the above copyright
|
12
|
+
;;; notice, this list of conditions and the following disclaimer.
|
13
|
+
;;;
|
14
|
+
;;; 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
;;; notice, this list of conditions and the following disclaimer in the
|
16
|
+
;;; documentation and/or other materials provided with the distribution.
|
17
|
+
;;;
|
18
|
+
;;; 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
;;; may be used to endorse or promote products derived from this
|
20
|
+
;;; software without specific prior written permission.
|
21
|
+
;;;
|
22
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
;;;
|
34
|
+
;;; $Id:
|
35
|
+
;;;
|
36
|
+
(use srfi-1)
|
37
|
+
(use sekka.util)
|
38
|
+
(use sekka.convert-jisyo)
|
39
|
+
(use sekka.jisyo-db)
|
40
|
+
(use sekka.alphabet-lib)
|
41
|
+
(require "sekka/approximatesearch")
|
42
|
+
|
43
|
+
|
44
|
+
;; 曖昧検索の評価値にヒューリスティックを加える
|
45
|
+
;; "nn" のように n が重なるほど、"ん" の可能性が強くなる。
|
46
|
+
;; "にゃ" "にゅ" "にょ" が出現した場合は、n がそちらの解釈に取られたものと解釈してその件数分引いておく。
|
47
|
+
(define (nn-heuristics keyword lst)
|
48
|
+
(map
|
49
|
+
(lambda (x)
|
50
|
+
(let ((point (first x))
|
51
|
+
(roman (second x))
|
52
|
+
(target (third x)))
|
53
|
+
(let* ((nn-number (min
|
54
|
+
(vector-length (keyword.scan #/nn/))
|
55
|
+
(vector-length (roman.scan #/nn/))
|
56
|
+
(- (vector-length (target.scan #/ん/))
|
57
|
+
(vector-length (target.scan #/[ゃゅょ]/)))))
|
58
|
+
(nn-number (max 0 nn-number)))
|
59
|
+
(list
|
60
|
+
(if (= 0 nn-number)
|
61
|
+
point
|
62
|
+
(* point (* 1.1 nn-number)))
|
63
|
+
roman
|
64
|
+
target))))
|
65
|
+
lst))
|
66
|
+
|
67
|
+
|
68
|
+
(define (my-append-many lst)
|
69
|
+
(let1 result '()
|
70
|
+
(for-each
|
71
|
+
(lambda (x)
|
72
|
+
(set! result (append result x)))
|
73
|
+
lst)
|
74
|
+
result))
|
75
|
+
|
76
|
+
|
77
|
+
;; 曖昧検索
|
78
|
+
;; リストで返す。 (ソート済み検索結果)
|
79
|
+
(define (approximate-search userid kvs keyword okuri-ari-flag limit)
|
80
|
+
(define jarow-shikii 0.94)
|
81
|
+
(let* ((a-search (ApproximateSearch.new jarow-shikii))
|
82
|
+
(lst (my-append-many
|
83
|
+
(map
|
84
|
+
(lambda (x)
|
85
|
+
(let* ((val (vector-ref x 0))
|
86
|
+
(k (vector-ref x 1))
|
87
|
+
(v (dict-get userid kvs k ""))
|
88
|
+
(v-pair (string-split-first-and-rest v))
|
89
|
+
(v-first (car v-pair))
|
90
|
+
(v-rest (cdr v-pair)))
|
91
|
+
(map
|
92
|
+
(lambda (element)
|
93
|
+
(list val k (+ v-first element)))
|
94
|
+
(to-list (v-rest.split "/")))))
|
95
|
+
(to-list (a-search.search userid kvs keyword okuri-ari-flag)))))
|
96
|
+
(nn-heuristics-result (nn-heuristics keyword lst))
|
97
|
+
(sorted-lst (sort-by nn-heuristics-result (lambda (item) (- 1.0 (car item))))))
|
98
|
+
(if (= limit 0)
|
99
|
+
sorted-lst
|
100
|
+
(take* sorted-lst limit))))
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
;; 厳密検索
|
105
|
+
(define (exact-search userid kvs keyword)
|
106
|
+
(let1 v (dict-get userid kvs keyword #f)
|
107
|
+
(if v
|
108
|
+
`((1.0 ,keyword ,v))
|
109
|
+
'())))
|
110
|
+
|
111
|
+
|
112
|
+
;; split "/a;annotation/b/c/" into `(("a" "annotation" ,src) ("b" #f ,src) ("c" #f ,src)) alist
|
113
|
+
(define (split-henkan-kouho str src . okuri)
|
114
|
+
(define (append-j-type lst)
|
115
|
+
(map
|
116
|
+
(lambda (x)
|
117
|
+
(append x (list 'j)))
|
118
|
+
lst))
|
119
|
+
|
120
|
+
(let* ((trimmed (cond
|
121
|
+
((rxmatch #/^[\/](.+)$/ str)
|
122
|
+
=> (lambda (m)
|
123
|
+
(rxmatch-substring m 1)))
|
124
|
+
(else
|
125
|
+
"")))
|
126
|
+
(splitted (filter
|
127
|
+
(lambda (str) (< 0 str.size))
|
128
|
+
(to-list (trimmed.split "/"))))
|
129
|
+
(okuri (get-optional okuri "")))
|
130
|
+
;; pickup annotation
|
131
|
+
(append-j-type
|
132
|
+
(map
|
133
|
+
(lambda (entry)
|
134
|
+
(let1 vec (entry.split ";")
|
135
|
+
(list (+ (vector-ref vec 0) okuri)
|
136
|
+
(vector-ref vec 1 #f)
|
137
|
+
src)))
|
138
|
+
splitted))))
|
139
|
+
|
140
|
+
|
141
|
+
;; 送り仮名なしの変換
|
142
|
+
(define (henkan-okuri-nashi userid kvs keyword limit)
|
143
|
+
(let* ((result (approximate-search userid kvs keyword #f limit))
|
144
|
+
(kouho (uniq (map
|
145
|
+
(lambda (x)
|
146
|
+
(third x))
|
147
|
+
result))))
|
148
|
+
(let1 lst
|
149
|
+
(append-map
|
150
|
+
(lambda (value)
|
151
|
+
(if-let1 m (rxmatch #/^C(.+)$/ value)
|
152
|
+
(split-henkan-kouho
|
153
|
+
(dict-get userid kvs (string-drop value 1))
|
154
|
+
(rxmatch-substring m 1)
|
155
|
+
) ;; continue to fetch
|
156
|
+
(split-henkan-kouho
|
157
|
+
value
|
158
|
+
keyword)))
|
159
|
+
kouho)
|
160
|
+
(if (= limit 0)
|
161
|
+
lst
|
162
|
+
(take* lst limit)))))
|
163
|
+
|
164
|
+
|
165
|
+
|
166
|
+
;; 送り仮名ありの変換
|
167
|
+
(define (henkan-okuri-ari userid kvs keyword limit roman-method)
|
168
|
+
(let* ((keyword (let1 _pair (string-split-first-and-rest keyword)
|
169
|
+
(+ (sekka-downcase (car _pair))
|
170
|
+
(cdr _pair))))
|
171
|
+
(m (rxmatch #/^([a-z])([a-z\-^]*)([A-Z`+])([a-zA-Z]*)$/ keyword)))
|
172
|
+
(if (not m)
|
173
|
+
'() ;; 変換候補無し
|
174
|
+
(let* ((result (approximate-search userid kvs
|
175
|
+
(+ (rxmatch-substring m 1)
|
176
|
+
(rxmatch-substring m 2)
|
177
|
+
(rxmatch-substring m 3))
|
178
|
+
#t
|
179
|
+
limit))
|
180
|
+
(okurigana-lst (gen-roman->hiragana (sekka-downcase
|
181
|
+
(+
|
182
|
+
(rxmatch-substring m 3)
|
183
|
+
(rxmatch-substring m 4)))
|
184
|
+
roman-method))
|
185
|
+
(okurigana-lst (if (null? okurigana-lst)
|
186
|
+
'("")
|
187
|
+
okurigana-lst))
|
188
|
+
(kouho (uniq (map
|
189
|
+
(lambda (x)
|
190
|
+
(third x))
|
191
|
+
result))))
|
192
|
+
(let1 lst
|
193
|
+
(append-map
|
194
|
+
(lambda (value)
|
195
|
+
(append-map
|
196
|
+
(lambda (x)
|
197
|
+
(if-let1 m (rxmatch #/^C(.+)$/ value)
|
198
|
+
(split-henkan-kouho
|
199
|
+
(dict-get userid kvs (string-drop value 1))
|
200
|
+
(rxmatch-substring m 1)
|
201
|
+
x) ;; continue to fetch
|
202
|
+
(split-henkan-kouho value keyword x)))
|
203
|
+
okurigana-lst))
|
204
|
+
kouho)
|
205
|
+
(if (= limit 0)
|
206
|
+
lst
|
207
|
+
(take* lst limit)))))))
|
208
|
+
|
209
|
+
|
210
|
+
;; 平仮名の変換
|
211
|
+
(define (henkan-hiragana kvs keyword roman-method)
|
212
|
+
(let* ((str (sekka-downcase keyword))
|
213
|
+
(hira (gen-roman->hiragana str roman-method))
|
214
|
+
(kata (gen-roman->katakana str roman-method)))
|
215
|
+
(if (null? hira)
|
216
|
+
`((,keyword #f ,keyword j))
|
217
|
+
(append-map (lambda (h k)
|
218
|
+
`(
|
219
|
+
(,h #f ,keyword h)
|
220
|
+
(,k #f ,keyword k)))
|
221
|
+
hira
|
222
|
+
kata))))
|
223
|
+
|
224
|
+
|
225
|
+
;; アルファベットの単純変換
|
226
|
+
(define (henkan-alphabet kvs keyword)
|
227
|
+
(let* ((zen (gen-alphabet-han->zen keyword))
|
228
|
+
(han (gen-alphabet-zen->han keyword)))
|
229
|
+
`(
|
230
|
+
(,zen #f ,keyword z)
|
231
|
+
(,han #f ,keyword l))))
|
232
|
+
|
233
|
+
|
234
|
+
;; 記号を含むキーワードの変換
|
235
|
+
(define (henkan-non-kanji userid kvs keyword)
|
236
|
+
(let* ((result (exact-search userid kvs keyword))
|
237
|
+
(kouho (map
|
238
|
+
(lambda (x)
|
239
|
+
(third x))
|
240
|
+
result)))
|
241
|
+
(append-map
|
242
|
+
(lambda (value)
|
243
|
+
(split-henkan-kouho value keyword))
|
244
|
+
kouho)))
|
245
|
+
|
246
|
+
|
247
|
+
;; keyword には ローマ字のみを受け付ける
|
248
|
+
(define (sekka-henkan userid kvs keyword limit roman-method)
|
249
|
+
;; 変換候補にindex番号を付加する
|
250
|
+
(define (append-index-number kouho-list)
|
251
|
+
(let1 count 0
|
252
|
+
(map
|
253
|
+
(lambda (x)
|
254
|
+
(begin0
|
255
|
+
(append x (list count))
|
256
|
+
(set! count (+ count 1))))
|
257
|
+
kouho-list)))
|
258
|
+
|
259
|
+
;; キーワードの種別で処理を分割する
|
260
|
+
(append-index-number
|
261
|
+
(cond
|
262
|
+
((rxmatch #/[A-Z`+]/ keyword)
|
263
|
+
(let1 k (string-downcase-first keyword)
|
264
|
+
(cond
|
265
|
+
((rxmatch #/[a-z][A-Z`+]/ k)
|
266
|
+
;; 送りあり
|
267
|
+
;;(let1 result (approximate-search db k #t) (disp-search-result result))
|
268
|
+
(append
|
269
|
+
(append
|
270
|
+
(henkan-okuri-ari userid kvs k limit roman-method)
|
271
|
+
(if (null? (gen-roman->hiragana (sekka-downcase k) roman-method))
|
272
|
+
'()
|
273
|
+
(henkan-hiragana kvs (sekka-downcase k) roman-method)))
|
274
|
+
(henkan-alphabet kvs keyword)))
|
275
|
+
(else
|
276
|
+
;; 送りなし
|
277
|
+
;;(let1 result (approximate-search db k #f) (disp-search-result result))
|
278
|
+
(append
|
279
|
+
(append
|
280
|
+
(henkan-okuri-nashi userid kvs k limit)
|
281
|
+
(if (null? (gen-roman->hiragana (sekka-downcase k) roman-method))
|
282
|
+
'()
|
283
|
+
(henkan-hiragana kvs (sekka-downcase k) roman-method)))
|
284
|
+
(henkan-alphabet kvs keyword))))))
|
285
|
+
((not (null? (gen-roman->hiragana keyword roman-method)))
|
286
|
+
(append
|
287
|
+
(append
|
288
|
+
(henkan-hiragana kvs keyword roman-method)
|
289
|
+
(henkan-alphabet kvs keyword))
|
290
|
+
(henkan-okuri-nashi userid kvs keyword limit)))
|
291
|
+
(else
|
292
|
+
(append
|
293
|
+
(henkan-non-kanji userid kvs keyword)
|
294
|
+
(henkan-alphabet kvs keyword))))))
|
295
|
+
|
296
|
+
|
297
|
+
|
298
|
+
;; conversion #f => nil for EmacsLisp
|
299
|
+
(define (sekkaHenkan userid kvs cachesv keyword limit roman-method)
|
300
|
+
(define cache-exp-second (* 60 60))
|
301
|
+
(let* ((keyword (keyword.strip))
|
302
|
+
(sekka-keyword (+ "sekka::" roman-method "::" (limit.to_s) "::" keyword)))
|
303
|
+
(if-let1 fetched (and cachesv
|
304
|
+
(cachesv.get sekka-keyword #f))
|
305
|
+
(begin
|
306
|
+
keyword
|
307
|
+
(read-from-string fetched))
|
308
|
+
(let1 henkan-result (map
|
309
|
+
(lambda (x)
|
310
|
+
(map (lambda (val) (if val val nil)) x))
|
311
|
+
(sekka-henkan userid kvs keyword limit (make-keyword roman-method)))
|
312
|
+
(and cachesv
|
313
|
+
(not (null? henkan-result))
|
314
|
+
(let1 fetched2 (cachesv.get "sekka::(keys)" #f)
|
315
|
+
(cachesv.put! sekka-keyword (write-to-string henkan-result) cache-exp-second)
|
316
|
+
(cachesv.put! "sekka::(keys)" (if fetched2
|
317
|
+
(+ fetched2 " " sekka-keyword)
|
318
|
+
sekka-keyword)
|
319
|
+
cache-exp-second)
|
320
|
+
(if-let1 v (cachesv.get "sekka::(keys)" #f)
|
321
|
+
(begin
|
322
|
+
#?=(v.size)
|
323
|
+
#?=v)
|
324
|
+
#f)))
|
325
|
+
henkan-result))))
|
326
|
+
;; Export to Ruby world
|
327
|
+
(export-to-ruby sekkaHenkan)
|
328
|
+
|
329
|
+
|
330
|
+
;; Flush henkan-result cache data on cachesv(memcached)
|
331
|
+
(define (flush-cachesv cachesv)
|
332
|
+
#?="--- do (flush-cachesv) ---"
|
333
|
+
(if-let1 fetched (cachesv.get "sekka::(keys)" #f)
|
334
|
+
(begin
|
335
|
+
(for-each
|
336
|
+
(lambda (x)
|
337
|
+
(cachesv.delete #?=x))
|
338
|
+
(to-list (fetched.split #/[ ]+/)))
|
339
|
+
(cachesv.delete "sekka::(keys)"))
|
340
|
+
#f))
|
341
|
+
|
342
|
+
|
343
|
+
;; 確定処理: 最終確定語を変換候補の先頭に持ってくる。
|
344
|
+
;; key ... "developer" や "へんかん" など、変換候補レコードのキーとなる文字列
|
345
|
+
;; tango ... "変換" など、変換候補から最終確定した、変換候補の文字列
|
346
|
+
;; 登録失敗したら nil を返す
|
347
|
+
(define (sekkaKakutei userid kvs cachesv key tango)
|
348
|
+
(define (fetch userid kvs key)
|
349
|
+
(dict-get userid kvs key #f))
|
350
|
+
|
351
|
+
(define (write-user-entry userid kvs key value)
|
352
|
+
(kvs.put! (+ userid "::" key) value))
|
353
|
+
|
354
|
+
(define (join-henkan-kouho lst)
|
355
|
+
(+ "/"
|
356
|
+
(string-join
|
357
|
+
(map
|
358
|
+
(lambda (x)
|
359
|
+
(if (second x)
|
360
|
+
(+ (first x) ";" (second x))
|
361
|
+
(first x)))
|
362
|
+
lst)
|
363
|
+
"/")))
|
364
|
+
|
365
|
+
(let1 tango (if (is-hiragana-and-okuri key)
|
366
|
+
(drop-okuri tango)
|
367
|
+
tango)
|
368
|
+
(if-let1 kouho-str (fetch userid kvs key)
|
369
|
+
(if (rxmatch #/^\// kouho-str)
|
370
|
+
(let* ((kouho-lst (split-henkan-kouho kouho-str key))
|
371
|
+
(no1
|
372
|
+
(filter (lambda (x)
|
373
|
+
(eq? (car x) tango))
|
374
|
+
kouho-lst))
|
375
|
+
(other
|
376
|
+
(filter (lambda (x)
|
377
|
+
(not (eq? (car x) tango)))
|
378
|
+
kouho-lst))
|
379
|
+
(new-kouho-str
|
380
|
+
(join-henkan-kouho (append no1 other))))
|
381
|
+
(if (not (= kouho-str new-kouho-str))
|
382
|
+
(begin
|
383
|
+
(write-user-entry userid kvs key new-kouho-str)
|
384
|
+
(and cachesv
|
385
|
+
(flush-cachesv cachesv))
|
386
|
+
tango)
|
387
|
+
nil))
|
388
|
+
nil)
|
389
|
+
nil)))
|
390
|
+
;; Export to Ruby world
|
391
|
+
(export-to-ruby sekkaKakutei)
|
392
|
+
|
393
|
+
|
394
|
+
;; ユーザー定義語彙の登録処理
|
395
|
+
(define (registerUserJisyo userid kvs dict-line)
|
396
|
+
(define user-keylist '())
|
397
|
+
(define (insert-to-db sekka-jisyo-data)
|
398
|
+
(for-each
|
399
|
+
(lambda (entry)
|
400
|
+
(let* ((kv (to-list (entry.split #/[ ]+/)))
|
401
|
+
(k (first kv))
|
402
|
+
(v (second kv)))
|
403
|
+
(set! user-keylist (cons k user-keylist))
|
404
|
+
(append-entry userid kvs k v)))
|
405
|
+
sekka-jisyo-data))
|
406
|
+
|
407
|
+
(define (_create-ready-made-keylist keylist)
|
408
|
+
(receive (okuri-ari-hash
|
409
|
+
okuri-nashi-hash)
|
410
|
+
(create-2char-hash keylist)
|
411
|
+
|
412
|
+
;; OKURI-ARI
|
413
|
+
(for-each
|
414
|
+
(lambda (key)
|
415
|
+
(let1 fetched (kvs.get (+ userid "::" "(" (sekka-upcase key) ")")
|
416
|
+
(kvs.get (+ masterid "::" "(" (sekka-upcase key) ")") ""))
|
417
|
+
(kvs.put! (+ userid "::" "(" (sekka-upcase key) ")")
|
418
|
+
(string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
|
419
|
+
(hash-table-get okuri-ari-hash key))))
|
420
|
+
" "))))
|
421
|
+
(hash-table-keys okuri-ari-hash))
|
422
|
+
|
423
|
+
;; OKURI-NASHI
|
424
|
+
(for-each
|
425
|
+
(lambda (key)
|
426
|
+
(let1 fetched (kvs.get (+ userid "::" "(" (sekka-downcase key) ")")
|
427
|
+
(kvs.get (+ masterid "::" "(" (sekka-downcase key) ")") ""))
|
428
|
+
(kvs.put! (+ userid "::" "(" (sekka-downcase key) ")")
|
429
|
+
(string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
|
430
|
+
(hash-table-get okuri-nashi-hash key))))
|
431
|
+
" "))))
|
432
|
+
(hash-table-keys okuri-nashi-hash))))
|
433
|
+
|
434
|
+
;; "ユーザー語彙を"(stored)"にpush!する"
|
435
|
+
(define (kvs-push! userid kvs entry-str)
|
436
|
+
(let* ((orig (kvs.get (+ userid "::(stored)") "()"))
|
437
|
+
(orig (read-from-string orig)))
|
438
|
+
(kvs.put! (+ userid "::(stored)") (write-to-string (append orig (list entry-str))))
|
439
|
+
entry-str))
|
440
|
+
|
441
|
+
(cond
|
442
|
+
((memv dict-line (read-from-string (kvs.get (+ userid "::(stored)") "()")))
|
443
|
+
#?=(+ "already stored userid=[" userid "] tango=[" dict-line "]")
|
444
|
+
#f)
|
445
|
+
(else
|
446
|
+
(cond
|
447
|
+
((rxmatch #/[ ]+[\/]/ dict-line)
|
448
|
+
(let1 lst (convert-skk-jisyo-f (StringIO.new (+ dict-line "\n")))
|
449
|
+
(if (null? lst)
|
450
|
+
(begin
|
451
|
+
#?=(+ "Error user dict format error userid=[" userid "] tango=[" dict-line "]")
|
452
|
+
#f)
|
453
|
+
(begin
|
454
|
+
(insert-to-db lst)
|
455
|
+
(_create-ready-made-keylist user-keylist)
|
456
|
+
(kvs-push! userid kvs dict-line)
|
457
|
+
#?=(+ "user dict stored userid=[" userid "] tango=[" dict-line "]")
|
458
|
+
#t))))
|
459
|
+
(else
|
460
|
+
#?=(+ "Error user dict format error userid=[" userid "] tango=[" dict-line "]")
|
461
|
+
#f)))))
|
462
|
+
;; Export to Ruby world
|
463
|
+
(export-to-ruby registerUserJisyo)
|
464
|
+
|
@@ -0,0 +1,184 @@
|
|
1
|
+
:; #-*- mode: nendo; syntax: scheme -*-;;
|
2
|
+
;;;
|
3
|
+
;;; jisyo-db.nnd - 辞書DBの構築、辞書DBアクセスのライブラリ
|
4
|
+
;;;
|
5
|
+
;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
;;;
|
7
|
+
;;; Redistribution and use in source and binary forms, with or without
|
8
|
+
;;; modification, are permitted provided that the following conditions
|
9
|
+
;;; are met:
|
10
|
+
;;;
|
11
|
+
;;; 1. Redistributions of source code must retain the above copyright
|
12
|
+
;;; notice, this list of conditions and the following disclaimer.
|
13
|
+
;;;
|
14
|
+
;;; 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
;;; notice, this list of conditions and the following disclaimer in the
|
16
|
+
;;; documentation and/or other materials provided with the distribution.
|
17
|
+
;;;
|
18
|
+
;;; 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
;;; may be used to endorse or promote products derived from this
|
20
|
+
;;; software without specific prior written permission.
|
21
|
+
;;;
|
22
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
;;;
|
34
|
+
;;; $Id:
|
35
|
+
;;;
|
36
|
+
(use srfi-1)
|
37
|
+
(require "sekka/kvs")
|
38
|
+
(use sekka.util)
|
39
|
+
|
40
|
+
(define masterid "MASTER")
|
41
|
+
(define workid "WORK")
|
42
|
+
|
43
|
+
(define alphabet-string "abcdefghijklmnopqrstuvwxyz>@;")
|
44
|
+
(define alphabet-lower-list (to-list (alphabet-string.split "")))
|
45
|
+
(define alphabet-upper-list (to-list (. (sekka-upcase alphabet-string) split "")))
|
46
|
+
(define alphabet-pairs (append-map
|
47
|
+
(lambda (a)
|
48
|
+
(map
|
49
|
+
(lambda (b)
|
50
|
+
(+ a b))
|
51
|
+
(append alphabet-upper-list alphabet-lower-list)))
|
52
|
+
alphabet-lower-list))
|
53
|
+
|
54
|
+
;; KVS type setting
|
55
|
+
(define *kvs-type* 'tokyocabinet) ;; default
|
56
|
+
(define (set-kvs-type type)
|
57
|
+
(if (not (symbol? type))
|
58
|
+
(error "Error: set-kvs-type requires symbol argument.")
|
59
|
+
(set! *kvs-type* type)))
|
60
|
+
(define (get-kvs-type) *kvs-type*)
|
61
|
+
|
62
|
+
|
63
|
+
;; dict-get search sequence
|
64
|
+
;; (1) try "userid::keyword" key
|
65
|
+
;; (2) try "MASTER::keyword" key
|
66
|
+
(define (dict-get userid kvs key . fallback)
|
67
|
+
(if-let1 value
|
68
|
+
(or (kvs.get (+ userid "::" key) #f)
|
69
|
+
(kvs.get (+ masterid "::" key) #f))
|
70
|
+
value
|
71
|
+
(let1 opt (get-optional fallback #f)
|
72
|
+
opt)))
|
73
|
+
|
74
|
+
(define (create-2char-hash keylist)
|
75
|
+
(define okuri-ari-hash (make-hash-table))
|
76
|
+
(define okuri-nashi-hash (make-hash-table))
|
77
|
+
(define (create-hash keylist)
|
78
|
+
(for-each
|
79
|
+
(lambda (k)
|
80
|
+
(when (rxmatch #/^[a-zA-Z^>-@`;+]+$/ k)
|
81
|
+
(let1 sliced (sekka-downcase (k.slice 0 2))
|
82
|
+
(when (= 2 sliced.size)
|
83
|
+
(if (rxmatch #/[A-Z`+]$/ k)
|
84
|
+
(hash-table-push! okuri-ari-hash sliced k)
|
85
|
+
(hash-table-push! okuri-nashi-hash sliced k))))))
|
86
|
+
keylist))
|
87
|
+
(create-hash keylist)
|
88
|
+
(values okuri-ari-hash
|
89
|
+
okuri-nashi-hash))
|
90
|
+
|
91
|
+
(define (setup-ready-made-keylist kvs keylist)
|
92
|
+
(for-each
|
93
|
+
(lambda (key)
|
94
|
+
(let1 key (+ masterid "::" key)
|
95
|
+
(unless (kvs.get key #f)
|
96
|
+
(kvs.put! key ""))))
|
97
|
+
alphabet-pairs)
|
98
|
+
|
99
|
+
(receive (okuri-ari-hash
|
100
|
+
okuri-nashi-hash)
|
101
|
+
(create-2char-hash keylist)
|
102
|
+
|
103
|
+
;; OKURI-ARI
|
104
|
+
(for-each
|
105
|
+
(lambda (key)
|
106
|
+
(kvs.put! (+ masterid "::" "(" (sekka-upcase key) ")")
|
107
|
+
(string-join (uniq (sort (hash-table-get okuri-ari-hash key))) " ")))
|
108
|
+
(hash-table-keys okuri-ari-hash))
|
109
|
+
|
110
|
+
;; OKURI-NASHI
|
111
|
+
(for-each
|
112
|
+
(lambda (key)
|
113
|
+
(kvs.put! (+ masterid "::" "(" (sekka-downcase key) ")")
|
114
|
+
(string-join (uniq (sort (hash-table-get okuri-nashi-hash key))) " ")))
|
115
|
+
(hash-table-keys okuri-nashi-hash))))
|
116
|
+
|
117
|
+
|
118
|
+
(define (append-entry userid kvs key value)
|
119
|
+
(let1 key (+ userid "::" key)
|
120
|
+
(if-let1 got (kvs.get key #f)
|
121
|
+
(let* ((trimmed-a-first1 (car (string-split-first-and-rest got)))
|
122
|
+
(trimmed-a (if-let1 m (rxmatch #/^[C\/](.+)$/ got)
|
123
|
+
(rxmatch-substring m 1)
|
124
|
+
got))
|
125
|
+
(trimmed-b-first1 (car (string-split-first-and-rest value)))
|
126
|
+
(trimmed-b (if-let1 m (rxmatch #/^[C\/](.+)$/ value)
|
127
|
+
(rxmatch-substring m 1)
|
128
|
+
value))
|
129
|
+
(lst (delete-duplicates (append (to-list (trimmed-a.split "/"))
|
130
|
+
(to-list (trimmed-b.split "/"))))))
|
131
|
+
(if (= trimmed-a-first1 trimmed-b-first1)
|
132
|
+
(kvs.put! key (+ trimmed-a-first1 (string-join lst "/")))
|
133
|
+
(begin
|
134
|
+
;; "/" よりも "C" を優先する
|
135
|
+
(cond
|
136
|
+
((= "C" trimmed-a-first1) ;; aを優先
|
137
|
+
(kvs.put! key got))
|
138
|
+
((= "C" trimmed-b-first1) ;; bを優先
|
139
|
+
(kvs.put! key value))
|
140
|
+
(else
|
141
|
+
(errorf "Error: [%s] entry is wrong format" value))))))
|
142
|
+
(kvs.put! key value))))
|
143
|
+
|
144
|
+
|
145
|
+
(define (load-sekka-jisyo-f f filename)
|
146
|
+
(define keylist '())
|
147
|
+
(define (create-keylist kvs lines)
|
148
|
+
(for-each
|
149
|
+
(lambda (line)
|
150
|
+
(let1 fields (split-dict-line line)
|
151
|
+
(set! keylist (cons (first fields) keylist))
|
152
|
+
(append-entry masterid kvs (first fields) (second fields))))
|
153
|
+
lines))
|
154
|
+
(let1 kvs (Kvs.new (get-kvs-type))
|
155
|
+
(kvs.open filename)
|
156
|
+
(kvs.clear)
|
157
|
+
(create-keylist kvs (f.readlines.to_list))
|
158
|
+
(setup-ready-made-keylist kvs keylist)
|
159
|
+
(kvs.close)))
|
160
|
+
|
161
|
+
|
162
|
+
(define (dump-sekka-jisyo-f f filename)
|
163
|
+
(let1 kvs (Kvs.new (get-kvs-type))
|
164
|
+
(kvs.open filename)
|
165
|
+
(for-each
|
166
|
+
(lambda (key)
|
167
|
+
(f.puts (+ key " " (kvs.get key))))
|
168
|
+
(to-list (kvs.keys)))
|
169
|
+
(kvs.close)))
|
170
|
+
|
171
|
+
|
172
|
+
(define (openSekkaJisyo dictSource cacheSource)
|
173
|
+
(when (not (rxmatch #/tch$/ dictSource))
|
174
|
+
(set-kvs-type 'memcache))
|
175
|
+
(let1 kvs (Kvs.new (get-kvs-type))
|
176
|
+
(kvs.open #?=dictSource)
|
177
|
+
(let1 cachesv (if cacheSource
|
178
|
+
(let1 obj (Kvs.new 'memcache)
|
179
|
+
(obj.open cacheSource)
|
180
|
+
obj)
|
181
|
+
#f)
|
182
|
+
(to-arr (list kvs cachesv)))))
|
183
|
+
;; Export to Ruby world
|
184
|
+
(export-to-ruby openSekkaJisyo)
|