sekka 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +22 -0
- data/bin/.gitignore +1 -0
- data/bin/sekka-jisyo +98 -0
- data/bin/sekka-server +83 -0
- data/emacs/http-cookies.el +416 -0
- data/emacs/http-get.el +448 -0
- data/emacs/sekka.el +1069 -0
- data/lib/sekka/alphabet-lib.nnd +59 -0
- data/lib/sekka/approximatesearch.rb +72 -0
- data/lib/sekka/convert-jisyo.nnd +129 -0
- data/lib/sekka/henkan.nnd +464 -0
- data/lib/sekka/jisyo-db.nnd +184 -0
- data/lib/sekka/kvs.rb +135 -0
- data/lib/sekka/roman-lib.nnd +660 -0
- data/lib/sekka/sekkaversion.rb +6 -0
- data/lib/sekka/util.nnd +64 -0
- data/lib/sekka.ru +36 -0
- data/lib/sekkaconfig.rb +62 -0
- data/lib/sekkaserver.rb +127 -0
- data/test/alphabet-lib.nnd +188 -0
- data/test/approximate-bench.nnd +83 -0
- data/test/common.nnd +51 -0
- data/test/henkan-main.nnd +942 -0
- data/test/jisyo.nnd +94 -0
- data/test/roman-lib.nnd +422 -0
- data/test/util.nnd +100 -0
- metadata +223 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
:; #-*- mode: nendo; syntax: scheme -*-;;
|
2
|
+
;;;
|
3
|
+
;;; alphabet-lib.nnd - アルファベットの変換ライブラリ
|
4
|
+
;;;
|
5
|
+
;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
;;;
|
7
|
+
;;; Redistribution and use in source and binary forms, with or without
|
8
|
+
;;; modification, are permitted provided that the following conditions
|
9
|
+
;;; are met:
|
10
|
+
;;;
|
11
|
+
;;; 1. Redistributions of source code must retain the above copyright
|
12
|
+
;;; notice, this list of conditions and the following disclaimer.
|
13
|
+
;;;
|
14
|
+
;;; 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
;;; notice, this list of conditions and the following disclaimer in the
|
16
|
+
;;; documentation and/or other materials provided with the distribution.
|
17
|
+
;;;
|
18
|
+
;;; 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
;;; may be used to endorse or promote products derived from this
|
20
|
+
;;; software without specific prior written permission.
|
21
|
+
;;;
|
22
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
;;;
|
34
|
+
;;; $Id:
|
35
|
+
;;;
|
36
|
+
|
37
|
+
;; アルファベットの 半角->全角 変換
|
38
|
+
(define (gen-alphabet-han->zen str)
|
39
|
+
(str.tr "!-}" "!-}"))
|
40
|
+
|
41
|
+
;; アルファベットの 全角->半角 変換
|
42
|
+
(define (gen-alphabet-zen->han str)
|
43
|
+
(str.tr "!-}" "!-}"))
|
44
|
+
|
45
|
+
;; アルファベットの 全角かどうか調べる
|
46
|
+
(define (is-alphabet-zenkaku str)
|
47
|
+
(if (rxmatch #/^[!-}]+$/ str) #t #f))
|
48
|
+
|
49
|
+
;; アルファベットの 半角かどうか調べる
|
50
|
+
(define (is-alphabet-hankaku str)
|
51
|
+
(if (rxmatch #/^[!-}]+$/ str) #t #f))
|
52
|
+
|
53
|
+
;; アルファベットの 全角が含まれているか調べる
|
54
|
+
(define (include-alphabet-zenkaku str)
|
55
|
+
(if (rxmatch #/[!-}]+/ str) #t #f))
|
56
|
+
|
57
|
+
;; アルファベットの 半角が含まれているか調べる
|
58
|
+
(define (include-alphabet-hankaku str)
|
59
|
+
(if (rxmatch #/[!-}]+/ str) #t #f))
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# approximatesearch.rb - "approximate search library"
|
2
|
+
#
|
3
|
+
# Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
4
|
+
#
|
5
|
+
# Redistribution and use in source and binary forms, with or without
|
6
|
+
# modification, are permitted provided that the following conditions
|
7
|
+
# are met:
|
8
|
+
#
|
9
|
+
# 1. Redistributions of source code must retain the above copyright
|
10
|
+
# notice, this list of conditions and the following disclaimer.
|
11
|
+
#
|
12
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer in the
|
14
|
+
# documentation and/or other materials provided with the distribution.
|
15
|
+
#
|
16
|
+
# 3. Neither the name of the authors nor the names of its contributors
|
17
|
+
# may be used to endorse or promote products derived from this
|
18
|
+
# software without specific prior written permission.
|
19
|
+
#
|
20
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
21
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
22
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
23
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
24
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
25
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
26
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
27
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
28
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
29
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
30
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
#
|
32
|
+
# $Id:
|
33
|
+
#
|
34
|
+
require 'fuzzystringmatch'
|
35
|
+
require 'sekka/kvs'
|
36
|
+
|
37
|
+
class ApproximateSearch
|
38
|
+
def initialize( jarow_shikii )
|
39
|
+
@jarow_shikii = jarow_shikii
|
40
|
+
@jarow = FuzzyStringMatch::JaroWinkler.new.create( :native )
|
41
|
+
end
|
42
|
+
|
43
|
+
def filtering( keyword, arr )
|
44
|
+
keyword = keyword.downcase
|
45
|
+
arr.map { |str|
|
46
|
+
val = @jarow.getDistance( keyword, str.downcase )
|
47
|
+
#printf( " [%s] vs [%s] => %f\n", keyword, str.downcase, val )
|
48
|
+
(val > @jarow_shikii) ? [ val, str ] : false
|
49
|
+
}.select { |v| v }.sort_by {|item| 1.0 - item[0]}
|
50
|
+
end
|
51
|
+
|
52
|
+
def search( userid, kvs, keyword, okuri_ari )
|
53
|
+
readymade_key = if okuri_ari
|
54
|
+
keyword.slice( 0, 2 ).upcase
|
55
|
+
else
|
56
|
+
keyword.slice( 0, 2 ).downcase
|
57
|
+
end
|
58
|
+
readymade_key = "(" + readymade_key + ")"
|
59
|
+
|
60
|
+
str = kvs.get( userid + "::" + readymade_key, false )
|
61
|
+
if not str
|
62
|
+
str = kvs.get( "MASTER::" + readymade_key )
|
63
|
+
end
|
64
|
+
|
65
|
+
#printf( "#readymade_key %s : %s\n", readymade_key, str )
|
66
|
+
if str
|
67
|
+
filtering( keyword, str.split( /[ ]+/ ))
|
68
|
+
else
|
69
|
+
[ ]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
:; #-*- mode: nendo; syntax: scheme -*-;;
|
2
|
+
;;;
|
3
|
+
;;; convert-jisyo.nnd - SKK-JISYO形式から SEKKA-JISYO形式へのコンバートロジック
|
4
|
+
;;;
|
5
|
+
;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
;;;
|
7
|
+
;;; Redistribution and use in source and binary forms, with or without
|
8
|
+
;;; modification, are permitted provided that the following conditions
|
9
|
+
;;; are met:
|
10
|
+
;;;
|
11
|
+
;;; 1. Redistributions of source code must retain the above copyright
|
12
|
+
;;; notice, this list of conditions and the following disclaimer.
|
13
|
+
;;;
|
14
|
+
;;; 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
;;; notice, this list of conditions and the following disclaimer in the
|
16
|
+
;;; documentation and/or other materials provided with the distribution.
|
17
|
+
;;;
|
18
|
+
;;; 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
;;; may be used to endorse or promote products derived from this
|
20
|
+
;;; software without specific prior written permission.
|
21
|
+
;;;
|
22
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
;;;
|
34
|
+
;;; $Id:
|
35
|
+
;;;
|
36
|
+
(use srfi-1)
|
37
|
+
(use sekka.util)
|
38
|
+
(use sekka.roman-lib)
|
39
|
+
|
40
|
+
|
41
|
+
(define (expand-okuri-nashi-entry key value)
|
42
|
+
(let1 roman-list (gen-hiragana->roman-list key)
|
43
|
+
(if (< 1000 (length roman-list))
|
44
|
+
(begin
|
45
|
+
(STDERR.printf " Warning: ignored entry [%s %s] , because too many pattens.\n" key value)
|
46
|
+
#f) ;; パターン数が爆発した単語は無視する
|
47
|
+
(append
|
48
|
+
(map
|
49
|
+
(lambda (x)
|
50
|
+
(cons x (+ "C" key)))
|
51
|
+
roman-list)
|
52
|
+
(list (cons key value))))))
|
53
|
+
|
54
|
+
|
55
|
+
(define (expand-okuri-ari-entry-internal key okuri value)
|
56
|
+
(let1 roman-list (gen-hiragana->roman-list key)
|
57
|
+
(append
|
58
|
+
(map
|
59
|
+
(lambda (x)
|
60
|
+
(cons (+ x (sekka-upcase okuri)) (+ "C" key okuri)))
|
61
|
+
roman-list)
|
62
|
+
(list (cons (+ key okuri) value)))))
|
63
|
+
|
64
|
+
(define (expand-okuri-ari-entry key okuri value)
|
65
|
+
(cond
|
66
|
+
((eq? "t" okuri)
|
67
|
+
(append-map (lambda (x) x)
|
68
|
+
(list
|
69
|
+
(expand-okuri-ari-entry-internal key okuri value)
|
70
|
+
(expand-okuri-ari-entry-internal key (sekka-upcase "@") value)
|
71
|
+
(expand-okuri-ari-entry-internal key (sekka-upcase ";") value))))
|
72
|
+
(else
|
73
|
+
(expand-okuri-ari-entry-internal key okuri value))))
|
74
|
+
|
75
|
+
|
76
|
+
(define (convert-skk-jisyo-f f)
|
77
|
+
(define total 0)
|
78
|
+
(define current 0)
|
79
|
+
|
80
|
+
(define (display-progress line)
|
81
|
+
(set! current (+ current 1))
|
82
|
+
(when (= 0 (% current 10000))
|
83
|
+
(STDERR.printf " %7d/%7d (%3.3f%)\n" current total (* (/ current (total.to_f)) 100.0))))
|
84
|
+
|
85
|
+
(define (gen-sekka-entries line)
|
86
|
+
(display-progress line)
|
87
|
+
|
88
|
+
(let* ((line (line.sub #/\/$/ ""))
|
89
|
+
(fields (split-dict-line line)))
|
90
|
+
(cond
|
91
|
+
((rxmatch #/^\;/ line)
|
92
|
+
;; コメント行
|
93
|
+
#f)
|
94
|
+
((not fields)
|
95
|
+
;; フォーマットエラー
|
96
|
+
#f)
|
97
|
+
((or (is-hiragana (first fields))
|
98
|
+
(rxmatch #/^([>あ-ん]+)$/ (first fields)))
|
99
|
+
;; 送り仮名なしデータ
|
100
|
+
(expand-okuri-nashi-entry (first fields) (second fields)))
|
101
|
+
((rxmatch #/^([>あ-ん]+)([a-z])$/ (first fields))
|
102
|
+
=> (lambda (m)
|
103
|
+
;; 送り仮名ありデータ
|
104
|
+
(expand-okuri-ari-entry (rxmatch-substring m 1)
|
105
|
+
(rxmatch-substring m 2)
|
106
|
+
(second fields))))
|
107
|
+
((rxmatch #/[亜-瑤]+/ (first fields))
|
108
|
+
;; 漢字が1文字でも含まれている
|
109
|
+
#f)
|
110
|
+
(else
|
111
|
+
(list (cons (first fields) (second fields)))))))
|
112
|
+
|
113
|
+
(let* ((lines
|
114
|
+
(map
|
115
|
+
(lambda (line)
|
116
|
+
(line.chomp))
|
117
|
+
(f.readlines.to_list)))
|
118
|
+
(_ (set! total (length lines)))
|
119
|
+
(entry-list
|
120
|
+
(filter
|
121
|
+
(lambda (x) x)
|
122
|
+
(map gen-sekka-entries lines))))
|
123
|
+
(map
|
124
|
+
(lambda (entry)
|
125
|
+
(sprintf "%s %s" (car entry) (cdr entry)))
|
126
|
+
(apply append! entry-list))))
|
127
|
+
|
128
|
+
|
129
|
+
|