sekka 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +22 -0
- data/bin/.gitignore +1 -0
- data/bin/sekka-jisyo +98 -0
- data/bin/sekka-server +83 -0
- data/emacs/http-cookies.el +416 -0
- data/emacs/http-get.el +448 -0
- data/emacs/sekka.el +1069 -0
- data/lib/sekka/alphabet-lib.nnd +59 -0
- data/lib/sekka/approximatesearch.rb +72 -0
- data/lib/sekka/convert-jisyo.nnd +129 -0
- data/lib/sekka/henkan.nnd +464 -0
- data/lib/sekka/jisyo-db.nnd +184 -0
- data/lib/sekka/kvs.rb +135 -0
- data/lib/sekka/roman-lib.nnd +660 -0
- data/lib/sekka/sekkaversion.rb +6 -0
- data/lib/sekka/util.nnd +64 -0
- data/lib/sekka.ru +36 -0
- data/lib/sekkaconfig.rb +62 -0
- data/lib/sekkaserver.rb +127 -0
- data/test/alphabet-lib.nnd +188 -0
- data/test/approximate-bench.nnd +83 -0
- data/test/common.nnd +51 -0
- data/test/henkan-main.nnd +942 -0
- data/test/jisyo.nnd +94 -0
- data/test/roman-lib.nnd +422 -0
- data/test/util.nnd +100 -0
- metadata +223 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
:; #-*- mode: nendo; syntax: scheme -*-;;
|
2
|
+
;;;
|
3
|
+
;;; alphabet-lib.nnd - アルファベットの変換ライブラリ
|
4
|
+
;;;
|
5
|
+
;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
;;;
|
7
|
+
;;; Redistribution and use in source and binary forms, with or without
|
8
|
+
;;; modification, are permitted provided that the following conditions
|
9
|
+
;;; are met:
|
10
|
+
;;;
|
11
|
+
;;; 1. Redistributions of source code must retain the above copyright
|
12
|
+
;;; notice, this list of conditions and the following disclaimer.
|
13
|
+
;;;
|
14
|
+
;;; 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
;;; notice, this list of conditions and the following disclaimer in the
|
16
|
+
;;; documentation and/or other materials provided with the distribution.
|
17
|
+
;;;
|
18
|
+
;;; 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
;;; may be used to endorse or promote products derived from this
|
20
|
+
;;; software without specific prior written permission.
|
21
|
+
;;;
|
22
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
;;;
|
34
|
+
;;; $Id:
|
35
|
+
;;;
|
36
|
+
|
37
|
+
;; アルファベットの 半角->全角 変換
|
38
|
+
(define (gen-alphabet-han->zen str)
|
39
|
+
(str.tr "!-}" "!-}"))
|
40
|
+
|
41
|
+
;; アルファベットの 全角->半角 変換
|
42
|
+
(define (gen-alphabet-zen->han str)
|
43
|
+
(str.tr "!-}" "!-}"))
|
44
|
+
|
45
|
+
;; アルファベットの 全角かどうか調べる
|
46
|
+
(define (is-alphabet-zenkaku str)
|
47
|
+
(if (rxmatch #/^[!-}]+$/ str) #t #f))
|
48
|
+
|
49
|
+
;; アルファベットの 半角かどうか調べる
|
50
|
+
(define (is-alphabet-hankaku str)
|
51
|
+
(if (rxmatch #/^[!-}]+$/ str) #t #f))
|
52
|
+
|
53
|
+
;; アルファベットの 全角が含まれているか調べる
|
54
|
+
(define (include-alphabet-zenkaku str)
|
55
|
+
(if (rxmatch #/[!-}]+/ str) #t #f))
|
56
|
+
|
57
|
+
;; アルファベットの 半角が含まれているか調べる
|
58
|
+
(define (include-alphabet-hankaku str)
|
59
|
+
(if (rxmatch #/[!-}]+/ str) #t #f))
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# approximatesearch.rb - "approximate search library"
|
2
|
+
#
|
3
|
+
# Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
4
|
+
#
|
5
|
+
# Redistribution and use in source and binary forms, with or without
|
6
|
+
# modification, are permitted provided that the following conditions
|
7
|
+
# are met:
|
8
|
+
#
|
9
|
+
# 1. Redistributions of source code must retain the above copyright
|
10
|
+
# notice, this list of conditions and the following disclaimer.
|
11
|
+
#
|
12
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer in the
|
14
|
+
# documentation and/or other materials provided with the distribution.
|
15
|
+
#
|
16
|
+
# 3. Neither the name of the authors nor the names of its contributors
|
17
|
+
# may be used to endorse or promote products derived from this
|
18
|
+
# software without specific prior written permission.
|
19
|
+
#
|
20
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
21
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
22
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
23
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
24
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
25
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
26
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
27
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
28
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
29
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
30
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
#
|
32
|
+
# $Id:
|
33
|
+
#
|
34
|
+
require 'fuzzystringmatch'
|
35
|
+
require 'sekka/kvs'
|
36
|
+
|
37
|
+
class ApproximateSearch
|
38
|
+
def initialize( jarow_shikii )
|
39
|
+
@jarow_shikii = jarow_shikii
|
40
|
+
@jarow = FuzzyStringMatch::JaroWinkler.new.create( :native )
|
41
|
+
end
|
42
|
+
|
43
|
+
def filtering( keyword, arr )
|
44
|
+
keyword = keyword.downcase
|
45
|
+
arr.map { |str|
|
46
|
+
val = @jarow.getDistance( keyword, str.downcase )
|
47
|
+
#printf( " [%s] vs [%s] => %f\n", keyword, str.downcase, val )
|
48
|
+
(val > @jarow_shikii) ? [ val, str ] : false
|
49
|
+
}.select { |v| v }.sort_by {|item| 1.0 - item[0]}
|
50
|
+
end
|
51
|
+
|
52
|
+
def search( userid, kvs, keyword, okuri_ari )
|
53
|
+
readymade_key = if okuri_ari
|
54
|
+
keyword.slice( 0, 2 ).upcase
|
55
|
+
else
|
56
|
+
keyword.slice( 0, 2 ).downcase
|
57
|
+
end
|
58
|
+
readymade_key = "(" + readymade_key + ")"
|
59
|
+
|
60
|
+
str = kvs.get( userid + "::" + readymade_key, false )
|
61
|
+
if not str
|
62
|
+
str = kvs.get( "MASTER::" + readymade_key )
|
63
|
+
end
|
64
|
+
|
65
|
+
#printf( "#readymade_key %s : %s\n", readymade_key, str )
|
66
|
+
if str
|
67
|
+
filtering( keyword, str.split( /[ ]+/ ))
|
68
|
+
else
|
69
|
+
[ ]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
:; #-*- mode: nendo; syntax: scheme -*-;;
|
2
|
+
;;;
|
3
|
+
;;; convert-jisyo.nnd - SKK-JISYO形式から SEKKA-JISYO形式へのコンバートロジック
|
4
|
+
;;;
|
5
|
+
;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
;;;
|
7
|
+
;;; Redistribution and use in source and binary forms, with or without
|
8
|
+
;;; modification, are permitted provided that the following conditions
|
9
|
+
;;; are met:
|
10
|
+
;;;
|
11
|
+
;;; 1. Redistributions of source code must retain the above copyright
|
12
|
+
;;; notice, this list of conditions and the following disclaimer.
|
13
|
+
;;;
|
14
|
+
;;; 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
;;; notice, this list of conditions and the following disclaimer in the
|
16
|
+
;;; documentation and/or other materials provided with the distribution.
|
17
|
+
;;;
|
18
|
+
;;; 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
;;; may be used to endorse or promote products derived from this
|
20
|
+
;;; software without specific prior written permission.
|
21
|
+
;;;
|
22
|
+
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
;;;
|
34
|
+
;;; $Id:
|
35
|
+
;;;
|
36
|
+
(use srfi-1)
|
37
|
+
(use sekka.util)
|
38
|
+
(use sekka.roman-lib)
|
39
|
+
|
40
|
+
|
41
|
+
(define (expand-okuri-nashi-entry key value)
|
42
|
+
(let1 roman-list (gen-hiragana->roman-list key)
|
43
|
+
(if (< 1000 (length roman-list))
|
44
|
+
(begin
|
45
|
+
(STDERR.printf " Warning: ignored entry [%s %s] , because too many pattens.\n" key value)
|
46
|
+
#f) ;; パターン数が爆発した単語は無視する
|
47
|
+
(append
|
48
|
+
(map
|
49
|
+
(lambda (x)
|
50
|
+
(cons x (+ "C" key)))
|
51
|
+
roman-list)
|
52
|
+
(list (cons key value))))))
|
53
|
+
|
54
|
+
|
55
|
+
(define (expand-okuri-ari-entry-internal key okuri value)
|
56
|
+
(let1 roman-list (gen-hiragana->roman-list key)
|
57
|
+
(append
|
58
|
+
(map
|
59
|
+
(lambda (x)
|
60
|
+
(cons (+ x (sekka-upcase okuri)) (+ "C" key okuri)))
|
61
|
+
roman-list)
|
62
|
+
(list (cons (+ key okuri) value)))))
|
63
|
+
|
64
|
+
(define (expand-okuri-ari-entry key okuri value)
|
65
|
+
(cond
|
66
|
+
((eq? "t" okuri)
|
67
|
+
(append-map (lambda (x) x)
|
68
|
+
(list
|
69
|
+
(expand-okuri-ari-entry-internal key okuri value)
|
70
|
+
(expand-okuri-ari-entry-internal key (sekka-upcase "@") value)
|
71
|
+
(expand-okuri-ari-entry-internal key (sekka-upcase ";") value))))
|
72
|
+
(else
|
73
|
+
(expand-okuri-ari-entry-internal key okuri value))))
|
74
|
+
|
75
|
+
|
76
|
+
(define (convert-skk-jisyo-f f)
|
77
|
+
(define total 0)
|
78
|
+
(define current 0)
|
79
|
+
|
80
|
+
(define (display-progress line)
|
81
|
+
(set! current (+ current 1))
|
82
|
+
(when (= 0 (% current 10000))
|
83
|
+
(STDERR.printf " %7d/%7d (%3.3f%)\n" current total (* (/ current (total.to_f)) 100.0))))
|
84
|
+
|
85
|
+
(define (gen-sekka-entries line)
|
86
|
+
(display-progress line)
|
87
|
+
|
88
|
+
(let* ((line (line.sub #/\/$/ ""))
|
89
|
+
(fields (split-dict-line line)))
|
90
|
+
(cond
|
91
|
+
((rxmatch #/^\;/ line)
|
92
|
+
;; コメント行
|
93
|
+
#f)
|
94
|
+
((not fields)
|
95
|
+
;; フォーマットエラー
|
96
|
+
#f)
|
97
|
+
((or (is-hiragana (first fields))
|
98
|
+
(rxmatch #/^([>あ-ん]+)$/ (first fields)))
|
99
|
+
;; 送り仮名なしデータ
|
100
|
+
(expand-okuri-nashi-entry (first fields) (second fields)))
|
101
|
+
((rxmatch #/^([>あ-ん]+)([a-z])$/ (first fields))
|
102
|
+
=> (lambda (m)
|
103
|
+
;; 送り仮名ありデータ
|
104
|
+
(expand-okuri-ari-entry (rxmatch-substring m 1)
|
105
|
+
(rxmatch-substring m 2)
|
106
|
+
(second fields))))
|
107
|
+
((rxmatch #/[亜-瑤]+/ (first fields))
|
108
|
+
;; 漢字が1文字でも含まれている
|
109
|
+
#f)
|
110
|
+
(else
|
111
|
+
(list (cons (first fields) (second fields)))))))
|
112
|
+
|
113
|
+
(let* ((lines
|
114
|
+
(map
|
115
|
+
(lambda (line)
|
116
|
+
(line.chomp))
|
117
|
+
(f.readlines.to_list)))
|
118
|
+
(_ (set! total (length lines)))
|
119
|
+
(entry-list
|
120
|
+
(filter
|
121
|
+
(lambda (x) x)
|
122
|
+
(map gen-sekka-entries lines))))
|
123
|
+
(map
|
124
|
+
(lambda (entry)
|
125
|
+
(sprintf "%s %s" (car entry) (cdr entry)))
|
126
|
+
(apply append! entry-list))))
|
127
|
+
|
128
|
+
|
129
|
+
|