sekka 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,59 @@
1
+ :; #-*- mode: nendo; syntax: scheme -*-;;
2
+ ;;;
3
+ ;;; alphabet-lib.nnd - アルファベットの変換ライブラリ
4
+ ;;;
5
+ ;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ ;;;
7
+ ;;; Redistribution and use in source and binary forms, with or without
8
+ ;;; modification, are permitted provided that the following conditions
9
+ ;;; are met:
10
+ ;;;
11
+ ;;; 1. Redistributions of source code must retain the above copyright
12
+ ;;; notice, this list of conditions and the following disclaimer.
13
+ ;;;
14
+ ;;; 2. Redistributions in binary form must reproduce the above copyright
15
+ ;;; notice, this list of conditions and the following disclaimer in the
16
+ ;;; documentation and/or other materials provided with the distribution.
17
+ ;;;
18
+ ;;; 3. Neither the name of the authors nor the names of its contributors
19
+ ;;; may be used to endorse or promote products derived from this
20
+ ;;; software without specific prior written permission.
21
+ ;;;
22
+ ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ ;;;
34
+ ;;; $Id:
35
+ ;;;
36
+
37
+ ;; アルファベットの 半角->全角 変換
38
+ (define (gen-alphabet-han->zen str)
39
+ (str.tr "!-}" "!-}"))
40
+
41
+ ;; アルファベットの 全角->半角 変換
42
+ (define (gen-alphabet-zen->han str)
43
+ (str.tr "!-}" "!-}"))
44
+
45
+ ;; アルファベットの 全角かどうか調べる
46
+ (define (is-alphabet-zenkaku str)
47
+ (if (rxmatch #/^[!-}]+$/ str) #t #f))
48
+
49
+ ;; アルファベットの 半角かどうか調べる
50
+ (define (is-alphabet-hankaku str)
51
+ (if (rxmatch #/^[!-}]+$/ str) #t #f))
52
+
53
+ ;; アルファベットの 全角が含まれているか調べる
54
+ (define (include-alphabet-zenkaku str)
55
+ (if (rxmatch #/[!-}]+/ str) #t #f))
56
+
57
+ ;; アルファベットの 半角が含まれているか調べる
58
+ (define (include-alphabet-hankaku str)
59
+ (if (rxmatch #/[!-}]+/ str) #t #f))
@@ -0,0 +1,72 @@
1
+ # approximatesearch.rb - "approximate search library"
2
+ #
3
+ # Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions
7
+ # are met:
8
+ #
9
+ # 1. Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ #
12
+ # 2. Redistributions in binary form must reproduce the above copyright
13
+ # notice, this list of conditions and the following disclaimer in the
14
+ # documentation and/or other materials provided with the distribution.
15
+ #
16
+ # 3. Neither the name of the authors nor the names of its contributors
17
+ # may be used to endorse or promote products derived from this
18
+ # software without specific prior written permission.
19
+ #
20
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ #
32
+ # $Id:
33
+ #
34
+ require 'fuzzystringmatch'
35
+ require 'sekka/kvs'
36
+
37
+ class ApproximateSearch
38
+ def initialize( jarow_shikii )
39
+ @jarow_shikii = jarow_shikii
40
+ @jarow = FuzzyStringMatch::JaroWinkler.new.create( :native )
41
+ end
42
+
43
+ def filtering( keyword, arr )
44
+ keyword = keyword.downcase
45
+ arr.map { |str|
46
+ val = @jarow.getDistance( keyword, str.downcase )
47
+ #printf( " [%s] vs [%s] => %f\n", keyword, str.downcase, val )
48
+ (val > @jarow_shikii) ? [ val, str ] : false
49
+ }.select { |v| v }.sort_by {|item| 1.0 - item[0]}
50
+ end
51
+
52
+ def search( userid, kvs, keyword, okuri_ari )
53
+ readymade_key = if okuri_ari
54
+ keyword.slice( 0, 2 ).upcase
55
+ else
56
+ keyword.slice( 0, 2 ).downcase
57
+ end
58
+ readymade_key = "(" + readymade_key + ")"
59
+
60
+ str = kvs.get( userid + "::" + readymade_key, false )
61
+ if not str
62
+ str = kvs.get( "MASTER::" + readymade_key )
63
+ end
64
+
65
+ #printf( "#readymade_key %s : %s\n", readymade_key, str )
66
+ if str
67
+ filtering( keyword, str.split( /[ ]+/ ))
68
+ else
69
+ [ ]
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,129 @@
1
+ :; #-*- mode: nendo; syntax: scheme -*-;;
2
+ ;;;
3
+ ;;; convert-jisyo.nnd - SKK-JISYO形式から SEKKA-JISYO形式へのコンバートロジック
4
+ ;;;
5
+ ;;; Copyright (c) 2010 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ ;;;
7
+ ;;; Redistribution and use in source and binary forms, with or without
8
+ ;;; modification, are permitted provided that the following conditions
9
+ ;;; are met:
10
+ ;;;
11
+ ;;; 1. Redistributions of source code must retain the above copyright
12
+ ;;; notice, this list of conditions and the following disclaimer.
13
+ ;;;
14
+ ;;; 2. Redistributions in binary form must reproduce the above copyright
15
+ ;;; notice, this list of conditions and the following disclaimer in the
16
+ ;;; documentation and/or other materials provided with the distribution.
17
+ ;;;
18
+ ;;; 3. Neither the name of the authors nor the names of its contributors
19
+ ;;; may be used to endorse or promote products derived from this
20
+ ;;; software without specific prior written permission.
21
+ ;;;
22
+ ;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ ;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ ;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ ;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ ;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ ;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ ;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ ;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ ;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ ;;;
34
+ ;;; $Id:
35
+ ;;;
36
+ (use srfi-1)
37
+ (use sekka.util)
38
+ (use sekka.roman-lib)
39
+
40
+
41
+ (define (expand-okuri-nashi-entry key value)
42
+ (let1 roman-list (gen-hiragana->roman-list key)
43
+ (if (< 1000 (length roman-list))
44
+ (begin
45
+ (STDERR.printf " Warning: ignored entry [%s %s] , because too many pattens.\n" key value)
46
+ #f) ;; パターン数が爆発した単語は無視する
47
+ (append
48
+ (map
49
+ (lambda (x)
50
+ (cons x (+ "C" key)))
51
+ roman-list)
52
+ (list (cons key value))))))
53
+
54
+
55
+ (define (expand-okuri-ari-entry-internal key okuri value)
56
+ (let1 roman-list (gen-hiragana->roman-list key)
57
+ (append
58
+ (map
59
+ (lambda (x)
60
+ (cons (+ x (sekka-upcase okuri)) (+ "C" key okuri)))
61
+ roman-list)
62
+ (list (cons (+ key okuri) value)))))
63
+
64
+ (define (expand-okuri-ari-entry key okuri value)
65
+ (cond
66
+ ((eq? "t" okuri)
67
+ (append-map (lambda (x) x)
68
+ (list
69
+ (expand-okuri-ari-entry-internal key okuri value)
70
+ (expand-okuri-ari-entry-internal key (sekka-upcase "@") value)
71
+ (expand-okuri-ari-entry-internal key (sekka-upcase ";") value))))
72
+ (else
73
+ (expand-okuri-ari-entry-internal key okuri value))))
74
+
75
+
76
+ (define (convert-skk-jisyo-f f)
77
+ (define total 0)
78
+ (define current 0)
79
+
80
+ (define (display-progress line)
81
+ (set! current (+ current 1))
82
+ (when (= 0 (% current 10000))
83
+ (STDERR.printf " %7d/%7d (%3.3f%)\n" current total (* (/ current (total.to_f)) 100.0))))
84
+
85
+ (define (gen-sekka-entries line)
86
+ (display-progress line)
87
+
88
+ (let* ((line (line.sub #/\/$/ ""))
89
+ (fields (split-dict-line line)))
90
+ (cond
91
+ ((rxmatch #/^\;/ line)
92
+ ;; コメント行
93
+ #f)
94
+ ((not fields)
95
+ ;; フォーマットエラー
96
+ #f)
97
+ ((or (is-hiragana (first fields))
98
+ (rxmatch #/^([>あ-ん]+)$/ (first fields)))
99
+ ;; 送り仮名なしデータ
100
+ (expand-okuri-nashi-entry (first fields) (second fields)))
101
+ ((rxmatch #/^([>あ-ん]+)([a-z])$/ (first fields))
102
+ => (lambda (m)
103
+ ;; 送り仮名ありデータ
104
+ (expand-okuri-ari-entry (rxmatch-substring m 1)
105
+ (rxmatch-substring m 2)
106
+ (second fields))))
107
+ ((rxmatch #/[亜-瑤]+/ (first fields))
108
+ ;; 漢字が1文字でも含まれている
109
+ #f)
110
+ (else
111
+ (list (cons (first fields) (second fields)))))))
112
+
113
+ (let* ((lines
114
+ (map
115
+ (lambda (line)
116
+ (line.chomp))
117
+ (f.readlines.to_list)))
118
+ (_ (set! total (length lines)))
119
+ (entry-list
120
+ (filter
121
+ (lambda (x) x)
122
+ (map gen-sekka-entries lines))))
123
+ (map
124
+ (lambda (entry)
125
+ (sprintf "%s %s" (car entry) (cdr entry)))
126
+ (apply append! entry-list))))
127
+
128
+
129
+