sekka 0.9.6 → 0.9.7
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +1 -0
- data/Rakefile +217 -0
- data/VERSION.yml +4 -0
- data/bin/sekka-server +11 -5
- data/emacs/sekka.el +1 -1
- data/lib/sekka/approximatesearch.rb +1 -1
- data/lib/sekka/convert-jisyo.nnd +2 -2
- data/lib/sekka/jisyo-db.nnd +3 -3
- data/lib/sekka/sekkaversion.rb +1 -1
- data/lib/sekka.ru +1 -1
- data/lib/sekkaserver.rb +2 -2
- data/test/henkan-main.nnd +30 -26
- data/test/jisyo.nnd +1 -0
- data/test/sekka-dump-out-1.txt +2172 -0
- data/test/sekka-jisyo-out-1.txt +3 -0
- data/test/skk-jisyo-in-1.txt +51 -0
- metadata +69 -255
data/.gemtest
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,217 @@
|
|
1
|
+
# -*- mode: ruby; -*-
|
2
|
+
# Rakefile for Sekka
|
3
|
+
#
|
4
|
+
# Release Engineering:
|
5
|
+
# 1. edit the VERSION.yml file
|
6
|
+
# 2. rake compile && rake test && rake gemspec && rake build
|
7
|
+
# to generate sekka-x.x.x.gem
|
8
|
+
# 3. install sekka-x.x.x.gem to clean environment and test
|
9
|
+
# 4. rake release
|
10
|
+
# 5. gem push pkg/sekka-x.x.x.gem ( need gem version 1.3.6 or higer. Please "gem update --system" to update )
|
11
|
+
#
|
12
|
+
# Enviroment Variables:
|
13
|
+
# Please select from
|
14
|
+
# DB=dbm
|
15
|
+
# DB=tokyocabinet
|
16
|
+
# DB=redis
|
17
|
+
# DB= (default)
|
18
|
+
# DB=all dbm/tokyocabinet/redis
|
19
|
+
#
|
20
|
+
|
21
|
+
require 'rake'
|
22
|
+
begin
|
23
|
+
require 'jeweler'
|
24
|
+
Jeweler::Tasks.new do |gemspec|
|
25
|
+
gemspec.name = "sekka"
|
26
|
+
gemspec.summary = "Sekka is a SKK like input method."
|
27
|
+
gemspec.description = "Sekka is a SKK like input method. Sekka server provides REST Based API. If you are SKK user, let's try it."
|
28
|
+
gemspec.email = "kiyoka@sumibi.org"
|
29
|
+
gemspec.homepage = "http://github.com/kiyoka/sekka"
|
30
|
+
gemspec.authors = ["Kiyoka Nishiyama"]
|
31
|
+
gemspec.files = FileList['Rakefile',
|
32
|
+
'.gemtest',
|
33
|
+
'VERSION.yml',
|
34
|
+
'README',
|
35
|
+
'COPYING',
|
36
|
+
'lib/*.rb',
|
37
|
+
'lib/*.ru',
|
38
|
+
'lib/sekka/*.rb',
|
39
|
+
'lib/sekka/*.nnd',
|
40
|
+
'bin/sekka-jisyo',
|
41
|
+
'bin/sekka-server',
|
42
|
+
'bin/sekka-benchmark',
|
43
|
+
'bin/sekka-path',
|
44
|
+
'test/*.nnd',
|
45
|
+
'test/*.rb',
|
46
|
+
'test/*.txt',
|
47
|
+
'script/sekkaserver.*',
|
48
|
+
'emacs/*.el'].to_a
|
49
|
+
gemspec.executables = ["sekka-jisyo",
|
50
|
+
"sekka-server",
|
51
|
+
"sekka-benchmark",
|
52
|
+
"sekka-path"]
|
53
|
+
gemspec.required_ruby_version = '>= 1.9.1'
|
54
|
+
gemspec.add_dependency( "eventmachine" )
|
55
|
+
gemspec.add_dependency( "fuzzy-string-match", ">= 0.9.2" )
|
56
|
+
gemspec.add_dependency( "memcache-client" )
|
57
|
+
gemspec.add_dependency( "nendo", "= 0.6.4" )
|
58
|
+
gemspec.add_dependency( "rack" )
|
59
|
+
gemspec.add_dependency( "ruby-progressbar" )
|
60
|
+
end
|
61
|
+
rescue LoadError
|
62
|
+
puts "Jeweler not available. Install it with: sudo gem install jeweler"
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
task :default => [:test] do
|
67
|
+
end
|
68
|
+
|
69
|
+
task :compile do
|
70
|
+
# generate version.rb
|
71
|
+
dictVersion = "0.9.2"
|
72
|
+
vh = Jeweler::VersionHelper.new "."
|
73
|
+
open( "./lib/sekka/sekkaversion.rb", "w" ) {|f|
|
74
|
+
f.puts( "class SekkaVersion" )
|
75
|
+
f.printf( " def self.version() \"%s\" end\n", vh )
|
76
|
+
f.printf( " def self.dictVersion() \"%s\" end\n", dictVersion )
|
77
|
+
f.puts( "end" )
|
78
|
+
}
|
79
|
+
|
80
|
+
# Replace Version Number
|
81
|
+
targetFile = "./emacs/sekka.el"
|
82
|
+
vh = Jeweler::VersionHelper.new "."
|
83
|
+
(original, modified) = open( targetFile, "r:utf-8" ) {|f|
|
84
|
+
lines = f.readlines
|
85
|
+
[ lines,
|
86
|
+
lines.map {|line|
|
87
|
+
if line.match( /;;SEKKA-VERSION/ )
|
88
|
+
sprintf( ' "%s" ;;SEKKA-VERSION', vh.to_s ) + "\n"
|
89
|
+
else
|
90
|
+
line
|
91
|
+
end
|
92
|
+
} ]
|
93
|
+
}
|
94
|
+
if original.join != modified.join
|
95
|
+
puts "Info: " + targetFile + " was updated."
|
96
|
+
open( targetFile, "w" ) {|f|
|
97
|
+
f.write( modified.join )
|
98
|
+
}
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
task :test do
|
103
|
+
sh "ruby -I ./lib ./bin/sekka-path > /tmp/path1"
|
104
|
+
path1 = open( "/tmp/path1" ) {|f|
|
105
|
+
f.readline.chomp
|
106
|
+
}
|
107
|
+
path2 = File.dirname( __FILE__ )
|
108
|
+
unless path1 == path2
|
109
|
+
puts STDERR.printf( "Error: on <sekka-path> requires [%s] but got [%s].", path2, path1 )
|
110
|
+
exit 1
|
111
|
+
end
|
112
|
+
|
113
|
+
sh "/bin/rm -f test.record test.tch"
|
114
|
+
files = []
|
115
|
+
files << "./test/memcache.nnd"
|
116
|
+
files << "./test/util.nnd"
|
117
|
+
files << "./test/alphabet-lib.nnd"
|
118
|
+
files << "./test/sharp-number.nnd"
|
119
|
+
files << "./test/roman-lib.nnd"
|
120
|
+
files << "./test/azik-verification.nnd"
|
121
|
+
files << "./test/jisyo.nnd"
|
122
|
+
files << "./test/google-ime.nnd"
|
123
|
+
STDERR.printf( "Info: env DB=%s\n", ENV['DB'] )
|
124
|
+
case ENV['DB']
|
125
|
+
when 'dbm'
|
126
|
+
files << "./test/henkan-main.nnd dbm"
|
127
|
+
when 'tokyocabinet'
|
128
|
+
files << "./test/henkan-main.nnd tokyocabinet"
|
129
|
+
when 'redis'
|
130
|
+
files << "./test/redis.nnd"
|
131
|
+
files << "./test/henkan-main.nnd redis"
|
132
|
+
when 'pure'
|
133
|
+
files << "./test/henkan-main.nnd pure"
|
134
|
+
when 'all'
|
135
|
+
files << "./test/henkan-main.nnd dbm"
|
136
|
+
files << "./test/henkan-main.nnd tokyocabinet"
|
137
|
+
files << "./test/henkan-main.nnd redis"
|
138
|
+
files << "./test/henkan-main.nnd pure"
|
139
|
+
else # default
|
140
|
+
files << "./test/henkan-main.nnd tokyocabinet"
|
141
|
+
files << "./test/henkan-main.nnd pure"
|
142
|
+
end
|
143
|
+
files.each {|filename|
|
144
|
+
nendopath = `which nendo`.chomp
|
145
|
+
sh sprintf( "time ruby -I ./lib %s %s", nendopath, filename )
|
146
|
+
}
|
147
|
+
sh "cat test.record"
|
148
|
+
end
|
149
|
+
|
150
|
+
task :bench do
|
151
|
+
sh "time ruby -I ./lib /usr/local/bin/nendo ./test/approximate-bench.nnd"
|
152
|
+
sh "time ruby -I ./lib /usr/local/bin/nendo ./test/henkan-bench.nnd"
|
153
|
+
end
|
154
|
+
|
155
|
+
task :alljisyo => [ :jisyoS, :jisyoL, :load, :dump ]
|
156
|
+
|
157
|
+
task :jisyoS do
|
158
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.L.201008 > ./data/SEKKA-JISYO.SMALL"
|
159
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.L.hira-kata >> ./data/SEKKA-JISYO.SMALL"
|
160
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.hiragana-phrase >> ./data/SEKKA-JISYO.SMALL"
|
161
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.hiragana-phrase2 >> ./data/SEKKA-JISYO.SMALL"
|
162
|
+
end
|
163
|
+
|
164
|
+
task :jisyoL do
|
165
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.L.201008 > ./data/SEKKA-JISYO.LARGE"
|
166
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.L.hira-kata >> ./data/SEKKA-JISYO.LARGE"
|
167
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.fullname >> ./data/SEKKA-JISYO.LARGE"
|
168
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.jinmei >> ./data/SEKKA-JISYO.LARGE"
|
169
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.station >> ./data/SEKKA-JISYO.LARGE"
|
170
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.hiragana-phrase >> ./data/SEKKA-JISYO.LARGE"
|
171
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.hiragana-phrase2 >> ./data/SEKKA-JISYO.LARGE"
|
172
|
+
end
|
173
|
+
|
174
|
+
task :load do
|
175
|
+
sh "time ./bin/sekka-jisyo load ./data/SEKKA-JISYO.SMALL ./data/SEKKA-JISYO.SMALL.tch"
|
176
|
+
sh "time ./bin/sekka-jisyo load ./data/SEKKA-JISYO.LARGE ./data/SEKKA-JISYO.LARGE.tch"
|
177
|
+
end
|
178
|
+
|
179
|
+
task :dump do
|
180
|
+
sh "time ./bin/sekka-jisyo dump ./data/SEKKA-JISYO.SMALL.tch > ./data/SEKKA-JISYO.SMALL.tsv"
|
181
|
+
sh "time ./bin/sekka-jisyo dump ./data/SEKKA-JISYO.LARGE.tch > ./data/SEKKA-JISYO.LARGE.tsv"
|
182
|
+
end
|
183
|
+
|
184
|
+
|
185
|
+
# Fetched data from
|
186
|
+
# http://s-yata.jp/corpus/nwc2010/ngrams/
|
187
|
+
task :phrase => [ "./data/6gm-0000.txt" ] do
|
188
|
+
sh "time ruby -I ./lib /usr/local/bin/nendo ./data/hiragana_phrase_in_webcorpus.nnd ./data/6gm-0000.txt | sort | uniq > /tmp/tmp.txt"
|
189
|
+
sh "time ruby -I ./lib /usr/local/bin/nendo ./data/writing_phrase_filter.nnd /tmp/tmp.txt | sort | uniq > ./data/SKK-JISYO.hiragana-phrase"
|
190
|
+
end
|
191
|
+
|
192
|
+
file "./data/6gm-0000.txt" do
|
193
|
+
sh "wget http://dist.s-yata.jp/corpus/nwc2010/ngrams/word/over999/6gms/6gm-0000.xz -O /tmp/6gm-0000.xz"
|
194
|
+
sh "xz -cd /tmp/6gm-0000.xz > ./data/6gm-0000.txt"
|
195
|
+
end
|
196
|
+
|
197
|
+
task :phrase2 => [ "./data/ipadic.all.utf8.txt" ] do
|
198
|
+
sh "time ruby -I ./lib /usr/local/bin/nendo ./data/hiragana_phrase_in_ipadic.nnd ./data/ipadic.all.utf8.txt | sort | uniq > ./data/SKK-JISYO.hiragana-phrase2"
|
199
|
+
end
|
200
|
+
|
201
|
+
file "./data/ipadic.all.utf8.txt" do
|
202
|
+
sh "wget http://chasen.aist-nara.ac.jp/stable/ipadic/ipadic-2.7.0.tar.gz -O /tmp/ipadic-2.7.0.tar.gz"
|
203
|
+
sh "tar zxfC /tmp/ipadic-2.7.0.tar.gz /tmp"
|
204
|
+
sh "iconv -f euc-jp -t utf-8 /tmp/ipadic-2.7.0/*.dic > ./data/ipadic.all.utf8.txt"
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
task :rackup do
|
209
|
+
# how to install mongrel is "gem install mongrel --pre"
|
210
|
+
sh "ruby -I ./lib ./bin/sekka-server"
|
211
|
+
end
|
212
|
+
|
213
|
+
task :katakanago do
|
214
|
+
sh "nkf --euc ./data/SKK-JISYO.L.201008 > tmpfile.euc"
|
215
|
+
sh "/usr/share/skktools/filters/abbrev-convert.rb -k tmpfile.euc | skkdic-expr2 | iconv -f=EUC-JP -t=UTF-8 > ./data/SKK-JISYO.L.hira-kata"
|
216
|
+
sh "/bin/rm -f tmpfile.euc"
|
217
|
+
end
|
data/VERSION.yml
ADDED
data/bin/sekka-server
CHANGED
@@ -5,8 +5,8 @@ require 'digest/md5'
|
|
5
5
|
require 'fileutils'
|
6
6
|
require 'rack'
|
7
7
|
require 'uri'
|
8
|
-
require
|
9
|
-
require
|
8
|
+
require 'sekkaconfig'
|
9
|
+
require 'sekka/sekkaversion'
|
10
10
|
|
11
11
|
|
12
12
|
|
@@ -160,8 +160,14 @@ def main
|
|
160
160
|
# 設定項目をConfigオブジェクトに代入
|
161
161
|
SekkaServer::Config.setup( dictType, dictSource, MEMCACHED, 12929, proxyHost, proxyPort )
|
162
162
|
|
163
|
-
#
|
164
|
-
|
163
|
+
# rackに渡すための sekka.ru のインストールパスを求める。
|
164
|
+
vendordir = File.expand_path(File.dirname(__FILE__) + "/../lib")
|
165
|
+
if RbConfig::CONFIG[ 'vendordir' ]
|
166
|
+
if File.exists? RbConfig::CONFIG[ 'vendordir' ] + "/sekka.ru"
|
167
|
+
vendordir = RbConfig::CONFIG[ 'vendordir' ]
|
168
|
+
end
|
169
|
+
end
|
170
|
+
# print "vendordir = " + vendordir + "\n"
|
165
171
|
|
166
172
|
# サーバー起動
|
167
173
|
Rack::Server.start(
|
@@ -170,7 +176,7 @@ def main
|
|
170
176
|
:Port => SekkaServer::Config.listenPort,
|
171
177
|
:Host => "0.0.0.0",
|
172
178
|
:AccessLog => [],
|
173
|
-
:config => "
|
179
|
+
:config => vendordir + "/sekka.ru"
|
174
180
|
)
|
175
181
|
end
|
176
182
|
|
data/emacs/sekka.el
CHANGED
@@ -37,7 +37,7 @@ require 'sekka/kvs'
|
|
37
37
|
class ApproximateSearch
|
38
38
|
def initialize( jarow_shikii )
|
39
39
|
@jarow_shikii = jarow_shikii
|
40
|
-
@jarow = FuzzyStringMatch::JaroWinkler.
|
40
|
+
@jarow = FuzzyStringMatch::JaroWinkler.create( :native )
|
41
41
|
end
|
42
42
|
|
43
43
|
def filtering( keyword, arr )
|
data/lib/sekka/convert-jisyo.nnd
CHANGED
@@ -42,7 +42,7 @@
|
|
42
42
|
(let1 roman-list (gen-hiragana->roman-list key)
|
43
43
|
(if (< 1000 (length roman-list))
|
44
44
|
(begin
|
45
|
-
(
|
45
|
+
#?=(sprintf " Warning: ignored entry [%s] (hiragana phrase), because too many pattens.\n" key)
|
46
46
|
#f) ;; パターン数が爆発した単語は無視する
|
47
47
|
(map
|
48
48
|
(lambda (x)
|
@@ -53,7 +53,7 @@
|
|
53
53
|
(let1 roman-list (gen-hiragana->roman-list key)
|
54
54
|
(if (< 1000 (length roman-list))
|
55
55
|
(begin
|
56
|
-
(
|
56
|
+
#?=(sprintf " Warning: ignored entry [%s %s] , because too many pattens.\n" key value)
|
57
57
|
#f) ;; パターン数が爆発した単語は無視する
|
58
58
|
(append
|
59
59
|
(map
|
data/lib/sekka/jisyo-db.nnd
CHANGED
@@ -242,12 +242,12 @@
|
|
242
242
|
(let ([d (. (Date.new 0) to_s)])
|
243
243
|
(if (and (kvs.pure_put! "key_for_ping" d)
|
244
244
|
(string=? (kvs.get "key_for_ping") d))
|
245
|
-
|
245
|
+
#?="Info: database file is clean"
|
246
246
|
;; 正常に読み書きできないようであれば、データベースを修復する
|
247
247
|
(begin
|
248
|
-
|
248
|
+
#?="Info: database file is NOT clean. try to fix..."
|
249
249
|
(kvs.fixdb)
|
250
|
-
|
250
|
+
#?="Info: done.")))
|
251
251
|
(to-arr (list kvs cachesv)))))
|
252
252
|
;; Export to Ruby world
|
253
253
|
(export-to-ruby openSekkaJisyo)
|
data/lib/sekka/sekkaversion.rb
CHANGED
data/lib/sekka.ru
CHANGED
data/lib/sekkaserver.rb
CHANGED
data/test/henkan-main.nnd
CHANGED
@@ -35,6 +35,7 @@
|
|
35
35
|
;;;
|
36
36
|
(require "sekka/kvs")
|
37
37
|
(use nendo.test)
|
38
|
+
(use debug.null)
|
38
39
|
(use sekka.convert-jisyo)
|
39
40
|
(use sekka.henkan)
|
40
41
|
(load "./test/common.nnd")
|
@@ -90,7 +91,7 @@
|
|
90
91
|
;;-------------------------------------------------------------------
|
91
92
|
(with-open "./test/sekka-dump-out-1.txt"
|
92
93
|
(lambda (f)
|
93
|
-
(let ((
|
94
|
+
(let ((_answer (Digest::MD5.hexdigest (f.read)))
|
94
95
|
(_output1 (StringIO.new))
|
95
96
|
(_output2 (StringIO.new)))
|
96
97
|
(_output1.set_encoding "utf-8")
|
@@ -99,30 +100,33 @@
|
|
99
100
|
(eq? dbtype 'redis)
|
100
101
|
(eq? dbtype 'dbm)
|
101
102
|
(eq? dbtype 'pure))
|
102
|
-
(
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
(
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
(
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
103
|
+
(begin
|
104
|
+
(test-section "dump db")
|
105
|
+
|
106
|
+
(test* "dump DB to SEKKA jisyo file. (1)"
|
107
|
+
_answer
|
108
|
+
(begin
|
109
|
+
(dump-sekka-jisyo-f _output1 target)
|
110
|
+
(with-open "./test/sekka-dump-out-1.tmp"
|
111
|
+
(lambda (f) (f.write (_output1.string)))
|
112
|
+
"w")
|
113
|
+
(Digest::MD5.hexdigest
|
114
|
+
(_output1.string))))
|
115
|
+
|
116
|
+
(test* "dump/restore DB to SEKKA jisyo file. (1)"
|
117
|
+
_answer
|
118
|
+
(begin
|
119
|
+
;; ---- RESTORE ----
|
120
|
+
(f.rewind)
|
121
|
+
(restore-sekka-jisyo-f f target)
|
122
|
+
|
123
|
+
;; ---- DUMP ----
|
124
|
+
(dump-sekka-jisyo-f _output2 target)
|
125
|
+
(with-open "./test/sekka-dump-out-2.tmp"
|
126
|
+
(lambda (f) (f.write (_output2.string)))
|
127
|
+
"w")
|
128
|
+
(Digest::MD5.hexdigest
|
129
|
+
(_output2.string)))))))))
|
126
130
|
|
127
131
|
|
128
132
|
|
@@ -1252,6 +1256,6 @@
|
|
1252
1256
|
;; ---後処理---
|
1253
1257
|
(kvs.close)
|
1254
1258
|
;; 最終的な辞書の状態を目視するためのダンプ
|
1255
|
-
(dump-sekka-jisyo-f STDOUT target)
|
1259
|
+
;;(dump-sekka-jisyo-f STDOUT target)
|
1256
1260
|
|
1257
1261
|
(test-end)
|