sekka 0.9.6 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +1 -0
- data/Rakefile +217 -0
- data/VERSION.yml +4 -0
- data/bin/sekka-server +11 -5
- data/emacs/sekka.el +1 -1
- data/lib/sekka/approximatesearch.rb +1 -1
- data/lib/sekka/convert-jisyo.nnd +2 -2
- data/lib/sekka/jisyo-db.nnd +3 -3
- data/lib/sekka/sekkaversion.rb +1 -1
- data/lib/sekka.ru +1 -1
- data/lib/sekkaserver.rb +2 -2
- data/test/henkan-main.nnd +30 -26
- data/test/jisyo.nnd +1 -0
- data/test/sekka-dump-out-1.txt +2172 -0
- data/test/sekka-jisyo-out-1.txt +3 -0
- data/test/skk-jisyo-in-1.txt +51 -0
- metadata +69 -255
data/.gemtest
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,217 @@
|
|
1
|
+
# -*- mode: ruby; -*-
|
2
|
+
# Rakefile for Sekka
|
3
|
+
#
|
4
|
+
# Release Engineering:
|
5
|
+
# 1. edit the VERSION.yml file
|
6
|
+
# 2. rake compile && rake test && rake gemspec && rake build
|
7
|
+
# to generate sekka-x.x.x.gem
|
8
|
+
# 3. install sekka-x.x.x.gem to clean environment and test
|
9
|
+
# 4. rake release
|
10
|
+
# 5. gem push pkg/sekka-x.x.x.gem ( need gem version 1.3.6 or higer. Please "gem update --system" to update )
|
11
|
+
#
|
12
|
+
# Enviroment Variables:
|
13
|
+
# Please select from
|
14
|
+
# DB=dbm
|
15
|
+
# DB=tokyocabinet
|
16
|
+
# DB=redis
|
17
|
+
# DB= (default)
|
18
|
+
# DB=all dbm/tokyocabinet/redis
|
19
|
+
#
|
20
|
+
|
21
|
+
require 'rake'
|
22
|
+
begin
|
23
|
+
require 'jeweler'
|
24
|
+
Jeweler::Tasks.new do |gemspec|
|
25
|
+
gemspec.name = "sekka"
|
26
|
+
gemspec.summary = "Sekka is a SKK like input method."
|
27
|
+
gemspec.description = "Sekka is a SKK like input method. Sekka server provides REST Based API. If you are SKK user, let's try it."
|
28
|
+
gemspec.email = "kiyoka@sumibi.org"
|
29
|
+
gemspec.homepage = "http://github.com/kiyoka/sekka"
|
30
|
+
gemspec.authors = ["Kiyoka Nishiyama"]
|
31
|
+
gemspec.files = FileList['Rakefile',
|
32
|
+
'.gemtest',
|
33
|
+
'VERSION.yml',
|
34
|
+
'README',
|
35
|
+
'COPYING',
|
36
|
+
'lib/*.rb',
|
37
|
+
'lib/*.ru',
|
38
|
+
'lib/sekka/*.rb',
|
39
|
+
'lib/sekka/*.nnd',
|
40
|
+
'bin/sekka-jisyo',
|
41
|
+
'bin/sekka-server',
|
42
|
+
'bin/sekka-benchmark',
|
43
|
+
'bin/sekka-path',
|
44
|
+
'test/*.nnd',
|
45
|
+
'test/*.rb',
|
46
|
+
'test/*.txt',
|
47
|
+
'script/sekkaserver.*',
|
48
|
+
'emacs/*.el'].to_a
|
49
|
+
gemspec.executables = ["sekka-jisyo",
|
50
|
+
"sekka-server",
|
51
|
+
"sekka-benchmark",
|
52
|
+
"sekka-path"]
|
53
|
+
gemspec.required_ruby_version = '>= 1.9.1'
|
54
|
+
gemspec.add_dependency( "eventmachine" )
|
55
|
+
gemspec.add_dependency( "fuzzy-string-match", ">= 0.9.2" )
|
56
|
+
gemspec.add_dependency( "memcache-client" )
|
57
|
+
gemspec.add_dependency( "nendo", "= 0.6.4" )
|
58
|
+
gemspec.add_dependency( "rack" )
|
59
|
+
gemspec.add_dependency( "ruby-progressbar" )
|
60
|
+
end
|
61
|
+
rescue LoadError
|
62
|
+
puts "Jeweler not available. Install it with: sudo gem install jeweler"
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
task :default => [:test] do
|
67
|
+
end
|
68
|
+
|
69
|
+
task :compile do
|
70
|
+
# generate version.rb
|
71
|
+
dictVersion = "0.9.2"
|
72
|
+
vh = Jeweler::VersionHelper.new "."
|
73
|
+
open( "./lib/sekka/sekkaversion.rb", "w" ) {|f|
|
74
|
+
f.puts( "class SekkaVersion" )
|
75
|
+
f.printf( " def self.version() \"%s\" end\n", vh )
|
76
|
+
f.printf( " def self.dictVersion() \"%s\" end\n", dictVersion )
|
77
|
+
f.puts( "end" )
|
78
|
+
}
|
79
|
+
|
80
|
+
# Replace Version Number
|
81
|
+
targetFile = "./emacs/sekka.el"
|
82
|
+
vh = Jeweler::VersionHelper.new "."
|
83
|
+
(original, modified) = open( targetFile, "r:utf-8" ) {|f|
|
84
|
+
lines = f.readlines
|
85
|
+
[ lines,
|
86
|
+
lines.map {|line|
|
87
|
+
if line.match( /;;SEKKA-VERSION/ )
|
88
|
+
sprintf( ' "%s" ;;SEKKA-VERSION', vh.to_s ) + "\n"
|
89
|
+
else
|
90
|
+
line
|
91
|
+
end
|
92
|
+
} ]
|
93
|
+
}
|
94
|
+
if original.join != modified.join
|
95
|
+
puts "Info: " + targetFile + " was updated."
|
96
|
+
open( targetFile, "w" ) {|f|
|
97
|
+
f.write( modified.join )
|
98
|
+
}
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
task :test do
|
103
|
+
sh "ruby -I ./lib ./bin/sekka-path > /tmp/path1"
|
104
|
+
path1 = open( "/tmp/path1" ) {|f|
|
105
|
+
f.readline.chomp
|
106
|
+
}
|
107
|
+
path2 = File.dirname( __FILE__ )
|
108
|
+
unless path1 == path2
|
109
|
+
puts STDERR.printf( "Error: on <sekka-path> requires [%s] but got [%s].", path2, path1 )
|
110
|
+
exit 1
|
111
|
+
end
|
112
|
+
|
113
|
+
sh "/bin/rm -f test.record test.tch"
|
114
|
+
files = []
|
115
|
+
files << "./test/memcache.nnd"
|
116
|
+
files << "./test/util.nnd"
|
117
|
+
files << "./test/alphabet-lib.nnd"
|
118
|
+
files << "./test/sharp-number.nnd"
|
119
|
+
files << "./test/roman-lib.nnd"
|
120
|
+
files << "./test/azik-verification.nnd"
|
121
|
+
files << "./test/jisyo.nnd"
|
122
|
+
files << "./test/google-ime.nnd"
|
123
|
+
STDERR.printf( "Info: env DB=%s\n", ENV['DB'] )
|
124
|
+
case ENV['DB']
|
125
|
+
when 'dbm'
|
126
|
+
files << "./test/henkan-main.nnd dbm"
|
127
|
+
when 'tokyocabinet'
|
128
|
+
files << "./test/henkan-main.nnd tokyocabinet"
|
129
|
+
when 'redis'
|
130
|
+
files << "./test/redis.nnd"
|
131
|
+
files << "./test/henkan-main.nnd redis"
|
132
|
+
when 'pure'
|
133
|
+
files << "./test/henkan-main.nnd pure"
|
134
|
+
when 'all'
|
135
|
+
files << "./test/henkan-main.nnd dbm"
|
136
|
+
files << "./test/henkan-main.nnd tokyocabinet"
|
137
|
+
files << "./test/henkan-main.nnd redis"
|
138
|
+
files << "./test/henkan-main.nnd pure"
|
139
|
+
else # default
|
140
|
+
files << "./test/henkan-main.nnd tokyocabinet"
|
141
|
+
files << "./test/henkan-main.nnd pure"
|
142
|
+
end
|
143
|
+
files.each {|filename|
|
144
|
+
nendopath = `which nendo`.chomp
|
145
|
+
sh sprintf( "time ruby -I ./lib %s %s", nendopath, filename )
|
146
|
+
}
|
147
|
+
sh "cat test.record"
|
148
|
+
end
|
149
|
+
|
150
|
+
task :bench do
|
151
|
+
sh "time ruby -I ./lib /usr/local/bin/nendo ./test/approximate-bench.nnd"
|
152
|
+
sh "time ruby -I ./lib /usr/local/bin/nendo ./test/henkan-bench.nnd"
|
153
|
+
end
|
154
|
+
|
155
|
+
task :alljisyo => [ :jisyoS, :jisyoL, :load, :dump ]
|
156
|
+
|
157
|
+
task :jisyoS do
|
158
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.L.201008 > ./data/SEKKA-JISYO.SMALL"
|
159
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.L.hira-kata >> ./data/SEKKA-JISYO.SMALL"
|
160
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.hiragana-phrase >> ./data/SEKKA-JISYO.SMALL"
|
161
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.hiragana-phrase2 >> ./data/SEKKA-JISYO.SMALL"
|
162
|
+
end
|
163
|
+
|
164
|
+
task :jisyoL do
|
165
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.L.201008 > ./data/SEKKA-JISYO.LARGE"
|
166
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.L.hira-kata >> ./data/SEKKA-JISYO.LARGE"
|
167
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.fullname >> ./data/SEKKA-JISYO.LARGE"
|
168
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.jinmei >> ./data/SEKKA-JISYO.LARGE"
|
169
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.station >> ./data/SEKKA-JISYO.LARGE"
|
170
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.hiragana-phrase >> ./data/SEKKA-JISYO.LARGE"
|
171
|
+
sh "time ./bin/sekka-jisyo convert ./data/SKK-JISYO.hiragana-phrase2 >> ./data/SEKKA-JISYO.LARGE"
|
172
|
+
end
|
173
|
+
|
174
|
+
task :load do
|
175
|
+
sh "time ./bin/sekka-jisyo load ./data/SEKKA-JISYO.SMALL ./data/SEKKA-JISYO.SMALL.tch"
|
176
|
+
sh "time ./bin/sekka-jisyo load ./data/SEKKA-JISYO.LARGE ./data/SEKKA-JISYO.LARGE.tch"
|
177
|
+
end
|
178
|
+
|
179
|
+
task :dump do
|
180
|
+
sh "time ./bin/sekka-jisyo dump ./data/SEKKA-JISYO.SMALL.tch > ./data/SEKKA-JISYO.SMALL.tsv"
|
181
|
+
sh "time ./bin/sekka-jisyo dump ./data/SEKKA-JISYO.LARGE.tch > ./data/SEKKA-JISYO.LARGE.tsv"
|
182
|
+
end
|
183
|
+
|
184
|
+
|
185
|
+
# Fetched data from
|
186
|
+
# http://s-yata.jp/corpus/nwc2010/ngrams/
|
187
|
+
task :phrase => [ "./data/6gm-0000.txt" ] do
|
188
|
+
sh "time ruby -I ./lib /usr/local/bin/nendo ./data/hiragana_phrase_in_webcorpus.nnd ./data/6gm-0000.txt | sort | uniq > /tmp/tmp.txt"
|
189
|
+
sh "time ruby -I ./lib /usr/local/bin/nendo ./data/writing_phrase_filter.nnd /tmp/tmp.txt | sort | uniq > ./data/SKK-JISYO.hiragana-phrase"
|
190
|
+
end
|
191
|
+
|
192
|
+
file "./data/6gm-0000.txt" do
|
193
|
+
sh "wget http://dist.s-yata.jp/corpus/nwc2010/ngrams/word/over999/6gms/6gm-0000.xz -O /tmp/6gm-0000.xz"
|
194
|
+
sh "xz -cd /tmp/6gm-0000.xz > ./data/6gm-0000.txt"
|
195
|
+
end
|
196
|
+
|
197
|
+
task :phrase2 => [ "./data/ipadic.all.utf8.txt" ] do
|
198
|
+
sh "time ruby -I ./lib /usr/local/bin/nendo ./data/hiragana_phrase_in_ipadic.nnd ./data/ipadic.all.utf8.txt | sort | uniq > ./data/SKK-JISYO.hiragana-phrase2"
|
199
|
+
end
|
200
|
+
|
201
|
+
file "./data/ipadic.all.utf8.txt" do
|
202
|
+
sh "wget http://chasen.aist-nara.ac.jp/stable/ipadic/ipadic-2.7.0.tar.gz -O /tmp/ipadic-2.7.0.tar.gz"
|
203
|
+
sh "tar zxfC /tmp/ipadic-2.7.0.tar.gz /tmp"
|
204
|
+
sh "iconv -f euc-jp -t utf-8 /tmp/ipadic-2.7.0/*.dic > ./data/ipadic.all.utf8.txt"
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
task :rackup do
|
209
|
+
# how to install mongrel is "gem install mongrel --pre"
|
210
|
+
sh "ruby -I ./lib ./bin/sekka-server"
|
211
|
+
end
|
212
|
+
|
213
|
+
task :katakanago do
|
214
|
+
sh "nkf --euc ./data/SKK-JISYO.L.201008 > tmpfile.euc"
|
215
|
+
sh "/usr/share/skktools/filters/abbrev-convert.rb -k tmpfile.euc | skkdic-expr2 | iconv -f=EUC-JP -t=UTF-8 > ./data/SKK-JISYO.L.hira-kata"
|
216
|
+
sh "/bin/rm -f tmpfile.euc"
|
217
|
+
end
|
data/VERSION.yml
ADDED
data/bin/sekka-server
CHANGED
@@ -5,8 +5,8 @@ require 'digest/md5'
|
|
5
5
|
require 'fileutils'
|
6
6
|
require 'rack'
|
7
7
|
require 'uri'
|
8
|
-
require
|
9
|
-
require
|
8
|
+
require 'sekkaconfig'
|
9
|
+
require 'sekka/sekkaversion'
|
10
10
|
|
11
11
|
|
12
12
|
|
@@ -160,8 +160,14 @@ def main
|
|
160
160
|
# 設定項目をConfigオブジェクトに代入
|
161
161
|
SekkaServer::Config.setup( dictType, dictSource, MEMCACHED, 12929, proxyHost, proxyPort )
|
162
162
|
|
163
|
-
#
|
164
|
-
|
163
|
+
# rackに渡すための sekka.ru のインストールパスを求める。
|
164
|
+
vendordir = File.expand_path(File.dirname(__FILE__) + "/../lib")
|
165
|
+
if RbConfig::CONFIG[ 'vendordir' ]
|
166
|
+
if File.exists? RbConfig::CONFIG[ 'vendordir' ] + "/sekka.ru"
|
167
|
+
vendordir = RbConfig::CONFIG[ 'vendordir' ]
|
168
|
+
end
|
169
|
+
end
|
170
|
+
# print "vendordir = " + vendordir + "\n"
|
165
171
|
|
166
172
|
# サーバー起動
|
167
173
|
Rack::Server.start(
|
@@ -170,7 +176,7 @@ def main
|
|
170
176
|
:Port => SekkaServer::Config.listenPort,
|
171
177
|
:Host => "0.0.0.0",
|
172
178
|
:AccessLog => [],
|
173
|
-
:config => "
|
179
|
+
:config => vendordir + "/sekka.ru"
|
174
180
|
)
|
175
181
|
end
|
176
182
|
|
data/emacs/sekka.el
CHANGED
@@ -37,7 +37,7 @@ require 'sekka/kvs'
|
|
37
37
|
class ApproximateSearch
|
38
38
|
def initialize( jarow_shikii )
|
39
39
|
@jarow_shikii = jarow_shikii
|
40
|
-
@jarow = FuzzyStringMatch::JaroWinkler.
|
40
|
+
@jarow = FuzzyStringMatch::JaroWinkler.create( :native )
|
41
41
|
end
|
42
42
|
|
43
43
|
def filtering( keyword, arr )
|
data/lib/sekka/convert-jisyo.nnd
CHANGED
@@ -42,7 +42,7 @@
|
|
42
42
|
(let1 roman-list (gen-hiragana->roman-list key)
|
43
43
|
(if (< 1000 (length roman-list))
|
44
44
|
(begin
|
45
|
-
(
|
45
|
+
#?=(sprintf " Warning: ignored entry [%s] (hiragana phrase), because too many pattens.\n" key)
|
46
46
|
#f) ;; パターン数が爆発した単語は無視する
|
47
47
|
(map
|
48
48
|
(lambda (x)
|
@@ -53,7 +53,7 @@
|
|
53
53
|
(let1 roman-list (gen-hiragana->roman-list key)
|
54
54
|
(if (< 1000 (length roman-list))
|
55
55
|
(begin
|
56
|
-
(
|
56
|
+
#?=(sprintf " Warning: ignored entry [%s %s] , because too many pattens.\n" key value)
|
57
57
|
#f) ;; パターン数が爆発した単語は無視する
|
58
58
|
(append
|
59
59
|
(map
|
data/lib/sekka/jisyo-db.nnd
CHANGED
@@ -242,12 +242,12 @@
|
|
242
242
|
(let ([d (. (Date.new 0) to_s)])
|
243
243
|
(if (and (kvs.pure_put! "key_for_ping" d)
|
244
244
|
(string=? (kvs.get "key_for_ping") d))
|
245
|
-
|
245
|
+
#?="Info: database file is clean"
|
246
246
|
;; 正常に読み書きできないようであれば、データベースを修復する
|
247
247
|
(begin
|
248
|
-
|
248
|
+
#?="Info: database file is NOT clean. try to fix..."
|
249
249
|
(kvs.fixdb)
|
250
|
-
|
250
|
+
#?="Info: done.")))
|
251
251
|
(to-arr (list kvs cachesv)))))
|
252
252
|
;; Export to Ruby world
|
253
253
|
(export-to-ruby openSekkaJisyo)
|
data/lib/sekka/sekkaversion.rb
CHANGED
data/lib/sekka.ru
CHANGED
data/lib/sekkaserver.rb
CHANGED
data/test/henkan-main.nnd
CHANGED
@@ -35,6 +35,7 @@
|
|
35
35
|
;;;
|
36
36
|
(require "sekka/kvs")
|
37
37
|
(use nendo.test)
|
38
|
+
(use debug.null)
|
38
39
|
(use sekka.convert-jisyo)
|
39
40
|
(use sekka.henkan)
|
40
41
|
(load "./test/common.nnd")
|
@@ -90,7 +91,7 @@
|
|
90
91
|
;;-------------------------------------------------------------------
|
91
92
|
(with-open "./test/sekka-dump-out-1.txt"
|
92
93
|
(lambda (f)
|
93
|
-
(let ((
|
94
|
+
(let ((_answer (Digest::MD5.hexdigest (f.read)))
|
94
95
|
(_output1 (StringIO.new))
|
95
96
|
(_output2 (StringIO.new)))
|
96
97
|
(_output1.set_encoding "utf-8")
|
@@ -99,30 +100,33 @@
|
|
99
100
|
(eq? dbtype 'redis)
|
100
101
|
(eq? dbtype 'dbm)
|
101
102
|
(eq? dbtype 'pure))
|
102
|
-
(
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
(
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
(
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
103
|
+
(begin
|
104
|
+
(test-section "dump db")
|
105
|
+
|
106
|
+
(test* "dump DB to SEKKA jisyo file. (1)"
|
107
|
+
_answer
|
108
|
+
(begin
|
109
|
+
(dump-sekka-jisyo-f _output1 target)
|
110
|
+
(with-open "./test/sekka-dump-out-1.tmp"
|
111
|
+
(lambda (f) (f.write (_output1.string)))
|
112
|
+
"w")
|
113
|
+
(Digest::MD5.hexdigest
|
114
|
+
(_output1.string))))
|
115
|
+
|
116
|
+
(test* "dump/restore DB to SEKKA jisyo file. (1)"
|
117
|
+
_answer
|
118
|
+
(begin
|
119
|
+
;; ---- RESTORE ----
|
120
|
+
(f.rewind)
|
121
|
+
(restore-sekka-jisyo-f f target)
|
122
|
+
|
123
|
+
;; ---- DUMP ----
|
124
|
+
(dump-sekka-jisyo-f _output2 target)
|
125
|
+
(with-open "./test/sekka-dump-out-2.tmp"
|
126
|
+
(lambda (f) (f.write (_output2.string)))
|
127
|
+
"w")
|
128
|
+
(Digest::MD5.hexdigest
|
129
|
+
(_output2.string)))))))))
|
126
130
|
|
127
131
|
|
128
132
|
|
@@ -1252,6 +1256,6 @@
|
|
1252
1256
|
;; ---後処理---
|
1253
1257
|
(kvs.close)
|
1254
1258
|
;; 最終的な辞書の状態を目視するためのダンプ
|
1255
|
-
(dump-sekka-jisyo-f STDOUT target)
|
1259
|
+
;;(dump-sekka-jisyo-f STDOUT target)
|
1256
1260
|
|
1257
1261
|
(test-end)
|