sekka 1.5.9 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +0 -1
- data/Rakefile +22 -57
- data/VERSION.yml +2 -2
- data/bin/sekka-server +6 -21
- data/data/.gitignore +2 -0
- data/emacs/sekka.el +2 -2
- data/lib/sekka/roman-lib.nnd +13 -1
- data/lib/sekka/sekkaversion.rb +2 -2
- data/public_dict/1.5.0/SEKKA-JISYO.N.md5 +1 -0
- data/public_dict/1.5.0/SEKKA-JISYO.N.url +1 -0
- data/sekka.gemspec +1 -1
- data/test/roman-lib.nnd +68 -0
- data/test/sekka-dump-out-1.txt +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 059e9182b4e6dc1dc9cd996f1840f7ba6f819a9b
|
4
|
+
data.tar.gz: 7d7dc6eeee190b860c2d3c43d61e140672069d6a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da64491bc34442de5b0abe7798cdfa3afc797a64bae0650fd0ab0f97897273db9777fe68bf9665ae0811d00634c4faaeb0563b00efc3688023f771a580af2019
|
7
|
+
data.tar.gz: 6b38924ce6b313babe7461d9df811d447902a234f5a4cf1346d87fb7f214f518c97b77a8c4b4e712cd103af57c9b7dbe4d7134da05e0fb0044217d7f50626914
|
data/.travis.yml
CHANGED
data/Rakefile
CHANGED
@@ -23,17 +23,13 @@ require 'rake'
|
|
23
23
|
require "bundler/gem_tasks"
|
24
24
|
require 'jeweler2'
|
25
25
|
|
26
|
-
# generate `normal' and `azik' dictionary
|
27
|
-
# (generateTypes = [ "N", "A" ])
|
28
|
-
generateTypes = [ "N" ]
|
29
|
-
|
30
26
|
|
31
27
|
task :default => [:test] do
|
32
28
|
end
|
33
29
|
|
34
30
|
task :compile do
|
35
31
|
# generate version.rb
|
36
|
-
dictVersion = "1.
|
32
|
+
dictVersion = "1.5.0"
|
37
33
|
vh = Jeweler::VersionHelper.new "."
|
38
34
|
open( "./lib/sekka/sekkaversion.rb", "w" ) {|f|
|
39
35
|
f.puts( "class SekkaVersion" )
|
@@ -112,82 +108,51 @@ task :test do
|
|
112
108
|
files << "./test/henkan-main.nnd pure"
|
113
109
|
end
|
114
110
|
files.each {|filename|
|
115
|
-
sh sprintf( "ruby -I ./lib -S nendo -d %s", filename )
|
111
|
+
sh sprintf( "ruby -I ./lib -S nendo -I ./lib -d %s", filename )
|
116
112
|
}
|
117
113
|
sh "cat test.record"
|
118
114
|
end
|
119
115
|
|
120
116
|
task :bench do
|
121
|
-
sh "time
|
122
|
-
sh "time
|
117
|
+
sh "time nendo -I ./lib ./test/approximate-bench.nnd"
|
118
|
+
sh "time nendo -I ./lib ./test/henkan-bench.nnd"
|
123
119
|
end
|
124
120
|
|
125
|
-
task :alljisyo
|
126
|
-
task :alljisyoS => [ :jisyoS, :loadS, :dumpS, :md5 ]
|
127
|
-
task :alljisyoL => [ :jisyoL, :loadL, :dumpL, :md5 ]
|
121
|
+
task :alljisyo => [ :jisyo, :load, :dump, :md5 ]
|
128
122
|
|
129
123
|
task :md5 do
|
130
|
-
sh "md5sum ./data/SEKKA-JISYO.
|
131
|
-
sh "md5sum ./data/SEKKA-JISYO.LARGE.N.tsv > ./data/SEKKA-JISYO.LARGE.N.md5"
|
124
|
+
sh "md5sum ./data/SEKKA-JISYO.N.tsv > ./data/SEKKA-JISYO.N.md5"
|
132
125
|
end
|
133
126
|
|
134
|
-
task :
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
127
|
+
task :jisyo do
|
128
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.L.201501 > ./data/SEKKA-JISYO.N"
|
129
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.L.hira-kata >> ./data/SEKKA-JISYO.N"
|
130
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.fullname >> ./data/SEKKA-JISYO.N"
|
131
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.jinmei >> ./data/SEKKA-JISYO.N"
|
132
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.station >> ./data/SEKKA-JISYO.N"
|
133
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.hiragana-phrase >> ./data/SEKKA-JISYO.N"
|
134
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.hiragana-phrase2 >> ./data/SEKKA-JISYO.N"
|
135
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.hiragana-phrase3 >> ./data/SEKKA-JISYO.N"
|
142
136
|
end
|
143
137
|
|
144
|
-
task :
|
145
|
-
|
146
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.L.201501 > ./data/SEKKA-JISYO.LARGE.#{x}"
|
147
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.L.hira-kata >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
148
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.fullname >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
149
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.jinmei >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
150
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.station >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
151
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.hiragana-phrase >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
152
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.hiragana-phrase2 >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
153
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.hiragana-phrase3 >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
154
|
-
}
|
138
|
+
task :load do
|
139
|
+
sh "ruby ./bin/sekka-jisyo load ./data/SEKKA-JISYO.N ./data/SEKKA-JISYO.N.tch#xmsiz=1024m"
|
155
140
|
end
|
156
141
|
|
157
|
-
task :
|
158
|
-
|
159
|
-
sh "ruby ./bin/sekka-jisyo load ./data/SEKKA-JISYO.SMALL.#{x} ./data/SEKKA-JISYO.SMALL.#{x}.tch#xmsiz=1024m"
|
160
|
-
}
|
161
|
-
end
|
162
|
-
|
163
|
-
task :loadL do
|
164
|
-
generateTypes.each {|x|
|
165
|
-
sh "ruby ./bin/sekka-jisyo load ./data/SEKKA-JISYO.LARGE.#{x} ./data/SEKKA-JISYO.LARGE.#{x}.tch#xmsiz=1024m"
|
166
|
-
}
|
167
|
-
end
|
168
|
-
|
169
|
-
task :dumpS do
|
170
|
-
generateTypes.each {|x|
|
171
|
-
sh "ruby ./bin/sekka-jisyo dump ./data/SEKKA-JISYO.SMALL.#{x}.tch#xmsiz=1024m > ./data/SEKKA-JISYO.SMALL.#{x}.tsv"
|
172
|
-
}
|
173
|
-
end
|
174
|
-
|
175
|
-
task :dumpL do
|
176
|
-
generateTypes.each {|x|
|
177
|
-
sh "ruby ./bin/sekka-jisyo dump ./data/SEKKA-JISYO.LARGE.#{x}.tch#xmsiz=1024m > ./data/SEKKA-JISYO.LARGE.#{x}.tsv"
|
178
|
-
}
|
142
|
+
task :dump do
|
143
|
+
sh "ruby ./bin/sekka-jisyo dump ./data/SEKKA-JISYO.N.tch#xmsiz=1024m > ./data/SEKKA-JISYO.N.tsv"
|
179
144
|
end
|
180
145
|
|
181
146
|
# SKK-JISYO.hiragana-phrase はWikipediaから作られる。
|
182
147
|
task :phrase => [ "/tmp/jawiki.txt.gz", "./data/wikipedia/jawiki.hiragana.txt" ] do
|
183
148
|
sh "sort ./data/wikipedia/jawiki.hiragana.txt | uniq -c | sort > ./data/wikipedia/ranking.txt"
|
184
|
-
sh "
|
149
|
+
sh "nendo -I ./lib ./data/hiragana_phrase_in_wikipedia2.nnd ./data/wikipedia/ranking.txt > ./data/SKK-JISYO.hiragana-phrase"
|
185
150
|
sh "echo 'して //' >> ./data/SKK-JISYO.hiragana-phrase"
|
186
151
|
end
|
187
152
|
|
188
153
|
file "./data/wikipedia/jawiki.hiragana.txt" do
|
189
154
|
sh "zcat /tmp/jawiki.txt.gz | mecab --input-buffer-size=65536 -O wakati --output=/tmp/jawiki.wakati.txt"
|
190
|
-
sh "
|
155
|
+
sh "nendo -I ./lib ./data/hiragana_phrase_in_wikipedia.nnd /tmp/jawiki.wakati.txt > ./data/wikipedia/jawiki.hiragana.txt"
|
191
156
|
sh "rm -f /tmp/jawiki.wakati.txt"
|
192
157
|
end
|
193
158
|
|
@@ -203,7 +168,7 @@ end
|
|
203
168
|
|
204
169
|
# SKK-JISYO.hiragana-phrase2 はIPADicから作られる。
|
205
170
|
task :phrase2 => [ "./data/ipadic.all.utf8.txt" ] do
|
206
|
-
sh "time
|
171
|
+
sh "time nendo -I ./data/hiragana_phrase_in_ipadic.nnd ./data/ipadic.all.utf8.txt | sort | uniq > ./data/SKK-JISYO.hiragana-phrase2"
|
207
172
|
end
|
208
173
|
|
209
174
|
file "./data/ipadic.all.utf8.txt" do
|
data/VERSION.yml
CHANGED
data/bin/sekka-server
CHANGED
@@ -16,31 +16,16 @@ DICTURL = "https://raw.githubusercontent.com/kiyoka/sekka/master/public_dict/" +
|
|
16
16
|
|
17
17
|
PIDFILE = DICTDIR + "/pid"
|
18
18
|
|
19
|
-
DICTTYPE = "N"
|
20
|
-
if ENV.has_key?( 'SEKKA_AZIK' )
|
21
|
-
STDERR.printf( "Error: SEKKA_AZIK became obsolute. sekka-server use normal dictionary.\n" )
|
22
|
-
end
|
23
|
-
|
24
|
-
DICTSIZE = if ENV.has_key?( 'SEKKA_LARGE' )
|
25
|
-
if 1 == ENV[ 'SEKKA_LARGE' ].to_i
|
26
|
-
"LARGE"
|
27
|
-
else
|
28
|
-
"SMALL"
|
29
|
-
end
|
30
|
-
else
|
31
|
-
"SMALL"
|
32
|
-
end
|
33
|
-
|
34
19
|
TC_OPTS = "#xmsiz=256m"
|
35
|
-
TC_FILE = DICTDIR + "/SEKKA-JISYO
|
36
|
-
TSVFILE = DICTDIR + "/SEKKA-JISYO
|
37
|
-
SUMFILE = DICTDIR + "/SEKKA-JISYO
|
20
|
+
TC_FILE = DICTDIR + "/SEKKA-JISYO.N.tch" + TC_OPTS
|
21
|
+
TSVFILE = DICTDIR + "/SEKKA-JISYO.N.tsv"
|
22
|
+
SUMFILE = DICTDIR + "/SEKKA-JISYO.N.md5"
|
38
23
|
|
39
|
-
GDBM_FILE = DICTDIR + "/SEKKA-JISYO
|
24
|
+
GDBM_FILE = DICTDIR + "/SEKKA-JISYO.N.db"
|
40
25
|
|
41
26
|
|
42
|
-
URLURL = DICTURL + "/SEKKA-JISYO
|
43
|
-
SUMURL = DICTURL + "/SEKKA-JISYO
|
27
|
+
URLURL = DICTURL + "/SEKKA-JISYO.N.url"
|
28
|
+
SUMURL = DICTURL + "/SEKKA-JISYO.N.md5"
|
44
29
|
|
45
30
|
MEMCACHED = "localhost:11211" # memcahced
|
46
31
|
|
data/data/.gitignore
CHANGED
data/emacs/sekka.el
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
;; Copyright (C) 2010-2014 Kiyoka Nishiyama
|
4
4
|
;;
|
5
5
|
;; Author: Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
-
;; Version: 1.
|
6
|
+
;; Version: 1.6.0 ;;SEKKA-VERSION
|
7
7
|
;; Keywords: ime, skk, japanese
|
8
8
|
;; Package-Requires: ((cl-lib "0.3") (concurrent "0.3.1") (popup "0.5.0"))
|
9
9
|
;; URL: https://github.com/kiyoka/sekka
|
@@ -1746,7 +1746,7 @@ point から行頭方向に同種の文字列が続く間を漢字変換しま
|
|
1746
1746
|
(setq default-input-method "japanese-sekka")
|
1747
1747
|
|
1748
1748
|
(defconst sekka-version
|
1749
|
-
"1.
|
1749
|
+
"1.6.0" ;;SEKKA-VERSION
|
1750
1750
|
)
|
1751
1751
|
(defun sekka-version (&optional arg)
|
1752
1752
|
"入力モード変更"
|
data/lib/sekka/roman-lib.nnd
CHANGED
@@ -737,10 +737,22 @@
|
|
737
737
|
(define (is-katakana str)
|
738
738
|
(if (rxmatch #/^[ア-ンァィゥェォャュョッー]+$/ str) #t #f))
|
739
739
|
|
740
|
-
;;
|
740
|
+
;; ひらがなの文字列かどうかを評価する
|
741
741
|
(define (is-hiragana str)
|
742
742
|
(if (rxmatch #/^[あ-んぁぃぅぇぉゃゅょっー]+$/ str) #t #f))
|
743
743
|
|
744
|
+
;; ひらがなの文字列を含むかどうかを評価する
|
745
|
+
(define (include-hiragana str)
|
746
|
+
(if (rxmatch #/[あ-んぁぃぅぇぉゃゅょっー]+/ str) #t #f))
|
747
|
+
|
748
|
+
;; 漢字の文字列かどうかを評価する
|
749
|
+
(define (is-kanji str)
|
750
|
+
(if (rxmatch #/^[\p{Han}]+$/ str) #t #f))
|
751
|
+
|
752
|
+
;; 漢字の文字列を含むかどうか評価する
|
753
|
+
(define (include-kanji str)
|
754
|
+
(if (rxmatch #/[\p{Han}]/ str) #t #f))
|
755
|
+
|
744
756
|
;; 送り仮名付き平仮名文字列(例:"おこなu") かどうかを評価する
|
745
757
|
(define (is-hiragana-and-okuri str)
|
746
758
|
(if (rxmatch #/^[あ-んぁぃぅぇぉゃゅょっー]+[a-z]$/ str) #t #f))
|
data/lib/sekka/sekkaversion.rb
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
3cbe0e13da141bda955fe8e53f378dd0 ./data/SEKKA-JISYO.N.tsv
|
@@ -0,0 +1 @@
|
|
1
|
+
https://s3-ap-northeast-1.amazonaws.com/sekkadict/1.5.0/SEKKA-JISYO.N.tsv
|
data/sekka.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
spec.add_dependency "eventmachine", "~> 1.0"
|
21
21
|
spec.add_dependency "memcache-client", "~> 1.8"
|
22
|
-
spec.add_dependency "nendo", "= 0.7.
|
22
|
+
spec.add_dependency "nendo", "= 0.7.2"
|
23
23
|
spec.add_dependency "distributed-trie", "= 0.8.0"
|
24
24
|
spec.add_dependency "rack", "~> 1.5"
|
25
25
|
spec.add_dependency "ruby-progressbar", "~> 1.4"
|
data/test/roman-lib.nnd
CHANGED
@@ -251,6 +251,74 @@
|
|
251
251
|
#f
|
252
252
|
(is-hiragana-and-okuri "123"))
|
253
253
|
|
254
|
+
;;-------------------------------------------------------------------
|
255
|
+
(test-section "other judgement functions")
|
256
|
+
|
257
|
+
(test* "include hiragana 1"
|
258
|
+
#t
|
259
|
+
(include-hiragana "123あ456"))
|
260
|
+
|
261
|
+
(test* "include hiragana 2"
|
262
|
+
#f
|
263
|
+
(include-hiragana "123A456"))
|
264
|
+
|
265
|
+
(test* "include hiragana 3"
|
266
|
+
#f
|
267
|
+
(include-hiragana "漢字"))
|
268
|
+
|
269
|
+
(test* "include hiragana 4"
|
270
|
+
#f
|
271
|
+
(include-hiragana "カタカナ"))
|
272
|
+
|
273
|
+
(test* "is kanji 1"
|
274
|
+
#t
|
275
|
+
(is-kanji "漢字"))
|
276
|
+
|
277
|
+
(test* "is kanji 2"
|
278
|
+
#t
|
279
|
+
(is-kanji "薔薇"))
|
280
|
+
|
281
|
+
(test* "is kanji 3"
|
282
|
+
#f
|
283
|
+
(is-kanji "感じ"))
|
284
|
+
|
285
|
+
(test* "is kanji 4"
|
286
|
+
#f
|
287
|
+
(is-kanji "ひらがな"))
|
288
|
+
|
289
|
+
(test* "is kanji 5"
|
290
|
+
#f
|
291
|
+
(is-kanji "ABCDE"))
|
292
|
+
|
293
|
+
(test* "is kanji 6"
|
294
|
+
#f
|
295
|
+
(is-kanji "¢"))
|
296
|
+
|
297
|
+
(test* "is kanji 7"
|
298
|
+
#f
|
299
|
+
(is-kanji "з"))
|
300
|
+
|
301
|
+
(test* "include kanji 1"
|
302
|
+
#t
|
303
|
+
(include-kanji "感じ"))
|
304
|
+
|
305
|
+
(test* "include kanji 2"
|
306
|
+
#t
|
307
|
+
(include-kanji "ABC漢字DEF"))
|
308
|
+
|
309
|
+
(test* "include kanji 3"
|
310
|
+
#f
|
311
|
+
(include-kanji "ABCDEF"))
|
312
|
+
|
313
|
+
(test* "include kanji 4"
|
314
|
+
#f
|
315
|
+
(include-kanji "ひらがな"))
|
316
|
+
|
317
|
+
(test* "include kanji 5"
|
318
|
+
#f
|
319
|
+
(include-kanji "カタカナ"))
|
320
|
+
|
321
|
+
|
254
322
|
;;-------------------------------------------------------------------
|
255
323
|
(test-section "drop okurigana functions")
|
256
324
|
(test* "drop okri 1"
|
data/test/sekka-dump-out-1.txt
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sekka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kiyoka Nishiyama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: eventmachine
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - '='
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.7.
|
47
|
+
version: 0.7.2
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - '='
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.7.
|
54
|
+
version: 0.7.2
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: distributed-trie
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -229,6 +229,8 @@ files:
|
|
229
229
|
- public_dict/1.4.2/SEKKA-JISYO.LARGE.N.url
|
230
230
|
- public_dict/1.4.2/SEKKA-JISYO.SMALL.N.md5
|
231
231
|
- public_dict/1.4.2/SEKKA-JISYO.SMALL.N.url
|
232
|
+
- public_dict/1.5.0/SEKKA-JISYO.N.md5
|
233
|
+
- public_dict/1.5.0/SEKKA-JISYO.N.url
|
232
234
|
- script/sekkaserver.debian
|
233
235
|
- sekka.gemspec
|
234
236
|
- test/.gitignore
|