sekka 1.5.9 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +0 -1
- data/Rakefile +22 -57
- data/VERSION.yml +2 -2
- data/bin/sekka-server +6 -21
- data/data/.gitignore +2 -0
- data/emacs/sekka.el +2 -2
- data/lib/sekka/roman-lib.nnd +13 -1
- data/lib/sekka/sekkaversion.rb +2 -2
- data/public_dict/1.5.0/SEKKA-JISYO.N.md5 +1 -0
- data/public_dict/1.5.0/SEKKA-JISYO.N.url +1 -0
- data/sekka.gemspec +1 -1
- data/test/roman-lib.nnd +68 -0
- data/test/sekka-dump-out-1.txt +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 059e9182b4e6dc1dc9cd996f1840f7ba6f819a9b
|
4
|
+
data.tar.gz: 7d7dc6eeee190b860c2d3c43d61e140672069d6a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da64491bc34442de5b0abe7798cdfa3afc797a64bae0650fd0ab0f97897273db9777fe68bf9665ae0811d00634c4faaeb0563b00efc3688023f771a580af2019
|
7
|
+
data.tar.gz: 6b38924ce6b313babe7461d9df811d447902a234f5a4cf1346d87fb7f214f518c97b77a8c4b4e712cd103af57c9b7dbe4d7134da05e0fb0044217d7f50626914
|
data/.travis.yml
CHANGED
data/Rakefile
CHANGED
@@ -23,17 +23,13 @@ require 'rake'
|
|
23
23
|
require "bundler/gem_tasks"
|
24
24
|
require 'jeweler2'
|
25
25
|
|
26
|
-
# generate `normal' and `azik' dictionary
|
27
|
-
# (generateTypes = [ "N", "A" ])
|
28
|
-
generateTypes = [ "N" ]
|
29
|
-
|
30
26
|
|
31
27
|
task :default => [:test] do
|
32
28
|
end
|
33
29
|
|
34
30
|
task :compile do
|
35
31
|
# generate version.rb
|
36
|
-
dictVersion = "1.
|
32
|
+
dictVersion = "1.5.0"
|
37
33
|
vh = Jeweler::VersionHelper.new "."
|
38
34
|
open( "./lib/sekka/sekkaversion.rb", "w" ) {|f|
|
39
35
|
f.puts( "class SekkaVersion" )
|
@@ -112,82 +108,51 @@ task :test do
|
|
112
108
|
files << "./test/henkan-main.nnd pure"
|
113
109
|
end
|
114
110
|
files.each {|filename|
|
115
|
-
sh sprintf( "ruby -I ./lib -S nendo -d %s", filename )
|
111
|
+
sh sprintf( "ruby -I ./lib -S nendo -I ./lib -d %s", filename )
|
116
112
|
}
|
117
113
|
sh "cat test.record"
|
118
114
|
end
|
119
115
|
|
120
116
|
task :bench do
|
121
|
-
sh "time
|
122
|
-
sh "time
|
117
|
+
sh "time nendo -I ./lib ./test/approximate-bench.nnd"
|
118
|
+
sh "time nendo -I ./lib ./test/henkan-bench.nnd"
|
123
119
|
end
|
124
120
|
|
125
|
-
task :alljisyo
|
126
|
-
task :alljisyoS => [ :jisyoS, :loadS, :dumpS, :md5 ]
|
127
|
-
task :alljisyoL => [ :jisyoL, :loadL, :dumpL, :md5 ]
|
121
|
+
task :alljisyo => [ :jisyo, :load, :dump, :md5 ]
|
128
122
|
|
129
123
|
task :md5 do
|
130
|
-
sh "md5sum ./data/SEKKA-JISYO.
|
131
|
-
sh "md5sum ./data/SEKKA-JISYO.LARGE.N.tsv > ./data/SEKKA-JISYO.LARGE.N.md5"
|
124
|
+
sh "md5sum ./data/SEKKA-JISYO.N.tsv > ./data/SEKKA-JISYO.N.md5"
|
132
125
|
end
|
133
126
|
|
134
|
-
task :
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
127
|
+
task :jisyo do
|
128
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.L.201501 > ./data/SEKKA-JISYO.N"
|
129
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.L.hira-kata >> ./data/SEKKA-JISYO.N"
|
130
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.fullname >> ./data/SEKKA-JISYO.N"
|
131
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.jinmei >> ./data/SEKKA-JISYO.N"
|
132
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.station >> ./data/SEKKA-JISYO.N"
|
133
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.hiragana-phrase >> ./data/SEKKA-JISYO.N"
|
134
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.hiragana-phrase2 >> ./data/SEKKA-JISYO.N"
|
135
|
+
sh "ruby ./bin/sekka-jisyo convertN ./data/SKK-JISYO.hiragana-phrase3 >> ./data/SEKKA-JISYO.N"
|
142
136
|
end
|
143
137
|
|
144
|
-
task :
|
145
|
-
|
146
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.L.201501 > ./data/SEKKA-JISYO.LARGE.#{x}"
|
147
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.L.hira-kata >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
148
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.fullname >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
149
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.jinmei >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
150
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.station >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
151
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.hiragana-phrase >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
152
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.hiragana-phrase2 >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
153
|
-
sh "ruby ./bin/sekka-jisyo convert#{x} ./data/SKK-JISYO.hiragana-phrase3 >> ./data/SEKKA-JISYO.LARGE.#{x}"
|
154
|
-
}
|
138
|
+
task :load do
|
139
|
+
sh "ruby ./bin/sekka-jisyo load ./data/SEKKA-JISYO.N ./data/SEKKA-JISYO.N.tch#xmsiz=1024m"
|
155
140
|
end
|
156
141
|
|
157
|
-
task :
|
158
|
-
|
159
|
-
sh "ruby ./bin/sekka-jisyo load ./data/SEKKA-JISYO.SMALL.#{x} ./data/SEKKA-JISYO.SMALL.#{x}.tch#xmsiz=1024m"
|
160
|
-
}
|
161
|
-
end
|
162
|
-
|
163
|
-
task :loadL do
|
164
|
-
generateTypes.each {|x|
|
165
|
-
sh "ruby ./bin/sekka-jisyo load ./data/SEKKA-JISYO.LARGE.#{x} ./data/SEKKA-JISYO.LARGE.#{x}.tch#xmsiz=1024m"
|
166
|
-
}
|
167
|
-
end
|
168
|
-
|
169
|
-
task :dumpS do
|
170
|
-
generateTypes.each {|x|
|
171
|
-
sh "ruby ./bin/sekka-jisyo dump ./data/SEKKA-JISYO.SMALL.#{x}.tch#xmsiz=1024m > ./data/SEKKA-JISYO.SMALL.#{x}.tsv"
|
172
|
-
}
|
173
|
-
end
|
174
|
-
|
175
|
-
task :dumpL do
|
176
|
-
generateTypes.each {|x|
|
177
|
-
sh "ruby ./bin/sekka-jisyo dump ./data/SEKKA-JISYO.LARGE.#{x}.tch#xmsiz=1024m > ./data/SEKKA-JISYO.LARGE.#{x}.tsv"
|
178
|
-
}
|
142
|
+
task :dump do
|
143
|
+
sh "ruby ./bin/sekka-jisyo dump ./data/SEKKA-JISYO.N.tch#xmsiz=1024m > ./data/SEKKA-JISYO.N.tsv"
|
179
144
|
end
|
180
145
|
|
181
146
|
# SKK-JISYO.hiragana-phrase はWikipediaから作られる。
|
182
147
|
task :phrase => [ "/tmp/jawiki.txt.gz", "./data/wikipedia/jawiki.hiragana.txt" ] do
|
183
148
|
sh "sort ./data/wikipedia/jawiki.hiragana.txt | uniq -c | sort > ./data/wikipedia/ranking.txt"
|
184
|
-
sh "
|
149
|
+
sh "nendo -I ./lib ./data/hiragana_phrase_in_wikipedia2.nnd ./data/wikipedia/ranking.txt > ./data/SKK-JISYO.hiragana-phrase"
|
185
150
|
sh "echo 'して //' >> ./data/SKK-JISYO.hiragana-phrase"
|
186
151
|
end
|
187
152
|
|
188
153
|
file "./data/wikipedia/jawiki.hiragana.txt" do
|
189
154
|
sh "zcat /tmp/jawiki.txt.gz | mecab --input-buffer-size=65536 -O wakati --output=/tmp/jawiki.wakati.txt"
|
190
|
-
sh "
|
155
|
+
sh "nendo -I ./lib ./data/hiragana_phrase_in_wikipedia.nnd /tmp/jawiki.wakati.txt > ./data/wikipedia/jawiki.hiragana.txt"
|
191
156
|
sh "rm -f /tmp/jawiki.wakati.txt"
|
192
157
|
end
|
193
158
|
|
@@ -203,7 +168,7 @@ end
|
|
203
168
|
|
204
169
|
# SKK-JISYO.hiragana-phrase2 はIPADicから作られる。
|
205
170
|
task :phrase2 => [ "./data/ipadic.all.utf8.txt" ] do
|
206
|
-
sh "time
|
171
|
+
sh "time nendo -I ./data/hiragana_phrase_in_ipadic.nnd ./data/ipadic.all.utf8.txt | sort | uniq > ./data/SKK-JISYO.hiragana-phrase2"
|
207
172
|
end
|
208
173
|
|
209
174
|
file "./data/ipadic.all.utf8.txt" do
|
data/VERSION.yml
CHANGED
data/bin/sekka-server
CHANGED
@@ -16,31 +16,16 @@ DICTURL = "https://raw.githubusercontent.com/kiyoka/sekka/master/public_dict/" +
|
|
16
16
|
|
17
17
|
PIDFILE = DICTDIR + "/pid"
|
18
18
|
|
19
|
-
DICTTYPE = "N"
|
20
|
-
if ENV.has_key?( 'SEKKA_AZIK' )
|
21
|
-
STDERR.printf( "Error: SEKKA_AZIK became obsolute. sekka-server use normal dictionary.\n" )
|
22
|
-
end
|
23
|
-
|
24
|
-
DICTSIZE = if ENV.has_key?( 'SEKKA_LARGE' )
|
25
|
-
if 1 == ENV[ 'SEKKA_LARGE' ].to_i
|
26
|
-
"LARGE"
|
27
|
-
else
|
28
|
-
"SMALL"
|
29
|
-
end
|
30
|
-
else
|
31
|
-
"SMALL"
|
32
|
-
end
|
33
|
-
|
34
19
|
TC_OPTS = "#xmsiz=256m"
|
35
|
-
TC_FILE = DICTDIR + "/SEKKA-JISYO
|
36
|
-
TSVFILE = DICTDIR + "/SEKKA-JISYO
|
37
|
-
SUMFILE = DICTDIR + "/SEKKA-JISYO
|
20
|
+
TC_FILE = DICTDIR + "/SEKKA-JISYO.N.tch" + TC_OPTS
|
21
|
+
TSVFILE = DICTDIR + "/SEKKA-JISYO.N.tsv"
|
22
|
+
SUMFILE = DICTDIR + "/SEKKA-JISYO.N.md5"
|
38
23
|
|
39
|
-
GDBM_FILE = DICTDIR + "/SEKKA-JISYO
|
24
|
+
GDBM_FILE = DICTDIR + "/SEKKA-JISYO.N.db"
|
40
25
|
|
41
26
|
|
42
|
-
URLURL = DICTURL + "/SEKKA-JISYO
|
43
|
-
SUMURL = DICTURL + "/SEKKA-JISYO
|
27
|
+
URLURL = DICTURL + "/SEKKA-JISYO.N.url"
|
28
|
+
SUMURL = DICTURL + "/SEKKA-JISYO.N.md5"
|
44
29
|
|
45
30
|
MEMCACHED = "localhost:11211" # memcahced
|
46
31
|
|
data/data/.gitignore
CHANGED
data/emacs/sekka.el
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
;; Copyright (C) 2010-2014 Kiyoka Nishiyama
|
4
4
|
;;
|
5
5
|
;; Author: Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
-
;; Version: 1.
|
6
|
+
;; Version: 1.6.0 ;;SEKKA-VERSION
|
7
7
|
;; Keywords: ime, skk, japanese
|
8
8
|
;; Package-Requires: ((cl-lib "0.3") (concurrent "0.3.1") (popup "0.5.0"))
|
9
9
|
;; URL: https://github.com/kiyoka/sekka
|
@@ -1746,7 +1746,7 @@ point から行頭方向に同種の文字列が続く間を漢字変換しま
|
|
1746
1746
|
(setq default-input-method "japanese-sekka")
|
1747
1747
|
|
1748
1748
|
(defconst sekka-version
|
1749
|
-
"1.
|
1749
|
+
"1.6.0" ;;SEKKA-VERSION
|
1750
1750
|
)
|
1751
1751
|
(defun sekka-version (&optional arg)
|
1752
1752
|
"入力モード変更"
|
data/lib/sekka/roman-lib.nnd
CHANGED
@@ -737,10 +737,22 @@
|
|
737
737
|
(define (is-katakana str)
|
738
738
|
(if (rxmatch #/^[ア-ンァィゥェォャュョッー]+$/ str) #t #f))
|
739
739
|
|
740
|
-
;;
|
740
|
+
;; ひらがなの文字列かどうかを評価する
|
741
741
|
(define (is-hiragana str)
|
742
742
|
(if (rxmatch #/^[あ-んぁぃぅぇぉゃゅょっー]+$/ str) #t #f))
|
743
743
|
|
744
|
+
;; ひらがなの文字列を含むかどうかを評価する
|
745
|
+
(define (include-hiragana str)
|
746
|
+
(if (rxmatch #/[あ-んぁぃぅぇぉゃゅょっー]+/ str) #t #f))
|
747
|
+
|
748
|
+
;; 漢字の文字列かどうかを評価する
|
749
|
+
(define (is-kanji str)
|
750
|
+
(if (rxmatch #/^[\p{Han}]+$/ str) #t #f))
|
751
|
+
|
752
|
+
;; 漢字の文字列を含むかどうか評価する
|
753
|
+
(define (include-kanji str)
|
754
|
+
(if (rxmatch #/[\p{Han}]/ str) #t #f))
|
755
|
+
|
744
756
|
;; 送り仮名付き平仮名文字列(例:"おこなu") かどうかを評価する
|
745
757
|
(define (is-hiragana-and-okuri str)
|
746
758
|
(if (rxmatch #/^[あ-んぁぃぅぇぉゃゅょっー]+[a-z]$/ str) #t #f))
|
data/lib/sekka/sekkaversion.rb
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
3cbe0e13da141bda955fe8e53f378dd0 ./data/SEKKA-JISYO.N.tsv
|
@@ -0,0 +1 @@
|
|
1
|
+
https://s3-ap-northeast-1.amazonaws.com/sekkadict/1.5.0/SEKKA-JISYO.N.tsv
|
data/sekka.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
spec.add_dependency "eventmachine", "~> 1.0"
|
21
21
|
spec.add_dependency "memcache-client", "~> 1.8"
|
22
|
-
spec.add_dependency "nendo", "= 0.7.
|
22
|
+
spec.add_dependency "nendo", "= 0.7.2"
|
23
23
|
spec.add_dependency "distributed-trie", "= 0.8.0"
|
24
24
|
spec.add_dependency "rack", "~> 1.5"
|
25
25
|
spec.add_dependency "ruby-progressbar", "~> 1.4"
|
data/test/roman-lib.nnd
CHANGED
@@ -251,6 +251,74 @@
|
|
251
251
|
#f
|
252
252
|
(is-hiragana-and-okuri "123"))
|
253
253
|
|
254
|
+
;;-------------------------------------------------------------------
|
255
|
+
(test-section "other judgement functions")
|
256
|
+
|
257
|
+
(test* "include hiragana 1"
|
258
|
+
#t
|
259
|
+
(include-hiragana "123あ456"))
|
260
|
+
|
261
|
+
(test* "include hiragana 2"
|
262
|
+
#f
|
263
|
+
(include-hiragana "123A456"))
|
264
|
+
|
265
|
+
(test* "include hiragana 3"
|
266
|
+
#f
|
267
|
+
(include-hiragana "漢字"))
|
268
|
+
|
269
|
+
(test* "include hiragana 4"
|
270
|
+
#f
|
271
|
+
(include-hiragana "カタカナ"))
|
272
|
+
|
273
|
+
(test* "is kanji 1"
|
274
|
+
#t
|
275
|
+
(is-kanji "漢字"))
|
276
|
+
|
277
|
+
(test* "is kanji 2"
|
278
|
+
#t
|
279
|
+
(is-kanji "薔薇"))
|
280
|
+
|
281
|
+
(test* "is kanji 3"
|
282
|
+
#f
|
283
|
+
(is-kanji "感じ"))
|
284
|
+
|
285
|
+
(test* "is kanji 4"
|
286
|
+
#f
|
287
|
+
(is-kanji "ひらがな"))
|
288
|
+
|
289
|
+
(test* "is kanji 5"
|
290
|
+
#f
|
291
|
+
(is-kanji "ABCDE"))
|
292
|
+
|
293
|
+
(test* "is kanji 6"
|
294
|
+
#f
|
295
|
+
(is-kanji "¢"))
|
296
|
+
|
297
|
+
(test* "is kanji 7"
|
298
|
+
#f
|
299
|
+
(is-kanji "з"))
|
300
|
+
|
301
|
+
(test* "include kanji 1"
|
302
|
+
#t
|
303
|
+
(include-kanji "感じ"))
|
304
|
+
|
305
|
+
(test* "include kanji 2"
|
306
|
+
#t
|
307
|
+
(include-kanji "ABC漢字DEF"))
|
308
|
+
|
309
|
+
(test* "include kanji 3"
|
310
|
+
#f
|
311
|
+
(include-kanji "ABCDEF"))
|
312
|
+
|
313
|
+
(test* "include kanji 4"
|
314
|
+
#f
|
315
|
+
(include-kanji "ひらがな"))
|
316
|
+
|
317
|
+
(test* "include kanji 5"
|
318
|
+
#f
|
319
|
+
(include-kanji "カタカナ"))
|
320
|
+
|
321
|
+
|
254
322
|
;;-------------------------------------------------------------------
|
255
323
|
(test-section "drop okurigana functions")
|
256
324
|
(test* "drop okri 1"
|
data/test/sekka-dump-out-1.txt
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sekka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kiyoka Nishiyama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: eventmachine
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - '='
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.7.
|
47
|
+
version: 0.7.2
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - '='
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.7.
|
54
|
+
version: 0.7.2
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: distributed-trie
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -229,6 +229,8 @@ files:
|
|
229
229
|
- public_dict/1.4.2/SEKKA-JISYO.LARGE.N.url
|
230
230
|
- public_dict/1.4.2/SEKKA-JISYO.SMALL.N.md5
|
231
231
|
- public_dict/1.4.2/SEKKA-JISYO.SMALL.N.url
|
232
|
+
- public_dict/1.5.0/SEKKA-JISYO.N.md5
|
233
|
+
- public_dict/1.5.0/SEKKA-JISYO.N.url
|
232
234
|
- script/sekkaserver.debian
|
233
235
|
- sekka.gemspec
|
234
236
|
- test/.gitignore
|