sekka 1.0.0 → 1.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,8 @@
1
+ # Sekka (石火) SKK like Japanese input method
2
+
3
+ [![Build Status](https://secure.travis-ci.org/kiyoka/sekka.png)](http://travis-ci.org/kiyoka/sekka)
4
+
5
+ ## Sekka(石火)はSKKライクな日本語入力メソッドです。
6
+ <http://oldtype.sumibi.org/show-page/Sekka>
7
+
8
+ ![Logo]( http://pix.am/u2Hv.png )
data/Rakefile CHANGED
@@ -20,8 +20,10 @@
20
20
 
21
21
  require 'rake'
22
22
  begin
23
- require 'jeweler'
23
+ require 'jeweler2'
24
24
  Jeweler::Tasks.new do |gemspec|
25
+ vh = Jeweler::VersionHelper.new "."
26
+ gemspec.version = vh.to_s + ".pre"
25
27
  gemspec.name = "sekka"
26
28
  gemspec.summary = "Sekka is a SKK like input method."
27
29
  gemspec.description = "Sekka is a SKK like input method. Sekka server provides REST Based API. If you are SKK user, let's try it."
@@ -52,14 +54,14 @@ begin
52
54
  "sekka-path"]
53
55
  gemspec.required_ruby_version = '>= 1.9.1'
54
56
  gemspec.add_dependency( "eventmachine" )
55
- gemspec.add_dependency( "fuzzy-string-match", ">= 0.9.2" )
56
57
  gemspec.add_dependency( "memcache-client" )
57
58
  gemspec.add_dependency( "nendo", "= 0.6.4" )
59
+ gemspec.add_dependency( "distributed-trie" )
58
60
  gemspec.add_dependency( "rack" )
59
61
  gemspec.add_dependency( "ruby-progressbar" )
60
62
  end
61
63
  rescue LoadError
62
- puts "Jeweler not available. Install it with: sudo gem install jeweler"
64
+ puts "Jeweler2 not available. Install it with: sudo gem install jeweler2"
63
65
  end
64
66
 
65
67
 
@@ -68,7 +70,7 @@ end
68
70
 
69
71
  task :compile do
70
72
  # generate version.rb
71
- dictVersion = "0.9.2"
73
+ dictVersion = "1.2.0"
72
74
  vh = Jeweler::VersionHelper.new "."
73
75
  open( "./lib/sekka/sekkaversion.rb", "w" ) {|f|
74
76
  f.puts( "class SekkaVersion" )
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 1
3
- :minor: 0
3
+ :minor: 1
4
4
  :patch: 0
data/bin/sekka-server CHANGED
@@ -16,16 +16,17 @@ DICTURL = "https://raw.github.com/kiyoka/sekka/master/public_dict/" + SekkaVersi
16
16
 
17
17
  PIDFILE = DICTDIR + "/pid"
18
18
 
19
- TC_FILE = DICTDIR + "/SEKKA-JISYO.SMALL.tch"
19
+ TC_OPTS = "#xmsiz=256m"
20
+ TC_FILE = DICTDIR + "/SEKKA-JISYO.SMALL.tch" + TC_OPTS
20
21
  TSVFILE = DICTDIR + "/SEKKA-JISYO.SMALL.tsv"
21
22
  SUMFILE = DICTDIR + "/SEKKA-JISYO.SMALL.md5"
22
23
 
23
24
  URLURL = DICTURL + "/SEKKA-JISYO.SMALL.url"
24
25
  SUMURL = DICTURL + "/SEKKA-JISYO.SMALL.md5"
25
26
 
26
- TC_FILE_LIST = [ DICTDIR + "/SEKKA-JISYO.CUSTOM.tch",
27
- DICTDIR + "/SEKKA-JISYO.LARGE.tch",
28
- DICTDIR + "/SEKKA-JISYO.SMALL.tch" ]
27
+ TC_FILE_LIST = [ DICTDIR + "/SEKKA-JISYO.CUSTOM.tch" + TC_OPTS,
28
+ DICTDIR + "/SEKKA-JISYO.LARGE.tch" + TC_OPTS,
29
+ DICTDIR + "/SEKKA-JISYO.SMALL.tch" + TC_OPTS ]
29
30
 
30
31
  MEMCACHED = "localhost:11211" # memcahced
31
32
 
@@ -111,7 +112,7 @@ def main
111
112
 
112
113
  case dictType
113
114
  when :tokyocabinet
114
- # .tchファイルが存在すうるか調べる
115
+ # .tchファイルが存在するか調べる
115
116
  list = TC_FILE_LIST.select { |name| File.exist?( name ) }
116
117
 
117
118
  if 0 == list.size
data/emacs/sekka.el CHANGED
@@ -1620,7 +1620,7 @@ point から行頭方向に同種の文字列が続く間を漢字変換しま
1620
1620
  (setq default-input-method "japanese-sekka")
1621
1621
 
1622
1622
  (defconst sekka-version
1623
- "1.0.0" ;;SEKKA-VERSION
1623
+ "1.1.0" ;;SEKKA-VERSION
1624
1624
  )
1625
1625
  (defun sekka-version (&optional arg)
1626
1626
  "入力モード変更"
@@ -31,46 +31,42 @@
31
31
  #
32
32
  # $Id:
33
33
  #
34
- require 'fuzzystringmatch'
34
+ require 'distributedtrie'
35
35
  require 'sekka/kvs'
36
36
 
37
37
  class ApproximateSearch
38
38
  def initialize( jarow_shikii )
39
39
  @jarow_shikii = jarow_shikii
40
- @jarow = FuzzyStringMatch::JaroWinkler.create( :native )
41
- end
42
-
43
- def filtering( keyword, arr )
44
- keyword = keyword.downcase
45
- arr.map { |str|
46
- val = @jarow.getDistance( keyword, str.downcase )
47
- #printf( " [%s] vs [%s] => %f\n", keyword, str.downcase, val )
48
- (val > @jarow_shikii) ? [ val, str ] : false
49
- }.select { |v| v }.sort_by {|item| 1.0 - item[0]}
50
40
  end
51
41
 
52
42
  def search( userid, kvs, keyword, type )
53
- readymade_key = case type
54
- when 'k' # okuri nashi kanji entry
55
- "(" + keyword.slice( 0, 2 ).downcase + ")"
56
- when 'K' # okuri ari kanji entry
57
- "(" + keyword.slice( 0, 2 ).upcase + ")"
58
- when 'h' # hiragana phrase entry
59
- "{" + keyword.slice( 1, 2 ).downcase + "}"
60
- else
61
- raise sprintf( "Error: ApproximateSearch#search unknown type %s ", type )
62
- end
63
-
64
- str = kvs.get( userid + "::" + readymade_key, false )
65
- if not str
66
- str = kvs.get( "MASTER::" + readymade_key )
67
- end
68
-
69
- #printf( "#readymade_key %s : %s\n", readymade_key, str )
70
- if str
71
- filtering( keyword, str.split( /[ ]+/ ))
43
+ arr = []
44
+ case userid
45
+ when "MASTER"
46
+ arr = searchByUser( "MASTER", kvs, keyword, type )
72
47
  else
73
- [ ]
48
+ h = {}
49
+ searchByUser( "MASTER", kvs, keyword, type ).each { |item| h[ item[1] ] = item[0] }
50
+ searchByUser( userid, kvs, keyword, type ).each { |item| h[ item[1] ] = item[0] }
51
+ h.keys.each { |k| arr << [ h[k], k ] }
74
52
  end
53
+ arr.sort_by {|item| [1.0 - item[0], item[1]]}
54
+ end
55
+
56
+ def searchByUser( userid, kvs, keyword, type )
57
+ pair = case type
58
+ when 'k' # okuri nashi kanji entry
59
+ ["Ik::" + userid + "::", keyword.downcase]
60
+ when 'K' # okuri ari kanji entry
61
+ ["IK::" + userid + "::", keyword]
62
+ when 'h' # hiragana phrase entry
63
+ ["Ih::" + userid + "::", keyword.downcase]
64
+ else
65
+ raise sprintf( "Error: ApproximateSearch#search unknown type %s ", type )
66
+ end
67
+ prefix = pair[0]
68
+ _keyword = pair[1]
69
+ trie = DistributedTrie::Trie.new( kvs, prefix )
70
+ trie.fuzzySearch( _keyword, @jarow_shikii )
75
71
  end
76
72
  end
data/lib/sekka/henkan.nnd CHANGED
@@ -496,45 +496,6 @@
496
496
  (set! user-keylist (cons k user-keylist))
497
497
  (append-entry userid kvs k v)))
498
498
  sekka-jisyo-data))
499
-
500
- (define (_create-ready-made-keylist keylist)
501
- (receive (okuri-ari-hash
502
- okuri-nashi-hash
503
- hiragana-phrase-hash)
504
- (create-2char-hash keylist)
505
-
506
- ;; OKURI-ARI
507
- (for-each
508
- (lambda (key)
509
- (let1 fetched (kvs.get (+ userid "::" "(" (sekka-upcase key) ")")
510
- (kvs.get (+ masterid "::" "(" (sekka-upcase key) ")") ""))
511
- (kvs.put! (+ userid "::" "(" (sekka-upcase key) ")")
512
- (string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
513
- (hash-table-get okuri-ari-hash key))))
514
- " "))))
515
- (hash-table-keys okuri-ari-hash))
516
-
517
- ;; OKURI-NASHI
518
- (for-each
519
- (lambda (key)
520
- (let1 fetched (kvs.get (+ userid "::" "(" (sekka-downcase key) ")")
521
- (kvs.get (+ masterid "::" "(" (sekka-downcase key) ")") ""))
522
- (kvs.put! (+ userid "::" "(" (sekka-downcase key) ")")
523
- (string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
524
- (hash-table-get okuri-nashi-hash key))))
525
- " "))))
526
- (hash-table-keys okuri-nashi-hash))
527
-
528
- ;; HIRAGANA-PHRASE
529
- (for-each
530
- (lambda (key)
531
- (let1 fetched (kvs.get (+ userid "::" "{" (sekka-downcase key) "}")
532
- (kvs.get (+ masterid "::" "{" (sekka-downcase key) "}") ""))
533
- (kvs.put! (+ userid "::" "{" (sekka-downcase key) "}")
534
- (string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
535
- (hash-table-get hiragana-phrase-hash key))))
536
- " "))))
537
- (hash-table-keys hiragana-phrase-hash))))
538
499
 
539
500
  ;; "ユーザー語彙を"(stored)"にpush!する"
540
501
  (define (kvs-push! userid kvs entry-str)
@@ -557,7 +518,7 @@
557
518
  #f)
558
519
  (begin
559
520
  (insert-to-db lst)
560
- (_create-ready-made-keylist user-keylist)
521
+ (setup-trie-index userid kvs user-keylist #f)
561
522
  (kvs-push! userid kvs dict-line)
562
523
  #?=(+ "user dict stored userid=[" userid "] tango=[" dict-line "]")
563
524
  #t))))
@@ -37,22 +37,14 @@
37
37
  (require "progressbar")
38
38
  (require "sekka/kvs")
39
39
  (require "sekka/sekkaversion")
40
+ (require "distributedtrie")
40
41
  (use sekka.util)
41
42
 
42
- (define masterid "MASTER")
43
- (define workid "WORK")
44
- (define versionid "SEKKA::VERSION")
45
-
46
- (define alphabet-string "abcdefghijklmnopqrstuvwxyz>@;#")
47
- (define alphabet-lower-list (to-list (alphabet-string.split "")))
48
- (define alphabet-upper-list (to-list (. (sekka-upcase alphabet-string) split "")))
49
- (define alphabet-pairs (append-map
50
- (lambda (a)
51
- (map
52
- (lambda (b)
53
- (+ a b))
54
- (append alphabet-upper-list alphabet-lower-list)))
55
- alphabet-lower-list))
43
+ (define master-prefix "MASTER")
44
+ (define version-prefix "SEKKA::VERSION")
45
+ (define okuri-ari-prefix "IK")
46
+ (define okuri-nashi-prefix "Ik")
47
+ (define hiragana-phrase-prefix "Ih")
56
48
 
57
49
  ;; KVS type setting
58
50
  (define *kvs-type* 'tokyocabinet) ;; default
@@ -69,69 +61,72 @@
69
61
  (define (dict-get userid kvs key . fallback)
70
62
  (if-let1 value
71
63
  (or (kvs.get (+ userid "::" key) #f)
72
- (kvs.get (+ masterid "::" key) #f))
64
+ (kvs.get (+ master-prefix "::" key) #f))
73
65
  value
74
66
  (let1 opt (get-optional fallback #f)
75
67
  opt)))
76
68
 
77
- (define (create-2char-hash keylist)
78
- (define okuri-ari-hash (make-hash-table))
79
- (define okuri-nashi-hash (make-hash-table))
80
- (define hiragana-phrase-hash (make-hash-table))
81
- (define (create-hash keylist)
82
- (for-each
83
- (lambda (k)
84
- (when (rxmatch #/^[=a-zA-Z#^>-@`\;+:'\-]+$/ k)
85
- (if (rxmatch #/^=/ k)
86
- ;; HIRAGANA-PHRASE
87
- (let1 sliced (sekka-downcase (k.slice 1 2))
88
- (when (= 2 sliced.size)
89
- (hash-table-push! hiragana-phrase-hash sliced k)))
90
- ;; OKURI-ARI and OKURI-NASHI
91
- (let1 sliced (sekka-downcase (k.slice 0 2))
92
- (when (= 2 sliced.size)
93
- (if (rxmatch #/[A-Z`+]$/ k)
94
- (hash-table-push! okuri-ari-hash sliced k)
95
- (hash-table-push! okuri-nashi-hash sliced k)))))))
96
- keylist))
97
- (create-hash keylist)
98
- (values okuri-ari-hash
99
- okuri-nashi-hash
100
- hiragana-phrase-hash))
101
-
102
- (define (setup-ready-made-keylist kvs keylist)
103
- (for-each
104
- (lambda (key)
105
- (let1 key (+ masterid "::" key)
106
- (unless (kvs.get key #f)
107
- (kvs.put! key ""))))
108
- alphabet-pairs)
109
-
110
- (receive (okuri-ari-hash
111
- okuri-nashi-hash
112
- hiragana-phrase-hash)
113
- (create-2char-hash keylist)
114
-
115
- ;; OKURI-ARI
116
- (for-each
117
- (lambda (key)
118
- (kvs.put! (+ masterid "::" "(" (sekka-upcase key) ")")
119
- (string-join (uniq (sort (hash-table-get okuri-ari-hash key))) " ")))
120
- (hash-table-keys okuri-ari-hash))
121
69
 
122
- ;; OKURI-NASHI
123
- (for-each
124
- (lambda (key)
125
- (kvs.put! (+ masterid "::" "(" (sekka-downcase key) ")")
126
- (string-join (uniq (sort (hash-table-get okuri-nashi-hash key))) " ")))
127
- (hash-table-keys okuri-nashi-hash))
70
+ (define (setup-trie-index userid kvs keylist displayProgress)
128
71
 
129
- ;; HIRAGANA-PHRASE
130
- (for-each
131
- (lambda (key)
132
- (kvs.put! (+ masterid "::" "{" (sekka-downcase key) "}")
133
- (string-join (uniq (sort (hash-table-get hiragana-phrase-hash key))) " ")))
134
- (hash-table-keys hiragana-phrase-hash))))
72
+ (define (trie-add-autocommit trie keylist progress)
73
+ (let ([i 0])
74
+ (for-each
75
+ (lambda (k)
76
+ (trie.addKey! k)
77
+ (set! i (+ i 1))
78
+ (when (= 0 (% i 100))
79
+ (trie.commit!))
80
+ (when progress
81
+ (progress.inc)))
82
+ keylist))
83
+ trie.commit!)
84
+
85
+ (define (grouping-keylist keylist progress)
86
+ (let ([okuri-ari-list '()]
87
+ [okuri-nashi-list '()]
88
+ [hiragana-phrase-list '()])
89
+ (for-each
90
+ (lambda (k)
91
+ (if (rxmatch #/^[=a-zA-Z#^>-@`\;+:'\-]+$/ k)
92
+ (if (rxmatch #/^=/ k)
93
+ ;; HIRAGANA-PHRASE
94
+ (push! hiragana-phrase-list k)
95
+ ;; OKURI-ARI and OKURI-NASHI
96
+ (if (rxmatch #/[A-Z`+]$/ k)
97
+ (push! okuri-ari-list k)
98
+ (push! okuri-nashi-list k))))
99
+ (when progress
100
+ (progress.inc)))
101
+ keylist)
102
+ (values okuri-ari-list
103
+ okuri-nashi-list
104
+ hiragana-phrase-list)))
105
+
106
+ (let1 progress (if displayProgress
107
+ (ProgressBar.new "trie-tree " (* 2 (length keylist)) STDERR)
108
+ #f)
109
+ (receive (okuri-ari-list
110
+ okuri-nashi-list
111
+ hiragana-phrase-list)
112
+ (grouping-keylist keylist progress)
113
+
114
+ (when progress
115
+ (progress.set (+ (length keylist)
116
+ (- (length keylist)
117
+ (+
118
+ (length okuri-ari-list)
119
+ (length okuri-nashi-list)
120
+ (length hiragana-phrase-list))))))
121
+
122
+ (let1 trie (DistributedTrie::Trie.new kvs (+ okuri-ari-prefix "::" userid "::"))
123
+ (trie-add-autocommit trie okuri-ari-list progress))
124
+
125
+ (let1 trie (DistributedTrie::Trie.new kvs (+ okuri-nashi-prefix "::" userid "::"))
126
+ (trie-add-autocommit trie okuri-nashi-list progress))
127
+
128
+ (let1 trie (DistributedTrie::Trie.new kvs (+ hiragana-phrase-prefix "::" userid "::"))
129
+ (trie-add-autocommit trie hiragana-phrase-list progress)))))
135
130
 
136
131
 
137
132
  (define (append-entry userid kvs _key value)
@@ -166,21 +161,34 @@
166
161
  (kvs.put! key value))))))
167
162
 
168
163
 
164
+ (define (file-length f)
165
+ (let1 total 0
166
+ (for-each
167
+ (lambda (x)
168
+ (set! total (+ 1 total)))
169
+ f)
170
+ (f.rewind)
171
+ total))
172
+
169
173
  (define (load-sekka-jisyo-f f filename)
170
174
  (define keylist '())
171
175
  (define (create-keylist kvs f)
172
- (for-each
173
- (lambda (line)
174
- (let1 fields (split-dict-line line)
175
- (set! keylist (cons (first fields) keylist))
176
- (append-entry masterid kvs (first fields) (second fields))))
177
- (f.readlines)))
176
+ (let1 progress (ProgressBar.new "data-body " (file-length f) STDERR)
177
+ (for-each
178
+ (lambda (line)
179
+ (let* (
180
+ [line (line.chomp)]
181
+ [fields (split-dict-line line)])
182
+ (push! keylist (first fields))
183
+ (append-entry master-prefix kvs (first fields) (second fields)))
184
+ (progress.inc))
185
+ f)))
178
186
  (let1 kvs (Kvs.new (get-kvs-type))
179
187
  (kvs.open filename)
180
188
  (kvs.clear)
181
189
  (create-keylist kvs f)
182
- (setup-ready-made-keylist kvs keylist)
183
- (kvs.put! versionid SekkaVersion.version)
190
+ (setup-trie-index master-prefix kvs keylist #t)
191
+ (kvs.put! version-prefix SekkaVersion.version)
184
192
  (kvs.close)))
185
193
 
186
194
 
@@ -196,15 +204,6 @@
196
204
 
197
205
 
198
206
  (define (restore-sekka-jisyo-f f filename-or-hostname)
199
- (define (file-length f)
200
- (let1 total 0
201
- (for-each
202
- (lambda (x)
203
- (set! total (+ 1 total)))
204
- f)
205
- (f.rewind)
206
- total))
207
-
208
207
  (define (restore f kvs progress)
209
208
  (let loop ([line (f.readline.chomp)])
210
209
  (let* (
@@ -1,4 +1,4 @@
1
1
  class SekkaVersion
2
- def self.version() "1.0.0" end
3
- def self.dictVersion() "0.9.2" end
2
+ def self.version() "1.1.0" end
3
+ def self.dictVersion() "1.2.0" end
4
4
  end
data/lib/sekkaserver.rb CHANGED
@@ -57,12 +57,20 @@ module SekkaServer
57
57
  (@kvs,@cachesv) = @core.openSekkaJisyo( SekkaServer::Config.dictType,
58
58
  SekkaServer::Config.dictSource,
59
59
  SekkaServer::Config.cacheSource )
60
+
61
+ version = @kvs.get( "SEKKA::VERSION" )
62
+ if not SekkaVersion.dictVersion == version
63
+ STDERR.printf( "Sekka Error: require dict version [%s] but got [%s].\n", SekkaVersion.dictVersion, version )
64
+ exit( 1 )
65
+ end
66
+
60
67
  @queue = EM::Queue.new
61
68
  @mutex = Mutex.new
62
69
 
63
70
  STDERR.puts( "----- Sekka Server Started -----" )
64
71
  STDERR.printf( " Sekka version : %s\n", SekkaVersion.version )
65
72
  STDERR.printf( " Nendo version : %s\n", Nendo::Core.version )
73
+ STDERR.printf( " dict version : %s\n", SekkaVersion.dictVersion )
66
74
  STDERR.printf( " dict-type : %s\n", SekkaServer::Config.dictType )
67
75
  STDERR.printf( " dict-db : %s\n", SekkaServer::Config.dictSource )
68
76
  STDERR.printf( " memcached : %s\n", SekkaServer::Config.cacheSource )