sekka 1.0.0 → 1.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,8 @@
1
+ # Sekka (石火) SKK like Japanese input method
2
+
3
+ [![Build Status](https://secure.travis-ci.org/kiyoka/sekka.png)](http://travis-ci.org/kiyoka/sekka)
4
+
5
+ ## Sekka(石火)はSKKライクな日本語入力メソッドです。
6
+ <http://oldtype.sumibi.org/show-page/Sekka>
7
+
8
+ ![Logo]( http://pix.am/u2Hv.png )
data/Rakefile CHANGED
@@ -20,8 +20,10 @@
20
20
 
21
21
  require 'rake'
22
22
  begin
23
- require 'jeweler'
23
+ require 'jeweler2'
24
24
  Jeweler::Tasks.new do |gemspec|
25
+ vh = Jeweler::VersionHelper.new "."
26
+ gemspec.version = vh.to_s + ".pre"
25
27
  gemspec.name = "sekka"
26
28
  gemspec.summary = "Sekka is a SKK like input method."
27
29
  gemspec.description = "Sekka is a SKK like input method. Sekka server provides REST Based API. If you are SKK user, let's try it."
@@ -52,14 +54,14 @@ begin
52
54
  "sekka-path"]
53
55
  gemspec.required_ruby_version = '>= 1.9.1'
54
56
  gemspec.add_dependency( "eventmachine" )
55
- gemspec.add_dependency( "fuzzy-string-match", ">= 0.9.2" )
56
57
  gemspec.add_dependency( "memcache-client" )
57
58
  gemspec.add_dependency( "nendo", "= 0.6.4" )
59
+ gemspec.add_dependency( "distributed-trie" )
58
60
  gemspec.add_dependency( "rack" )
59
61
  gemspec.add_dependency( "ruby-progressbar" )
60
62
  end
61
63
  rescue LoadError
62
- puts "Jeweler not available. Install it with: sudo gem install jeweler"
64
+ puts "Jeweler2 not available. Install it with: sudo gem install jeweler2"
63
65
  end
64
66
 
65
67
 
@@ -68,7 +70,7 @@ end
68
70
 
69
71
  task :compile do
70
72
  # generate version.rb
71
- dictVersion = "0.9.2"
73
+ dictVersion = "1.2.0"
72
74
  vh = Jeweler::VersionHelper.new "."
73
75
  open( "./lib/sekka/sekkaversion.rb", "w" ) {|f|
74
76
  f.puts( "class SekkaVersion" )
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
2
  :major: 1
3
- :minor: 0
3
+ :minor: 1
4
4
  :patch: 0
data/bin/sekka-server CHANGED
@@ -16,16 +16,17 @@ DICTURL = "https://raw.github.com/kiyoka/sekka/master/public_dict/" + SekkaVersi
16
16
 
17
17
  PIDFILE = DICTDIR + "/pid"
18
18
 
19
- TC_FILE = DICTDIR + "/SEKKA-JISYO.SMALL.tch"
19
+ TC_OPTS = "#xmsiz=256m"
20
+ TC_FILE = DICTDIR + "/SEKKA-JISYO.SMALL.tch" + TC_OPTS
20
21
  TSVFILE = DICTDIR + "/SEKKA-JISYO.SMALL.tsv"
21
22
  SUMFILE = DICTDIR + "/SEKKA-JISYO.SMALL.md5"
22
23
 
23
24
  URLURL = DICTURL + "/SEKKA-JISYO.SMALL.url"
24
25
  SUMURL = DICTURL + "/SEKKA-JISYO.SMALL.md5"
25
26
 
26
- TC_FILE_LIST = [ DICTDIR + "/SEKKA-JISYO.CUSTOM.tch",
27
- DICTDIR + "/SEKKA-JISYO.LARGE.tch",
28
- DICTDIR + "/SEKKA-JISYO.SMALL.tch" ]
27
+ TC_FILE_LIST = [ DICTDIR + "/SEKKA-JISYO.CUSTOM.tch" + TC_OPTS,
28
+ DICTDIR + "/SEKKA-JISYO.LARGE.tch" + TC_OPTS,
29
+ DICTDIR + "/SEKKA-JISYO.SMALL.tch" + TC_OPTS ]
29
30
 
30
31
  MEMCACHED = "localhost:11211" # memcahced
31
32
 
@@ -111,7 +112,7 @@ def main
111
112
 
112
113
  case dictType
113
114
  when :tokyocabinet
114
- # .tchファイルが存在すうるか調べる
115
+ # .tchファイルが存在するか調べる
115
116
  list = TC_FILE_LIST.select { |name| File.exist?( name ) }
116
117
 
117
118
  if 0 == list.size
data/emacs/sekka.el CHANGED
@@ -1620,7 +1620,7 @@ point から行頭方向に同種の文字列が続く間を漢字変換しま
1620
1620
  (setq default-input-method "japanese-sekka")
1621
1621
 
1622
1622
  (defconst sekka-version
1623
- "1.0.0" ;;SEKKA-VERSION
1623
+ "1.1.0" ;;SEKKA-VERSION
1624
1624
  )
1625
1625
  (defun sekka-version (&optional arg)
1626
1626
  "入力モード変更"
@@ -31,46 +31,42 @@
31
31
  #
32
32
  # $Id:
33
33
  #
34
- require 'fuzzystringmatch'
34
+ require 'distributedtrie'
35
35
  require 'sekka/kvs'
36
36
 
37
37
  class ApproximateSearch
38
38
  def initialize( jarow_shikii )
39
39
  @jarow_shikii = jarow_shikii
40
- @jarow = FuzzyStringMatch::JaroWinkler.create( :native )
41
- end
42
-
43
- def filtering( keyword, arr )
44
- keyword = keyword.downcase
45
- arr.map { |str|
46
- val = @jarow.getDistance( keyword, str.downcase )
47
- #printf( " [%s] vs [%s] => %f\n", keyword, str.downcase, val )
48
- (val > @jarow_shikii) ? [ val, str ] : false
49
- }.select { |v| v }.sort_by {|item| 1.0 - item[0]}
50
40
  end
51
41
 
52
42
  def search( userid, kvs, keyword, type )
53
- readymade_key = case type
54
- when 'k' # okuri nashi kanji entry
55
- "(" + keyword.slice( 0, 2 ).downcase + ")"
56
- when 'K' # okuri ari kanji entry
57
- "(" + keyword.slice( 0, 2 ).upcase + ")"
58
- when 'h' # hiragana phrase entry
59
- "{" + keyword.slice( 1, 2 ).downcase + "}"
60
- else
61
- raise sprintf( "Error: ApproximateSearch#search unknown type %s ", type )
62
- end
63
-
64
- str = kvs.get( userid + "::" + readymade_key, false )
65
- if not str
66
- str = kvs.get( "MASTER::" + readymade_key )
67
- end
68
-
69
- #printf( "#readymade_key %s : %s\n", readymade_key, str )
70
- if str
71
- filtering( keyword, str.split( /[ ]+/ ))
43
+ arr = []
44
+ case userid
45
+ when "MASTER"
46
+ arr = searchByUser( "MASTER", kvs, keyword, type )
72
47
  else
73
- [ ]
48
+ h = {}
49
+ searchByUser( "MASTER", kvs, keyword, type ).each { |item| h[ item[1] ] = item[0] }
50
+ searchByUser( userid, kvs, keyword, type ).each { |item| h[ item[1] ] = item[0] }
51
+ h.keys.each { |k| arr << [ h[k], k ] }
74
52
  end
53
+ arr.sort_by {|item| [1.0 - item[0], item[1]]}
54
+ end
55
+
56
+ def searchByUser( userid, kvs, keyword, type )
57
+ pair = case type
58
+ when 'k' # okuri nashi kanji entry
59
+ ["Ik::" + userid + "::", keyword.downcase]
60
+ when 'K' # okuri ari kanji entry
61
+ ["IK::" + userid + "::", keyword]
62
+ when 'h' # hiragana phrase entry
63
+ ["Ih::" + userid + "::", keyword.downcase]
64
+ else
65
+ raise sprintf( "Error: ApproximateSearch#search unknown type %s ", type )
66
+ end
67
+ prefix = pair[0]
68
+ _keyword = pair[1]
69
+ trie = DistributedTrie::Trie.new( kvs, prefix )
70
+ trie.fuzzySearch( _keyword, @jarow_shikii )
75
71
  end
76
72
  end
data/lib/sekka/henkan.nnd CHANGED
@@ -496,45 +496,6 @@
496
496
  (set! user-keylist (cons k user-keylist))
497
497
  (append-entry userid kvs k v)))
498
498
  sekka-jisyo-data))
499
-
500
- (define (_create-ready-made-keylist keylist)
501
- (receive (okuri-ari-hash
502
- okuri-nashi-hash
503
- hiragana-phrase-hash)
504
- (create-2char-hash keylist)
505
-
506
- ;; OKURI-ARI
507
- (for-each
508
- (lambda (key)
509
- (let1 fetched (kvs.get (+ userid "::" "(" (sekka-upcase key) ")")
510
- (kvs.get (+ masterid "::" "(" (sekka-upcase key) ")") ""))
511
- (kvs.put! (+ userid "::" "(" (sekka-upcase key) ")")
512
- (string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
513
- (hash-table-get okuri-ari-hash key))))
514
- " "))))
515
- (hash-table-keys okuri-ari-hash))
516
-
517
- ;; OKURI-NASHI
518
- (for-each
519
- (lambda (key)
520
- (let1 fetched (kvs.get (+ userid "::" "(" (sekka-downcase key) ")")
521
- (kvs.get (+ masterid "::" "(" (sekka-downcase key) ")") ""))
522
- (kvs.put! (+ userid "::" "(" (sekka-downcase key) ")")
523
- (string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
524
- (hash-table-get okuri-nashi-hash key))))
525
- " "))))
526
- (hash-table-keys okuri-nashi-hash))
527
-
528
- ;; HIRAGANA-PHRASE
529
- (for-each
530
- (lambda (key)
531
- (let1 fetched (kvs.get (+ userid "::" "{" (sekka-downcase key) "}")
532
- (kvs.get (+ masterid "::" "{" (sekka-downcase key) "}") ""))
533
- (kvs.put! (+ userid "::" "{" (sekka-downcase key) "}")
534
- (string-join (uniq (sort (append (to-list (fetched.split #/[ ]+/))
535
- (hash-table-get hiragana-phrase-hash key))))
536
- " "))))
537
- (hash-table-keys hiragana-phrase-hash))))
538
499
 
539
500
  ;; "ユーザー語彙を"(stored)"にpush!する"
540
501
  (define (kvs-push! userid kvs entry-str)
@@ -557,7 +518,7 @@
557
518
  #f)
558
519
  (begin
559
520
  (insert-to-db lst)
560
- (_create-ready-made-keylist user-keylist)
521
+ (setup-trie-index userid kvs user-keylist #f)
561
522
  (kvs-push! userid kvs dict-line)
562
523
  #?=(+ "user dict stored userid=[" userid "] tango=[" dict-line "]")
563
524
  #t))))
@@ -37,22 +37,14 @@
37
37
  (require "progressbar")
38
38
  (require "sekka/kvs")
39
39
  (require "sekka/sekkaversion")
40
+ (require "distributedtrie")
40
41
  (use sekka.util)
41
42
 
42
- (define masterid "MASTER")
43
- (define workid "WORK")
44
- (define versionid "SEKKA::VERSION")
45
-
46
- (define alphabet-string "abcdefghijklmnopqrstuvwxyz>@;#")
47
- (define alphabet-lower-list (to-list (alphabet-string.split "")))
48
- (define alphabet-upper-list (to-list (. (sekka-upcase alphabet-string) split "")))
49
- (define alphabet-pairs (append-map
50
- (lambda (a)
51
- (map
52
- (lambda (b)
53
- (+ a b))
54
- (append alphabet-upper-list alphabet-lower-list)))
55
- alphabet-lower-list))
43
+ (define master-prefix "MASTER")
44
+ (define version-prefix "SEKKA::VERSION")
45
+ (define okuri-ari-prefix "IK")
46
+ (define okuri-nashi-prefix "Ik")
47
+ (define hiragana-phrase-prefix "Ih")
56
48
 
57
49
  ;; KVS type setting
58
50
  (define *kvs-type* 'tokyocabinet) ;; default
@@ -69,69 +61,72 @@
69
61
  (define (dict-get userid kvs key . fallback)
70
62
  (if-let1 value
71
63
  (or (kvs.get (+ userid "::" key) #f)
72
- (kvs.get (+ masterid "::" key) #f))
64
+ (kvs.get (+ master-prefix "::" key) #f))
73
65
  value
74
66
  (let1 opt (get-optional fallback #f)
75
67
  opt)))
76
68
 
77
- (define (create-2char-hash keylist)
78
- (define okuri-ari-hash (make-hash-table))
79
- (define okuri-nashi-hash (make-hash-table))
80
- (define hiragana-phrase-hash (make-hash-table))
81
- (define (create-hash keylist)
82
- (for-each
83
- (lambda (k)
84
- (when (rxmatch #/^[=a-zA-Z#^>-@`\;+:'\-]+$/ k)
85
- (if (rxmatch #/^=/ k)
86
- ;; HIRAGANA-PHRASE
87
- (let1 sliced (sekka-downcase (k.slice 1 2))
88
- (when (= 2 sliced.size)
89
- (hash-table-push! hiragana-phrase-hash sliced k)))
90
- ;; OKURI-ARI and OKURI-NASHI
91
- (let1 sliced (sekka-downcase (k.slice 0 2))
92
- (when (= 2 sliced.size)
93
- (if (rxmatch #/[A-Z`+]$/ k)
94
- (hash-table-push! okuri-ari-hash sliced k)
95
- (hash-table-push! okuri-nashi-hash sliced k)))))))
96
- keylist))
97
- (create-hash keylist)
98
- (values okuri-ari-hash
99
- okuri-nashi-hash
100
- hiragana-phrase-hash))
101
-
102
- (define (setup-ready-made-keylist kvs keylist)
103
- (for-each
104
- (lambda (key)
105
- (let1 key (+ masterid "::" key)
106
- (unless (kvs.get key #f)
107
- (kvs.put! key ""))))
108
- alphabet-pairs)
109
-
110
- (receive (okuri-ari-hash
111
- okuri-nashi-hash
112
- hiragana-phrase-hash)
113
- (create-2char-hash keylist)
114
-
115
- ;; OKURI-ARI
116
- (for-each
117
- (lambda (key)
118
- (kvs.put! (+ masterid "::" "(" (sekka-upcase key) ")")
119
- (string-join (uniq (sort (hash-table-get okuri-ari-hash key))) " ")))
120
- (hash-table-keys okuri-ari-hash))
121
69
 
122
- ;; OKURI-NASHI
123
- (for-each
124
- (lambda (key)
125
- (kvs.put! (+ masterid "::" "(" (sekka-downcase key) ")")
126
- (string-join (uniq (sort (hash-table-get okuri-nashi-hash key))) " ")))
127
- (hash-table-keys okuri-nashi-hash))
70
+ (define (setup-trie-index userid kvs keylist displayProgress)
128
71
 
129
- ;; HIRAGANA-PHRASE
130
- (for-each
131
- (lambda (key)
132
- (kvs.put! (+ masterid "::" "{" (sekka-downcase key) "}")
133
- (string-join (uniq (sort (hash-table-get hiragana-phrase-hash key))) " ")))
134
- (hash-table-keys hiragana-phrase-hash))))
72
+ (define (trie-add-autocommit trie keylist progress)
73
+ (let ([i 0])
74
+ (for-each
75
+ (lambda (k)
76
+ (trie.addKey! k)
77
+ (set! i (+ i 1))
78
+ (when (= 0 (% i 100))
79
+ (trie.commit!))
80
+ (when progress
81
+ (progress.inc)))
82
+ keylist))
83
+ trie.commit!)
84
+
85
+ (define (grouping-keylist keylist progress)
86
+ (let ([okuri-ari-list '()]
87
+ [okuri-nashi-list '()]
88
+ [hiragana-phrase-list '()])
89
+ (for-each
90
+ (lambda (k)
91
+ (if (rxmatch #/^[=a-zA-Z#^>-@`\;+:'\-]+$/ k)
92
+ (if (rxmatch #/^=/ k)
93
+ ;; HIRAGANA-PHRASE
94
+ (push! hiragana-phrase-list k)
95
+ ;; OKURI-ARI and OKURI-NASHI
96
+ (if (rxmatch #/[A-Z`+]$/ k)
97
+ (push! okuri-ari-list k)
98
+ (push! okuri-nashi-list k))))
99
+ (when progress
100
+ (progress.inc)))
101
+ keylist)
102
+ (values okuri-ari-list
103
+ okuri-nashi-list
104
+ hiragana-phrase-list)))
105
+
106
+ (let1 progress (if displayProgress
107
+ (ProgressBar.new "trie-tree " (* 2 (length keylist)) STDERR)
108
+ #f)
109
+ (receive (okuri-ari-list
110
+ okuri-nashi-list
111
+ hiragana-phrase-list)
112
+ (grouping-keylist keylist progress)
113
+
114
+ (when progress
115
+ (progress.set (+ (length keylist)
116
+ (- (length keylist)
117
+ (+
118
+ (length okuri-ari-list)
119
+ (length okuri-nashi-list)
120
+ (length hiragana-phrase-list))))))
121
+
122
+ (let1 trie (DistributedTrie::Trie.new kvs (+ okuri-ari-prefix "::" userid "::"))
123
+ (trie-add-autocommit trie okuri-ari-list progress))
124
+
125
+ (let1 trie (DistributedTrie::Trie.new kvs (+ okuri-nashi-prefix "::" userid "::"))
126
+ (trie-add-autocommit trie okuri-nashi-list progress))
127
+
128
+ (let1 trie (DistributedTrie::Trie.new kvs (+ hiragana-phrase-prefix "::" userid "::"))
129
+ (trie-add-autocommit trie hiragana-phrase-list progress)))))
135
130
 
136
131
 
137
132
  (define (append-entry userid kvs _key value)
@@ -166,21 +161,34 @@
166
161
  (kvs.put! key value))))))
167
162
 
168
163
 
164
+ (define (file-length f)
165
+ (let1 total 0
166
+ (for-each
167
+ (lambda (x)
168
+ (set! total (+ 1 total)))
169
+ f)
170
+ (f.rewind)
171
+ total))
172
+
169
173
  (define (load-sekka-jisyo-f f filename)
170
174
  (define keylist '())
171
175
  (define (create-keylist kvs f)
172
- (for-each
173
- (lambda (line)
174
- (let1 fields (split-dict-line line)
175
- (set! keylist (cons (first fields) keylist))
176
- (append-entry masterid kvs (first fields) (second fields))))
177
- (f.readlines)))
176
+ (let1 progress (ProgressBar.new "data-body " (file-length f) STDERR)
177
+ (for-each
178
+ (lambda (line)
179
+ (let* (
180
+ [line (line.chomp)]
181
+ [fields (split-dict-line line)])
182
+ (push! keylist (first fields))
183
+ (append-entry master-prefix kvs (first fields) (second fields)))
184
+ (progress.inc))
185
+ f)))
178
186
  (let1 kvs (Kvs.new (get-kvs-type))
179
187
  (kvs.open filename)
180
188
  (kvs.clear)
181
189
  (create-keylist kvs f)
182
- (setup-ready-made-keylist kvs keylist)
183
- (kvs.put! versionid SekkaVersion.version)
190
+ (setup-trie-index master-prefix kvs keylist #t)
191
+ (kvs.put! version-prefix SekkaVersion.version)
184
192
  (kvs.close)))
185
193
 
186
194
 
@@ -196,15 +204,6 @@
196
204
 
197
205
 
198
206
  (define (restore-sekka-jisyo-f f filename-or-hostname)
199
- (define (file-length f)
200
- (let1 total 0
201
- (for-each
202
- (lambda (x)
203
- (set! total (+ 1 total)))
204
- f)
205
- (f.rewind)
206
- total))
207
-
208
207
  (define (restore f kvs progress)
209
208
  (let loop ([line (f.readline.chomp)])
210
209
  (let* (
@@ -1,4 +1,4 @@
1
1
  class SekkaVersion
2
- def self.version() "1.0.0" end
3
- def self.dictVersion() "0.9.2" end
2
+ def self.version() "1.1.0" end
3
+ def self.dictVersion() "1.2.0" end
4
4
  end
data/lib/sekkaserver.rb CHANGED
@@ -57,12 +57,20 @@ module SekkaServer
57
57
  (@kvs,@cachesv) = @core.openSekkaJisyo( SekkaServer::Config.dictType,
58
58
  SekkaServer::Config.dictSource,
59
59
  SekkaServer::Config.cacheSource )
60
+
61
+ version = @kvs.get( "SEKKA::VERSION" )
62
+ if not SekkaVersion.dictVersion == version
63
+ STDERR.printf( "Sekka Error: require dict version [%s] but got [%s].\n", SekkaVersion.dictVersion, version )
64
+ exit( 1 )
65
+ end
66
+
60
67
  @queue = EM::Queue.new
61
68
  @mutex = Mutex.new
62
69
 
63
70
  STDERR.puts( "----- Sekka Server Started -----" )
64
71
  STDERR.printf( " Sekka version : %s\n", SekkaVersion.version )
65
72
  STDERR.printf( " Nendo version : %s\n", Nendo::Core.version )
73
+ STDERR.printf( " dict version : %s\n", SekkaVersion.dictVersion )
66
74
  STDERR.printf( " dict-type : %s\n", SekkaServer::Config.dictType )
67
75
  STDERR.printf( " dict-db : %s\n", SekkaServer::Config.dictSource )
68
76
  STDERR.printf( " memcached : %s\n", SekkaServer::Config.cacheSource )