tataki 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/Guardfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +55 -0
- data/Rakefile +6 -0
- data/data/alphabet.yml +27 -0
- data/data/jisyo/.gitignore +2 -0
- data/data/roman.yml +142 -0
- data/data/skk-jisyo.yml +24 -0
- data/lib/tataki.rb +15 -0
- data/lib/tataki/base.rb +13 -0
- data/lib/tataki/converters.rb +16 -0
- data/lib/tataki/converters/alphabet.rb +24 -0
- data/lib/tataki/converters/combine.rb +20 -0
- data/lib/tataki/converters/roman.rb +67 -0
- data/lib/tataki/converters/skk_jisyo.rb +129 -0
- data/lib/tataki/version.rb +3 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/tataki/converters/alphabet_spec.rb +17 -0
- data/spec/tataki/converters/combine_spec.rb +35 -0
- data/spec/tataki/converters/roman_spec.rb +30 -0
- data/spec/tataki/converters/skk_jisyo_spec.rb +22 -0
- data/spec/tataki_spec.rb +25 -0
- data/tataki.gemspec +30 -0
- metadata +190 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a2ee32cd3111b09f4e62781debac62e4b5685017
|
4
|
+
data.tar.gz: 90a9ae1f2b96841d50ec13cefc9740fb43f7557c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ba6ba513abcfbd2b1b31250cc592a094a39ab9c5dc995741dcbfa864091f7e96f1f73119ea3965f5b49ec4329589731219c62fca4c6df8096ec10b3001bfe6c1
|
7
|
+
data.tar.gz: 94070b7ba996462447f5e942c6193829ab8c01cb10687e5812d7d97954710168fcf6fe2443a6b736f79595bc7b21af29830ea4cbfec74842165a68060ea00832
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Guardfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Sunao Komuro
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# Tataki
|
2
|
+
|
3
|
+
Tataki is pure ruby kana converter.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'tataki'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install tataki
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
### Basic usage
|
22
|
+
```ruby
|
23
|
+
require "tataki"
|
24
|
+
|
25
|
+
"漢字をひらがなに変換".to_kana # => "かんじをひらがなにへんかん"
|
26
|
+
"X線研究者".to_kana # => "えっくすせんけんきゅうしゃ"
|
27
|
+
"肉を食べるだけの簡単なお仕事".to_kana # => "にくをたべるだけのかんたんなおしごと"
|
28
|
+
```
|
29
|
+
|
30
|
+
At first time, `require "tataki"` is slow (creating dictionary cache).
|
31
|
+
|
32
|
+
### Configure converter
|
33
|
+
```ruby
|
34
|
+
require "tataki/base"
|
35
|
+
|
36
|
+
alphabet_converter = Tataki::Converter::Alphabet.new
|
37
|
+
alphabet_converter.to_kana("abcde") # => "えーびーしーでぃーいー"
|
38
|
+
|
39
|
+
roman_alphabet_converter = Tataki::Converter::Combine.new(Tataki::Converter::Roman.new, Tataki::Converter::Alphabet.new)
|
40
|
+
roman_alphabet_converter.to_kana("robottotaisennf") # => "ろぼっとたいせんえふ"
|
41
|
+
|
42
|
+
skk_converter = Tataki::Converter::SkkJisyo.new
|
43
|
+
skk_converter.to_kana("研究者") # => "けんきゅうしゃ"
|
44
|
+
```
|
45
|
+
|
46
|
+
## TODO
|
47
|
+
- Support more configurable
|
48
|
+
|
49
|
+
## Contributing
|
50
|
+
|
51
|
+
1. Fork it
|
52
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
53
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
54
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
55
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/data/alphabet.yml
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
table:
|
2
|
+
"a": えー
|
3
|
+
"b": びー
|
4
|
+
"c": しー
|
5
|
+
"d": でぃー
|
6
|
+
"e": いー
|
7
|
+
"f": えふ
|
8
|
+
"g": じー
|
9
|
+
"h": えいち
|
10
|
+
"i": あい
|
11
|
+
"j": じぇい
|
12
|
+
"k": けー
|
13
|
+
"l": える
|
14
|
+
"m": えむ
|
15
|
+
"n": えぬ
|
16
|
+
"o": おー
|
17
|
+
"p": ぴー
|
18
|
+
"q": きゅー
|
19
|
+
"r": あーる
|
20
|
+
"s": えす
|
21
|
+
"t": てぃー
|
22
|
+
"u": ゆー
|
23
|
+
"v": ぶい
|
24
|
+
"w": だぶりゅー
|
25
|
+
"x": えっくす
|
26
|
+
"y": わい
|
27
|
+
"z": ぜっと
|
data/data/roman.yml
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
table:
|
2
|
+
"a": あ
|
3
|
+
"i": い
|
4
|
+
"u": う
|
5
|
+
"e": え
|
6
|
+
"o": お
|
7
|
+
"ka": か
|
8
|
+
"ki": き
|
9
|
+
"ku": く
|
10
|
+
"ke": け
|
11
|
+
"ko": こ
|
12
|
+
"ga": が
|
13
|
+
"gi": ぎ
|
14
|
+
"gu": ぐ
|
15
|
+
"ge": げ
|
16
|
+
"go": ご
|
17
|
+
"sa": さ
|
18
|
+
"si": し
|
19
|
+
"shi": し
|
20
|
+
"su": す
|
21
|
+
"se": せ
|
22
|
+
"so": そ
|
23
|
+
"za": ざ
|
24
|
+
"zi": じ
|
25
|
+
"ji": じ
|
26
|
+
"zu": ず
|
27
|
+
"ze": ぜ
|
28
|
+
"zo": ぞ
|
29
|
+
"ta": た
|
30
|
+
"ti": ち
|
31
|
+
"chi": ち
|
32
|
+
"tu": つ
|
33
|
+
"tsu": つ
|
34
|
+
"te": て
|
35
|
+
"to": と
|
36
|
+
"da": だ
|
37
|
+
"di": ぢ
|
38
|
+
"du": づ
|
39
|
+
"de": で
|
40
|
+
"do": ど
|
41
|
+
"na": な
|
42
|
+
"ni": に
|
43
|
+
"nu": ぬ
|
44
|
+
"ne": ね
|
45
|
+
"no": の
|
46
|
+
"ha": は
|
47
|
+
"hi": ひ
|
48
|
+
"hu": ふ
|
49
|
+
"fu": ふ
|
50
|
+
"he": へ
|
51
|
+
"ho": ほ
|
52
|
+
"ba": ば
|
53
|
+
"bi": び
|
54
|
+
"bu": ぶ
|
55
|
+
"be": べ
|
56
|
+
"bo": ぼ
|
57
|
+
"pa": ぱ
|
58
|
+
"pi": ぴ
|
59
|
+
"pu": ぷ
|
60
|
+
"pe": ぺ
|
61
|
+
"po": ぽ
|
62
|
+
"ma": ま
|
63
|
+
"mi": み
|
64
|
+
"mu": む
|
65
|
+
"me": め
|
66
|
+
"mo": も
|
67
|
+
"ya": や
|
68
|
+
"yu": ゆ
|
69
|
+
"yo": よ
|
70
|
+
"ra": ら
|
71
|
+
"ri": り
|
72
|
+
"ru": る
|
73
|
+
"re": れ
|
74
|
+
"ro": ろ
|
75
|
+
"wa": わ
|
76
|
+
"wo": を
|
77
|
+
"n": ん
|
78
|
+
"nn": ん
|
79
|
+
"xa": ぁ
|
80
|
+
"la": ぁ
|
81
|
+
"xi": ぃ
|
82
|
+
"li": ぃ
|
83
|
+
"xu": ぅ
|
84
|
+
"lu": ぅ
|
85
|
+
"xe": ぇ
|
86
|
+
"le": ぇ
|
87
|
+
"xo": ぉ
|
88
|
+
"lo": ぉ
|
89
|
+
"kya": きゃ
|
90
|
+
"kyu": きゅ
|
91
|
+
"kyo": きょ
|
92
|
+
"gya": ぎゃ
|
93
|
+
"gyu": ぎゅ
|
94
|
+
"gyo": ぎょ
|
95
|
+
"zya": じゃ
|
96
|
+
"sya": しゃ
|
97
|
+
"sha": しゃ
|
98
|
+
"syu": しゅ
|
99
|
+
"shu": しゅ
|
100
|
+
"syo": しょ
|
101
|
+
"sho": しょ
|
102
|
+
"ja": じゃ
|
103
|
+
"zyu": じゅ
|
104
|
+
"ju": じゅ
|
105
|
+
"zyo": じょ
|
106
|
+
"jo": じょ
|
107
|
+
"tya": ちゃ
|
108
|
+
"cha": ちゃ
|
109
|
+
"tyu": ちゅ
|
110
|
+
"chu": ちゅ
|
111
|
+
"tyo": ちょ
|
112
|
+
"cho": ちょ
|
113
|
+
"dya": ぢゃ
|
114
|
+
"dyu": ぢゅ
|
115
|
+
"dyo": ぢょ
|
116
|
+
"nya": にゃ
|
117
|
+
"nyu": にゅ
|
118
|
+
"nyo": にょ
|
119
|
+
"hya": ひゃ
|
120
|
+
"hyu": ひゅ
|
121
|
+
"hyo": ひょ
|
122
|
+
"bya": びゃ
|
123
|
+
"byu": びゅ
|
124
|
+
"byo": びょ
|
125
|
+
"pya": ぴゃ
|
126
|
+
"pyu": ぴゅ
|
127
|
+
"pyo": ぴょ
|
128
|
+
"mya": みゃ
|
129
|
+
"myu": みゅ
|
130
|
+
"myo": みょ
|
131
|
+
"xya": ゃ
|
132
|
+
"lya": ゃ
|
133
|
+
"xyu": ゅ
|
134
|
+
"lyu": ゅ
|
135
|
+
"xyo": ょ
|
136
|
+
"lyo": ょ
|
137
|
+
"rya": りゃ
|
138
|
+
"ryu": りゅ
|
139
|
+
"ryo": りょ
|
140
|
+
"xwa": ゎ
|
141
|
+
"lwa": ゎ
|
142
|
+
consonant: [k, g, s, j, t, c, d, n, h, f, b, p, m, y, r, w, x, l]
|
data/data/skk-jisyo.yml
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
roman_table:
|
2
|
+
"i": [い]
|
3
|
+
"u": [う]
|
4
|
+
"e": [え]
|
5
|
+
"o": [お]
|
6
|
+
"k": [か, き, く, け, こ]
|
7
|
+
"g": [が, ぎ, ぐ, げ, ご]
|
8
|
+
"s": [さ, し, す, せ, そ]
|
9
|
+
"z": [ざ, じ, ず, ぜ, ぞ]
|
10
|
+
"j": [じ]
|
11
|
+
"t": [た, ち, つ, て, と]
|
12
|
+
"c": [ち]
|
13
|
+
"d": [だ, ぢ, づ, で, ど]
|
14
|
+
"n": [な, に, ぬ, ね, の]
|
15
|
+
"h": [は, ひ, ふ, へ, ほ]
|
16
|
+
"f": [ふ]
|
17
|
+
"b": [ば, び, ぶ, べ, ぼ]
|
18
|
+
"p": [ぱ, ぴ, ぷ, ぺ, ぽ]
|
19
|
+
"m": [ま, み, む, め, も]
|
20
|
+
"y": [や, ゆ, よ]
|
21
|
+
"r": [ら, り, る, れ, ろ]
|
22
|
+
"w": [わ]
|
23
|
+
ignore_kana:
|
24
|
+
- きごう
|
data/lib/tataki.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "tataki/base"
|
3
|
+
|
4
|
+
module Tataki
|
5
|
+
EASY_CONVERTER = Tataki::Converter::Combine.new(
|
6
|
+
Tataki::Converter::SkkJisyo.new,
|
7
|
+
Tataki::Converter::Alphabet.new,
|
8
|
+
).freeze
|
9
|
+
|
10
|
+
String.class_eval do
|
11
|
+
def to_kana
|
12
|
+
EASY_CONVERTER.to_kana(self)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/tataki/base.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Tataki
|
3
|
+
CONVERTERS = []
|
4
|
+
|
5
|
+
module Converter
|
6
|
+
class Base
|
7
|
+
def to_kana(sentence)
|
8
|
+
raise "TODO: implement .to_kana"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
Dir[File.expand_path("../converters", __FILE__) + "/*.rb"].each do |file|
|
15
|
+
require file
|
16
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "yaml"
|
3
|
+
|
4
|
+
module Tataki
|
5
|
+
module Converter
|
6
|
+
class Alphabet < Base
|
7
|
+
def initialize
|
8
|
+
alphabet_file = File.expand_path("../../../../data/alphabet.yml", __FILE__)
|
9
|
+
alphabet_data = YAML.load_file(alphabet_file)
|
10
|
+
@table = alphabet_data["table"]
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_kana(sentence)
|
14
|
+
kana = ""
|
15
|
+
sentence.downcase.each_char do |ch|
|
16
|
+
kana << (@table[ch] || ch)
|
17
|
+
end
|
18
|
+
kana
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Tataki::CONVERTERS << Converter::Alphabet
|
24
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Tataki
|
3
|
+
module Converter
|
4
|
+
class Combine < Base
|
5
|
+
def initialize(*converters)
|
6
|
+
@converters = converters
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_kana(sentence)
|
10
|
+
kana = sentence
|
11
|
+
@converters.each do |converter|
|
12
|
+
kana = converter.to_kana(kana)
|
13
|
+
end
|
14
|
+
kana
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
Tataki::CONVERTERS << Converter::Combine
|
20
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "trie"
|
3
|
+
require "yaml"
|
4
|
+
|
5
|
+
module Tataki
|
6
|
+
module Converter
|
7
|
+
class Roman < Base
|
8
|
+
SOKUON = "っ"
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@trie = Trie.new
|
12
|
+
roman_file = File.expand_path("../../../../data/roman.yml", __FILE__)
|
13
|
+
roman_data = YAML.load_file(roman_file)
|
14
|
+
roman_data["table"].each do |roman, kana|
|
15
|
+
@trie.insert(roman, kana)
|
16
|
+
end
|
17
|
+
@consonant = roman_data["consonant"]
|
18
|
+
@trie.freeze
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_kana(sentence)
|
22
|
+
_to_kana(sentence.downcase, "", "", @trie)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def _to_kana(sentence, kana, prefix, trie, through_alphabet = true)
|
28
|
+
return if trie.empty?
|
29
|
+
return kana if sentence.empty?
|
30
|
+
|
31
|
+
next_ch = sentence[0]
|
32
|
+
next_sentence = sentence[1..-1]
|
33
|
+
next_trie = trie.find_prefix(next_ch)
|
34
|
+
next_set = next_trie.find([])
|
35
|
+
if next_set.size > 0 && next_set.size == next_trie.size
|
36
|
+
return _to_kana(next_sentence, kana + next_set.values.first, "", @trie)
|
37
|
+
end
|
38
|
+
|
39
|
+
if next_sentence.empty?
|
40
|
+
if next_set.size > 0
|
41
|
+
return kana + prefix + next_set.values.first
|
42
|
+
else
|
43
|
+
return kana + prefix + next_ch
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
next_kana = _to_kana(next_sentence, kana, prefix + next_ch, next_trie, false)
|
48
|
+
|
49
|
+
if next_kana
|
50
|
+
return next_kana
|
51
|
+
end
|
52
|
+
|
53
|
+
if next_set.size > 0
|
54
|
+
return _to_kana(next_sentence, kana + next_set.values.first, "", @trie)
|
55
|
+
elsif @consonant.include?(next_ch) && next_sentence.start_with?(next_ch)
|
56
|
+
return _to_kana(next_sentence, kana + SOKUON, "", @trie)
|
57
|
+
elsif through_alphabet
|
58
|
+
return _to_kana(next_sentence, kana + prefix + next_ch, "", @trie)
|
59
|
+
else
|
60
|
+
return nil
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
Tataki::CONVERTERS << Converter::Roman
|
67
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "yaml"
|
3
|
+
require "time"
|
4
|
+
require "skk/jisyo"
|
5
|
+
require "trie"
|
6
|
+
|
7
|
+
module Tataki
|
8
|
+
module Converter
|
9
|
+
class SkkJisyo < Base
|
10
|
+
DEFAULT_JISYO_SUFFIXES = %w[M]
|
11
|
+
|
12
|
+
def initialize(options = {})
|
13
|
+
options = {
|
14
|
+
:jisyo_paths => DEFAULT_JISYO_SUFFIXES.map{|suffix| Skk::Jisyo.path(suffix) },
|
15
|
+
:trie_cache_path => trie_cache_path(DEFAULT_JISYO_SUFFIXES.join("_")),
|
16
|
+
}.merge(options)
|
17
|
+
config_file = File.expand_path("../../../../data/skk-jisyo.yml", __FILE__)
|
18
|
+
config_data = YAML.load_file(config_file)
|
19
|
+
@roman_data = config_data["roman_table"]
|
20
|
+
@ignore_kana = config_data["ignore_kana"]
|
21
|
+
@trie = setup_jisyo(options).freeze
|
22
|
+
end
|
23
|
+
|
24
|
+
def setup_jisyo(options)
|
25
|
+
if File.exist?(options[:trie_cache_path])
|
26
|
+
trie = Marshal.load(File.read(options[:trie_cache_path]))
|
27
|
+
else
|
28
|
+
trie = Trie.new
|
29
|
+
options[:jisyo_paths].each do |jisyo_path|
|
30
|
+
add_jisyo(trie, jisyo_path)
|
31
|
+
end
|
32
|
+
File.binwrite(options[:trie_cache_path], Marshal.dump(trie))
|
33
|
+
File.write("#{options[:trie_cache_path]}.timestamp", Time.now.to_s)
|
34
|
+
end
|
35
|
+
trie
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_jisyo(trie, jisyo_path)
|
39
|
+
File.open(jisyo_path, "rb:euc-jp") do |jisyo_file|
|
40
|
+
jisyo_file.each_line do |line|
|
41
|
+
next if line.empty? || line[0] == ";" || line.include?("#")
|
42
|
+
kana, kanji_part = line.encode("utf-8").split(" ")
|
43
|
+
next unless kana && kanji_part
|
44
|
+
kana.gsub!(/[^ぁ-んa-z]/, "")
|
45
|
+
next if kana.empty? || !(kana =~ /^[ぁ-ん]+[a-z]?/) || @ignore_kana.include?(kana)
|
46
|
+
kanji_part.gsub!(/^\/|;.+|\/$/, "")
|
47
|
+
kanji_part.split("/").each do |kanji|
|
48
|
+
trie.insert(kanji, kana)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def jisyo_path
|
55
|
+
File.expand_path("../../../../data/jisyo", __FILE__)
|
56
|
+
end
|
57
|
+
|
58
|
+
def trie_cache_path(name)
|
59
|
+
File.join(jisyo_path, "SKK-JISYO.#{name}.trie.cache")
|
60
|
+
end
|
61
|
+
|
62
|
+
def jisyo_timestamp(path)
|
63
|
+
Time.parse(File.read("#{path}.timestamp"))
|
64
|
+
end
|
65
|
+
|
66
|
+
def to_kana(sentence)
|
67
|
+
_to_kana(sentence, "", "", @trie)
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def _to_kana(sentence, kana, prefix, trie, through_alphabet = true)
|
73
|
+
return if trie.empty?
|
74
|
+
return kana if sentence.empty?
|
75
|
+
|
76
|
+
next_ch = sentence[0]
|
77
|
+
next_sentence = sentence[1..-1]
|
78
|
+
next_trie = trie.find_prefix(next_ch)
|
79
|
+
next_trie_values = next_trie.values
|
80
|
+
next_trie_values.reject!{|value| value =~ /[a-z]/ }
|
81
|
+
next_set = next_trie.find([])
|
82
|
+
next_set_values = next_set.values
|
83
|
+
okurigana = find_okurigana(next_set_values, next_sentence)
|
84
|
+
next_set_values.reject!{|value| value =~ /[a-z]/ }
|
85
|
+
if okurigana
|
86
|
+
return _to_kana(next_sentence, kana + okurigana, "", @trie)
|
87
|
+
elsif next_set_values.size > 0 && next_set_values.size == next_trie_values.size
|
88
|
+
return _to_kana(next_sentence, kana + next_set_values.sample, "", @trie)
|
89
|
+
end
|
90
|
+
|
91
|
+
if next_sentence.empty?
|
92
|
+
if next_set_values.size > 0
|
93
|
+
return kana + next_set_values.sample
|
94
|
+
elsif through_alphabet
|
95
|
+
return kana + prefix + next_ch
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
next_kana = _to_kana(next_sentence, kana, prefix + next_ch, next_trie, false)
|
100
|
+
|
101
|
+
if next_kana
|
102
|
+
return next_kana
|
103
|
+
end
|
104
|
+
|
105
|
+
if next_set_values.size > 0
|
106
|
+
return _to_kana(next_sentence, kana + next_set_values.sample, "", @trie)
|
107
|
+
elsif through_alphabet
|
108
|
+
return _to_kana(next_sentence, kana + prefix + next_ch, "", @trie)
|
109
|
+
else
|
110
|
+
return nil
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def find_okurigana(yomi_candidates, next_sentence)
|
115
|
+
yomi_candidates.each do |yomi|
|
116
|
+
next unless yomi =~ /.+([a-z])$/
|
117
|
+
okurigana_yomi = @roman_data[$1]
|
118
|
+
next unless okurigana_yomi
|
119
|
+
okurigana_yomi.each do |okurigana|
|
120
|
+
return yomi.gsub(/[a-z]$/, "") if next_sentence.start_with?(okurigana)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
nil
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
Tataki::CONVERTERS << Converter::SkkJisyo
|
129
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe Tataki::Converter::Alphabet do
|
5
|
+
let(:converter) { Tataki::Converter::Alphabet.new }
|
6
|
+
|
7
|
+
describe ".to_kana" do
|
8
|
+
shared_examples "converts_kana" do |sentence, kana|
|
9
|
+
it "converts #{sentence.inspect} to #{kana.inspect}" do
|
10
|
+
expect(converter.to_kana(sentence)).to eq(kana)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
include_examples "converts_kana", "", ""
|
15
|
+
include_examples "converts_kana", "hoge!", "えいちおーじーいー!"
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe Tataki::Converter::Combine do
|
5
|
+
let(:skk_converter) { Tataki::Converter::SkkJisyo.new }
|
6
|
+
let(:roman_converter) { Tataki::Converter::Roman.new }
|
7
|
+
let(:alphabet_converter) { Tataki::Converter::Alphabet.new }
|
8
|
+
|
9
|
+
describe ".to_kana" do
|
10
|
+
shared_examples "converts_kana" do |sentence, kana|
|
11
|
+
it "converts #{sentence.inspect} to #{kana.inspect}" do
|
12
|
+
expect(converter.to_kana(sentence)).to eq(kana)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
context "when roman + alphabet" do
|
17
|
+
let(:converter) do
|
18
|
+
Tataki::Converter::Combine.new(roman_converter, alphabet_converter)
|
19
|
+
end
|
20
|
+
|
21
|
+
include_examples "converts_kana", "robottotaisennf", "ろぼっとたいせんえふ"
|
22
|
+
end
|
23
|
+
|
24
|
+
context "when skk-jisyo + alphabet" do
|
25
|
+
let(:converter) do
|
26
|
+
Tataki::Converter::Combine.new(skk_converter, alphabet_converter)
|
27
|
+
end
|
28
|
+
|
29
|
+
include_examples "converts_kana", "X線の研究をしています", "えっくすせんのけんきゅうをしています"
|
30
|
+
include_examples "converts_kana", "X線研究者", "えっくすせんけんきゅうしゃ"
|
31
|
+
include_examples "converts_kana", "電気通信大学X線研究", "でんきつうしんだいがくえっくすせんけんきゅう"
|
32
|
+
include_examples "converts_kana", "DNAは螺旋状の構造らしい", "でぃーえぬえーはらせんじょうのこうぞうらしい"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe Tataki::Converter::Roman do
|
5
|
+
let(:converter) { Tataki::Converter::Roman.new }
|
6
|
+
|
7
|
+
describe ".to_kana" do
|
8
|
+
shared_examples "converts_kana" do |sentence, kana|
|
9
|
+
it "converts #{sentence.inspect} to #{kana.inspect}" do
|
10
|
+
expect(converter.to_kana(sentence)).to eq(kana)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
include_examples "converts_kana", "", ""
|
15
|
+
include_examples "converts_kana", "hoge", "ほげ"
|
16
|
+
include_examples "converts_kana", "hogelog", "ほげぉg"
|
17
|
+
include_examples "converts_kana", "hogge", "ほっげ"
|
18
|
+
include_examples "converts_kana", "hogs", "ほgs"
|
19
|
+
include_examples "converts_kana", "nanka", "なんか"
|
20
|
+
include_examples "converts_kana", "nannnan", "なんなん"
|
21
|
+
include_examples "converts_kana", "nannnann", "なんなん"
|
22
|
+
include_examples "converts_kana", "nannnannsei", "なんなんせい"
|
23
|
+
include_examples "converts_kana", "kukkingu", "くっきんぐ"
|
24
|
+
include_examples "converts_kana", "kukkingu papa", "くっきんぐ ぱぱ"
|
25
|
+
include_examples "converts_kana", "toukyoutokkyokyokakyoku", "とうきょうとっきょきょかきょく"
|
26
|
+
|
27
|
+
include_examples "converts_kana", "kku", "っく"
|
28
|
+
include_examples "converts_kana", ",,", ",,"
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe Tataki::Converter::SkkJisyo do
|
5
|
+
let(:converter) { Tataki::Converter::SkkJisyo.new }
|
6
|
+
|
7
|
+
describe ".to_kana" do
|
8
|
+
shared_examples "converts_kana" do |sentence, kana|
|
9
|
+
it "converts #{sentence.inspect} to #{kana.inspect}" do
|
10
|
+
expect(converter.to_kana(sentence)).to eq(kana)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
include_examples "converts_kana", "", ""
|
15
|
+
include_examples "converts_kana", "漢字", "かんじ"
|
16
|
+
include_examples "converts_kana", "漢字変換する", "かんじへんかんする"
|
17
|
+
include_examples "converts_kana", "隣りの", "となりの"
|
18
|
+
include_examples "converts_kana", "隣りはよく柿食う", "となりはよくかきくう"
|
19
|
+
include_examples "converts_kana", "安心安全", "あんしんあんぜん"
|
20
|
+
include_examples "converts_kana", "毎朝新聞配達をしています", "まいあさしんぶんはいたつをしています"
|
21
|
+
end
|
22
|
+
end
|
data/spec/tataki_spec.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe Tataki do
|
5
|
+
it "has a version number" do
|
6
|
+
Tataki::VERSION.should_not be_nil
|
7
|
+
end
|
8
|
+
|
9
|
+
describe ".converters" do
|
10
|
+
it "returns converters" do
|
11
|
+
expect(Tataki.converters).to match_array([
|
12
|
+
Tataki::Converter::Roman,
|
13
|
+
Tataki::Converter::Alphabet,
|
14
|
+
Tataki::Converter::Combine,
|
15
|
+
Tataki::Converter::SkkJisyo,
|
16
|
+
])
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "String.to_kana" do
|
21
|
+
it "converts to kana" do
|
22
|
+
expect("X線研究者".to_kana).to eq("えっくすせんけんきゅうしゃ")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/tataki.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'tataki/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "tataki"
|
8
|
+
spec.version = Tataki::VERSION
|
9
|
+
spec.authors = ["hogelog"]
|
10
|
+
spec.email = ["konbu.komuro@gmail.com"]
|
11
|
+
spec.description = %q{Kanji to Kana converter}
|
12
|
+
spec.summary = %q{Tataki is pure ruby Kanji to Kana converter.}
|
13
|
+
spec.homepage = "https://github.com/hogelog/tataki"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "skk-jisyo", "~> 0.0.5"
|
22
|
+
spec.add_dependency "trie"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
25
|
+
spec.add_development_dependency "rake"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "pry"
|
28
|
+
spec.add_development_dependency "guard-rspec"
|
29
|
+
spec.add_development_dependency "spring"
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,190 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tataki
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- hogelog
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-10-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: skk-jisyo
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.0.5
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.5
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: trie
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.3'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: guard-rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: spring
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
description: Kanji to Kana converter
|
126
|
+
email:
|
127
|
+
- konbu.komuro@gmail.com
|
128
|
+
executables: []
|
129
|
+
extensions: []
|
130
|
+
extra_rdoc_files: []
|
131
|
+
files:
|
132
|
+
- .gitignore
|
133
|
+
- .rspec
|
134
|
+
- .travis.yml
|
135
|
+
- Gemfile
|
136
|
+
- Guardfile
|
137
|
+
- LICENSE.txt
|
138
|
+
- README.md
|
139
|
+
- Rakefile
|
140
|
+
- data/alphabet.yml
|
141
|
+
- data/jisyo/.gitignore
|
142
|
+
- data/roman.yml
|
143
|
+
- data/skk-jisyo.yml
|
144
|
+
- lib/tataki.rb
|
145
|
+
- lib/tataki/base.rb
|
146
|
+
- lib/tataki/converters.rb
|
147
|
+
- lib/tataki/converters/alphabet.rb
|
148
|
+
- lib/tataki/converters/combine.rb
|
149
|
+
- lib/tataki/converters/roman.rb
|
150
|
+
- lib/tataki/converters/skk_jisyo.rb
|
151
|
+
- lib/tataki/version.rb
|
152
|
+
- spec/spec_helper.rb
|
153
|
+
- spec/tataki/converters/alphabet_spec.rb
|
154
|
+
- spec/tataki/converters/combine_spec.rb
|
155
|
+
- spec/tataki/converters/roman_spec.rb
|
156
|
+
- spec/tataki/converters/skk_jisyo_spec.rb
|
157
|
+
- spec/tataki_spec.rb
|
158
|
+
- tataki.gemspec
|
159
|
+
homepage: https://github.com/hogelog/tataki
|
160
|
+
licenses:
|
161
|
+
- MIT
|
162
|
+
metadata: {}
|
163
|
+
post_install_message:
|
164
|
+
rdoc_options: []
|
165
|
+
require_paths:
|
166
|
+
- lib
|
167
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
168
|
+
requirements:
|
169
|
+
- - '>='
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: '0'
|
172
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
173
|
+
requirements:
|
174
|
+
- - '>='
|
175
|
+
- !ruby/object:Gem::Version
|
176
|
+
version: '0'
|
177
|
+
requirements: []
|
178
|
+
rubyforge_project:
|
179
|
+
rubygems_version: 2.0.3
|
180
|
+
signing_key:
|
181
|
+
specification_version: 4
|
182
|
+
summary: Tataki is pure ruby Kanji to Kana converter.
|
183
|
+
test_files:
|
184
|
+
- spec/spec_helper.rb
|
185
|
+
- spec/tataki/converters/alphabet_spec.rb
|
186
|
+
- spec/tataki/converters/combine_spec.rb
|
187
|
+
- spec/tataki/converters/roman_spec.rb
|
188
|
+
- spec/tataki/converters/skk_jisyo_spec.rb
|
189
|
+
- spec/tataki_spec.rb
|
190
|
+
has_rdoc:
|