tataki 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/benchmark/001-M_jinmei.rb +17 -0
- data/lib/tataki/converters/skk_jisyo.rb +54 -57
- data/lib/tataki/version.rb +1 -1
- data/spec/spec_helper.rb +2 -1
- data/spec/tataki/converters/combine_spec.rb +0 -9
- data/spec/tataki/converters/skk_jisyo_spec.rb +10 -1
- data/spec/tataki_spec.rb +2 -1
- data/tataki.gemspec +0 -1
- metadata +2 -19
- data/data/roman.yml +0 -142
- data/lib/tataki/converters/roman.rb +0 -67
- data/spec/tataki/converters/roman_spec.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c75759d9be482c52fc40c1b59d543fa709877f9d
|
4
|
+
data.tar.gz: d2f4bc1514cd2eb3d4a8b57286509b2e91442db3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b1cc2f50a0302cb959b6414d56a8c3e565286b9284ec07ecf328caefb6735a23d1aa83f09e1abdf5be208c5811a0b5936cb422beb192119209678a2c45ef599
|
7
|
+
data.tar.gz: 315cf59f199dfa3532293ae19252083ce58a2e305ea0ee3421bebd1fb66f99c0d89c75ea3df93943e7fb3f0060a732c1c4519dbb10aeb76d6313a6d3b5743224
|
data/README.md
CHANGED
@@ -36,11 +36,12 @@ require "tataki/base"
|
|
36
36
|
alphabet_converter = Tataki::Converter::Alphabet.new
|
37
37
|
alphabet_converter.to_kana("abcde") # => "えーびーしーでぃーいー"
|
38
38
|
|
39
|
-
roman_alphabet_converter = Tataki::Converter::Combine.new(Tataki::Converter::Roman.new, Tataki::Converter::Alphabet.new)
|
40
|
-
roman_alphabet_converter.to_kana("robottotaisennf") # => "ろぼっとたいせんえふ"
|
41
|
-
|
42
39
|
skk_converter = Tataki::Converter::SkkJisyo.new
|
43
40
|
skk_converter.to_kana("研究者") # => "けんきゅうしゃ"
|
41
|
+
|
42
|
+
alphabet_skk_converter = Tataki::Converter::Combine.new(Tataki::Converter::Alphabet.new, Tataki::Converter::SkkJisyo.new)
|
43
|
+
alphabet_skk_converter.to_kana("X線研究者") # => "robottotaisennf"
|
44
|
+
|
44
45
|
```
|
45
46
|
|
46
47
|
## TODO
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "benchmark"
|
2
|
+
|
3
|
+
N = 1000
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
6
|
+
|
7
|
+
require 'tataki/base'
|
8
|
+
converter = Tataki::Converter::SkkJisyo.new(%w[M jinmei])
|
9
|
+
|
10
|
+
source = "かな漢字変換" * 100
|
11
|
+
|
12
|
+
puts Benchmark::CAPTION
|
13
|
+
puts Benchmark.measure {
|
14
|
+
N.times do
|
15
|
+
converter.to_kana(source)
|
16
|
+
end
|
17
|
+
}
|
@@ -2,7 +2,6 @@
|
|
2
2
|
require "yaml"
|
3
3
|
require "time"
|
4
4
|
require "skk/jisyo"
|
5
|
-
require "trie"
|
6
5
|
|
7
6
|
module Tataki
|
8
7
|
module Converter
|
@@ -12,30 +11,34 @@ module Tataki
|
|
12
11
|
|
13
12
|
def initialize(jisyo_types = DEFAULT_JISYO_SUFFIXES)
|
14
13
|
@jisyo_paths = jisyo_types.map{|suffix| Skk::Jisyo.path(suffix) }
|
15
|
-
@
|
14
|
+
@table_cache_path = table_cache_path(jisyo_types.join("_"))
|
16
15
|
|
17
16
|
config_file = File.expand_path(DEFAULT_CONFIG_PATH, __FILE__)
|
18
17
|
config_data = YAML.load_file(config_file)
|
19
18
|
@roman_data = config_data["roman_table"]
|
20
19
|
@ignore_kana = config_data["ignore_kana"]
|
21
|
-
|
20
|
+
tables = setup_jisyo
|
21
|
+
@match_table = tables[0].freeze
|
22
|
+
@okurigana_table = tables[1].freeze
|
22
23
|
end
|
23
24
|
|
24
25
|
def setup_jisyo
|
25
|
-
if File.exist?(@
|
26
|
-
|
26
|
+
if File.exist?(@table_cache_path)
|
27
|
+
tables = Marshal.load(File.read(@table_cache_path))
|
27
28
|
else
|
28
|
-
|
29
|
+
match_table = {}
|
30
|
+
okurigana_table = {}
|
29
31
|
@jisyo_paths.each do |jisyo_path|
|
30
|
-
add_jisyo(
|
32
|
+
add_jisyo(match_table, okurigana_table, jisyo_path)
|
31
33
|
end
|
32
|
-
|
33
|
-
File.
|
34
|
+
tables = [match_table, okurigana_table]
|
35
|
+
File.binwrite(@table_cache_path, Marshal.dump(tables))
|
36
|
+
File.write("#{@table_cache_path}.timestamp", Time.now.to_s)
|
34
37
|
end
|
35
|
-
|
38
|
+
tables
|
36
39
|
end
|
37
40
|
|
38
|
-
def add_jisyo(
|
41
|
+
def add_jisyo(match_table, okurigana_table, jisyo_path)
|
39
42
|
File.open(jisyo_path, "rb:euc-jp") do |jisyo_file|
|
40
43
|
jisyo_file.each_line do |line|
|
41
44
|
next if line.empty? || line[0] == ";" || line.include?("#")
|
@@ -44,8 +47,14 @@ module Tataki
|
|
44
47
|
kana.gsub!(/[^ぁ-んa-z]/, "")
|
45
48
|
next if kana.empty? || !(kana =~ /^[ぁ-ん]+[a-z]?/) || @ignore_kana.include?(kana)
|
46
49
|
kanji_part.gsub!(/^\/|;.+|\/$/, "")
|
50
|
+
|
51
|
+
table = kana =~ /^(.+)([a-z])$/ ? okurigana_table : match_table
|
47
52
|
kanji_part.split("/").each do |kanji|
|
48
|
-
|
53
|
+
kanji_prefix = kanji[0]
|
54
|
+
table_entry = table[kanji_prefix]
|
55
|
+
table[kanji_prefix] = table_entry = [] unless table_entry
|
56
|
+
table_entry.push($2 ? [kanji, $1, $2] : [kanji, kana])
|
57
|
+
table_entry.sort_by!{|entry| - (entry[0].size) }
|
49
58
|
end
|
50
59
|
end
|
51
60
|
end
|
@@ -55,8 +64,8 @@ module Tataki
|
|
55
64
|
File.expand_path("../../../../data/jisyo", __FILE__)
|
56
65
|
end
|
57
66
|
|
58
|
-
def
|
59
|
-
File.join(jisyo_path, "SKK-JISYO.#{name}.
|
67
|
+
def table_cache_path(name)
|
68
|
+
File.join(jisyo_path, "SKK-JISYO.#{name}.table.cache")
|
60
69
|
end
|
61
70
|
|
62
71
|
def jisyo_timestamp(path)
|
@@ -64,61 +73,49 @@ module Tataki
|
|
64
73
|
end
|
65
74
|
|
66
75
|
def to_kana(sentence)
|
67
|
-
_to_kana(sentence, ""
|
76
|
+
_to_kana(sentence, "")
|
68
77
|
end
|
69
78
|
|
70
79
|
private
|
71
80
|
|
72
|
-
def _to_kana(sentence, kana
|
73
|
-
return if trie.empty?
|
81
|
+
def _to_kana(sentence, kana)
|
74
82
|
return kana if sentence.empty?
|
75
83
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
next_set_values = next_set.values
|
83
|
-
okurigana = find_okurigana(next_set_values, next_sentence)
|
84
|
-
next_set_values.reject!{|value| value =~ /[a-z]/ }
|
85
|
-
if okurigana
|
86
|
-
return _to_kana(next_sentence, kana + okurigana, "", @trie)
|
87
|
-
elsif next_set_values.size > 0 && next_set_values.size == next_trie_values.size
|
88
|
-
return _to_kana(next_sentence, kana + next_set_values.sample, "", @trie)
|
89
|
-
end
|
90
|
-
|
91
|
-
if next_sentence.empty?
|
92
|
-
if next_set_values.size > 0
|
93
|
-
return kana + next_set_values.sample
|
94
|
-
elsif through_alphabet
|
95
|
-
return kana + prefix + next_ch
|
96
|
-
end
|
84
|
+
table_entry = find_okurigana_entry(sentence) || find_match_entry(sentence)
|
85
|
+
if table_entry
|
86
|
+
next_kanji = table_entry[0]
|
87
|
+
next_kana = table_entry[1]
|
88
|
+
next_sentence = sentence[next_kanji.size .. -1]
|
89
|
+
return _to_kana(next_sentence, kana + next_kana)
|
97
90
|
end
|
98
91
|
|
99
|
-
|
100
|
-
|
101
|
-
if next_kana
|
102
|
-
return next_kana
|
103
|
-
end
|
92
|
+
return _to_kana(sentence[1 .. -1], kana + sentence[0])
|
93
|
+
end
|
104
94
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
95
|
+
def find_okurigana_entry(sentence)
|
96
|
+
entries = @okurigana_table[sentence[0]]
|
97
|
+
return unless entries
|
98
|
+
|
99
|
+
entries.each do |entry|
|
100
|
+
kanji, yomi, alphabet = *entry
|
101
|
+
next unless sentence.start_with?(kanji)
|
102
|
+
next_ch = sentence[kanji.size]
|
103
|
+
okurigana_candidates = @roman_data[alphabet]
|
104
|
+
next unless okurigana_candidates
|
105
|
+
okurigana_candidates.each do |okurigana|
|
106
|
+
return entry if okurigana == next_ch
|
107
|
+
end
|
111
108
|
end
|
109
|
+
nil
|
112
110
|
end
|
113
111
|
|
114
|
-
def
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
end
|
112
|
+
def find_match_entry(sentence)
|
113
|
+
entries = @match_table[sentence[0]]
|
114
|
+
return unless entries
|
115
|
+
|
116
|
+
entries.each do |entry|
|
117
|
+
kanji, yomi = *entry
|
118
|
+
return entry if sentence.start_with?(kanji)
|
122
119
|
end
|
123
120
|
nil
|
124
121
|
end
|
data/lib/tataki/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -3,7 +3,6 @@ require "spec_helper"
|
|
3
3
|
|
4
4
|
describe Tataki::Converter::Combine do
|
5
5
|
let(:skk_converter) { Tataki::Converter::SkkJisyo.new }
|
6
|
-
let(:roman_converter) { Tataki::Converter::Roman.new }
|
7
6
|
let(:alphabet_converter) { Tataki::Converter::Alphabet.new }
|
8
7
|
|
9
8
|
describe ".to_kana" do
|
@@ -13,14 +12,6 @@ describe Tataki::Converter::Combine do
|
|
13
12
|
end
|
14
13
|
end
|
15
14
|
|
16
|
-
context "when roman + alphabet" do
|
17
|
-
let(:converter) do
|
18
|
-
Tataki::Converter::Combine.new(roman_converter, alphabet_converter)
|
19
|
-
end
|
20
|
-
|
21
|
-
include_examples "converts_kana", "robottotaisennf", "ろぼっとたいせんえふ"
|
22
|
-
end
|
23
|
-
|
24
15
|
context "when skk-jisyo + alphabet" do
|
25
16
|
let(:converter) do
|
26
17
|
Tataki::Converter::Combine.new(skk_converter, alphabet_converter)
|
@@ -2,7 +2,6 @@
|
|
2
2
|
require "spec_helper"
|
3
3
|
|
4
4
|
describe Tataki::Converter::SkkJisyo do
|
5
|
-
|
6
5
|
describe ".to_kana" do
|
7
6
|
shared_examples "converts_kana" do |sentence, kana|
|
8
7
|
it "converts #{sentence.inspect} to #{kana.inspect}" do
|
@@ -29,5 +28,15 @@ describe Tataki::Converter::SkkJisyo do
|
|
29
28
|
include_examples "converts_kana", "漢字", "漢字"
|
30
29
|
include_examples "converts_kana", "半澤直樹", "はんざわなおき"
|
31
30
|
end
|
31
|
+
|
32
|
+
context "with M, jinmei jisyo" do
|
33
|
+
let(:converter) { Tataki::Converter::SkkJisyo.new(%w[M jinmei]) }
|
34
|
+
|
35
|
+
include_examples "converts_kana", "", ""
|
36
|
+
include_examples "converts_kana", "漢字", "かんじ"
|
37
|
+
include_examples "converts_kana", "半澤直樹", "はんざわなおき"
|
38
|
+
include_examples "converts_kana", "半澤直樹倍返し", "はんざわなおきばいかえし"
|
39
|
+
include_examples "converts_kana", "半澤直樹、銀行を買う", "はんざわなおき、ぎんこうをかう"
|
40
|
+
end
|
32
41
|
end
|
33
42
|
end
|
data/spec/tataki_spec.rb
CHANGED
@@ -9,7 +9,6 @@ describe Tataki do
|
|
9
9
|
describe ".converters" do
|
10
10
|
it "returns converters" do
|
11
11
|
expect(Tataki.converters).to match_array([
|
12
|
-
Tataki::Converter::Roman,
|
13
12
|
Tataki::Converter::Alphabet,
|
14
13
|
Tataki::Converter::Combine,
|
15
14
|
Tataki::Converter::SkkJisyo,
|
@@ -18,6 +17,8 @@ describe Tataki do
|
|
18
17
|
end
|
19
18
|
|
20
19
|
describe "String.to_kana" do
|
20
|
+
before { require "tataki" }
|
21
|
+
|
21
22
|
it "converts to kana" do
|
22
23
|
expect("X線研究者".to_kana).to eq("えっくすせんけんきゅうしゃ")
|
23
24
|
end
|
data/tataki.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tataki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hogelog
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.0.5
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: trie
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - '>='
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - '>='
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: bundler
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -137,22 +123,20 @@ files:
|
|
137
123
|
- LICENSE.txt
|
138
124
|
- README.md
|
139
125
|
- Rakefile
|
126
|
+
- benchmark/001-M_jinmei.rb
|
140
127
|
- data/alphabet.yml
|
141
128
|
- data/jisyo/.gitignore
|
142
|
-
- data/roman.yml
|
143
129
|
- data/skk-jisyo.yml
|
144
130
|
- lib/tataki.rb
|
145
131
|
- lib/tataki/base.rb
|
146
132
|
- lib/tataki/converters.rb
|
147
133
|
- lib/tataki/converters/alphabet.rb
|
148
134
|
- lib/tataki/converters/combine.rb
|
149
|
-
- lib/tataki/converters/roman.rb
|
150
135
|
- lib/tataki/converters/skk_jisyo.rb
|
151
136
|
- lib/tataki/version.rb
|
152
137
|
- spec/spec_helper.rb
|
153
138
|
- spec/tataki/converters/alphabet_spec.rb
|
154
139
|
- spec/tataki/converters/combine_spec.rb
|
155
|
-
- spec/tataki/converters/roman_spec.rb
|
156
140
|
- spec/tataki/converters/skk_jisyo_spec.rb
|
157
141
|
- spec/tataki_spec.rb
|
158
142
|
- tataki.gemspec
|
@@ -184,7 +168,6 @@ test_files:
|
|
184
168
|
- spec/spec_helper.rb
|
185
169
|
- spec/tataki/converters/alphabet_spec.rb
|
186
170
|
- spec/tataki/converters/combine_spec.rb
|
187
|
-
- spec/tataki/converters/roman_spec.rb
|
188
171
|
- spec/tataki/converters/skk_jisyo_spec.rb
|
189
172
|
- spec/tataki_spec.rb
|
190
173
|
has_rdoc:
|
data/data/roman.yml
DELETED
@@ -1,142 +0,0 @@
|
|
1
|
-
table:
|
2
|
-
"a": あ
|
3
|
-
"i": い
|
4
|
-
"u": う
|
5
|
-
"e": え
|
6
|
-
"o": お
|
7
|
-
"ka": か
|
8
|
-
"ki": き
|
9
|
-
"ku": く
|
10
|
-
"ke": け
|
11
|
-
"ko": こ
|
12
|
-
"ga": が
|
13
|
-
"gi": ぎ
|
14
|
-
"gu": ぐ
|
15
|
-
"ge": げ
|
16
|
-
"go": ご
|
17
|
-
"sa": さ
|
18
|
-
"si": し
|
19
|
-
"shi": し
|
20
|
-
"su": す
|
21
|
-
"se": せ
|
22
|
-
"so": そ
|
23
|
-
"za": ざ
|
24
|
-
"zi": じ
|
25
|
-
"ji": じ
|
26
|
-
"zu": ず
|
27
|
-
"ze": ぜ
|
28
|
-
"zo": ぞ
|
29
|
-
"ta": た
|
30
|
-
"ti": ち
|
31
|
-
"chi": ち
|
32
|
-
"tu": つ
|
33
|
-
"tsu": つ
|
34
|
-
"te": て
|
35
|
-
"to": と
|
36
|
-
"da": だ
|
37
|
-
"di": ぢ
|
38
|
-
"du": づ
|
39
|
-
"de": で
|
40
|
-
"do": ど
|
41
|
-
"na": な
|
42
|
-
"ni": に
|
43
|
-
"nu": ぬ
|
44
|
-
"ne": ね
|
45
|
-
"no": の
|
46
|
-
"ha": は
|
47
|
-
"hi": ひ
|
48
|
-
"hu": ふ
|
49
|
-
"fu": ふ
|
50
|
-
"he": へ
|
51
|
-
"ho": ほ
|
52
|
-
"ba": ば
|
53
|
-
"bi": び
|
54
|
-
"bu": ぶ
|
55
|
-
"be": べ
|
56
|
-
"bo": ぼ
|
57
|
-
"pa": ぱ
|
58
|
-
"pi": ぴ
|
59
|
-
"pu": ぷ
|
60
|
-
"pe": ぺ
|
61
|
-
"po": ぽ
|
62
|
-
"ma": ま
|
63
|
-
"mi": み
|
64
|
-
"mu": む
|
65
|
-
"me": め
|
66
|
-
"mo": も
|
67
|
-
"ya": や
|
68
|
-
"yu": ゆ
|
69
|
-
"yo": よ
|
70
|
-
"ra": ら
|
71
|
-
"ri": り
|
72
|
-
"ru": る
|
73
|
-
"re": れ
|
74
|
-
"ro": ろ
|
75
|
-
"wa": わ
|
76
|
-
"wo": を
|
77
|
-
"n": ん
|
78
|
-
"nn": ん
|
79
|
-
"xa": ぁ
|
80
|
-
"la": ぁ
|
81
|
-
"xi": ぃ
|
82
|
-
"li": ぃ
|
83
|
-
"xu": ぅ
|
84
|
-
"lu": ぅ
|
85
|
-
"xe": ぇ
|
86
|
-
"le": ぇ
|
87
|
-
"xo": ぉ
|
88
|
-
"lo": ぉ
|
89
|
-
"kya": きゃ
|
90
|
-
"kyu": きゅ
|
91
|
-
"kyo": きょ
|
92
|
-
"gya": ぎゃ
|
93
|
-
"gyu": ぎゅ
|
94
|
-
"gyo": ぎょ
|
95
|
-
"zya": じゃ
|
96
|
-
"sya": しゃ
|
97
|
-
"sha": しゃ
|
98
|
-
"syu": しゅ
|
99
|
-
"shu": しゅ
|
100
|
-
"syo": しょ
|
101
|
-
"sho": しょ
|
102
|
-
"ja": じゃ
|
103
|
-
"zyu": じゅ
|
104
|
-
"ju": じゅ
|
105
|
-
"zyo": じょ
|
106
|
-
"jo": じょ
|
107
|
-
"tya": ちゃ
|
108
|
-
"cha": ちゃ
|
109
|
-
"tyu": ちゅ
|
110
|
-
"chu": ちゅ
|
111
|
-
"tyo": ちょ
|
112
|
-
"cho": ちょ
|
113
|
-
"dya": ぢゃ
|
114
|
-
"dyu": ぢゅ
|
115
|
-
"dyo": ぢょ
|
116
|
-
"nya": にゃ
|
117
|
-
"nyu": にゅ
|
118
|
-
"nyo": にょ
|
119
|
-
"hya": ひゃ
|
120
|
-
"hyu": ひゅ
|
121
|
-
"hyo": ひょ
|
122
|
-
"bya": びゃ
|
123
|
-
"byu": びゅ
|
124
|
-
"byo": びょ
|
125
|
-
"pya": ぴゃ
|
126
|
-
"pyu": ぴゅ
|
127
|
-
"pyo": ぴょ
|
128
|
-
"mya": みゃ
|
129
|
-
"myu": みゅ
|
130
|
-
"myo": みょ
|
131
|
-
"xya": ゃ
|
132
|
-
"lya": ゃ
|
133
|
-
"xyu": ゅ
|
134
|
-
"lyu": ゅ
|
135
|
-
"xyo": ょ
|
136
|
-
"lyo": ょ
|
137
|
-
"rya": りゃ
|
138
|
-
"ryu": りゅ
|
139
|
-
"ryo": りょ
|
140
|
-
"xwa": ゎ
|
141
|
-
"lwa": ゎ
|
142
|
-
consonant: [k, g, s, j, t, c, d, n, h, f, b, p, m, y, r, w, x, l]
|
@@ -1,67 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
require "trie"
|
3
|
-
require "yaml"
|
4
|
-
|
5
|
-
module Tataki
|
6
|
-
module Converter
|
7
|
-
class Roman < Base
|
8
|
-
SOKUON = "っ"
|
9
|
-
|
10
|
-
def initialize
|
11
|
-
@trie = Trie.new
|
12
|
-
roman_file = File.expand_path("../../../../data/roman.yml", __FILE__)
|
13
|
-
roman_data = YAML.load_file(roman_file)
|
14
|
-
roman_data["table"].each do |roman, kana|
|
15
|
-
@trie.insert(roman, kana)
|
16
|
-
end
|
17
|
-
@consonant = roman_data["consonant"]
|
18
|
-
@trie.freeze
|
19
|
-
end
|
20
|
-
|
21
|
-
def to_kana(sentence)
|
22
|
-
_to_kana(sentence.downcase, "", "", @trie)
|
23
|
-
end
|
24
|
-
|
25
|
-
private
|
26
|
-
|
27
|
-
def _to_kana(sentence, kana, prefix, trie, through_alphabet = true)
|
28
|
-
return if trie.empty?
|
29
|
-
return kana if sentence.empty?
|
30
|
-
|
31
|
-
next_ch = sentence[0]
|
32
|
-
next_sentence = sentence[1..-1]
|
33
|
-
next_trie = trie.find_prefix(next_ch)
|
34
|
-
next_set = next_trie.find([])
|
35
|
-
if next_set.size > 0 && next_set.size == next_trie.size
|
36
|
-
return _to_kana(next_sentence, kana + next_set.values.first, "", @trie)
|
37
|
-
end
|
38
|
-
|
39
|
-
if next_sentence.empty?
|
40
|
-
if next_set.size > 0
|
41
|
-
return kana + prefix + next_set.values.first
|
42
|
-
else
|
43
|
-
return kana + prefix + next_ch
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
next_kana = _to_kana(next_sentence, kana, prefix + next_ch, next_trie, false)
|
48
|
-
|
49
|
-
if next_kana
|
50
|
-
return next_kana
|
51
|
-
end
|
52
|
-
|
53
|
-
if next_set.size > 0
|
54
|
-
return _to_kana(next_sentence, kana + next_set.values.first, "", @trie)
|
55
|
-
elsif @consonant.include?(next_ch) && next_sentence.start_with?(next_ch)
|
56
|
-
return _to_kana(next_sentence, kana + SOKUON, "", @trie)
|
57
|
-
elsif through_alphabet
|
58
|
-
return _to_kana(next_sentence, kana + prefix + next_ch, "", @trie)
|
59
|
-
else
|
60
|
-
return nil
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
Tataki::CONVERTERS << Converter::Roman
|
67
|
-
end
|
@@ -1,30 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
require "spec_helper"
|
3
|
-
|
4
|
-
describe Tataki::Converter::Roman do
|
5
|
-
let(:converter) { Tataki::Converter::Roman.new }
|
6
|
-
|
7
|
-
describe ".to_kana" do
|
8
|
-
shared_examples "converts_kana" do |sentence, kana|
|
9
|
-
it "converts #{sentence.inspect} to #{kana.inspect}" do
|
10
|
-
expect(converter.to_kana(sentence)).to eq(kana)
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
include_examples "converts_kana", "", ""
|
15
|
-
include_examples "converts_kana", "hoge", "ほげ"
|
16
|
-
include_examples "converts_kana", "hogelog", "ほげぉg"
|
17
|
-
include_examples "converts_kana", "hogge", "ほっげ"
|
18
|
-
include_examples "converts_kana", "hogs", "ほgs"
|
19
|
-
include_examples "converts_kana", "nanka", "なんか"
|
20
|
-
include_examples "converts_kana", "nannnan", "なんなん"
|
21
|
-
include_examples "converts_kana", "nannnann", "なんなん"
|
22
|
-
include_examples "converts_kana", "nannnannsei", "なんなんせい"
|
23
|
-
include_examples "converts_kana", "kukkingu", "くっきんぐ"
|
24
|
-
include_examples "converts_kana", "kukkingu papa", "くっきんぐ ぱぱ"
|
25
|
-
include_examples "converts_kana", "toukyoutokkyokyokakyoku", "とうきょうとっきょきょかきょく"
|
26
|
-
|
27
|
-
include_examples "converts_kana", "kku", "っく"
|
28
|
-
include_examples "converts_kana", ",,", ",,"
|
29
|
-
end
|
30
|
-
end
|