tataki 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/benchmark/001-M_jinmei.rb +17 -0
- data/lib/tataki/converters/skk_jisyo.rb +54 -57
- data/lib/tataki/version.rb +1 -1
- data/spec/spec_helper.rb +2 -1
- data/spec/tataki/converters/combine_spec.rb +0 -9
- data/spec/tataki/converters/skk_jisyo_spec.rb +10 -1
- data/spec/tataki_spec.rb +2 -1
- data/tataki.gemspec +0 -1
- metadata +2 -19
- data/data/roman.yml +0 -142
- data/lib/tataki/converters/roman.rb +0 -67
- data/spec/tataki/converters/roman_spec.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c75759d9be482c52fc40c1b59d543fa709877f9d
|
4
|
+
data.tar.gz: d2f4bc1514cd2eb3d4a8b57286509b2e91442db3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b1cc2f50a0302cb959b6414d56a8c3e565286b9284ec07ecf328caefb6735a23d1aa83f09e1abdf5be208c5811a0b5936cb422beb192119209678a2c45ef599
|
7
|
+
data.tar.gz: 315cf59f199dfa3532293ae19252083ce58a2e305ea0ee3421bebd1fb66f99c0d89c75ea3df93943e7fb3f0060a732c1c4519dbb10aeb76d6313a6d3b5743224
|
data/README.md
CHANGED
@@ -36,11 +36,12 @@ require "tataki/base"
|
|
36
36
|
alphabet_converter = Tataki::Converter::Alphabet.new
|
37
37
|
alphabet_converter.to_kana("abcde") # => "えーびーしーでぃーいー"
|
38
38
|
|
39
|
-
roman_alphabet_converter = Tataki::Converter::Combine.new(Tataki::Converter::Roman.new, Tataki::Converter::Alphabet.new)
|
40
|
-
roman_alphabet_converter.to_kana("robottotaisennf") # => "ろぼっとたいせんえふ"
|
41
|
-
|
42
39
|
skk_converter = Tataki::Converter::SkkJisyo.new
|
43
40
|
skk_converter.to_kana("研究者") # => "けんきゅうしゃ"
|
41
|
+
|
42
|
+
alphabet_skk_converter = Tataki::Converter::Combine.new(Tataki::Converter::Alphabet.new, Tataki::Converter::SkkJisyo.new)
|
43
|
+
alphabet_skk_converter.to_kana("X線研究者") # => "robottotaisennf"
|
44
|
+
|
44
45
|
```
|
45
46
|
|
46
47
|
## TODO
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "benchmark"
|
2
|
+
|
3
|
+
N = 1000
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
6
|
+
|
7
|
+
require 'tataki/base'
|
8
|
+
converter = Tataki::Converter::SkkJisyo.new(%w[M jinmei])
|
9
|
+
|
10
|
+
source = "かな漢字変換" * 100
|
11
|
+
|
12
|
+
puts Benchmark::CAPTION
|
13
|
+
puts Benchmark.measure {
|
14
|
+
N.times do
|
15
|
+
converter.to_kana(source)
|
16
|
+
end
|
17
|
+
}
|
@@ -2,7 +2,6 @@
|
|
2
2
|
require "yaml"
|
3
3
|
require "time"
|
4
4
|
require "skk/jisyo"
|
5
|
-
require "trie"
|
6
5
|
|
7
6
|
module Tataki
|
8
7
|
module Converter
|
@@ -12,30 +11,34 @@ module Tataki
|
|
12
11
|
|
13
12
|
def initialize(jisyo_types = DEFAULT_JISYO_SUFFIXES)
|
14
13
|
@jisyo_paths = jisyo_types.map{|suffix| Skk::Jisyo.path(suffix) }
|
15
|
-
@
|
14
|
+
@table_cache_path = table_cache_path(jisyo_types.join("_"))
|
16
15
|
|
17
16
|
config_file = File.expand_path(DEFAULT_CONFIG_PATH, __FILE__)
|
18
17
|
config_data = YAML.load_file(config_file)
|
19
18
|
@roman_data = config_data["roman_table"]
|
20
19
|
@ignore_kana = config_data["ignore_kana"]
|
21
|
-
|
20
|
+
tables = setup_jisyo
|
21
|
+
@match_table = tables[0].freeze
|
22
|
+
@okurigana_table = tables[1].freeze
|
22
23
|
end
|
23
24
|
|
24
25
|
def setup_jisyo
|
25
|
-
if File.exist?(@
|
26
|
-
|
26
|
+
if File.exist?(@table_cache_path)
|
27
|
+
tables = Marshal.load(File.read(@table_cache_path))
|
27
28
|
else
|
28
|
-
|
29
|
+
match_table = {}
|
30
|
+
okurigana_table = {}
|
29
31
|
@jisyo_paths.each do |jisyo_path|
|
30
|
-
add_jisyo(
|
32
|
+
add_jisyo(match_table, okurigana_table, jisyo_path)
|
31
33
|
end
|
32
|
-
|
33
|
-
File.
|
34
|
+
tables = [match_table, okurigana_table]
|
35
|
+
File.binwrite(@table_cache_path, Marshal.dump(tables))
|
36
|
+
File.write("#{@table_cache_path}.timestamp", Time.now.to_s)
|
34
37
|
end
|
35
|
-
|
38
|
+
tables
|
36
39
|
end
|
37
40
|
|
38
|
-
def add_jisyo(
|
41
|
+
def add_jisyo(match_table, okurigana_table, jisyo_path)
|
39
42
|
File.open(jisyo_path, "rb:euc-jp") do |jisyo_file|
|
40
43
|
jisyo_file.each_line do |line|
|
41
44
|
next if line.empty? || line[0] == ";" || line.include?("#")
|
@@ -44,8 +47,14 @@ module Tataki
|
|
44
47
|
kana.gsub!(/[^ぁ-んa-z]/, "")
|
45
48
|
next if kana.empty? || !(kana =~ /^[ぁ-ん]+[a-z]?/) || @ignore_kana.include?(kana)
|
46
49
|
kanji_part.gsub!(/^\/|;.+|\/$/, "")
|
50
|
+
|
51
|
+
table = kana =~ /^(.+)([a-z])$/ ? okurigana_table : match_table
|
47
52
|
kanji_part.split("/").each do |kanji|
|
48
|
-
|
53
|
+
kanji_prefix = kanji[0]
|
54
|
+
table_entry = table[kanji_prefix]
|
55
|
+
table[kanji_prefix] = table_entry = [] unless table_entry
|
56
|
+
table_entry.push($2 ? [kanji, $1, $2] : [kanji, kana])
|
57
|
+
table_entry.sort_by!{|entry| - (entry[0].size) }
|
49
58
|
end
|
50
59
|
end
|
51
60
|
end
|
@@ -55,8 +64,8 @@ module Tataki
|
|
55
64
|
File.expand_path("../../../../data/jisyo", __FILE__)
|
56
65
|
end
|
57
66
|
|
58
|
-
def
|
59
|
-
File.join(jisyo_path, "SKK-JISYO.#{name}.
|
67
|
+
def table_cache_path(name)
|
68
|
+
File.join(jisyo_path, "SKK-JISYO.#{name}.table.cache")
|
60
69
|
end
|
61
70
|
|
62
71
|
def jisyo_timestamp(path)
|
@@ -64,61 +73,49 @@ module Tataki
|
|
64
73
|
end
|
65
74
|
|
66
75
|
def to_kana(sentence)
|
67
|
-
_to_kana(sentence, ""
|
76
|
+
_to_kana(sentence, "")
|
68
77
|
end
|
69
78
|
|
70
79
|
private
|
71
80
|
|
72
|
-
def _to_kana(sentence, kana
|
73
|
-
return if trie.empty?
|
81
|
+
def _to_kana(sentence, kana)
|
74
82
|
return kana if sentence.empty?
|
75
83
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
next_set_values = next_set.values
|
83
|
-
okurigana = find_okurigana(next_set_values, next_sentence)
|
84
|
-
next_set_values.reject!{|value| value =~ /[a-z]/ }
|
85
|
-
if okurigana
|
86
|
-
return _to_kana(next_sentence, kana + okurigana, "", @trie)
|
87
|
-
elsif next_set_values.size > 0 && next_set_values.size == next_trie_values.size
|
88
|
-
return _to_kana(next_sentence, kana + next_set_values.sample, "", @trie)
|
89
|
-
end
|
90
|
-
|
91
|
-
if next_sentence.empty?
|
92
|
-
if next_set_values.size > 0
|
93
|
-
return kana + next_set_values.sample
|
94
|
-
elsif through_alphabet
|
95
|
-
return kana + prefix + next_ch
|
96
|
-
end
|
84
|
+
table_entry = find_okurigana_entry(sentence) || find_match_entry(sentence)
|
85
|
+
if table_entry
|
86
|
+
next_kanji = table_entry[0]
|
87
|
+
next_kana = table_entry[1]
|
88
|
+
next_sentence = sentence[next_kanji.size .. -1]
|
89
|
+
return _to_kana(next_sentence, kana + next_kana)
|
97
90
|
end
|
98
91
|
|
99
|
-
|
100
|
-
|
101
|
-
if next_kana
|
102
|
-
return next_kana
|
103
|
-
end
|
92
|
+
return _to_kana(sentence[1 .. -1], kana + sentence[0])
|
93
|
+
end
|
104
94
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
95
|
+
def find_okurigana_entry(sentence)
|
96
|
+
entries = @okurigana_table[sentence[0]]
|
97
|
+
return unless entries
|
98
|
+
|
99
|
+
entries.each do |entry|
|
100
|
+
kanji, yomi, alphabet = *entry
|
101
|
+
next unless sentence.start_with?(kanji)
|
102
|
+
next_ch = sentence[kanji.size]
|
103
|
+
okurigana_candidates = @roman_data[alphabet]
|
104
|
+
next unless okurigana_candidates
|
105
|
+
okurigana_candidates.each do |okurigana|
|
106
|
+
return entry if okurigana == next_ch
|
107
|
+
end
|
111
108
|
end
|
109
|
+
nil
|
112
110
|
end
|
113
111
|
|
114
|
-
def
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
end
|
112
|
+
def find_match_entry(sentence)
|
113
|
+
entries = @match_table[sentence[0]]
|
114
|
+
return unless entries
|
115
|
+
|
116
|
+
entries.each do |entry|
|
117
|
+
kanji, yomi = *entry
|
118
|
+
return entry if sentence.start_with?(kanji)
|
122
119
|
end
|
123
120
|
nil
|
124
121
|
end
|
data/lib/tataki/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -3,7 +3,6 @@ require "spec_helper"
|
|
3
3
|
|
4
4
|
describe Tataki::Converter::Combine do
|
5
5
|
let(:skk_converter) { Tataki::Converter::SkkJisyo.new }
|
6
|
-
let(:roman_converter) { Tataki::Converter::Roman.new }
|
7
6
|
let(:alphabet_converter) { Tataki::Converter::Alphabet.new }
|
8
7
|
|
9
8
|
describe ".to_kana" do
|
@@ -13,14 +12,6 @@ describe Tataki::Converter::Combine do
|
|
13
12
|
end
|
14
13
|
end
|
15
14
|
|
16
|
-
context "when roman + alphabet" do
|
17
|
-
let(:converter) do
|
18
|
-
Tataki::Converter::Combine.new(roman_converter, alphabet_converter)
|
19
|
-
end
|
20
|
-
|
21
|
-
include_examples "converts_kana", "robottotaisennf", "ろぼっとたいせんえふ"
|
22
|
-
end
|
23
|
-
|
24
15
|
context "when skk-jisyo + alphabet" do
|
25
16
|
let(:converter) do
|
26
17
|
Tataki::Converter::Combine.new(skk_converter, alphabet_converter)
|
@@ -2,7 +2,6 @@
|
|
2
2
|
require "spec_helper"
|
3
3
|
|
4
4
|
describe Tataki::Converter::SkkJisyo do
|
5
|
-
|
6
5
|
describe ".to_kana" do
|
7
6
|
shared_examples "converts_kana" do |sentence, kana|
|
8
7
|
it "converts #{sentence.inspect} to #{kana.inspect}" do
|
@@ -29,5 +28,15 @@ describe Tataki::Converter::SkkJisyo do
|
|
29
28
|
include_examples "converts_kana", "漢字", "漢字"
|
30
29
|
include_examples "converts_kana", "半澤直樹", "はんざわなおき"
|
31
30
|
end
|
31
|
+
|
32
|
+
context "with M, jinmei jisyo" do
|
33
|
+
let(:converter) { Tataki::Converter::SkkJisyo.new(%w[M jinmei]) }
|
34
|
+
|
35
|
+
include_examples "converts_kana", "", ""
|
36
|
+
include_examples "converts_kana", "漢字", "かんじ"
|
37
|
+
include_examples "converts_kana", "半澤直樹", "はんざわなおき"
|
38
|
+
include_examples "converts_kana", "半澤直樹倍返し", "はんざわなおきばいかえし"
|
39
|
+
include_examples "converts_kana", "半澤直樹、銀行を買う", "はんざわなおき、ぎんこうをかう"
|
40
|
+
end
|
32
41
|
end
|
33
42
|
end
|
data/spec/tataki_spec.rb
CHANGED
@@ -9,7 +9,6 @@ describe Tataki do
|
|
9
9
|
describe ".converters" do
|
10
10
|
it "returns converters" do
|
11
11
|
expect(Tataki.converters).to match_array([
|
12
|
-
Tataki::Converter::Roman,
|
13
12
|
Tataki::Converter::Alphabet,
|
14
13
|
Tataki::Converter::Combine,
|
15
14
|
Tataki::Converter::SkkJisyo,
|
@@ -18,6 +17,8 @@ describe Tataki do
|
|
18
17
|
end
|
19
18
|
|
20
19
|
describe "String.to_kana" do
|
20
|
+
before { require "tataki" }
|
21
|
+
|
21
22
|
it "converts to kana" do
|
22
23
|
expect("X線研究者".to_kana).to eq("えっくすせんけんきゅうしゃ")
|
23
24
|
end
|
data/tataki.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tataki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hogelog
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.0.5
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: trie
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - '>='
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - '>='
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: bundler
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -137,22 +123,20 @@ files:
|
|
137
123
|
- LICENSE.txt
|
138
124
|
- README.md
|
139
125
|
- Rakefile
|
126
|
+
- benchmark/001-M_jinmei.rb
|
140
127
|
- data/alphabet.yml
|
141
128
|
- data/jisyo/.gitignore
|
142
|
-
- data/roman.yml
|
143
129
|
- data/skk-jisyo.yml
|
144
130
|
- lib/tataki.rb
|
145
131
|
- lib/tataki/base.rb
|
146
132
|
- lib/tataki/converters.rb
|
147
133
|
- lib/tataki/converters/alphabet.rb
|
148
134
|
- lib/tataki/converters/combine.rb
|
149
|
-
- lib/tataki/converters/roman.rb
|
150
135
|
- lib/tataki/converters/skk_jisyo.rb
|
151
136
|
- lib/tataki/version.rb
|
152
137
|
- spec/spec_helper.rb
|
153
138
|
- spec/tataki/converters/alphabet_spec.rb
|
154
139
|
- spec/tataki/converters/combine_spec.rb
|
155
|
-
- spec/tataki/converters/roman_spec.rb
|
156
140
|
- spec/tataki/converters/skk_jisyo_spec.rb
|
157
141
|
- spec/tataki_spec.rb
|
158
142
|
- tataki.gemspec
|
@@ -184,7 +168,6 @@ test_files:
|
|
184
168
|
- spec/spec_helper.rb
|
185
169
|
- spec/tataki/converters/alphabet_spec.rb
|
186
170
|
- spec/tataki/converters/combine_spec.rb
|
187
|
-
- spec/tataki/converters/roman_spec.rb
|
188
171
|
- spec/tataki/converters/skk_jisyo_spec.rb
|
189
172
|
- spec/tataki_spec.rb
|
190
173
|
has_rdoc:
|
data/data/roman.yml
DELETED
@@ -1,142 +0,0 @@
|
|
1
|
-
table:
|
2
|
-
"a": あ
|
3
|
-
"i": い
|
4
|
-
"u": う
|
5
|
-
"e": え
|
6
|
-
"o": お
|
7
|
-
"ka": か
|
8
|
-
"ki": き
|
9
|
-
"ku": く
|
10
|
-
"ke": け
|
11
|
-
"ko": こ
|
12
|
-
"ga": が
|
13
|
-
"gi": ぎ
|
14
|
-
"gu": ぐ
|
15
|
-
"ge": げ
|
16
|
-
"go": ご
|
17
|
-
"sa": さ
|
18
|
-
"si": し
|
19
|
-
"shi": し
|
20
|
-
"su": す
|
21
|
-
"se": せ
|
22
|
-
"so": そ
|
23
|
-
"za": ざ
|
24
|
-
"zi": じ
|
25
|
-
"ji": じ
|
26
|
-
"zu": ず
|
27
|
-
"ze": ぜ
|
28
|
-
"zo": ぞ
|
29
|
-
"ta": た
|
30
|
-
"ti": ち
|
31
|
-
"chi": ち
|
32
|
-
"tu": つ
|
33
|
-
"tsu": つ
|
34
|
-
"te": て
|
35
|
-
"to": と
|
36
|
-
"da": だ
|
37
|
-
"di": ぢ
|
38
|
-
"du": づ
|
39
|
-
"de": で
|
40
|
-
"do": ど
|
41
|
-
"na": な
|
42
|
-
"ni": に
|
43
|
-
"nu": ぬ
|
44
|
-
"ne": ね
|
45
|
-
"no": の
|
46
|
-
"ha": は
|
47
|
-
"hi": ひ
|
48
|
-
"hu": ふ
|
49
|
-
"fu": ふ
|
50
|
-
"he": へ
|
51
|
-
"ho": ほ
|
52
|
-
"ba": ば
|
53
|
-
"bi": び
|
54
|
-
"bu": ぶ
|
55
|
-
"be": べ
|
56
|
-
"bo": ぼ
|
57
|
-
"pa": ぱ
|
58
|
-
"pi": ぴ
|
59
|
-
"pu": ぷ
|
60
|
-
"pe": ぺ
|
61
|
-
"po": ぽ
|
62
|
-
"ma": ま
|
63
|
-
"mi": み
|
64
|
-
"mu": む
|
65
|
-
"me": め
|
66
|
-
"mo": も
|
67
|
-
"ya": や
|
68
|
-
"yu": ゆ
|
69
|
-
"yo": よ
|
70
|
-
"ra": ら
|
71
|
-
"ri": り
|
72
|
-
"ru": る
|
73
|
-
"re": れ
|
74
|
-
"ro": ろ
|
75
|
-
"wa": わ
|
76
|
-
"wo": を
|
77
|
-
"n": ん
|
78
|
-
"nn": ん
|
79
|
-
"xa": ぁ
|
80
|
-
"la": ぁ
|
81
|
-
"xi": ぃ
|
82
|
-
"li": ぃ
|
83
|
-
"xu": ぅ
|
84
|
-
"lu": ぅ
|
85
|
-
"xe": ぇ
|
86
|
-
"le": ぇ
|
87
|
-
"xo": ぉ
|
88
|
-
"lo": ぉ
|
89
|
-
"kya": きゃ
|
90
|
-
"kyu": きゅ
|
91
|
-
"kyo": きょ
|
92
|
-
"gya": ぎゃ
|
93
|
-
"gyu": ぎゅ
|
94
|
-
"gyo": ぎょ
|
95
|
-
"zya": じゃ
|
96
|
-
"sya": しゃ
|
97
|
-
"sha": しゃ
|
98
|
-
"syu": しゅ
|
99
|
-
"shu": しゅ
|
100
|
-
"syo": しょ
|
101
|
-
"sho": しょ
|
102
|
-
"ja": じゃ
|
103
|
-
"zyu": じゅ
|
104
|
-
"ju": じゅ
|
105
|
-
"zyo": じょ
|
106
|
-
"jo": じょ
|
107
|
-
"tya": ちゃ
|
108
|
-
"cha": ちゃ
|
109
|
-
"tyu": ちゅ
|
110
|
-
"chu": ちゅ
|
111
|
-
"tyo": ちょ
|
112
|
-
"cho": ちょ
|
113
|
-
"dya": ぢゃ
|
114
|
-
"dyu": ぢゅ
|
115
|
-
"dyo": ぢょ
|
116
|
-
"nya": にゃ
|
117
|
-
"nyu": にゅ
|
118
|
-
"nyo": にょ
|
119
|
-
"hya": ひゃ
|
120
|
-
"hyu": ひゅ
|
121
|
-
"hyo": ひょ
|
122
|
-
"bya": びゃ
|
123
|
-
"byu": びゅ
|
124
|
-
"byo": びょ
|
125
|
-
"pya": ぴゃ
|
126
|
-
"pyu": ぴゅ
|
127
|
-
"pyo": ぴょ
|
128
|
-
"mya": みゃ
|
129
|
-
"myu": みゅ
|
130
|
-
"myo": みょ
|
131
|
-
"xya": ゃ
|
132
|
-
"lya": ゃ
|
133
|
-
"xyu": ゅ
|
134
|
-
"lyu": ゅ
|
135
|
-
"xyo": ょ
|
136
|
-
"lyo": ょ
|
137
|
-
"rya": りゃ
|
138
|
-
"ryu": りゅ
|
139
|
-
"ryo": りょ
|
140
|
-
"xwa": ゎ
|
141
|
-
"lwa": ゎ
|
142
|
-
consonant: [k, g, s, j, t, c, d, n, h, f, b, p, m, y, r, w, x, l]
|
@@ -1,67 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
require "trie"
|
3
|
-
require "yaml"
|
4
|
-
|
5
|
-
module Tataki
|
6
|
-
module Converter
|
7
|
-
class Roman < Base
|
8
|
-
SOKUON = "っ"
|
9
|
-
|
10
|
-
def initialize
|
11
|
-
@trie = Trie.new
|
12
|
-
roman_file = File.expand_path("../../../../data/roman.yml", __FILE__)
|
13
|
-
roman_data = YAML.load_file(roman_file)
|
14
|
-
roman_data["table"].each do |roman, kana|
|
15
|
-
@trie.insert(roman, kana)
|
16
|
-
end
|
17
|
-
@consonant = roman_data["consonant"]
|
18
|
-
@trie.freeze
|
19
|
-
end
|
20
|
-
|
21
|
-
def to_kana(sentence)
|
22
|
-
_to_kana(sentence.downcase, "", "", @trie)
|
23
|
-
end
|
24
|
-
|
25
|
-
private
|
26
|
-
|
27
|
-
def _to_kana(sentence, kana, prefix, trie, through_alphabet = true)
|
28
|
-
return if trie.empty?
|
29
|
-
return kana if sentence.empty?
|
30
|
-
|
31
|
-
next_ch = sentence[0]
|
32
|
-
next_sentence = sentence[1..-1]
|
33
|
-
next_trie = trie.find_prefix(next_ch)
|
34
|
-
next_set = next_trie.find([])
|
35
|
-
if next_set.size > 0 && next_set.size == next_trie.size
|
36
|
-
return _to_kana(next_sentence, kana + next_set.values.first, "", @trie)
|
37
|
-
end
|
38
|
-
|
39
|
-
if next_sentence.empty?
|
40
|
-
if next_set.size > 0
|
41
|
-
return kana + prefix + next_set.values.first
|
42
|
-
else
|
43
|
-
return kana + prefix + next_ch
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
next_kana = _to_kana(next_sentence, kana, prefix + next_ch, next_trie, false)
|
48
|
-
|
49
|
-
if next_kana
|
50
|
-
return next_kana
|
51
|
-
end
|
52
|
-
|
53
|
-
if next_set.size > 0
|
54
|
-
return _to_kana(next_sentence, kana + next_set.values.first, "", @trie)
|
55
|
-
elsif @consonant.include?(next_ch) && next_sentence.start_with?(next_ch)
|
56
|
-
return _to_kana(next_sentence, kana + SOKUON, "", @trie)
|
57
|
-
elsif through_alphabet
|
58
|
-
return _to_kana(next_sentence, kana + prefix + next_ch, "", @trie)
|
59
|
-
else
|
60
|
-
return nil
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
Tataki::CONVERTERS << Converter::Roman
|
67
|
-
end
|
@@ -1,30 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
require "spec_helper"
|
3
|
-
|
4
|
-
describe Tataki::Converter::Roman do
|
5
|
-
let(:converter) { Tataki::Converter::Roman.new }
|
6
|
-
|
7
|
-
describe ".to_kana" do
|
8
|
-
shared_examples "converts_kana" do |sentence, kana|
|
9
|
-
it "converts #{sentence.inspect} to #{kana.inspect}" do
|
10
|
-
expect(converter.to_kana(sentence)).to eq(kana)
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
include_examples "converts_kana", "", ""
|
15
|
-
include_examples "converts_kana", "hoge", "ほげ"
|
16
|
-
include_examples "converts_kana", "hogelog", "ほげぉg"
|
17
|
-
include_examples "converts_kana", "hogge", "ほっげ"
|
18
|
-
include_examples "converts_kana", "hogs", "ほgs"
|
19
|
-
include_examples "converts_kana", "nanka", "なんか"
|
20
|
-
include_examples "converts_kana", "nannnan", "なんなん"
|
21
|
-
include_examples "converts_kana", "nannnann", "なんなん"
|
22
|
-
include_examples "converts_kana", "nannnannsei", "なんなんせい"
|
23
|
-
include_examples "converts_kana", "kukkingu", "くっきんぐ"
|
24
|
-
include_examples "converts_kana", "kukkingu papa", "くっきんぐ ぱぱ"
|
25
|
-
include_examples "converts_kana", "toukyoutokkyokyokakyoku", "とうきょうとっきょきょかきょく"
|
26
|
-
|
27
|
-
include_examples "converts_kana", "kku", "っく"
|
28
|
-
include_examples "converts_kana", ",,", ",,"
|
29
|
-
end
|
30
|
-
end
|