furigana 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/furigana/reader.rb +8 -21
- data/lib/furigana/version.rb +1 -1
- metadata +1 -1
data/lib/furigana/reader.rb
CHANGED
@@ -3,55 +3,42 @@ module Furigana
|
|
3
3
|
class Reader
|
4
4
|
def reading(text)
|
5
5
|
Mecab.tokenize(text).inject([]) do |list, token|
|
6
|
-
with_furigana = add_furigana(
|
7
|
-
list
|
6
|
+
with_furigana = add_furigana(token)
|
7
|
+
list += with_furigana if with_furigana
|
8
8
|
list
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
12
|
private
|
13
13
|
|
14
|
-
def yomi_to_hiragana(token)
|
15
|
-
token[:reading] = choose_reading(token[:surface_form], token[:reading])
|
16
|
-
token
|
17
|
-
end
|
18
|
-
|
19
14
|
def k2h(k)
|
20
15
|
return nil if k.nil?
|
21
16
|
NKF.nkf("-h1 -w", k)
|
22
17
|
end
|
23
18
|
|
24
|
-
def kana?(str)
|
25
|
-
/^[ぁ-んァ-ンー]+$/.match(str)
|
26
|
-
end
|
27
|
-
|
28
|
-
def choose_reading(surface_form, reading)
|
29
|
-
!kana?(surface_form) ? k2h(reading) : surface_form
|
30
|
-
end
|
31
|
-
|
32
19
|
def sdiff(first, second)
|
33
20
|
Diff::LCS.sdiff(first, second)
|
34
21
|
end
|
35
22
|
|
36
23
|
def diff_token_surface_form_and_reading(token)
|
37
|
-
sdiff(token[:surface_form], token[:reading])
|
24
|
+
sdiff(k2h(token[:surface_form]), k2h(token[:reading]))
|
38
25
|
end
|
39
26
|
|
40
27
|
def add_furigana(token)
|
41
28
|
states = { kanji_and_yomi: '!', yomi: '+', kana: '=' }
|
42
29
|
kanji, yomi = 0, 1
|
43
30
|
|
44
|
-
list =
|
31
|
+
list = []
|
45
32
|
on_kanji = false
|
46
33
|
diff_token_surface_form_and_reading(token).each do |part|
|
47
34
|
case part.action
|
48
35
|
when states[:kanji_and_yomi]
|
49
|
-
list
|
50
|
-
list[kanji] += part.old_element
|
51
|
-
list[yomi] += part.new_element
|
36
|
+
list.push ['',''] unless on_kanji
|
37
|
+
list.last[kanji] += part.old_element
|
38
|
+
list.last[yomi] += part.new_element
|
52
39
|
on_kanji = true
|
53
40
|
when states[:yomi]
|
54
|
-
list[yomi] += part.new_element
|
41
|
+
list.last[yomi] += part.new_element
|
55
42
|
when states[:kana]
|
56
43
|
on_kanji = false
|
57
44
|
end
|
data/lib/furigana/version.rb
CHANGED