utanone 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/utanone/uta.rb +15 -9
- data/lib/utanone/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 803e7b863bce0ef8254b5e37e6faa866b583ae49facb72ec40d4629eaf88cb57
|
4
|
+
data.tar.gz: 4a01df2d0ed0d1ac5d24024d5b8fe8e879f694fb1f0e0dcede8302b8ca7e693e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ce626ab61cf3c16055ca30047020ac66cfa70ce30e19b118cd691c6a0feb1752e848190ae8a0274288ed701f03e1383de5f5c5ff3d09b1481bce3638f63e45f9
|
7
|
+
data.tar.gz: 5c87aff2a79445f38f807fa610b5cb376d5503ab48104678b161316733508d55224db90a223af96d7ee9123d0c90d721a851d54997f34a20df575aa754cb72ef
|
data/CHANGELOG.md
CHANGED
data/lib/utanone/uta.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'nkf'
|
3
4
|
require 'natto'
|
4
5
|
module Utanone
|
5
6
|
class Uta
|
@@ -32,35 +33,36 @@ module Utanone
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def correct(correct_yomigana:) # rubocop:disable Metrics/AbcSize
|
35
|
-
|
36
|
+
converted_correct_yomigana = convert_kana(correct_yomigana)
|
37
|
+
return self if yomigana == converted_correct_yomigana
|
36
38
|
|
37
39
|
# 訂正したよみがなで再作成したUtaインスタンスを作成するので、一旦コピーする
|
38
40
|
corrected_uta = Uta.new(@original_str)
|
39
41
|
|
40
42
|
corrected_uta.parsed_morphemes.each_with_index do |morpheme, i|
|
41
43
|
# 形態素ごとによみがなの修正が必要であれば修正する
|
42
|
-
if
|
44
|
+
if converted_correct_yomigana[0, morpheme[:ruby].size] == morpheme[:ruby]
|
43
45
|
# よみがなが一致したらそのまま処理を続行する
|
44
46
|
# 比較したよみがな部分は訂正済みよみがなから削除する
|
45
|
-
|
47
|
+
converted_correct_yomigana.slice!(0, morpheme[:ruby].size)
|
46
48
|
next
|
47
49
|
else
|
48
50
|
# よみがなが不一致なら修正する
|
49
51
|
next_morpheme = corrected_uta.parsed_morphemes[i + 1]
|
50
52
|
if next_morpheme
|
51
53
|
# 修正済みよみがなから次の形態素に一致する箇所を探すことで修正したい形態素のよみがなを取得する
|
52
|
-
next_morpheme_start =
|
54
|
+
next_morpheme_start = converted_correct_yomigana.index(next_morpheme[:ruby])
|
53
55
|
|
54
56
|
# 一致箇所がなければ修正ができないものとして処理を中断する(よみがな不一致が連続すると修正できない)
|
55
57
|
# TODO: 再帰を使って連続したよみがな不一致も修正できないか
|
56
58
|
break unless next_morpheme_start
|
57
59
|
|
58
60
|
# 取得できた場合は修正する
|
59
|
-
morpheme[:ruby] =
|
60
|
-
|
61
|
+
morpheme[:ruby] = converted_correct_yomigana[0, next_morpheme_start]
|
62
|
+
converted_correct_yomigana.slice!(0, morpheme[:ruby].size)
|
61
63
|
else
|
62
64
|
# 最後の形態素だった時
|
63
|
-
morpheme[:ruby] =
|
65
|
+
morpheme[:ruby] = converted_correct_yomigana
|
64
66
|
end
|
65
67
|
end
|
66
68
|
end
|
@@ -70,7 +72,7 @@ module Utanone
|
|
70
72
|
private
|
71
73
|
|
72
74
|
def parse_to_hash(str, ref_uta)
|
73
|
-
parsed_str_enum = natto.enum_parse(
|
75
|
+
parsed_str_enum = natto.enum_parse(convert_number(str))
|
74
76
|
|
75
77
|
parsed_str_enum.each_with_object([]) do |result, array|
|
76
78
|
next if result.is_eos?
|
@@ -91,11 +93,15 @@ module Utanone
|
|
91
93
|
raise Utanone::ParseError
|
92
94
|
end
|
93
95
|
|
94
|
-
def
|
96
|
+
def convert_number(str)
|
95
97
|
# 半角数字を全角数字にしないと読みが取れないので変換する
|
96
98
|
str.tr('0-9a-zA-Z', '0-9a-zA-Z')
|
97
99
|
end
|
98
100
|
|
101
|
+
def convert_kana(str)
|
102
|
+
NKF.nkf('--katakana -w', str)
|
103
|
+
end
|
104
|
+
|
99
105
|
def separated_element(result)
|
100
106
|
# 形態素
|
101
107
|
word = result.surface
|
data/lib/utanone/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: utanone
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuriko1211
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-09
|
11
|
+
date: 2021-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mecab
|