amakanize 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/CHANGELOG.md +7 -0
- data/lib/amakanize/author_name.rb +15 -173
- data/lib/amakanize/filters/base_filter.rb +11 -0
- data/lib/amakanize/filters/html_unescape_filter.rb +14 -0
- data/lib/amakanize/filters/normalization_filter.rb +183 -0
- data/lib/amakanize/filters/parentheses_deletion_filter.rb +12 -0
- data/lib/amakanize/filters/role_name_deletion_filter.rb +18 -0
- data/lib/amakanize/filters/strip_filter.rb +12 -0
- data/lib/amakanize/filters/trailing_payload_deletion_filter.rb +18 -0
- data/lib/amakanize/version.rb +1 -1
- data/lib/amakanize.rb +7 -0
- metadata +8 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a8d34cba3811141a46de4311a4842da44b06fbf3
|
4
|
+
data.tar.gz: b717fcc757fead1743d1fe644a96f50243a15bdc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 113dd0b8d000dadb813814604d02f2db94d37f087ba2288ccdcae009d14b631ed0ccc9cc354b1aebe4af0daa5c244d1e8c703436e67af8a72f741b55856c9fad
|
7
|
+
data.tar.gz: 167decb78e0f2628df666e3a1f906198a8974d098e28f498dc2642fea43ef2d842ed9dbd3239bce5818d26ac129bbd1ac5755d8771eca9e45d0715d8463833f4
|
data/.rspec
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,173 +1,18 @@
|
|
1
|
-
require "active_support"
|
2
|
-
|
3
1
|
module Amakanize
|
4
2
|
class AuthorName
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
➓
|
19
|
-
⑩
|
20
|
-
⓫
|
21
|
-
⑪
|
22
|
-
⓬
|
23
|
-
⑫
|
24
|
-
⑬
|
25
|
-
⓭
|
26
|
-
⑭
|
27
|
-
⓮
|
28
|
-
⑮
|
29
|
-
⓯
|
30
|
-
⑯
|
31
|
-
⓰
|
32
|
-
⓱
|
33
|
-
⑰
|
34
|
-
⓲
|
35
|
-
⑱
|
36
|
-
⑲
|
37
|
-
⓳
|
38
|
-
❷
|
39
|
-
⓶
|
40
|
-
②
|
41
|
-
➁
|
42
|
-
➋
|
43
|
-
➁
|
44
|
-
⓴
|
45
|
-
⑳
|
46
|
-
㉑
|
47
|
-
㉒
|
48
|
-
㉓
|
49
|
-
㉔
|
50
|
-
㉕
|
51
|
-
㉖
|
52
|
-
㉗
|
53
|
-
㉘
|
54
|
-
㉙
|
55
|
-
➌
|
56
|
-
➂
|
57
|
-
⓷
|
58
|
-
❸
|
59
|
-
③
|
60
|
-
➂
|
61
|
-
㉚
|
62
|
-
㉛
|
63
|
-
㉜
|
64
|
-
㉝
|
65
|
-
㉞
|
66
|
-
㉟
|
67
|
-
㊱
|
68
|
-
㊲
|
69
|
-
㊳
|
70
|
-
㊴
|
71
|
-
➃
|
72
|
-
➍
|
73
|
-
⓸
|
74
|
-
④
|
75
|
-
❹
|
76
|
-
➃
|
77
|
-
㊵
|
78
|
-
㊶
|
79
|
-
㊷
|
80
|
-
㊸
|
81
|
-
㊹
|
82
|
-
㊺
|
83
|
-
㊻
|
84
|
-
㊼
|
85
|
-
㊽
|
86
|
-
㊾
|
87
|
-
⑤
|
88
|
-
⓹
|
89
|
-
➄
|
90
|
-
➄
|
91
|
-
❺
|
92
|
-
➎
|
93
|
-
㊿
|
94
|
-
⑥
|
95
|
-
➅
|
96
|
-
❻
|
97
|
-
➏
|
98
|
-
⓺
|
99
|
-
➅
|
100
|
-
➆
|
101
|
-
⑦
|
102
|
-
➐
|
103
|
-
⓻
|
104
|
-
➆
|
105
|
-
❼
|
106
|
-
⑧
|
107
|
-
➇
|
108
|
-
➇
|
109
|
-
❽
|
110
|
-
⓼
|
111
|
-
➑
|
112
|
-
❾
|
113
|
-
⑨
|
114
|
-
➈
|
115
|
-
⓽
|
116
|
-
➒
|
117
|
-
➈
|
118
|
-
ⓐ
|
119
|
-
Ⓐ
|
120
|
-
ⓑ
|
121
|
-
Ⓑ
|
122
|
-
ⓒ
|
123
|
-
Ⓒ
|
124
|
-
ⓓ
|
125
|
-
Ⓓ
|
126
|
-
ⓔ
|
127
|
-
Ⓔ
|
128
|
-
ⓕ
|
129
|
-
Ⓕ
|
130
|
-
ⓖ
|
131
|
-
Ⓖ
|
132
|
-
ⓗ
|
133
|
-
Ⓗ
|
134
|
-
ⓘ
|
135
|
-
Ⓘ
|
136
|
-
ⓙ
|
137
|
-
Ⓙ
|
138
|
-
ⓚ
|
139
|
-
Ⓚ
|
140
|
-
ⓛ
|
141
|
-
Ⓛ
|
142
|
-
ⓜ
|
143
|
-
Ⓜ
|
144
|
-
ⓝ
|
145
|
-
Ⓝ
|
146
|
-
ⓞ
|
147
|
-
Ⓞ
|
148
|
-
ⓟ
|
149
|
-
Ⓟ
|
150
|
-
ⓠ
|
151
|
-
Ⓠ
|
152
|
-
ⓡ
|
153
|
-
Ⓡ
|
154
|
-
ⓢ
|
155
|
-
Ⓢ
|
156
|
-
ⓣ
|
157
|
-
Ⓣ
|
158
|
-
ⓤ
|
159
|
-
Ⓤ
|
160
|
-
ⓥ
|
161
|
-
Ⓥ
|
162
|
-
ⓦ
|
163
|
-
Ⓦ
|
164
|
-
ⓧ
|
165
|
-
Ⓧ
|
166
|
-
ⓨ
|
167
|
-
Ⓨ
|
168
|
-
ⓩ
|
169
|
-
Ⓩ
|
170
|
-
)
|
3
|
+
class << self
|
4
|
+
# @return [Array<Amakan::Filters::BaseFilter>]
|
5
|
+
def filters
|
6
|
+
@filters ||= [
|
7
|
+
::Amakanize::Filters::HtmlUnescapeFilter.new,
|
8
|
+
::Amakanize::Filters::NormalizationFilter.new,
|
9
|
+
::Amakanize::Filters::ParenthesesDeletionFilter.new,
|
10
|
+
::Amakanize::Filters::RoleNameDeletionFilter.new,
|
11
|
+
::Amakanize::Filters::StripFilter.new,
|
12
|
+
::Amakanize::Filters::TrailingPayloadDeletionFilter.new,
|
13
|
+
]
|
14
|
+
end
|
15
|
+
end
|
171
16
|
|
172
17
|
# @param raw [String]
|
173
18
|
def initialize(raw)
|
@@ -176,11 +21,8 @@ module Amakanize
|
|
176
21
|
|
177
22
|
# @note Override
|
178
23
|
def to_s
|
179
|
-
|
180
|
-
|
181
|
-
half_size_alphanumerics = ::ActiveSupport::Multibyte::Unicode.normalize(matched_string)
|
182
|
-
"(#{half_size_alphanumerics})"
|
183
|
-
end
|
24
|
+
self.class.filters.inject(@raw) do |result, filter|
|
25
|
+
filter.call(result)
|
184
26
|
end
|
185
27
|
end
|
186
28
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "cgi"
|
2
|
+
|
3
|
+
module Amakanize
|
4
|
+
module Filters
|
5
|
+
class HtmlUnescapeFilter < BaseFilter
|
6
|
+
# @note Override
|
7
|
+
# @param string [String] e.g. `"<ハノカゲ>"`
|
8
|
+
# @return [String] e.g. `"ハノカゲ"`
|
9
|
+
def call(string)
|
10
|
+
::CGI.unescapeHTML(string)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
require "active_support"
|
2
|
+
|
3
|
+
module Amakanize
|
4
|
+
module Filters
|
5
|
+
class NormalizationFilter < BaseFilter
|
6
|
+
ENCLOSED_ALPHANUMERICS_COLLECTION = %w(
|
7
|
+
⓿
|
8
|
+
⓪
|
9
|
+
➀
|
10
|
+
➊
|
11
|
+
➀
|
12
|
+
❶
|
13
|
+
①
|
14
|
+
⓵
|
15
|
+
⓾
|
16
|
+
➉
|
17
|
+
❿
|
18
|
+
➉
|
19
|
+
➓
|
20
|
+
⑩
|
21
|
+
⓫
|
22
|
+
⑪
|
23
|
+
⓬
|
24
|
+
⑫
|
25
|
+
⑬
|
26
|
+
⓭
|
27
|
+
⑭
|
28
|
+
⓮
|
29
|
+
⑮
|
30
|
+
⓯
|
31
|
+
⑯
|
32
|
+
⓰
|
33
|
+
⓱
|
34
|
+
⑰
|
35
|
+
⓲
|
36
|
+
⑱
|
37
|
+
⑲
|
38
|
+
⓳
|
39
|
+
❷
|
40
|
+
⓶
|
41
|
+
②
|
42
|
+
➁
|
43
|
+
➋
|
44
|
+
➁
|
45
|
+
⓴
|
46
|
+
⑳
|
47
|
+
㉑
|
48
|
+
㉒
|
49
|
+
㉓
|
50
|
+
㉔
|
51
|
+
㉕
|
52
|
+
㉖
|
53
|
+
㉗
|
54
|
+
㉘
|
55
|
+
㉙
|
56
|
+
➌
|
57
|
+
➂
|
58
|
+
⓷
|
59
|
+
❸
|
60
|
+
③
|
61
|
+
➂
|
62
|
+
㉚
|
63
|
+
㉛
|
64
|
+
㉜
|
65
|
+
㉝
|
66
|
+
㉞
|
67
|
+
㉟
|
68
|
+
㊱
|
69
|
+
㊲
|
70
|
+
㊳
|
71
|
+
㊴
|
72
|
+
➃
|
73
|
+
➍
|
74
|
+
⓸
|
75
|
+
④
|
76
|
+
❹
|
77
|
+
➃
|
78
|
+
㊵
|
79
|
+
㊶
|
80
|
+
㊷
|
81
|
+
㊸
|
82
|
+
㊹
|
83
|
+
㊺
|
84
|
+
㊻
|
85
|
+
㊼
|
86
|
+
㊽
|
87
|
+
㊾
|
88
|
+
⑤
|
89
|
+
⓹
|
90
|
+
➄
|
91
|
+
➄
|
92
|
+
❺
|
93
|
+
➎
|
94
|
+
㊿
|
95
|
+
⑥
|
96
|
+
➅
|
97
|
+
❻
|
98
|
+
➏
|
99
|
+
⓺
|
100
|
+
➅
|
101
|
+
➆
|
102
|
+
⑦
|
103
|
+
➐
|
104
|
+
⓻
|
105
|
+
➆
|
106
|
+
❼
|
107
|
+
⑧
|
108
|
+
➇
|
109
|
+
➇
|
110
|
+
❽
|
111
|
+
⓼
|
112
|
+
➑
|
113
|
+
❾
|
114
|
+
⑨
|
115
|
+
➈
|
116
|
+
⓽
|
117
|
+
➒
|
118
|
+
➈
|
119
|
+
ⓐ
|
120
|
+
Ⓐ
|
121
|
+
ⓑ
|
122
|
+
Ⓑ
|
123
|
+
ⓒ
|
124
|
+
Ⓒ
|
125
|
+
ⓓ
|
126
|
+
Ⓓ
|
127
|
+
ⓔ
|
128
|
+
Ⓔ
|
129
|
+
ⓕ
|
130
|
+
Ⓕ
|
131
|
+
ⓖ
|
132
|
+
Ⓖ
|
133
|
+
ⓗ
|
134
|
+
Ⓗ
|
135
|
+
ⓘ
|
136
|
+
Ⓘ
|
137
|
+
ⓙ
|
138
|
+
Ⓙ
|
139
|
+
ⓚ
|
140
|
+
Ⓚ
|
141
|
+
ⓛ
|
142
|
+
Ⓛ
|
143
|
+
ⓜ
|
144
|
+
Ⓜ
|
145
|
+
ⓝ
|
146
|
+
Ⓝ
|
147
|
+
ⓞ
|
148
|
+
Ⓞ
|
149
|
+
ⓟ
|
150
|
+
Ⓟ
|
151
|
+
ⓠ
|
152
|
+
Ⓠ
|
153
|
+
ⓡ
|
154
|
+
Ⓡ
|
155
|
+
ⓢ
|
156
|
+
Ⓢ
|
157
|
+
ⓣ
|
158
|
+
Ⓣ
|
159
|
+
ⓤ
|
160
|
+
Ⓤ
|
161
|
+
ⓥ
|
162
|
+
Ⓥ
|
163
|
+
ⓦ
|
164
|
+
Ⓦ
|
165
|
+
ⓧ
|
166
|
+
Ⓧ
|
167
|
+
ⓨ
|
168
|
+
Ⓨ
|
169
|
+
ⓩ
|
170
|
+
Ⓩ
|
171
|
+
)
|
172
|
+
|
173
|
+
# @note Override
|
174
|
+
def call(string)
|
175
|
+
ENCLOSED_ALPHANUMERICS_COLLECTION.inject(string) do |result, enclosed_alphanumerics|
|
176
|
+
result.gsub(enclosed_alphanumerics) do |matched_string|
|
177
|
+
::ActiveSupport::Multibyte::Unicode.normalize(matched_string)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Amakanize
|
2
|
+
module Filters
|
3
|
+
class ParenthesesDeletionFilter < BaseFilter
|
4
|
+
# @note Override
|
5
|
+
# @param string [String] e.g. `"ぽんかん(8)"`
|
6
|
+
# @return [String] e.g. `"ぽんかん8"`
|
7
|
+
def call(string)
|
8
|
+
string.gsub(/\((\d+)\)/, '\1')
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Amakanize
|
2
|
+
module Filters
|
3
|
+
class RoleNameDeletionFilter < BaseFilter
|
4
|
+
ROLE_NAMES = %w(
|
5
|
+
原作
|
6
|
+
原案
|
7
|
+
漫画
|
8
|
+
)
|
9
|
+
|
10
|
+
# @note Override
|
11
|
+
# @param string [String] e.g. `"漫画:ハノカゲ"`
|
12
|
+
# @return [String] e.g. `"ハノカゲ"`
|
13
|
+
def call(string)
|
14
|
+
string.gsub(%r<\A#{::Regexp.union(ROLE_NAMES)}[:/]>, "")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Amakanize
|
2
|
+
module Filters
|
3
|
+
class StripFilter < BaseFilter
|
4
|
+
# @note Override
|
5
|
+
# @param string [String] e.g. `" ハノカゲ "`
|
6
|
+
# @return [String] e.g. `"ハノカゲ"`
|
7
|
+
def call(string)
|
8
|
+
string.gsub(/\A[[:space:]]+/, "").gsub(/[[:space:]]+\z/, "")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Amakanize
|
2
|
+
module Filters
|
3
|
+
class TrailingPayloadDeletionFilter < BaseFilter
|
4
|
+
PAYLOADS = %w(
|
5
|
+
その他
|
6
|
+
ほか
|
7
|
+
他
|
8
|
+
)
|
9
|
+
|
10
|
+
# @note Override
|
11
|
+
# @param string [String] e.g. `"ハノカゲ ほか"`
|
12
|
+
# @return [String] e.g. `"ハノカゲ"`
|
13
|
+
def call(string)
|
14
|
+
string.gsub(/[[:space:]]+#{::Regexp.union(PAYLOADS)}\z/, "")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/amakanize/version.rb
CHANGED
data/lib/amakanize.rb
CHANGED
@@ -1,3 +1,10 @@
|
|
1
1
|
require "amakanize/author_name"
|
2
|
+
require "amakanize/filters/base_filter"
|
3
|
+
require "amakanize/filters/html_unescape_filter"
|
4
|
+
require "amakanize/filters/normalization_filter"
|
5
|
+
require "amakanize/filters/parentheses_deletion_filter"
|
6
|
+
require "amakanize/filters/role_name_deletion_filter"
|
7
|
+
require "amakanize/filters/strip_filter"
|
8
|
+
require "amakanize/filters/trailing_payload_deletion_filter"
|
2
9
|
require "amakanize/series_name"
|
3
10
|
require "amakanize/version"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amakanize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- r7kamura
|
@@ -86,6 +86,13 @@ files:
|
|
86
86
|
- bin/setup
|
87
87
|
- lib/amakanize.rb
|
88
88
|
- lib/amakanize/author_name.rb
|
89
|
+
- lib/amakanize/filters/base_filter.rb
|
90
|
+
- lib/amakanize/filters/html_unescape_filter.rb
|
91
|
+
- lib/amakanize/filters/normalization_filter.rb
|
92
|
+
- lib/amakanize/filters/parentheses_deletion_filter.rb
|
93
|
+
- lib/amakanize/filters/role_name_deletion_filter.rb
|
94
|
+
- lib/amakanize/filters/strip_filter.rb
|
95
|
+
- lib/amakanize/filters/trailing_payload_deletion_filter.rb
|
89
96
|
- lib/amakanize/series_name.rb
|
90
97
|
- lib/amakanize/version.rb
|
91
98
|
homepage: https://github.com/amakan/amakanize
|