amakanize 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/CHANGELOG.md +7 -0
- data/lib/amakanize/author_name.rb +15 -173
- data/lib/amakanize/filters/base_filter.rb +11 -0
- data/lib/amakanize/filters/html_unescape_filter.rb +14 -0
- data/lib/amakanize/filters/normalization_filter.rb +183 -0
- data/lib/amakanize/filters/parentheses_deletion_filter.rb +12 -0
- data/lib/amakanize/filters/role_name_deletion_filter.rb +18 -0
- data/lib/amakanize/filters/strip_filter.rb +12 -0
- data/lib/amakanize/filters/trailing_payload_deletion_filter.rb +18 -0
- data/lib/amakanize/version.rb +1 -1
- data/lib/amakanize.rb +7 -0
- metadata +8 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a8d34cba3811141a46de4311a4842da44b06fbf3
|
4
|
+
data.tar.gz: b717fcc757fead1743d1fe644a96f50243a15bdc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 113dd0b8d000dadb813814604d02f2db94d37f087ba2288ccdcae009d14b631ed0ccc9cc354b1aebe4af0daa5c244d1e8c703436e67af8a72f741b55856c9fad
|
7
|
+
data.tar.gz: 167decb78e0f2628df666e3a1f906198a8974d098e28f498dc2642fea43ef2d842ed9dbd3239bce5818d26ac129bbd1ac5755d8771eca9e45d0715d8463833f4
|
data/.rspec
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,173 +1,18 @@
|
|
1
|
-
require "active_support"
|
2
|
-
|
3
1
|
module Amakanize
|
4
2
|
class AuthorName
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
➓
|
19
|
-
⑩
|
20
|
-
⓫
|
21
|
-
⑪
|
22
|
-
⓬
|
23
|
-
⑫
|
24
|
-
⑬
|
25
|
-
⓭
|
26
|
-
⑭
|
27
|
-
⓮
|
28
|
-
⑮
|
29
|
-
⓯
|
30
|
-
⑯
|
31
|
-
⓰
|
32
|
-
⓱
|
33
|
-
⑰
|
34
|
-
⓲
|
35
|
-
⑱
|
36
|
-
⑲
|
37
|
-
⓳
|
38
|
-
❷
|
39
|
-
⓶
|
40
|
-
②
|
41
|
-
➁
|
42
|
-
➋
|
43
|
-
➁
|
44
|
-
⓴
|
45
|
-
⑳
|
46
|
-
㉑
|
47
|
-
㉒
|
48
|
-
㉓
|
49
|
-
㉔
|
50
|
-
㉕
|
51
|
-
㉖
|
52
|
-
㉗
|
53
|
-
㉘
|
54
|
-
㉙
|
55
|
-
➌
|
56
|
-
➂
|
57
|
-
⓷
|
58
|
-
❸
|
59
|
-
③
|
60
|
-
➂
|
61
|
-
㉚
|
62
|
-
㉛
|
63
|
-
㉜
|
64
|
-
㉝
|
65
|
-
㉞
|
66
|
-
㉟
|
67
|
-
㊱
|
68
|
-
㊲
|
69
|
-
㊳
|
70
|
-
㊴
|
71
|
-
➃
|
72
|
-
➍
|
73
|
-
⓸
|
74
|
-
④
|
75
|
-
❹
|
76
|
-
➃
|
77
|
-
㊵
|
78
|
-
㊶
|
79
|
-
㊷
|
80
|
-
㊸
|
81
|
-
㊹
|
82
|
-
㊺
|
83
|
-
㊻
|
84
|
-
㊼
|
85
|
-
㊽
|
86
|
-
㊾
|
87
|
-
⑤
|
88
|
-
⓹
|
89
|
-
➄
|
90
|
-
➄
|
91
|
-
❺
|
92
|
-
➎
|
93
|
-
㊿
|
94
|
-
⑥
|
95
|
-
➅
|
96
|
-
❻
|
97
|
-
➏
|
98
|
-
⓺
|
99
|
-
➅
|
100
|
-
➆
|
101
|
-
⑦
|
102
|
-
➐
|
103
|
-
⓻
|
104
|
-
➆
|
105
|
-
❼
|
106
|
-
⑧
|
107
|
-
➇
|
108
|
-
➇
|
109
|
-
❽
|
110
|
-
⓼
|
111
|
-
➑
|
112
|
-
❾
|
113
|
-
⑨
|
114
|
-
➈
|
115
|
-
⓽
|
116
|
-
➒
|
117
|
-
➈
|
118
|
-
ⓐ
|
119
|
-
Ⓐ
|
120
|
-
ⓑ
|
121
|
-
Ⓑ
|
122
|
-
ⓒ
|
123
|
-
Ⓒ
|
124
|
-
ⓓ
|
125
|
-
Ⓓ
|
126
|
-
ⓔ
|
127
|
-
Ⓔ
|
128
|
-
ⓕ
|
129
|
-
Ⓕ
|
130
|
-
ⓖ
|
131
|
-
Ⓖ
|
132
|
-
ⓗ
|
133
|
-
Ⓗ
|
134
|
-
ⓘ
|
135
|
-
Ⓘ
|
136
|
-
ⓙ
|
137
|
-
Ⓙ
|
138
|
-
ⓚ
|
139
|
-
Ⓚ
|
140
|
-
ⓛ
|
141
|
-
Ⓛ
|
142
|
-
ⓜ
|
143
|
-
Ⓜ
|
144
|
-
ⓝ
|
145
|
-
Ⓝ
|
146
|
-
ⓞ
|
147
|
-
Ⓞ
|
148
|
-
ⓟ
|
149
|
-
Ⓟ
|
150
|
-
ⓠ
|
151
|
-
Ⓠ
|
152
|
-
ⓡ
|
153
|
-
Ⓡ
|
154
|
-
ⓢ
|
155
|
-
Ⓢ
|
156
|
-
ⓣ
|
157
|
-
Ⓣ
|
158
|
-
ⓤ
|
159
|
-
Ⓤ
|
160
|
-
ⓥ
|
161
|
-
Ⓥ
|
162
|
-
ⓦ
|
163
|
-
Ⓦ
|
164
|
-
ⓧ
|
165
|
-
Ⓧ
|
166
|
-
ⓨ
|
167
|
-
Ⓨ
|
168
|
-
ⓩ
|
169
|
-
Ⓩ
|
170
|
-
)
|
3
|
+
class << self
|
4
|
+
# @return [Array<Amakan::Filters::BaseFilter>]
|
5
|
+
def filters
|
6
|
+
@filters ||= [
|
7
|
+
::Amakanize::Filters::HtmlUnescapeFilter.new,
|
8
|
+
::Amakanize::Filters::NormalizationFilter.new,
|
9
|
+
::Amakanize::Filters::ParenthesesDeletionFilter.new,
|
10
|
+
::Amakanize::Filters::RoleNameDeletionFilter.new,
|
11
|
+
::Amakanize::Filters::StripFilter.new,
|
12
|
+
::Amakanize::Filters::TrailingPayloadDeletionFilter.new,
|
13
|
+
]
|
14
|
+
end
|
15
|
+
end
|
171
16
|
|
172
17
|
# @param raw [String]
|
173
18
|
def initialize(raw)
|
@@ -176,11 +21,8 @@ module Amakanize
|
|
176
21
|
|
177
22
|
# @note Override
|
178
23
|
def to_s
|
179
|
-
|
180
|
-
|
181
|
-
half_size_alphanumerics = ::ActiveSupport::Multibyte::Unicode.normalize(matched_string)
|
182
|
-
"(#{half_size_alphanumerics})"
|
183
|
-
end
|
24
|
+
self.class.filters.inject(@raw) do |result, filter|
|
25
|
+
filter.call(result)
|
184
26
|
end
|
185
27
|
end
|
186
28
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "cgi"
|
2
|
+
|
3
|
+
module Amakanize
|
4
|
+
module Filters
|
5
|
+
class HtmlUnescapeFilter < BaseFilter
|
6
|
+
# @note Override
|
7
|
+
# @param string [String] e.g. `"<ハノカゲ>"`
|
8
|
+
# @return [String] e.g. `"ハノカゲ"`
|
9
|
+
def call(string)
|
10
|
+
::CGI.unescapeHTML(string)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
require "active_support"
|
2
|
+
|
3
|
+
module Amakanize
|
4
|
+
module Filters
|
5
|
+
class NormalizationFilter < BaseFilter
|
6
|
+
ENCLOSED_ALPHANUMERICS_COLLECTION = %w(
|
7
|
+
⓿
|
8
|
+
⓪
|
9
|
+
➀
|
10
|
+
➊
|
11
|
+
➀
|
12
|
+
❶
|
13
|
+
①
|
14
|
+
⓵
|
15
|
+
⓾
|
16
|
+
➉
|
17
|
+
❿
|
18
|
+
➉
|
19
|
+
➓
|
20
|
+
⑩
|
21
|
+
⓫
|
22
|
+
⑪
|
23
|
+
⓬
|
24
|
+
⑫
|
25
|
+
⑬
|
26
|
+
⓭
|
27
|
+
⑭
|
28
|
+
⓮
|
29
|
+
⑮
|
30
|
+
⓯
|
31
|
+
⑯
|
32
|
+
⓰
|
33
|
+
⓱
|
34
|
+
⑰
|
35
|
+
⓲
|
36
|
+
⑱
|
37
|
+
⑲
|
38
|
+
⓳
|
39
|
+
❷
|
40
|
+
⓶
|
41
|
+
②
|
42
|
+
➁
|
43
|
+
➋
|
44
|
+
➁
|
45
|
+
⓴
|
46
|
+
⑳
|
47
|
+
㉑
|
48
|
+
㉒
|
49
|
+
㉓
|
50
|
+
㉔
|
51
|
+
㉕
|
52
|
+
㉖
|
53
|
+
㉗
|
54
|
+
㉘
|
55
|
+
㉙
|
56
|
+
➌
|
57
|
+
➂
|
58
|
+
⓷
|
59
|
+
❸
|
60
|
+
③
|
61
|
+
➂
|
62
|
+
㉚
|
63
|
+
㉛
|
64
|
+
㉜
|
65
|
+
㉝
|
66
|
+
㉞
|
67
|
+
㉟
|
68
|
+
㊱
|
69
|
+
㊲
|
70
|
+
㊳
|
71
|
+
㊴
|
72
|
+
➃
|
73
|
+
➍
|
74
|
+
⓸
|
75
|
+
④
|
76
|
+
❹
|
77
|
+
➃
|
78
|
+
㊵
|
79
|
+
㊶
|
80
|
+
㊷
|
81
|
+
㊸
|
82
|
+
㊹
|
83
|
+
㊺
|
84
|
+
㊻
|
85
|
+
㊼
|
86
|
+
㊽
|
87
|
+
㊾
|
88
|
+
⑤
|
89
|
+
⓹
|
90
|
+
➄
|
91
|
+
➄
|
92
|
+
❺
|
93
|
+
➎
|
94
|
+
㊿
|
95
|
+
⑥
|
96
|
+
➅
|
97
|
+
❻
|
98
|
+
➏
|
99
|
+
⓺
|
100
|
+
➅
|
101
|
+
➆
|
102
|
+
⑦
|
103
|
+
➐
|
104
|
+
⓻
|
105
|
+
➆
|
106
|
+
❼
|
107
|
+
⑧
|
108
|
+
➇
|
109
|
+
➇
|
110
|
+
❽
|
111
|
+
⓼
|
112
|
+
➑
|
113
|
+
❾
|
114
|
+
⑨
|
115
|
+
➈
|
116
|
+
⓽
|
117
|
+
➒
|
118
|
+
➈
|
119
|
+
ⓐ
|
120
|
+
Ⓐ
|
121
|
+
ⓑ
|
122
|
+
Ⓑ
|
123
|
+
ⓒ
|
124
|
+
Ⓒ
|
125
|
+
ⓓ
|
126
|
+
Ⓓ
|
127
|
+
ⓔ
|
128
|
+
Ⓔ
|
129
|
+
ⓕ
|
130
|
+
Ⓕ
|
131
|
+
ⓖ
|
132
|
+
Ⓖ
|
133
|
+
ⓗ
|
134
|
+
Ⓗ
|
135
|
+
ⓘ
|
136
|
+
Ⓘ
|
137
|
+
ⓙ
|
138
|
+
Ⓙ
|
139
|
+
ⓚ
|
140
|
+
Ⓚ
|
141
|
+
ⓛ
|
142
|
+
Ⓛ
|
143
|
+
ⓜ
|
144
|
+
Ⓜ
|
145
|
+
ⓝ
|
146
|
+
Ⓝ
|
147
|
+
ⓞ
|
148
|
+
Ⓞ
|
149
|
+
ⓟ
|
150
|
+
Ⓟ
|
151
|
+
ⓠ
|
152
|
+
Ⓠ
|
153
|
+
ⓡ
|
154
|
+
Ⓡ
|
155
|
+
ⓢ
|
156
|
+
Ⓢ
|
157
|
+
ⓣ
|
158
|
+
Ⓣ
|
159
|
+
ⓤ
|
160
|
+
Ⓤ
|
161
|
+
ⓥ
|
162
|
+
Ⓥ
|
163
|
+
ⓦ
|
164
|
+
Ⓦ
|
165
|
+
ⓧ
|
166
|
+
Ⓧ
|
167
|
+
ⓨ
|
168
|
+
Ⓨ
|
169
|
+
ⓩ
|
170
|
+
Ⓩ
|
171
|
+
)
|
172
|
+
|
173
|
+
# @note Override
|
174
|
+
def call(string)
|
175
|
+
ENCLOSED_ALPHANUMERICS_COLLECTION.inject(string) do |result, enclosed_alphanumerics|
|
176
|
+
result.gsub(enclosed_alphanumerics) do |matched_string|
|
177
|
+
::ActiveSupport::Multibyte::Unicode.normalize(matched_string)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Amakanize
|
2
|
+
module Filters
|
3
|
+
class ParenthesesDeletionFilter < BaseFilter
|
4
|
+
# @note Override
|
5
|
+
# @param string [String] e.g. `"ぽんかん(8)"`
|
6
|
+
# @return [String] e.g. `"ぽんかん8"`
|
7
|
+
def call(string)
|
8
|
+
string.gsub(/\((\d+)\)/, '\1')
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Amakanize
|
2
|
+
module Filters
|
3
|
+
class RoleNameDeletionFilter < BaseFilter
|
4
|
+
ROLE_NAMES = %w(
|
5
|
+
原作
|
6
|
+
原案
|
7
|
+
漫画
|
8
|
+
)
|
9
|
+
|
10
|
+
# @note Override
|
11
|
+
# @param string [String] e.g. `"漫画:ハノカゲ"`
|
12
|
+
# @return [String] e.g. `"ハノカゲ"`
|
13
|
+
def call(string)
|
14
|
+
string.gsub(%r<\A#{::Regexp.union(ROLE_NAMES)}[:/]>, "")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Amakanize
|
2
|
+
module Filters
|
3
|
+
class StripFilter < BaseFilter
|
4
|
+
# @note Override
|
5
|
+
# @param string [String] e.g. `" ハノカゲ "`
|
6
|
+
# @return [String] e.g. `"ハノカゲ"`
|
7
|
+
def call(string)
|
8
|
+
string.gsub(/\A[[:space:]]+/, "").gsub(/[[:space:]]+\z/, "")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Amakanize
|
2
|
+
module Filters
|
3
|
+
class TrailingPayloadDeletionFilter < BaseFilter
|
4
|
+
PAYLOADS = %w(
|
5
|
+
その他
|
6
|
+
ほか
|
7
|
+
他
|
8
|
+
)
|
9
|
+
|
10
|
+
# @note Override
|
11
|
+
# @param string [String] e.g. `"ハノカゲ ほか"`
|
12
|
+
# @return [String] e.g. `"ハノカゲ"`
|
13
|
+
def call(string)
|
14
|
+
string.gsub(/[[:space:]]+#{::Regexp.union(PAYLOADS)}\z/, "")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/amakanize/version.rb
CHANGED
data/lib/amakanize.rb
CHANGED
@@ -1,3 +1,10 @@
|
|
1
1
|
require "amakanize/author_name"
|
2
|
+
require "amakanize/filters/base_filter"
|
3
|
+
require "amakanize/filters/html_unescape_filter"
|
4
|
+
require "amakanize/filters/normalization_filter"
|
5
|
+
require "amakanize/filters/parentheses_deletion_filter"
|
6
|
+
require "amakanize/filters/role_name_deletion_filter"
|
7
|
+
require "amakanize/filters/strip_filter"
|
8
|
+
require "amakanize/filters/trailing_payload_deletion_filter"
|
2
9
|
require "amakanize/series_name"
|
3
10
|
require "amakanize/version"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amakanize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- r7kamura
|
@@ -86,6 +86,13 @@ files:
|
|
86
86
|
- bin/setup
|
87
87
|
- lib/amakanize.rb
|
88
88
|
- lib/amakanize/author_name.rb
|
89
|
+
- lib/amakanize/filters/base_filter.rb
|
90
|
+
- lib/amakanize/filters/html_unescape_filter.rb
|
91
|
+
- lib/amakanize/filters/normalization_filter.rb
|
92
|
+
- lib/amakanize/filters/parentheses_deletion_filter.rb
|
93
|
+
- lib/amakanize/filters/role_name_deletion_filter.rb
|
94
|
+
- lib/amakanize/filters/strip_filter.rb
|
95
|
+
- lib/amakanize/filters/trailing_payload_deletion_filter.rb
|
89
96
|
- lib/amakanize/series_name.rb
|
90
97
|
- lib/amakanize/version.rb
|
91
98
|
homepage: https://github.com/amakan/amakanize
|