amakanize 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -36
- data/README.md +35 -8
- data/lib/amakanize.rb +23 -1
- data/lib/amakanize/author_name.rb +2 -0
- data/lib/amakanize/author_names.rb +2 -0
- data/lib/amakanize/book_position.rb +33 -0
- data/lib/amakanize/filters/book_position_at_end_detection_filter.rb +29 -0
- data/lib/amakanize/filters/book_position_clearing_filter.rb +18 -0
- data/lib/amakanize/filters/book_position_in_bracket_detection_filter.rb +29 -0
- data/lib/amakanize/filters/book_position_in_words_detection_filter.rb +26 -0
- data/lib/amakanize/filters/book_position_number_canonicalization_filter.rb +18 -0
- data/lib/amakanize/filters/brackets_normalization_filter.rb +3 -1
- data/lib/amakanize/filters/continuous_spaces_normalization_filter.rb +3 -1
- data/lib/amakanize/filters/dash_between_alphabets_normalization_filter.rb +3 -1
- data/lib/amakanize/filters/html_unescape_filter.rb +2 -1
- data/lib/amakanize/filters/hyphen_minus_normalization_filter.rb +3 -1
- data/lib/amakanize/filters/normalization_filter.rb +2 -1
- data/lib/amakanize/filters/obvious_volume_number_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/parentheses_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/role_name_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/space_between_exclamation_and_bracket_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/space_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/spaces_between_exclamations_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/strip_filter.rb +3 -1
- data/lib/amakanize/filters/trailing_author_name_payload_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/trailing_dash_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/trailing_double_parentheses_singlization_filter.rb +3 -1
- data/lib/amakanize/filters/trailing_parentheses_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/trailing_series_name_payload_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/trailing_volume_number_deletion_filter.rb +3 -1
- data/lib/amakanize/filters/video_position_detection_filter.rb +3 -1
- data/lib/amakanize/filters/video_position_number_conversion_filter.rb +112 -110
- data/lib/amakanize/series_name.rb +2 -0
- data/lib/amakanize/version.rb +1 -1
- data/lib/amakanize/video_position.rb +2 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74e0311ca23b89b645e0629caa871060fd4f2796
|
4
|
+
data.tar.gz: fe9047de846deaccabf42d2b6cc9eba7d809d7c4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 898701f616daac7e977173f3794a51ed11d63ad7558303626255badaa6d72fe04bbfcb5f9a34f8199936e242ca715a93f1b46c7aebf628149a069beea80fa7bf
|
7
|
+
data.tar.gz: d776540d85acc6bce04afa16c4bacde2480c346dcf6f0267333bbaaf9a8ce3f2f6d6cad001c48c20cd60193e5cb139463b10086ec14d864af588a69cd1354db4
|
data/CHANGELOG.md
CHANGED
@@ -1,154 +1,159 @@
|
|
1
|
-
##
|
1
|
+
## v0.6.1
|
2
|
+
|
3
|
+
- Add `Amakanize::BookPosition`
|
4
|
+
- Reject empty author names on `Amakanize::AuthorNames`
|
5
|
+
|
6
|
+
## v0.6.0
|
2
7
|
|
3
8
|
- Add `Amakanize::VideoPosition`
|
4
9
|
- Rename author name from "r7kamura" to "Ryo Nakamura"
|
5
10
|
- Set required Ruby version to 2.2.2 or higher for ActiveSupport
|
6
11
|
|
7
|
-
##
|
12
|
+
## v0.5.3
|
8
13
|
|
9
14
|
- Support weekly magazine format in series detection
|
10
15
|
|
11
|
-
##
|
16
|
+
## v0.5.2
|
12
17
|
|
13
18
|
- Support nested parentheses
|
14
19
|
|
15
|
-
##
|
20
|
+
## v0.5.1
|
16
21
|
|
17
22
|
- Detect アフタヌーン from アフタヌーン 2016年 05 月号
|
18
23
|
|
19
|
-
##
|
24
|
+
## v0.5.0
|
20
25
|
|
21
26
|
- Remove volume number only one time
|
22
27
|
- Normalize continuous spaces
|
23
28
|
|
24
|
-
##
|
29
|
+
## v0.4.9
|
25
30
|
|
26
31
|
- Fix a bug in the brackets normalization logic
|
27
32
|
|
28
|
-
##
|
33
|
+
## v0.4.8
|
29
34
|
|
30
35
|
- Treat カバーデザイン as role name
|
31
36
|
|
32
|
-
##
|
37
|
+
## v0.4.7
|
33
38
|
|
34
39
|
- Support "Buso Renkin, Vol. 5"
|
35
40
|
|
36
|
-
##
|
41
|
+
## v0.4.6
|
37
42
|
|
38
43
|
- Treat (キャラクター原案)ハラカズヒロ as ハラカズヒロ
|
39
44
|
- Normalize 「x! 」 into 「x!」
|
40
45
|
|
41
|
-
##
|
46
|
+
## v0.4.5
|
42
47
|
|
43
48
|
- Support 2016年04月号 for magazines
|
44
49
|
|
45
|
-
##
|
50
|
+
## v0.4.4
|
46
51
|
|
47
52
|
- Support (前) (後) (上) (中) (下)
|
48
53
|
|
49
|
-
##
|
54
|
+
## v0.4.3
|
50
55
|
|
51
56
|
- Support (原作・イラスト)XXX
|
52
57
|
- Support イラスト:XXX
|
53
58
|
|
54
|
-
##
|
59
|
+
## v0.4.2
|
55
60
|
|
56
61
|
- Support #5
|
57
62
|
|
58
|
-
##
|
63
|
+
## v0.4.1
|
59
64
|
|
60
65
|
- Support Vol.3
|
61
66
|
- Support 3 通常版
|
62
67
|
- Support 7.5
|
63
68
|
|
64
|
-
##
|
69
|
+
## v0.4.0
|
65
70
|
|
66
71
|
- Add spaces around brackets except for ending
|
67
72
|
- Treat `―XXX―` as brackets (e.g. To LOVEる―とらぶる―)
|
68
73
|
|
69
|
-
##
|
74
|
+
## v0.3.0
|
70
75
|
|
71
76
|
- Normalize all brackets into round brackets (parentheses)
|
72
77
|
- Removes only trailing brackets
|
73
78
|
|
74
|
-
##
|
79
|
+
## v0.2.2
|
75
80
|
|
76
81
|
- Support volume N
|
77
82
|
|
78
|
-
##
|
83
|
+
## v0.2.1
|
79
84
|
|
80
85
|
- Normalize angle brackets
|
81
86
|
- Remove trailing LvN and Lv.N as volume number
|
82
87
|
- Fix volume number detection pattern
|
83
88
|
|
84
|
-
##
|
89
|
+
## v0.2.0
|
85
90
|
|
86
91
|
- Fix author name separator
|
87
92
|
|
88
|
-
##
|
93
|
+
## v0.1.12
|
89
94
|
|
90
95
|
- Support 第N版
|
91
96
|
|
92
|
-
##
|
97
|
+
## v0.1.11
|
93
98
|
|
94
99
|
- Improve volume detection for アニウッド大通り
|
95
100
|
|
96
|
-
##
|
101
|
+
## v0.1.10
|
97
102
|
|
98
103
|
- Detect obvious volume number in product title
|
99
104
|
|
100
|
-
##
|
105
|
+
## v0.1.9
|
101
106
|
|
102
107
|
- Normalize hyphen with hyphen/minus
|
103
108
|
|
104
|
-
##
|
109
|
+
## v0.1.8
|
105
110
|
|
106
111
|
- Normalize hyphen between alphabets in series name
|
107
112
|
|
108
|
-
##
|
113
|
+
## v0.1.7
|
109
114
|
|
110
115
|
- Remove trailing surrounding hyphens and payload on series name
|
111
116
|
|
112
|
-
##
|
117
|
+
## v0.1.6
|
113
118
|
|
114
119
|
- Remove trailing dash and payload on series name
|
115
120
|
|
116
|
-
##
|
121
|
+
## v0.1.5
|
117
122
|
|
118
123
|
- Remove spaces between exclamations for 「ばくおん! !」
|
119
124
|
|
120
|
-
##
|
125
|
+
## v0.1.4
|
121
126
|
|
122
127
|
- Add more author name separator
|
123
128
|
- Remove suffix role name
|
124
129
|
|
125
|
-
##
|
130
|
+
## v0.1.3
|
126
131
|
|
127
132
|
- Remove trailing 原作・原案・漫画 of author name
|
128
133
|
|
129
|
-
##
|
134
|
+
## v0.1.2
|
130
135
|
|
131
136
|
- Add AuthorNames class
|
132
137
|
|
133
|
-
##
|
138
|
+
## v0.1.1
|
134
139
|
|
135
140
|
- Support x and X as roman numeric
|
136
141
|
|
137
|
-
##
|
142
|
+
## v0.1.0
|
138
143
|
|
139
144
|
- Support roman numerals on series name
|
140
145
|
- Support 第N巻 on series name
|
141
146
|
- Normalize series name
|
142
147
|
|
143
|
-
##
|
148
|
+
## v0.0.4
|
144
149
|
|
145
150
|
- Normalize all characters
|
146
151
|
|
147
|
-
##
|
152
|
+
## v0.0.3
|
148
153
|
|
149
154
|
- Remove space from author name
|
150
155
|
|
151
|
-
##
|
156
|
+
## v0.0.2
|
152
157
|
|
153
158
|
- Remove parentheses of numericals in author name
|
154
159
|
- Remove role name from author name
|
@@ -156,6 +161,6 @@
|
|
156
161
|
- Strip author name
|
157
162
|
- Unescape HTML in author name
|
158
163
|
|
159
|
-
##
|
164
|
+
## v0.0.1
|
160
165
|
|
161
166
|
- 1st release :tada:
|
data/README.md
CHANGED
@@ -4,11 +4,7 @@
|
|
4
4
|
[![Gem Version](https://badge.fury.io/rb/amakanize.svg)](https://rubygems.org/gems/amakanize)
|
5
5
|
[![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](http://www.rubydoc.info/github/amakan/amakanize)
|
6
6
|
|
7
|
-
Utilities to canonicalize
|
8
|
-
|
9
|
-
## Requirements
|
10
|
-
|
11
|
-
- Ruby 2.2.2 or higher
|
7
|
+
Utilities to canonicalize names for [amakan.net](https://amakan.net/).
|
12
8
|
|
13
9
|
## Installation
|
14
10
|
|
@@ -20,30 +16,61 @@ gem "amakanize"
|
|
20
16
|
|
21
17
|
And then execute:
|
22
18
|
|
23
|
-
```
|
19
|
+
```bash
|
24
20
|
bundle
|
25
21
|
```
|
26
22
|
|
27
23
|
Or manually install:
|
28
24
|
|
29
|
-
```
|
25
|
+
```bash
|
30
26
|
gem install amakanize
|
31
27
|
```
|
32
28
|
|
33
29
|
## Usage
|
34
30
|
|
35
|
-
|
31
|
+
Require `"amakanize"` before using classes below.
|
32
|
+
|
33
|
+
```ruby
|
36
34
|
require "amakanize"
|
35
|
+
```
|
36
|
+
|
37
|
+
### Amakanize::AuthorName
|
37
38
|
|
39
|
+
```ruby
|
38
40
|
Amakanize::AuthorName.new("ぽんかん(8)").to_s #=> "ぽんかん8"
|
39
41
|
Amakanize::AuthorName.new("ぽんかん8").to_s #=> "ぽんかん8"
|
40
42
|
Amakanize::AuthorName.new("ぽんかん⑧").to_s #=> "ぽんかん8"
|
43
|
+
```
|
44
|
+
|
45
|
+
### Amakanize::AuthorNames
|
41
46
|
|
47
|
+
```ruby
|
42
48
|
Amakanize::AuthorNames.new("ぽんかん⑧,渡 航").map(&:to_s) #=> ["ぽんかん8", "渡航"]
|
43
49
|
Amakanize::AuthorNames.new("ぽんかん⑧,渡 航").map(&:to_s) #=> ["ぽんかん8", "渡航"]
|
44
50
|
Amakanize::AuthorNames.new("ぽんかん⑧、渡 航").map(&:to_s) #=> ["ぽんかん8", "渡航"]
|
51
|
+
```
|
45
52
|
|
53
|
+
### Amakanize::BookPosition
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
Amakanize::BookPosition.new("Fate/Grand Order 電撃コミックアンソロジー (2) (電撃コミックスNEXT)").to_s #=> "2"
|
57
|
+
Amakanize::BookPosition.new("お迎えです。 6 (花とゆめCOMICS)").to_s #=> "6"
|
58
|
+
Amakanize::BookPosition.new("こいつら100%伝説 上 (集英社文庫 お 34-5)").to_s #=> "上"
|
59
|
+
Amakanize::BookPosition.new("THE BEST STAGE ガールフレンド(♪) ~Side MOMOKO~ (電撃コミックスEX)").to_s #=> ""
|
60
|
+
```
|
61
|
+
|
62
|
+
### Amakanize::SeriesName
|
63
|
+
|
64
|
+
```ruby
|
46
65
|
Amakanize::SeriesName.new("やはり俺の青春ラブコメはまちがっている。4").to_s #=> "やはり俺の青春ラブコメはまちがっている。"
|
47
66
|
Amakanize::SeriesName.new("ラブライブ! School idol diary ~星空凛~").to_s #=> "ラブライブ! School idol diary"
|
48
67
|
Amakanize::SeriesName.new("僕だけがいない街 (1) (カドカワコミックス・エース)").to_s #=> "僕だけがいない街"
|
49
68
|
```
|
69
|
+
|
70
|
+
### Amakanize::VideoPosition
|
71
|
+
|
72
|
+
```ruby
|
73
|
+
Amakanize::VideoPosition.new("第1話 ソードアート・オンラインII").to_s #=> "1"
|
74
|
+
Amakanize::VideoPosition.new("デジモンアドベンチャー tri. 第2章「決意」").to_s #=> "2"
|
75
|
+
Amakanize::VideoPosition.new("楽園追放-Expelled from Paradise-").to_s #=> ""
|
76
|
+
```
|
data/lib/amakanize.rb
CHANGED
@@ -22,6 +22,22 @@ module Amakanize
|
|
22
22
|
|後
|
23
23
|
/x
|
24
24
|
|
25
|
+
PATTERN_OF_PREFIX_OF_BOOK_POSITION = /
|
26
|
+
\#\s*
|
27
|
+
|episode\.?\s*
|
28
|
+
|lv\.?\s*
|
29
|
+
|level\.?\s*
|
30
|
+
|vol(?:ume)?\.?\s*
|
31
|
+
|巻(?:の|ノ|之)?\s*
|
32
|
+
|第\s*
|
33
|
+
/ix
|
34
|
+
|
35
|
+
PATTERN_OF_SUFFIX_OF_BOOK_POSITION = /
|
36
|
+
\s*話
|
37
|
+
|\s*巻
|
38
|
+
|\s*版
|
39
|
+
/x
|
40
|
+
|
25
41
|
PATTERN_OF_VOLUME_PREFIX = /
|
26
42
|
episode\.?\s*
|
27
43
|
|\#
|
@@ -34,10 +50,16 @@ module Amakanize
|
|
34
50
|
/x
|
35
51
|
end
|
36
52
|
|
37
|
-
require "amakanize/filterable"
|
38
53
|
require "amakanize/author_name"
|
39
54
|
require "amakanize/author_names"
|
55
|
+
require "amakanize/book_position"
|
56
|
+
require "amakanize/filterable"
|
40
57
|
require "amakanize/filters/base_filter"
|
58
|
+
require "amakanize/filters/book_position_at_end_detection_filter"
|
59
|
+
require "amakanize/filters/book_position_clearing_filter"
|
60
|
+
require "amakanize/filters/book_position_in_bracket_detection_filter"
|
61
|
+
require "amakanize/filters/book_position_in_words_detection_filter"
|
62
|
+
require "amakanize/filters/book_position_number_canonicalization_filter"
|
41
63
|
require "amakanize/filters/brackets_normalization_filter"
|
42
64
|
require "amakanize/filters/continuous_spaces_normalization_filter"
|
43
65
|
require "amakanize/filters/dash_between_alphabets_normalization_filter"
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require "amakanize/filterable"
|
2
|
+
|
3
|
+
module Amakanize
|
4
|
+
class BookPosition
|
5
|
+
include ::Amakanize::Filterable
|
6
|
+
|
7
|
+
class << self
|
8
|
+
# @return [Array<Class>]
|
9
|
+
def filter_classes
|
10
|
+
@filter_classes ||= [
|
11
|
+
::Amakanize::Filters::HtmlUnescapeFilter,
|
12
|
+
::Amakanize::Filters::NormalizationFilter,
|
13
|
+
::Amakanize::Filters::ContinuousSpacesNormalizationFilter,
|
14
|
+
::Amakanize::Filters::HyphenMinusNormalizationFilter,
|
15
|
+
::Amakanize::Filters::DashBetweenAlhabetsNormalizationFilter,
|
16
|
+
::Amakanize::Filters::BracketsNormalizationFilter,
|
17
|
+
::Amakanize::Filters::SpacesBetweenExclamationsDeletionFilter,
|
18
|
+
::Amakanize::Filters::SpaceBetweenExclamationAndBracketDeletionFilter,
|
19
|
+
::Amakanize::Filters::BookPositionInBracketDetectionFilter,
|
20
|
+
::Amakanize::Filters::TrailingDoubleParenthesesSinglizationFilter,
|
21
|
+
::Amakanize::Filters::TrailingParenthesesDeletionFilter,
|
22
|
+
::Amakanize::Filters::TrailingDashDeletionFilter,
|
23
|
+
::Amakanize::Filters::TrailingSeriesNamePayloadDeletionFilter,
|
24
|
+
::Amakanize::Filters::BookPositionInWordsDetectionFilter,
|
25
|
+
::Amakanize::Filters::BookPositionAtEndDetectionFilter,
|
26
|
+
::Amakanize::Filters::BookPositionClearingFilter,
|
27
|
+
::Amakanize::Filters::VideoPositionNumberConversionFilter,
|
28
|
+
::Amakanize::Filters::BookPositionNumberCanonicalizationFilter,
|
29
|
+
]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
3
|
+
module Amakanize
|
4
|
+
module Filters
|
5
|
+
class BookPositionAtEndDetectionFilter < ::Amakanize::Filters::BaseFilter
|
6
|
+
PATTERN = /
|
7
|
+
(?:#{::Amakanize::PATTERN_OF_PREFIX_OF_BOOK_POSITION})?
|
8
|
+
(#{::Amakanize::PATTERN_OF_NUMERIC_CHARACTERS})
|
9
|
+
(?:#{::Amakanize::PATTERN_OF_SUFFIX_OF_BOOK_POSITION})?
|
10
|
+
\s*
|
11
|
+
\z
|
12
|
+
/x
|
13
|
+
|
14
|
+
# @note Override
|
15
|
+
def call(context:, output:)
|
16
|
+
unless context[:position_detected]
|
17
|
+
if position = output[PATTERN, 1]
|
18
|
+
context[:position_detected] = true
|
19
|
+
output = position
|
20
|
+
end
|
21
|
+
end
|
22
|
+
{
|
23
|
+
context: context,
|
24
|
+
output: output,
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
3
|
+
module Amakanize
|
4
|
+
module Filters
|
5
|
+
class BookPositionClearingFilter < ::Amakanize::Filters::BaseFilter
|
6
|
+
# @note Override
|
7
|
+
def call(context:, output:)
|
8
|
+
unless context[:position_detected]
|
9
|
+
output = ""
|
10
|
+
end
|
11
|
+
{
|
12
|
+
context: context,
|
13
|
+
output: output,
|
14
|
+
}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
3
|
+
module Amakanize
|
4
|
+
module Filters
|
5
|
+
class BookPositionInBracketDetectionFilter < ::Amakanize::Filters::BaseFilter
|
6
|
+
PATTERN = /
|
7
|
+
\(
|
8
|
+
(?:#{::Amakanize::PATTERN_OF_PREFIX_OF_BOOK_POSITION})?
|
9
|
+
(#{::Amakanize::PATTERN_OF_NUMERIC_CHARACTERS})
|
10
|
+
(?:#{::Amakanize::PATTERN_OF_SUFFIX_OF_BOOK_POSITION})?
|
11
|
+
\)
|
12
|
+
/x
|
13
|
+
|
14
|
+
# @note Override
|
15
|
+
def call(context:, output:)
|
16
|
+
unless context[:position_detected]
|
17
|
+
if position = output[PATTERN, 1]
|
18
|
+
context[:position_detected] = true
|
19
|
+
output = position
|
20
|
+
end
|
21
|
+
end
|
22
|
+
{
|
23
|
+
context: context,
|
24
|
+
output: output,
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
3
|
+
module Amakanize
|
4
|
+
module Filters
|
5
|
+
class BookPositionInWordsDetectionFilter < ::Amakanize::Filters::BaseFilter
|
6
|
+
PATTERN = /
|
7
|
+
(?:#{::Amakanize::PATTERN_OF_PREFIX_OF_BOOK_POSITION})(#{::Amakanize::PATTERN_OF_NUMERIC_CHARACTERS})
|
8
|
+
|(#{::Amakanize::PATTERN_OF_NUMERIC_CHARACTERS})(?:#{::Amakanize::PATTERN_OF_SUFFIX_OF_BOOK_POSITION})
|
9
|
+
/x
|
10
|
+
|
11
|
+
# @note Override
|
12
|
+
def call(context:, output:)
|
13
|
+
unless context[:position_detected]
|
14
|
+
if position = output[PATTERN, 1]
|
15
|
+
context[:position_detected] = true
|
16
|
+
output = position
|
17
|
+
end
|
18
|
+
end
|
19
|
+
{
|
20
|
+
context: context,
|
21
|
+
output: output,
|
22
|
+
}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
3
|
+
module Amakanize
|
4
|
+
module Filters
|
5
|
+
class BookPositionNumberCanonicalizationFilter < ::Amakanize::Filters::BaseFilter
|
6
|
+
# @note Override
|
7
|
+
def call(context:, output:)
|
8
|
+
if context[:position_detected]
|
9
|
+
output = output.gsub(/\A0+([1-9]+)/, '\1')
|
10
|
+
end
|
11
|
+
{
|
12
|
+
context: context,
|
13
|
+
output: output,
|
14
|
+
}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class ContinuousSpacesNormalizationFilter < BaseFilter
|
5
|
+
class ContinuousSpacesNormalizationFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @param output [String] e.g. `"ウメハラ FIGHTING GAMERS!"`
|
6
8
|
# @return [Hash] e.g. `"ウメハラ FIGHTING GAMERS!"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class DashBetweenAlhabetsNormalizationFilter < BaseFilter
|
5
|
+
class DashBetweenAlhabetsNormalizationFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @param output [String] e.g. `"D.Grayーman"`
|
6
8
|
# @return [Hash] e.g. `"D.Gray-man"`
|
@@ -1,8 +1,9 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
1
2
|
require "cgi"
|
2
3
|
|
3
4
|
module Amakanize
|
4
5
|
module Filters
|
5
|
-
class HtmlUnescapeFilter < BaseFilter
|
6
|
+
class HtmlUnescapeFilter < ::Amakanize::Filters::BaseFilter
|
6
7
|
# @note Override
|
7
8
|
# @param output [String] e.g. `"<ハノカゲ>"`
|
8
9
|
# @return [Hash] e.g. `"ハノカゲ"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class HyphenMinusNormalizationFilter < BaseFilter
|
5
|
+
class HyphenMinusNormalizationFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @note Replace U+2010 (hyphen) with U+002D (hyphen/minus)
|
6
8
|
# @param output [String] e.g. `"D.Gray‐man"`
|
@@ -1,8 +1,9 @@
|
|
1
1
|
require "active_support"
|
2
|
+
require "amakanize/filters/base_filter"
|
2
3
|
|
3
4
|
module Amakanize
|
4
5
|
module Filters
|
5
|
-
class NormalizationFilter < BaseFilter
|
6
|
+
class NormalizationFilter < ::Amakanize::Filters::BaseFilter
|
6
7
|
# @note Override
|
7
8
|
# @param output [String] e.g. `"ぽんかん(8)"`, `"ぽんかん⑧"`
|
8
9
|
# @return [Hash] e.g. `"ぽんかん(8)"`, `"ぽんかん8"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class ObviousVolumeNumberDeletionFilter < BaseFilter
|
5
|
+
class ObviousVolumeNumberDeletionFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
PATTERN = Regexp.union(
|
5
7
|
/\s*#{::Amakanize::PATTERN_OF_VOLUME_PREFIX}?#{::Amakanize::PATTERN_OF_NUMERIC_CHARACTERS}(?:話|巻|版).*/,
|
6
8
|
/\s+#{::Amakanize::PATTERN_OF_VOLUME_PREFIX}?#{::Amakanize::PATTERN_OF_NUMERIC_CHARACTERS}(?:話|巻|版)?:\s+.*/,
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class ParenthesesDeletionFilter < BaseFilter
|
5
|
+
class ParenthesesDeletionFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @param output [String] e.g. `"ぽんかん(8)"`
|
6
8
|
# @return [Hash] e.g. `"ぽんかん8"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class SpaceBetweenExclamationAndBracketDeletionFilter < BaseFilter
|
5
|
+
class SpaceBetweenExclamationAndBracketDeletionFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @param output [String] e.g. `"まおゆう魔王勇者 「この我のものとなれ、勇者よ」「断る! 」"`
|
6
8
|
# @return [Hash] e.g. `"まおゆう魔王勇者 「この我のものとなれ、勇者よ」「断る!」"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class SpaceDeletionFilter < BaseFilter
|
5
|
+
class SpaceDeletionFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @param output [String] e.g. `"渡 航"`
|
6
8
|
# @return [Hash] e.g. `"渡航"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class SpacesBetweenExclamationsDeletionFilter < BaseFilter
|
5
|
+
class SpacesBetweenExclamationsDeletionFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @param output [String] e.g. `"ばくおん! !"`
|
6
8
|
# @return [Hash] e.g. `"ばくおん!!"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class TrailingDoubleParenthesesSinglizationFilter < BaseFilter
|
5
|
+
class TrailingDoubleParenthesesSinglizationFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @param output [String] e.g. `"ヒナまつり 11 (ヒナまつり) (ビームコミックス(ハルタ) )"`
|
6
8
|
# @return [Hash] e.g. `"ヒナまつり 11 (ヒナまつり) (ビームコミックスハルタ )"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class TrailingParenthesesDeletionFilter < BaseFilter
|
5
|
+
class TrailingParenthesesDeletionFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @param output [String] e.g. `"魔法使いの嫁 通常版 4 (BLADE COMICS)"`
|
6
8
|
# @return [Hash] e.g. `"魔法使いの嫁 通常版 4"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class TrailingVolumeNumberDeletionFilter < BaseFilter
|
5
|
+
class TrailingVolumeNumberDeletionFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @param output [String] e.g. `"やはり俺の青春ラブコメはまちがっている。4"`, `"ネトゲの嫁は女の子じゃないと思った? Lv.2"`
|
6
8
|
# @return [Hash] e.g. `"やはり俺の青春ラブコメはまちがっている。"`, `"ネトゲの嫁は女の子じゃないと思った?"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class VideoPositionDetectionFilter < BaseFilter
|
5
|
+
class VideoPositionDetectionFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
# @note Override
|
5
7
|
# @param output [String] e.g. `"第1話「でじこだにょ」/第2話「ぷちこと一緒かにょ?」"`
|
6
8
|
# @return [Hash] e.g. `"1"`
|
@@ -1,6 +1,8 @@
|
|
1
|
+
require "amakanize/filters/base_filter"
|
2
|
+
|
1
3
|
module Amakanize
|
2
4
|
module Filters
|
3
|
-
class VideoPositionNumberConversionFilter < BaseFilter
|
5
|
+
class VideoPositionNumberConversionFilter < ::Amakanize::Filters::BaseFilter
|
4
6
|
TABLE = {
|
5
7
|
/i/i => 1,
|
6
8
|
/ii/i => 2,
|
@@ -32,115 +34,115 @@ module Amakanize
|
|
32
34
|
/xxviii/i => 28,
|
33
35
|
/xxix/i => 29,
|
34
36
|
/xxx/i => 30,
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
37
|
+
"一" => 1,
|
38
|
+
"二" => 2,
|
39
|
+
"三" => 3,
|
40
|
+
"四" => 4,
|
41
|
+
"五" => 5,
|
42
|
+
"六" => 6,
|
43
|
+
"七" => 7,
|
44
|
+
"八" => 8,
|
45
|
+
"九" => 9,
|
46
|
+
"十" => 10,
|
47
|
+
"十一" => 11,
|
48
|
+
"十二" => 12,
|
49
|
+
"十三" => 13,
|
50
|
+
"十四" => 14,
|
51
|
+
"十五" => 15,
|
52
|
+
"十六" => 16,
|
53
|
+
"十七" => 17,
|
54
|
+
"十八" => 18,
|
55
|
+
"十九" => 19,
|
56
|
+
"二十" => 20,
|
57
|
+
"二十一" => 21,
|
58
|
+
"二十二" => 22,
|
59
|
+
"二十三" => 23,
|
60
|
+
"二十四" => 24,
|
61
|
+
"二十五" => 25,
|
62
|
+
"二十六" => 26,
|
63
|
+
"二十七" => 27,
|
64
|
+
"二十八" => 28,
|
65
|
+
"二十九" => 29,
|
66
|
+
"三十" => 30,
|
67
|
+
"三十一" => 31,
|
68
|
+
"三十二" => 32,
|
69
|
+
"三十三" => 33,
|
70
|
+
"三十四" => 34,
|
71
|
+
"三十五" => 35,
|
72
|
+
"三十六" => 36,
|
73
|
+
"三十七" => 37,
|
74
|
+
"三十八" => 38,
|
75
|
+
"三十九" => 39,
|
76
|
+
"四十" => 40,
|
77
|
+
"四十一" => 41,
|
78
|
+
"四十二" => 42,
|
79
|
+
"四十三" => 43,
|
80
|
+
"四十四" => 44,
|
81
|
+
"四十五" => 45,
|
82
|
+
"四十六" => 46,
|
83
|
+
"四十七" => 47,
|
84
|
+
"四十八" => 48,
|
85
|
+
"四十九" => 49,
|
86
|
+
"五十" => 50,
|
87
|
+
"五十一" => 51,
|
88
|
+
"五十二" => 52,
|
89
|
+
"五十三" => 53,
|
90
|
+
"五十四" => 54,
|
91
|
+
"五十五" => 55,
|
92
|
+
"五十六" => 56,
|
93
|
+
"五十七" => 57,
|
94
|
+
"五十八" => 58,
|
95
|
+
"五十九" => 59,
|
96
|
+
"六十" => 60,
|
97
|
+
"六十一" => 61,
|
98
|
+
"六十二" => 62,
|
99
|
+
"六十三" => 63,
|
100
|
+
"六十四" => 64,
|
101
|
+
"六十五" => 65,
|
102
|
+
"六十六" => 66,
|
103
|
+
"六十七" => 67,
|
104
|
+
"六十八" => 68,
|
105
|
+
"六十九" => 69,
|
106
|
+
"七十" => 70,
|
107
|
+
"七十一" => 71,
|
108
|
+
"七十二" => 72,
|
109
|
+
"七十三" => 73,
|
110
|
+
"七十四" => 74,
|
111
|
+
"七十五" => 75,
|
112
|
+
"七十六" => 76,
|
113
|
+
"七十七" => 77,
|
114
|
+
"七十八" => 78,
|
115
|
+
"七十九" => 79,
|
116
|
+
"八十" => 80,
|
117
|
+
"八十一" => 81,
|
118
|
+
"八十二" => 82,
|
119
|
+
"八十三" => 83,
|
120
|
+
"八十四" => 84,
|
121
|
+
"八十五" => 85,
|
122
|
+
"八十六" => 86,
|
123
|
+
"八十七" => 87,
|
124
|
+
"八十八" => 88,
|
125
|
+
"八十九" => 89,
|
126
|
+
"九十" => 90,
|
127
|
+
"九十一" => 91,
|
128
|
+
"九十二" => 92,
|
129
|
+
"九十三" => 93,
|
130
|
+
"九十四" => 94,
|
131
|
+
"九十五" => 95,
|
132
|
+
"九十六" => 96,
|
133
|
+
"九十七" => 97,
|
134
|
+
"九十八" => 98,
|
135
|
+
"九十九" => 99,
|
136
|
+
"①" => 1,
|
137
|
+
"②" => 2,
|
138
|
+
"③" => 3,
|
139
|
+
"④" => 4,
|
140
|
+
"⑤" => 5,
|
141
|
+
"⑥" => 6,
|
142
|
+
"⑦" => 7,
|
143
|
+
"⑧" => 8,
|
144
|
+
"⑨" => 9,
|
145
|
+
"⑩" => 10,
|
144
146
|
}
|
145
147
|
|
146
148
|
# @note Override
|
data/lib/amakanize/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amakanize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryo Nakamura
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -99,8 +99,14 @@ files:
|
|
99
99
|
- lib/amakanize.rb
|
100
100
|
- lib/amakanize/author_name.rb
|
101
101
|
- lib/amakanize/author_names.rb
|
102
|
+
- lib/amakanize/book_position.rb
|
102
103
|
- lib/amakanize/filterable.rb
|
103
104
|
- lib/amakanize/filters/base_filter.rb
|
105
|
+
- lib/amakanize/filters/book_position_at_end_detection_filter.rb
|
106
|
+
- lib/amakanize/filters/book_position_clearing_filter.rb
|
107
|
+
- lib/amakanize/filters/book_position_in_bracket_detection_filter.rb
|
108
|
+
- lib/amakanize/filters/book_position_in_words_detection_filter.rb
|
109
|
+
- lib/amakanize/filters/book_position_number_canonicalization_filter.rb
|
104
110
|
- lib/amakanize/filters/brackets_normalization_filter.rb
|
105
111
|
- lib/amakanize/filters/continuous_spaces_normalization_filter.rb
|
106
112
|
- lib/amakanize/filters/dash_between_alphabets_normalization_filter.rb
|