amakanize 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/amakanize/filters/brackets_normalization_filter.rb +36 -0
- data/lib/amakanize/filters/obvious_volume_number_deletion_filter.rb +3 -2
- data/lib/amakanize/filters/trailing_parentheses_deletion_filter.rb +1 -1
- data/lib/amakanize/filters/trailing_volume_number_deletion_filter.rb +1 -1
- data/lib/amakanize/series_name.rb +1 -2
- data/lib/amakanize/version.rb +1 -1
- data/lib/amakanize.rb +12 -2
- metadata +2 -3
- data/lib/amakanize/filters/angle_brackets_after_word_normalization_filter.rb +0 -12
- data/lib/amakanize/filters/trailing_surrounding_hyphens_deletion_filter.rb +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 216c25e59b46b9bb55f7065cf24b913d96fbb8d8
|
4
|
+
data.tar.gz: ba5a3770af33f8c18f2e6efcbd7c647c6efa4190
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4e580b8ebaf2f4f7823b3d3eee8b991812fefa6e1ceaaff50c74e8a5a644f145bc7d6d8100fd06c41c9893079363b95f714cfb41d4b461011a8292469e4c19f
|
7
|
+
data.tar.gz: 661d3e2c5f5ef17bf5c88db04e9f63fa3004505f50a93411c55a79381932d8f5c096e3bae5782a6cf0d4fd482a0a4fd93c73e597aec18aa3f4d2ad976ae1ac33
|
data/CHANGELOG.md
CHANGED
@@ -0,0 +1,36 @@
|
|
1
|
+
module Amakanize
|
2
|
+
module Filters
|
3
|
+
class BracketsNormalizationFilter < BaseFilter
|
4
|
+
PAIRS = %w|
|
5
|
+
‹ ›
|
6
|
+
‾ ‾
|
7
|
+
- -
|
8
|
+
〜 〜
|
9
|
+
« »
|
10
|
+
( )
|
11
|
+
[ ]
|
12
|
+
{ }
|
13
|
+
{ }
|
14
|
+
〈 〉
|
15
|
+
《 》
|
16
|
+
【 】
|
17
|
+
〔 〕
|
18
|
+
〘 〙
|
19
|
+
〚 〛
|
20
|
+
\[ \]
|
21
|
+
< >
|
22
|
+
< >
|
23
|
+
~ ~
|
24
|
+
|.each_slice(2)
|
25
|
+
|
26
|
+
# @note Override
|
27
|
+
# @param string [String] e.g. `"IS〈インフィニット・ストラトス〉 1 (オーバーラップ文庫)"`
|
28
|
+
# @return [String] e.g. `"IS(インフィニット・ストラトス) 1 (オーバーラップ文庫)"`
|
29
|
+
def call(string)
|
30
|
+
PAIRS.each_with_object(string) do |(open, close), result|
|
31
|
+
result.gsub!(/#{open}(.+?)#{close}/, '(\1)')
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -5,8 +5,9 @@ module Amakanize
|
|
5
5
|
# @param string [String] e.g. `"刀語 第十一話 毒刀・鍍"`, `"アニウッド大通り 1: アニメ監督一家物語"`
|
6
6
|
# @return [String] e.g. `"刀語"`, `"アニウッド大通り"`
|
7
7
|
def call(string)
|
8
|
-
string.gsub(/\s
|
9
|
-
.gsub(/\s
|
8
|
+
string.gsub(/\s*#{PATTERN_OF_VOLUME_PREFIX}?#{Amakanize::PATTERN_OF_NUMERIC_CHARACTERS}(?:話|巻|版).*/, "")
|
9
|
+
.gsub(/\s+#{PATTERN_OF_VOLUME_PREFIX}?#{Amakanize::PATTERN_OF_NUMERIC_CHARACTERS}(?:話|巻|版)?:\s+.*/, "")
|
10
|
+
.gsub(/\s*\(#{PATTERN_OF_VOLUME_PREFIX}?#{Amakanize::PATTERN_OF_NUMERIC_CHARACTERS}\).*/, "")
|
10
11
|
end
|
11
12
|
end
|
12
13
|
end
|
@@ -5,7 +5,7 @@ module Amakanize
|
|
5
5
|
# @param string [String] e.g. `"魔法使いの嫁 通常版 4 (BLADE COMICS)"`
|
6
6
|
# @return [String] e.g. `"魔法使いの嫁 通常版 4"`
|
7
7
|
def call(string)
|
8
|
-
string.
|
8
|
+
string.sub(/\s*\([^\(]+\)\z/, "")
|
9
9
|
end
|
10
10
|
end
|
11
11
|
end
|
@@ -5,7 +5,7 @@ module Amakanize
|
|
5
5
|
# @param string [String] e.g. `"やはり俺の青春ラブコメはまちがっている。4"`, `"ネトゲの嫁は女の子じゃないと思った? Lv.2"`
|
6
6
|
# @return [String] e.g. `"やはり俺の青春ラブコメはまちがっている。"`, `"ネトゲの嫁は女の子じゃないと思った?"`
|
7
7
|
def call(string)
|
8
|
-
string.gsub(/\s
|
8
|
+
string.gsub(/\s*#{PATTERN_OF_VOLUME_PREFIX}?#{Amakanize::PATTERN_OF_NUMERIC_CHARACTERS}(?:話|巻|版)?(?:\s*\(.*?\))?\z/, "")
|
9
9
|
end
|
10
10
|
end
|
11
11
|
end
|
@@ -8,11 +8,10 @@ module Amakanize
|
|
8
8
|
::Amakanize::Filters::NormalizationFilter.new,
|
9
9
|
::Amakanize::Filters::HyphenMinusNormalizationFilter.new,
|
10
10
|
::Amakanize::Filters::DashBetweenAlhabetsNormalizationFilter.new,
|
11
|
-
::Amakanize::Filters::
|
11
|
+
::Amakanize::Filters::BracketsNormalizationFilter.new,
|
12
12
|
::Amakanize::Filters::SpacesBetweenExclamationsDeletionFilter.new,
|
13
13
|
::Amakanize::Filters::ObviousVolumeNumberDeletionFilter.new,
|
14
14
|
::Amakanize::Filters::TrailingParenthesesDeletionFilter.new,
|
15
|
-
::Amakanize::Filters::TrailingSurroundingHyphensDeletionFilter.new,
|
16
15
|
::Amakanize::Filters::TrailingDashDeletionFilter.new,
|
17
16
|
::Amakanize::Filters::TrailingVolumeNumberDeletionFilter.new,
|
18
17
|
::Amakanize::Filters::TrailingSeriesNamePayloadDeletionFilter.new,
|
data/lib/amakanize/version.rb
CHANGED
data/lib/amakanize.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "amakanize/author_name"
|
2
2
|
require "amakanize/author_names"
|
3
3
|
require "amakanize/filters/base_filter"
|
4
|
-
require "amakanize/filters/
|
4
|
+
require "amakanize/filters/brackets_normalization_filter"
|
5
5
|
require "amakanize/filters/dash_between_alphabets_normalization_filter"
|
6
6
|
require "amakanize/filters/html_unescape_filter"
|
7
7
|
require "amakanize/filters/hyphen_minus_normalization_filter"
|
@@ -16,7 +16,6 @@ require "amakanize/filters/trailing_author_name_payload_deletion_filter"
|
|
16
16
|
require "amakanize/filters/trailing_dash_deletion_filter"
|
17
17
|
require "amakanize/filters/trailing_parentheses_deletion_filter"
|
18
18
|
require "amakanize/filters/trailing_series_name_payload_deletion_filter"
|
19
|
-
require "amakanize/filters/trailing_surrounding_hyphens_deletion_filter"
|
20
19
|
require "amakanize/filters/trailing_volume_number_deletion_filter"
|
21
20
|
require "amakanize/series_name"
|
22
21
|
require "amakanize/version"
|
@@ -39,4 +38,15 @@ module Amakanize
|
|
39
38
|
|⑩
|
40
39
|
|[〇一二三四五六七八九十百千万零壱弍参肆伍陸漆捌玖壹貳參拾佰仟萬]+
|
41
40
|
/x
|
41
|
+
|
42
|
+
PATTERN_OF_VOLUME_PREFIX = /
|
43
|
+
episode\.?\s*
|
44
|
+
|Episode\.?\s*
|
45
|
+
|EPISODE\.?\s*
|
46
|
+
|Lv\.?\s*
|
47
|
+
|volume\.?\s*
|
48
|
+
|Volume\.?\s*
|
49
|
+
|VOLUME\.?\s*
|
50
|
+
|第
|
51
|
+
/x
|
42
52
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: amakanize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- r7kamura
|
@@ -87,8 +87,8 @@ files:
|
|
87
87
|
- lib/amakanize.rb
|
88
88
|
- lib/amakanize/author_name.rb
|
89
89
|
- lib/amakanize/author_names.rb
|
90
|
-
- lib/amakanize/filters/angle_brackets_after_word_normalization_filter.rb
|
91
90
|
- lib/amakanize/filters/base_filter.rb
|
91
|
+
- lib/amakanize/filters/brackets_normalization_filter.rb
|
92
92
|
- lib/amakanize/filters/dash_between_alphabets_normalization_filter.rb
|
93
93
|
- lib/amakanize/filters/html_unescape_filter.rb
|
94
94
|
- lib/amakanize/filters/hyphen_minus_normalization_filter.rb
|
@@ -103,7 +103,6 @@ files:
|
|
103
103
|
- lib/amakanize/filters/trailing_dash_deletion_filter.rb
|
104
104
|
- lib/amakanize/filters/trailing_parentheses_deletion_filter.rb
|
105
105
|
- lib/amakanize/filters/trailing_series_name_payload_deletion_filter.rb
|
106
|
-
- lib/amakanize/filters/trailing_surrounding_hyphens_deletion_filter.rb
|
107
106
|
- lib/amakanize/filters/trailing_volume_number_deletion_filter.rb
|
108
107
|
- lib/amakanize/series_name.rb
|
109
108
|
- lib/amakanize/version.rb
|
@@ -1,12 +0,0 @@
|
|
1
|
-
module Amakanize
|
2
|
-
module Filters
|
3
|
-
class AngleBracketsAfterWordNormalizationFilter < BaseFilter
|
4
|
-
# @note Override
|
5
|
-
# @param string [String] e.g. `"IS〈インフィニット・ストラトス〉 1 (オーバーラップ文庫)"`
|
6
|
-
# @return [String] e.g. `"IS<インフィニット・ストラトス> 1 (オーバーラップ文庫)"`
|
7
|
-
def call(string)
|
8
|
-
string.gsub(/([[:alnum:]])[〈《](.+?)[〉》]/, '\1<\2>')
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
@@ -1,12 +0,0 @@
|
|
1
|
-
module Amakanize
|
2
|
-
module Filters
|
3
|
-
class TrailingSurroundingHyphensDeletionFilter < BaseFilter
|
4
|
-
# @note Override
|
5
|
-
# @param string [String] e.g. `"艦隊これくしょん -艦これ- 島風 つむじ風の少女"`
|
6
|
-
# @return [String] e.g. `"艦隊これくしょん"`
|
7
|
-
def call(string)
|
8
|
-
string.gsub(/\s*-.+-.*/, "")
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|