amakanize 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 29a3fe6104923c2dce5236cdd426331fac81ef9f
4
- data.tar.gz: 48c1308766b0e8530b3b4653cdbc1778cd04f96d
3
+ metadata.gz: cd42f278e55fdace156f91282b117c2f32075cd8
4
+ data.tar.gz: 697300a04b3bc7a54ef611dad793bd1dd6bed448
5
5
  SHA512:
6
- metadata.gz: 3de8f1cc840ae35402f9d726bb6a81452ece1becf040fb78ec28f688208c51005f6adc35c4b253016d5292392a555551ccbe61947c69fa0ebbff3b81a261a49f
7
- data.tar.gz: d38495ab3280abd4f4c6eaaa3e2731be07d8496467015273fa0b6117f0a93cdb81198e4a835ef5817712a23310166d379db8c5a122f8361d52d1d357b98c68ea
6
+ metadata.gz: 792a8ec7d0e7715a81409a7ae847c5f4313bae35de67c3bb50bcfa3f854572793d96bb6984f9efdf9feaa68c7a89b9de3d48c1169f3cb1eec5baf94af0a8329d
7
+ data.tar.gz: 0aca318813fa1f33c5e459424ce30b88c1234d8eb86e0502911cd5807a6cd81fff4c89ce10a6360d215885d4578b2a7a9130be103070215bb89a020f5721eaf0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.1
2
+ - Normalize angle brackets
3
+ - Remove trailing LvN and Lv.N as volume number
4
+ - Fix volume number detection pattern
5
+
1
6
  ## 0.2.0
2
7
  - Fix author name separator
3
8
 
@@ -0,0 +1,12 @@
1
+ module Amakanize
2
+ module Filters
3
+ class AngleBracketsAfterWordNormalizationFilter < BaseFilter
4
+ # @note Override
5
+ # @param string [String] e.g. `"IS〈インフィニット・ストラトス〉 1 (オーバーラップ文庫)"`
6
+ # @return [String] e.g. `"IS<インフィニット・ストラトス> 1 (オーバーラップ文庫)"`
7
+ def call(string)
8
+ string.gsub(/([[:alnum:]])[〈《](.+?)[〉》]/, '\1<\2>')
9
+ end
10
+ end
11
+ end
12
+ end
@@ -5,8 +5,8 @@ module Amakanize
5
5
  # @param string [String] e.g. `"刀語 第十一話 毒刀・鍍"`, `"アニウッド大通り 1: アニメ監督一家物語"`
6
6
  # @return [String] e.g. `"刀語"`, `"アニウッド大通り"`
7
7
  def call(string)
8
- string.gsub(/\s*第?#{Amakanize::PATTERN_OF_NUMERIC_CHARACTER}+(?:話|巻|版).*/, "")
9
- .gsub(/\s+第?#{Amakanize::PATTERN_OF_NUMERIC_CHARACTER}+(?:話|巻|版)?:\s+.*/, "")
8
+ string.gsub(/\s*第?#{Amakanize::PATTERN_OF_NUMERIC_CHARACTERS}(?:話|巻|版).*/, "")
9
+ .gsub(/\s+第?#{Amakanize::PATTERN_OF_NUMERIC_CHARACTERS}(?:話|巻|版)?:\s+.*/, "")
10
10
  end
11
11
  end
12
12
  end
@@ -2,10 +2,10 @@ module Amakanize
2
2
  module Filters
3
3
  class TrailingVolumeNumberDeletionFilter < BaseFilter
4
4
  # @note Override
5
- # @param string [String] e.g. `"やはり俺の青春ラブコメはまちがっている。4"`
6
- # @return [String] e.g. `"やはり俺の青春ラブコメはまちがっている。"`
5
+ # @param string [String] e.g. `"やはり俺の青春ラブコメはまちがっている。4"`, `"ネトゲの嫁は女の子じゃないと思った? Lv.2"`
6
+ # @return [String] e.g. `"やはり俺の青春ラブコメはまちがっている。"`, `"ネトゲの嫁は女の子じゃないと思った?"`
7
7
  def call(string)
8
- string.gsub(/\s*第?#{Amakanize::PATTERN_OF_NUMERIC_CHARACTER}+(?:話|巻|版)?\z/, "")
8
+ string.gsub(/\s*(?:第|Lv\.?)?#{Amakanize::PATTERN_OF_NUMERIC_CHARACTERS}(?:話|巻|版)?\z/, "")
9
9
  end
10
10
  end
11
11
  end
@@ -8,6 +8,7 @@ module Amakanize
8
8
  ::Amakanize::Filters::NormalizationFilter.new,
9
9
  ::Amakanize::Filters::HyphenMinusNormalizationFilter.new,
10
10
  ::Amakanize::Filters::DashBetweenAlhabetsNormalizationFilter.new,
11
+ ::Amakanize::Filters::AngleBracketsAfterWordNormalizationFilter.new,
11
12
  ::Amakanize::Filters::SpacesBetweenExclamationsDeletionFilter.new,
12
13
  ::Amakanize::Filters::ObviousVolumeNumberDeletionFilter.new,
13
14
  ::Amakanize::Filters::TrailingParenthesesDeletionFilter.new,
@@ -1,3 +1,3 @@
1
1
  module Amakanize
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
data/lib/amakanize.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "amakanize/author_name"
2
2
  require "amakanize/author_names"
3
3
  require "amakanize/filters/base_filter"
4
+ require "amakanize/filters/angle_brackets_after_word_normalization_filter"
4
5
  require "amakanize/filters/dash_between_alphabets_normalization_filter"
5
6
  require "amakanize/filters/html_unescape_filter"
6
7
  require "amakanize/filters/hyphen_minus_normalization_filter"
@@ -21,5 +22,21 @@ require "amakanize/series_name"
21
22
  require "amakanize/version"
22
23
 
23
24
  module Amakanize
24
- PATTERN_OF_NUMERIC_CHARACTER = /[\diIvVxX1-9①②③④⑤⑥⑦⑧⑨⑩〇一二三四五六七八九十百千万零壱弍参肆伍陸漆捌玖壹貳參拾佰仟萬]/
25
+ PATTERN_OF_NUMERIC_CHARACTERS = /
26
+ \d+
27
+ |[ivx]+
28
+ |[IVX]+
29
+ |[0-9]+
30
+ |①
31
+ |②
32
+ |③
33
+ |④
34
+ |⑤
35
+ |⑥
36
+ |⑦
37
+ |⑧
38
+ |⑨
39
+ |⑩
40
+ |[〇一二三四五六七八九十百千万零壱弍参肆伍陸漆捌玖壹貳參拾佰仟萬]+
41
+ /x
25
42
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: amakanize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - r7kamura
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-08-21 00:00:00.000000000 Z
11
+ date: 2016-08-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -87,6 +87,7 @@ files:
87
87
  - lib/amakanize.rb
88
88
  - lib/amakanize/author_name.rb
89
89
  - lib/amakanize/author_names.rb
90
+ - lib/amakanize/filters/angle_brackets_after_word_normalization_filter.rb
90
91
  - lib/amakanize/filters/base_filter.rb
91
92
  - lib/amakanize/filters/dash_between_alphabets_normalization_filter.rb
92
93
  - lib/amakanize/filters/html_unescape_filter.rb