panchira 1.1.0 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1809ad9bff82b513e43a58b5270cf67232ccec9bb440b549b0faacfa24d1c9b5
4
- data.tar.gz: 7948df0b4ef085a2f37c6ae7c30801da7bc6f5a997dff74e108d51e4c5f4204e
3
+ metadata.gz: 4b6ab5f09a68a55d7fa54720b6ed5544605c5b4fa25ff839583058ac3af5611b
4
+ data.tar.gz: 22814f2437ac515c59d22a634c4709212eb639735a831157b7a64375559c1cf1
5
5
  SHA512:
6
- metadata.gz: 0cac7d53c293e5426db620e2fe7eb358b00e4bd7a5e1d0d05a743933f90056c1416391139b193a967a1a675c3bf6863ae691a99f893d4956cd49fd5d0316773b
7
- data.tar.gz: 013ea3d18b91410392a8d1727197746ad6b210dac2898c44f68e81f67b5361ebf034bb5f24f594cd78177b00f862478dc4ebdba7022875a60351f32fc3b5826c
6
+ metadata.gz: a2a42f6023db6ce19d076079044c6fd88e49870478590d89ad25810dd3b72e72439627b416bd3d439d78ab8eaa72683746617fe565f441e69a758811e4b50023
7
+ data.tar.gz: 6f4573bf646ea21d9d5887ee67a1e95e275e6b1de6d152d4c096c7b8a4e70fa29409143a8a00af0c32f6a1e023f4b0a5c84a30d50fd789dc5f6822cb161cd0ed
@@ -21,10 +21,7 @@ jobs:
21
21
  steps:
22
22
  - uses: actions/checkout@v2
23
23
  - name: Set up Ruby
24
- # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
25
- # change this to (see https://github.com/ruby/setup-ruby#versioning):
26
- # uses: ruby/setup-ruby@v1
27
- uses: ruby/setup-ruby@ec106b438a1ff6ff109590de34ddc62c540232e0
24
+ uses: ruby/setup-ruby@v1
28
25
  with:
29
26
  ruby-version: 2.6
30
27
  - name: Install dependencies
data/.rubocop.yml ADDED
@@ -0,0 +1,64 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
3
+ require:
4
+ - rubocop-minitest
5
+
6
+ AllCops:
7
+ TargetRubyVersion: 2.6
8
+ NewCops: enable
9
+ Exclude:
10
+ - bin/*
11
+ - vendor/bundle/**/*
12
+ SuggestExtensions: false
13
+
14
+ Layout/FirstHashElementIndentation:
15
+ EnforcedStyle: consistent
16
+
17
+ Layout/IndentationConsistency:
18
+ EnforcedStyle: indented_internal_methods
19
+
20
+ Layout/MultilineMethodCallIndentation:
21
+ EnforcedStyle: indented
22
+
23
+ Layout/SpaceInsideHashLiteralBraces:
24
+ EnforcedStyle: no_space
25
+
26
+ Lint/AssignmentInCondition:
27
+ Enabled: false
28
+
29
+ Lint/MissingSuper:
30
+ Exclude:
31
+ - lib/panchira/resolvers/*
32
+
33
+ Style/AsciiComments:
34
+ Enabled: false
35
+
36
+ Style/Documentation:
37
+ Enabled: false
38
+
39
+ Style/EmptyMethod:
40
+ EnforcedStyle: expanded
41
+
42
+ Style/GuardClause:
43
+ Enabled: false
44
+
45
+ Style/IfInsideElse:
46
+ Enabled: false
47
+
48
+ Style/SoleNestedConditional:
49
+ Enabled: false
50
+
51
+ Style/IfUnlessModifier:
52
+ Enabled: false
53
+
54
+ Style/NumericPredicate:
55
+ Enabled: false
56
+
57
+ Style/ParallelAssignment:
58
+ Enabled: false
59
+
60
+ Style/WordArray:
61
+ EnforcedStyle: brackets
62
+
63
+ Style/SymbolArray:
64
+ EnforcedStyle: brackets
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,45 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2021-01-03 15:08:31 UTC using RuboCop version 1.7.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ Lint/MixedRegexpCaptureTypes:
11
+ Exclude:
12
+ - 'lib/panchira/resolvers/nijie_resolver.rb'
13
+
14
+ # Offense count: 1
15
+ # Configuration parameters: AllowedMethods.
16
+ # AllowedMethods: present?, blank?, presence, try, try!, in?
17
+ Lint/SafeNavigationChain:
18
+ Exclude:
19
+ - 'lib/panchira/resolvers/melonbooks_resolver.rb'
20
+
21
+ # Offense count: 8
22
+ # Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
23
+ Metrics/AbcSize:
24
+ Max: 40
25
+
26
+ # Offense count: 1
27
+ # Configuration parameters: IgnoredMethods.
28
+ Metrics/CyclomaticComplexity:
29
+ Max: 8
30
+
31
+ # Offense count: 7
32
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
33
+ Metrics/MethodLength:
34
+ Max: 21
35
+
36
+ # Offense count: 11
37
+ Minitest/MultipleAssertions:
38
+ Max: 15
39
+
40
+ # Offense count: 2
41
+ # Cop supports --auto-correct.
42
+ # Configuration parameters: AutoCorrect, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
43
+ # URISchemes: http, https
44
+ Layout/LineLength:
45
+ Max: 132
data/CHANGELOG.md CHANGED
@@ -4,6 +4,42 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/).
6
6
 
7
+ ## 1.3.2 - 2021-05-23
8
+ ### Fixed
9
+ - Fixed an issue where Fanza Resolver was retrieving incorrect cannonical URLs from meta tags.
10
+ - Fixed an issue where Narou Resolver retrieved wrong descriptions from meta tags.
11
+
12
+ ### Changed
13
+ - Updated dependencies.
14
+
15
+ ## 1.3.1 - 2021-02-17
16
+ ### Added
17
+ - Added support for Fanza Video.
18
+
19
+ ## 1.3.0 - 2021-02-06
20
+ ### Added
21
+ - Added support for multiple authors. PanchiraResult#authors now returns an array of authors.
22
+ - PanchiraResult now returns a resolver name used in the process (PanchiraResult#resolver).
23
+
24
+ ### Fixed
25
+ - Fixed an issue that fetching DLSite pages with multiple authors were not working.
26
+ - Fixed a slight issue in MelonbooksResolver.
27
+
28
+ ## 1.2.0 - 2020-10-31
29
+ ### Added
30
+ - You can now fetch author and circle name in resolvers (Resolver#fetch_author, Resolver#fetch_circle).
31
+
32
+ ### Changed
33
+ - Resolver#fetch_title returns the title of the content (not the original title of the page).
34
+
35
+ ## 1.1.1 - 2020-08-09
36
+ ### Added
37
+ - Added support for Fanza Doujin.
38
+ - Added support for description in Fanza Book.
39
+
40
+ ### Fixed
41
+ - Fixed an issue that fetching image was not working in Fanza Book.
42
+
7
43
  ## 1.1.0 - 2020-08-06
8
44
  ### Added
9
45
  - Added support for Fanza Books.
@@ -43,6 +79,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
43
79
  ### Added
44
80
  - Released Panchira gem. At this time we can parse only 5 websites.
45
81
 
82
+ [1.3.1]: https://github.com/nuita/panchira/releases/tag/v1.3.1
83
+ [1.3.0]: https://github.com/nuita/panchira/releases/tag/v1.3.0
84
+ [1.2.0]: https://github.com/nuita/panchira/releases/tag/v1.2.0
46
85
  [1.1.0]: https://github.com/nuita/panchira/releases/tag/v1.1.0
47
86
  [1.0.0]: https://github.com/nuita/panchira/releases/tag/v1.0.0
48
87
  [0.3.0]: https://github.com/nuita/panchira/releases/tag/v0.3.0
data/Gemfile CHANGED
@@ -1,4 +1,6 @@
1
- source "https://rubygems.org"
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in panchira.gemspec
4
6
  gemspec
data/Gemfile.lock CHANGED
@@ -1,19 +1,41 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- panchira (1.1.0)
4
+ panchira (1.3.2)
5
5
  fastimage (~> 2.1.7)
6
- nokogiri (~> 1.10.9)
6
+ nokogiri (>= 1.10.9, < 1.12.0)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
+ ast (2.4.2)
11
12
  fastimage (2.1.7)
12
- mini_portile2 (2.4.0)
13
- minitest (5.14.0)
14
- nokogiri (1.10.9)
15
- mini_portile2 (~> 2.4.0)
13
+ minitest (5.14.4)
14
+ nokogiri (1.11.5-x86_64-darwin)
15
+ racc (~> 1.4)
16
+ parallel (1.20.1)
17
+ parser (3.0.1.1)
18
+ ast (~> 2.4.1)
19
+ racc (1.5.2)
20
+ rainbow (3.0.0)
16
21
  rake (12.3.3)
22
+ regexp_parser (2.1.1)
23
+ rexml (3.2.5)
24
+ rubocop (1.15.0)
25
+ parallel (~> 1.10)
26
+ parser (>= 3.0.0.0)
27
+ rainbow (>= 2.2.2, < 4.0)
28
+ regexp_parser (>= 1.8, < 3.0)
29
+ rexml
30
+ rubocop-ast (>= 1.5.0, < 2.0)
31
+ ruby-progressbar (~> 1.7)
32
+ unicode-display_width (>= 1.4.0, < 3.0)
33
+ rubocop-ast (1.5.0)
34
+ parser (>= 3.0.1.1)
35
+ rubocop-minitest (0.12.1)
36
+ rubocop (>= 0.90, < 2.0)
37
+ ruby-progressbar (1.11.0)
38
+ unicode-display_width (2.0.0)
17
39
 
18
40
  PLATFORMS
19
41
  ruby
@@ -23,6 +45,8 @@ DEPENDENCIES
23
45
  minitest (~> 5.0)
24
46
  panchira!
25
47
  rake (~> 12.3.3)
48
+ rubocop (~> 1.7)
49
+ rubocop-minitest (~> 0.10)
26
50
 
27
51
  BUNDLED WITH
28
- 2.0.2
52
+ 2.1.4
data/README.md CHANGED
@@ -39,7 +39,7 @@ Or install it yourself as:
39
39
  ```
40
40
  > Panchira.fetch("https://www.pixiv.net/artworks/61711172")
41
41
 
42
- => #<Panchira::PanchiraResult:0x00007fb95d2c53f8 @canonical_url="https://pixiv.net/member_illust.php?mode=medium&illust_id=61711172", @title="#輿水幸子 すずしい顔で締め切りを破る幸子 - むらためのイラスト - pixiv", @description="(UTF16の)Pietで実行すると「すずしい」と出力する幸子(5色+白Pietカラーゴルフ)。解説記事は http://chy72.hatenablog.com/entry/2016/12/24/1", @image=#<Panchira::PanchiraImage:0x00007fb95f126ea0 @url="https://pixiv.cat/61711172.jpg", @width=810, @height=500>, @tags=["輿水幸子", "Piet", "プログラミング"]>
42
+ => #<Panchira::PanchiraResult:0x00007ff15890e948 @canonical_url="https://pixiv.net/member_illust.php?mode=medium&illust_id=61711172", @title="すずしい顔で締め切りを破る幸子", @description="(UTF16の)Pietで実行すると「すずしい」と出力する幸子(5色+白Pietカラーゴルフ)。解説記事は http://chy72.hatenablog.com/entry/2016/12/24/1", @image=#<Panchira::PanchiraImage:0x00007ff15931fc48 @url="https://pixiv.cat/61711172.jpg", @width=810, @height=500>, @tags=["輿水幸子", "Piet", "プログラミング"], @authors=["むらため"], @circle=nil, @resolver="Panchira::PixivResolver">
43
43
  ```
44
44
 
45
45
  In most situation you would call `Panchira#fetch`. It is a singular method that takes a URI and returns an instance of `PanchiraResult`, which is a simple class that stores the website's information, such as title, description and so on.
data/Rakefile CHANGED
@@ -1,10 +1,12 @@
1
- require "bundler/gem_tasks"
2
- require "rake/testtask"
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rake/testtask'
3
5
 
4
6
  Rake::TestTask.new(:test) do |t|
5
- t.libs << "test"
6
- t.libs << "lib"
7
- t.test_files = FileList["test/**/*_test.rb"]
7
+ t.libs << 'test'
8
+ t.libs << 'lib'
9
+ t.test_files = FileList['test/**/*_test.rb']
8
10
  end
9
11
 
10
- task :default => :test
12
+ task default: :test
data/lib/panchira.rb CHANGED
@@ -11,7 +11,7 @@ require_relative 'panchira/resolvers/resolver'
11
11
  require_relative 'panchira/extensions'
12
12
 
13
13
  project_root = File.dirname(File.absolute_path(__FILE__))
14
- Dir.glob(project_root + '/panchira/resolvers/*_resolver.rb').sort.each { |file| require file }
14
+ Dir.glob("#{project_root}/panchira/resolvers/*_resolver.rb").sort.each { |file| require file }
15
15
 
16
16
  # register fallback ImageResolver at the end. (resolver is selected by registration order)
17
17
  ::Panchira::Extensions.register(Panchira::ImageResolver)
@@ -29,12 +29,12 @@ module Panchira
29
29
 
30
30
  private
31
31
 
32
- def select_resolver(url)
33
- Panchira::Extensions.resolvers.each do |resolver|
34
- return resolver if resolver.applicable?(url)
35
- end
32
+ def select_resolver(url)
33
+ Panchira::Extensions.resolvers.each do |resolver|
34
+ return resolver if resolver.applicable?(url)
35
+ end
36
36
 
37
- Panchira::Resolver
38
- end
37
+ Panchira::Resolver
38
+ end
39
39
  end
40
40
  end
@@ -8,6 +8,14 @@ module Panchira
8
8
 
9
9
  # Result class for Panchira.fetch.
10
10
  class PanchiraResult
11
- attr_accessor :canonical_url, :title, :description, :image, :tags
11
+ attr_accessor :canonical_url, :title, :description, :image, :tags, :authors, :circle, :resolver
12
+
13
+ def author
14
+ authors&.join(' ')
15
+ end
16
+
17
+ def author=(value)
18
+ self.authors = [value] if value
19
+ end
12
20
  end
13
21
  end
@@ -6,13 +6,41 @@ module Panchira
6
6
 
7
7
  private
8
8
 
9
- def parse_image_url
10
- @page.css('//meta[property="og:image"]/@content').first.to_s.sub(/sam/, 'main')
11
- end
9
+ # DLSiteのタイトルの[]に含まれている値はtitleタグだとサークル名 or 出版社名だが、
10
+ # Panchiraが優先するog:titleではサークル名 or 著者名 となる。
11
+ # 取得に際しては、以下の3パターンを考慮する必要があるため、titleタグとtableの解析が必要となる:
12
+ # 1) 同人系の一部, 特に音声など。タイトル[サークル名]. 本文中に著者・作者の記載なし
13
+ # 2) 同人系の一部, 特に一部の同人誌など。タイトル[サークル名]. 本文中に「作者」の記載あり
14
+ # 3) 商業系。タイトル[著者名] サークル名なし
15
+ # 込み入った実装になってしまったため、parse自体をいじる必要があるかも
16
+ def parse_title
17
+ @title_md = super.match(/(.+) \[(.+)\] \|.+/)
18
+ @title_md[1]
19
+ end
12
20
 
13
- def parse_tags
14
- @page.css('.main_genre').children.children.map(&:text)
15
- end
21
+ def parse_authors
22
+ @page.css('table[id*="work_"] tr').each do |tr|
23
+ next unless tr.css('th').text =~ /(作|著)者/
24
+
25
+ return @authors = tr.css('td > a').map do |node|
26
+ node.text.strip
27
+ end
28
+ end
29
+
30
+ @authors = nil
31
+ end
32
+
33
+ def parse_circle
34
+ @title_md[2] if @authors&.slice(0..2)&.join(' ') != @title_md[2]
35
+ end
36
+
37
+ def parse_image_url
38
+ @page.css('//meta[property="og:image"]/@content').first.to_s.sub(/sam/, 'main')
39
+ end
40
+
41
+ def parse_tags
42
+ @page.css('.main_genre').children.children.map(&:text)
43
+ end
16
44
  end
17
45
 
18
46
  ::Panchira::Extensions.register(Panchira::DlsiteResolver)
@@ -4,20 +4,75 @@ require 'net/https'
4
4
 
5
5
  module Panchira
6
6
  module Fanza
7
- class FanzaBookResolver < Resolver
8
- URL_REGEXP = %r{book\.dmm\.co\.jp\/}.freeze
7
+ FANZA_COOKIE = 'age_check_done=1;'
9
8
 
9
+ class FanzaResolver < Resolver
10
10
  private
11
11
 
12
- def parse_image
13
- @page.css('.m-imgDetailProductPack/@src').first.to_s
14
- end
12
+ def cookie
13
+ ::Panchira::Fanza::FANZA_COOKIE
14
+ end
15
+ end
16
+
17
+ class FanzaBookResolver < FanzaResolver
18
+ URL_REGEXP = %r{book\.dmm\.co\.jp/}.freeze
19
+
20
+ private
21
+
22
+ def parse_author
23
+ @page.css('.m-boxDetailProductInfoMainList__description__list__item > a').first&.text.to_s
24
+ end
25
+
26
+ def parse_image_url
27
+ @page.css('.m-imgDetailProductPack/@src').first.to_s
28
+ end
29
+
30
+ def parse_tags
31
+ @page.css('.m-boxDetailProductInfo__list__description__item > a').map(&:text)
32
+ end
33
+
34
+ def parse_description
35
+ @page.css('.m-boxDetailProduct__info__story').first&.text.to_s.gsub(/[\n\t]/, '')
36
+ end
37
+ end
38
+
39
+ class FanzaDoujinResolver < FanzaResolver
40
+ URL_REGEXP = %r{dmm\.co\.jp/dc/doujin/}.freeze
41
+
42
+ private
43
+
44
+ # canonical urlに別サービス(FANZA GAMES)のURLが設定されていることがあるため、
45
+ # 別サービスの場合はとりあえず元URLを設定する
46
+ def parse_canonical_url
47
+ @url
48
+ end
49
+
50
+ def parse_circle
51
+ @page.css('a.circleName__txt').first.text
52
+ end
53
+
54
+ def parse_tags
55
+ @page.css('.genreTag__item').map { |t| t.text.strip }
56
+ end
57
+ end
58
+
59
+ class FanzaVideoResolver < FanzaResolver
60
+ URL_REGEXP = %r{www.dmm.co.jp/digital/}.freeze
61
+
62
+ private
63
+
64
+ def parse_title
65
+ # og:titleは文字数制限で短く切られてる
66
+ @page.title.match(/(.+)- \S+ - FANZA動画/)[1]&.strip || super
67
+ end
15
68
 
16
- def cookie
17
- 'age_check_done=1;'
18
- end
69
+ def parse_image_url
70
+ super.sub(/(pr|ps).jpg$/, 'pl.jpg')
71
+ end
19
72
  end
20
73
  end
21
74
 
22
75
  ::Panchira::Extensions.register(Panchira::Fanza::FanzaBookResolver)
76
+ ::Panchira::Extensions.register(Panchira::Fanza::FanzaDoujinResolver)
77
+ ::Panchira::Extensions.register(Panchira::Fanza::FanzaVideoResolver)
23
78
  end
@@ -2,13 +2,14 @@
2
2
 
3
3
  module Panchira
4
4
  class ImageResolver < Resolver
5
- URL_REGEXP = %r{\.(png|gif|jpg|jpeg|webp)$}.freeze
5
+ URL_REGEXP = /\.(png|gif|jpg|jpeg|webp)$/.freeze
6
6
 
7
7
  def fetch
8
8
  result = PanchiraResult.new
9
9
  result.canonical_url = @url
10
10
  result.image = PanchiraImage.new
11
11
  result.image.url = @url
12
+ result.resolver = parse_resolver
12
13
  result
13
14
  end
14
15
  end
@@ -16,31 +16,30 @@ module Panchira
16
16
 
17
17
  private
18
18
 
19
- def parse_title
20
- comic_title = @json['content']['data']['title']
21
- "#{comic_title} | Komiflo"
22
- end
23
-
24
- def parse_image_url
25
- 'https://t.komiflo.com/564_mobile_large_3x/' + @json['content']['named_imgs']['cover']['filename']
26
- end
27
-
28
- def parse_description
29
- author = @json['content']['attributes']['artists']['children'][0]['data']['name']
30
-
31
- parent = @json['content']['parents'][0]['data']['title']
32
- description = '著: ' + author if author
33
- description + " / #{parent}" if parent
34
- end
35
-
36
- def parse_canonical_url
37
- id = @url.slice(%r{komiflo\.com(?:/#!)?/comics/(\d+)}, 1)
38
- 'https://komiflo.com/comics/' + id
39
- end
40
-
41
- def parse_tags
42
- @json['content']['attributes']['tags']['children'].map { |content| content['data']['name'] }
43
- end
19
+ def parse_title
20
+ @json['content']['data']['title']
21
+ end
22
+
23
+ def parse_image_url
24
+ "https://t.komiflo.com/564_mobile_large_3x/#{@json['content']['named_imgs']['cover']['filename']}"
25
+ end
26
+
27
+ def parse_author
28
+ @json['content']['attributes']['artists']['children'][0]['data']['name']
29
+ end
30
+
31
+ def parse_description
32
+ @json['content']['parents'][0]['data']['title']
33
+ end
34
+
35
+ def parse_canonical_url
36
+ id = @url.slice(%r{komiflo\.com(?:/#!)?/comics/(\d+)}, 1)
37
+ "https://komiflo.com/comics/#{id}"
38
+ end
39
+
40
+ def parse_tags
41
+ @json['content']['attributes']['tags']['children'].map { |content| content['data']['name'] }
42
+ end
44
43
  end
45
44
 
46
45
  ::Panchira::Extensions.register(Panchira::KomifloResolver)
@@ -4,31 +4,65 @@ module Panchira
4
4
  class MelonbooksResolver < Resolver
5
5
  URL_REGEXP = %r{melonbooks.co.jp/detail/detail.php\?product_id=(\d+)}.freeze
6
6
 
7
- private
7
+ def fetch
8
+ result = PanchiraResult.new
9
+
10
+ @page = fetch_page(@url)
11
+ result.canonical_url = parse_canonical_url
12
+
13
+ @page = fetch_page(result.canonical_url) if @url != result.canonical_url
8
14
 
9
- def parse_canonical_url
10
- product_id = @url.slice(URL_REGEXP, 1)
11
- 'https://www.melonbooks.co.jp/detail/detail.php?product_id=' + product_id + '&adult_view=1'
15
+ result.title, result.author, result.circle = parse_table
16
+ result.description = parse_description
17
+ result.image = parse_image
18
+ result.tags = parse_tags
19
+ result.resolver = parse_resolver
20
+
21
+ result
12
22
  end
13
23
 
14
- def parse_description
15
- # スタッフの紹介文でidが分岐
16
- special_description = @page.xpath('//div[@id="special_description"]//p/text()')
17
- if special_description.any?
18
- special_description.first.to_s
19
- else
20
- description = @page.xpath('//div[@id="description"]//p/text()')
21
- description.first.to_s
24
+ private
25
+
26
+ def parse_table
27
+ title, author, circle = nil, nil, nil
28
+
29
+ @page.css('#description > table.stripe > tr').each do |tr|
30
+ case tr.css('th').text
31
+ when 'タイトル'
32
+ title = tr.css('td').text.strip
33
+ when 'サークル名'
34
+ circle = tr.css('td > a').text.match(/^(.+)\W\(作品数:/)&.values_at(1)&.first
35
+ when '作家名'
36
+ author = tr.css('td > a').text.strip
37
+ end
38
+ end
39
+
40
+ [title, author, circle]
22
41
  end
23
- end
24
42
 
25
- def parse_image_url
26
- @page.css('//meta[property="og:image"]/@content').first.to_s.sub(/&c=1/, '')
27
- end
43
+ def parse_canonical_url
44
+ product_id = @url.slice(URL_REGEXP, 1)
45
+ "https://www.melonbooks.co.jp/detail/detail.php?product_id=#{product_id}&adult_view=1"
46
+ end
28
47
 
29
- def parse_tags
30
- @page.css('#related_tags .clearfix').children.children.map(&:text)
31
- end
48
+ def parse_description
49
+ # スタッフの紹介文でidが分岐
50
+ special_description = @page.xpath('//div[@id="special_description"]//p/text()')
51
+ if special_description.any?
52
+ special_description.first.to_s
53
+ else
54
+ description = @page.xpath('//div[@id="description"]//p/text()')
55
+ description.first.to_s
56
+ end
57
+ end
58
+
59
+ def parse_image_url
60
+ @page.css('//meta[property="og:image"]/@content').first.to_s.sub(/&c=1/, '')
61
+ end
62
+
63
+ def parse_tags
64
+ @page.css('#related_tags .clearfix').children.children.map(&:text)
65
+ end
32
66
  end
33
67
 
34
68
  ::Panchira::Extensions.register(Panchira::MelonbooksResolver)
@@ -6,35 +6,62 @@ module Panchira
6
6
  module Narou
7
7
  class Novel18Resolver < Resolver
8
8
  URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
9
- ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}.freeze
9
+ ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
10
+
11
+ def initialize(url)
12
+ super(url)
13
+
14
+ if id = @url.match(ID_REGEXP)[:id]
15
+ @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
16
+ end
17
+ end
10
18
 
11
19
  def fetch_page(uri)
12
20
  u = URI.parse(uri)
13
21
  http = Net::HTTP.new(u.host, u.port)
14
22
  http.use_ssl = u.port == 443
15
- res = http.get u.request_uri, { 'cookie' => 'over18=yes;' }
23
+ res = http.get u.request_uri, {'cookie' => 'over18=yes;'}
16
24
 
17
25
  Nokogiri::HTML.parse(res.body, uri)
18
26
  end
19
27
 
20
- def parse_tags
21
- id = @url.match(ID_REGEXP)[:id]
22
- return [] unless id
28
+ def parse_description
29
+ @desc&.xpath('//*[@id="noveltable1"]/tr/td')&.first&.text&.strip
30
+ end
23
31
 
24
- desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
25
- desc.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.split(' ') # つらい。
32
+ def parse_author
33
+ @desc&.xpath('//*[@id="noveltable1"]/tr[2]/td')&.text&.strip
34
+ end
35
+
36
+ def parse_tags
37
+ # つらい。
38
+ @desc&.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.split(' ')
26
39
  end
27
40
  end
41
+
28
42
  class NcodeResolver < Resolver
29
- URL_REGEXP = %r{ncode\.syosetu\.com}.freeze
30
- ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}.freeze
43
+ URL_REGEXP = /ncode\.syosetu\.com/.freeze
44
+ ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
31
45
 
32
- def parse_tags
33
- id = @url.match(ID_REGEXP)[:id]
34
- return [] unless id
46
+ def initialize(url)
47
+ super(url)
48
+
49
+ if id = @url.match(ID_REGEXP)[:id]
50
+ @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
51
+ end
52
+ end
53
+
54
+ def parse_description
55
+ @desc&.xpath('//*[@id="noveltable1"]/tr/td')&.first&.text&.strip
56
+ end
57
+
58
+ def parse_author
59
+ @desc&.xpath('//*[@id="noveltable1"]/tr[2]/td')&.text&.strip
60
+ end
35
61
 
36
- desc = fetch_page("https://ncode.syosetu.com/novelview/infotop/ncode/#{id}/")
37
- desc.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.delete("\u00A0")&.split(' ')&.grep_v('') # めっちゃつらい。
62
+ def parse_tags
63
+ # めっちゃつらい。
64
+ @desc&.xpath('//*[@id="noveltable1"]/tr[3]')&.text&.split("\n\n\n")&.dig(1)&.delete("\u00A0")&.split(' ')&.grep_v('')
38
65
  end
39
66
  end
40
67
  end
@@ -6,28 +6,43 @@ module Panchira
6
6
 
7
7
  private
8
8
 
9
- def parse_canonical_url
10
- @url.sub(/sp.nijie/, 'nijie').sub(/view_popup/, 'view')
11
- end
9
+ def parse_title
10
+ full_title = super
11
+ @md = full_title.match(/\A(?<title>.+) \| (?<author>.+)\z/)
12
12
 
13
- def parse_image_url
14
- str = @page.css('//script[@type="application/ld+json"]/text()').first.to_s
13
+ @md[:title]
14
+ end
15
+
16
+ def parse_author
17
+ @md[:author]
18
+ end
19
+
20
+ def parse_description
21
+ @page.css('p.illust_description')&.first&.text&.strip
22
+ end
15
23
 
16
- if s = str.match(%r{https://pic.nijie.(net|info)/(?<servername>\d+)/[^/]+/nijie_picture/(?<imagename>[^"]+)})
17
- # 動画は容量大きすぎるし取らない
18
- if s[:imagename] =~ /(jpg|png)/
19
- 'https://pic.nijie.net/' + s[:servername] + '/nijie_picture/' + s[:imagename]
24
+ def parse_canonical_url
25
+ @url.sub(/sp.nijie/, 'nijie').sub(/view_popup/, 'view')
26
+ end
27
+
28
+ def parse_image_url
29
+ str = @page.css('//script[@type="application/ld+json"]/text()').first.to_s
30
+
31
+ if s = str.match(%r{https://pic.nijie.(net|info)/(?<servername>\d+)/[^/]+/nijie_picture/(?<imagename>[^"]+)})
32
+ # 動画は容量大きすぎるし取らない
33
+ if s[:imagename] =~ /(jpg|png)/
34
+ "https://pic.nijie.net/#{s[:servername]}/nijie_picture/#{s[:imagename]}"
35
+ else
36
+ s[0]
37
+ end
20
38
  else
21
- s[0]
39
+ @page.css('//meta[property="og:image"]/@content').first.to_s
22
40
  end
23
- else
24
- @page.css('//meta[property="og:image"]/@content').first.to_s
25
41
  end
26
- end
27
42
 
28
- def parse_tags
29
- @page.css('#view-tag span.tag_name').map(&:text)
30
- end
43
+ def parse_tags
44
+ @page.css('#view-tag span.tag_name').map(&:text)
45
+ end
31
46
  end
32
47
 
33
48
  ::Panchira::Extensions.register(Panchira::NijieResolver)
@@ -14,26 +14,34 @@ module Panchira
14
14
 
15
15
  private
16
16
 
17
- def parse_canonical_url
18
- 'https://pixiv.net/member_illust.php?mode=medium&illust_id=' + @illust_id
19
- end
17
+ def parse_title
18
+ @json['body']['title']
19
+ end
20
20
 
21
- def parse_image_url
22
- proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
21
+ def parse_author
22
+ @json['body']['userName']
23
+ end
23
24
 
24
- case Net::HTTP.get_response(URI.parse(proxy_url))
25
- when Net::HTTPNotFound
26
- proxy_url = "https://pixiv.cat/#{@illust_id}-1.jpg"
25
+ def parse_canonical_url
26
+ "https://pixiv.net/member_illust.php?mode=medium&illust_id=#{@illust_id}"
27
27
  end
28
28
 
29
- proxy_url
30
- rescue StandardError
31
- @page.css('//meta[property="og:image"]/@content').first.to_s
32
- end
29
+ def parse_image_url
30
+ proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
33
31
 
34
- def parse_tags
35
- @json['body']['tags']['tags'].map{|content| content['tag']}
36
- end
32
+ case Net::HTTP.get_response(URI.parse(proxy_url))
33
+ when Net::HTTPNotFound
34
+ proxy_url = "https://pixiv.cat/#{@illust_id}-1.jpg"
35
+ end
36
+
37
+ proxy_url
38
+ rescue StandardError
39
+ @page.css('//meta[property="og:image"]/@content').first.to_s
40
+ end
41
+
42
+ def parse_tags
43
+ @json['body']['tags']['tags'].map { |content| content['tag'] }
44
+ end
37
45
  end
38
46
 
39
47
  ::Panchira::Extensions.register(Panchira::PixivResolver)
@@ -29,6 +29,13 @@ module Panchira
29
29
  result.description = parse_description
30
30
  result.image = parse_image
31
31
  result.tags = parse_tags
32
+ if respond_to?(:parse_authors, true)
33
+ result.authors = parse_authors
34
+ else
35
+ result.author = parse_author
36
+ end
37
+ result.circle = parse_circle
38
+ result.resolver = parse_resolver
32
39
 
33
40
  result
34
41
  end
@@ -43,75 +50,87 @@ module Panchira
43
50
 
44
51
  private
45
52
 
46
- def fetch_page(url)
47
- read_options = {
48
- 'User-Agent' => user_agent,
49
- 'Cookie' => cookie
50
- }
53
+ def fetch_page(url)
54
+ read_options = {
55
+ 'User-Agent' => user_agent,
56
+ 'Cookie' => cookie
57
+ }
51
58
 
52
- raw_page = URI.parse(url).read(read_options)
53
- charset = raw_page.charset
54
- Nokogiri::HTML.parse(raw_page, url, charset)
55
- end
59
+ raw_page = URI.parse(url).read(read_options)
60
+ charset = raw_page.charset
61
+ Nokogiri::HTML.parse(raw_page, url, charset)
62
+ end
56
63
 
57
- def parse_canonical_url
58
- history = []
64
+ def parse_canonical_url
65
+ history = []
59
66
 
60
- # fetch page and refresh canonical_url until canonical_url converges.
61
- loop do
62
- url_in_res = @page.css('//link[rel="canonical"]/@href').to_s
67
+ # fetch page and refresh canonical_url until canonical_url converges.
68
+ loop do
69
+ url_in_res = @page.css('//link[rel="canonical"]/@href').to_s
63
70
 
64
- if url_in_res.empty?
65
- return history.last || @url
66
- else
67
- if history.include?(url_in_res) || history.length > 5
68
- return url_in_res
71
+ if url_in_res.empty?
72
+ return history.last || @url
69
73
  else
70
- history.push(url_in_res)
71
- @page = fetch_page(url_in_res)
74
+ if history.include?(url_in_res) || history.length > 5
75
+ return url_in_res
76
+ else
77
+ history.push(url_in_res)
78
+ @page = fetch_page(url_in_res)
79
+ end
72
80
  end
73
81
  end
74
82
  end
75
- end
76
83
 
77
- def parse_title
78
- if @page.css('//meta[property="og:title"]/@content').empty?
79
- @page.title.to_s
80
- else
81
- @page.css('//meta[property="og:title"]/@content').to_s
84
+ def parse_title
85
+ if @page.css('//meta[property="og:title"]/@content').empty?
86
+ @page.title.to_s
87
+ else
88
+ @page.css('//meta[property="og:title"]/@content').to_s
89
+ end
82
90
  end
83
- end
84
91
 
85
- def parse_description
86
- if @page.css('//meta[property="og:description"]/@content').empty?
87
- @page.css('//meta[name$="description"]/@content').to_s
88
- else
89
- @page.css('//meta[property="og:description"]/@content').to_s
92
+ def parse_description
93
+ if @page.css('//meta[property="og:description"]/@content').empty?
94
+ @page.css('//meta[name$="description"]/@content').to_s
95
+ else
96
+ @page.css('//meta[property="og:description"]/@content').to_s
97
+ end
90
98
  end
91
- end
92
99
 
93
- def parse_image
94
- image = PanchiraImage.new
95
- image.url = parse_image_url
96
- image.width, image.height = FastImage.size(image.url)
100
+ def parse_image
101
+ image = PanchiraImage.new
102
+ image.url = parse_image_url
103
+ image.width, image.height = FastImage.size(image.url)
97
104
 
98
- image
99
- end
105
+ image
106
+ end
100
107
 
101
- def parse_image_url
102
- @page.css('//meta[property="og:image"]/@content').first.to_s
103
- end
108
+ def parse_image_url
109
+ @page.css('//meta[property="og:image"]/@content').first.to_s
110
+ end
104
111
 
105
- def parse_tags
106
- []
107
- end
112
+ def parse_tags
113
+ []
114
+ end
108
115
 
109
- def cookie
110
- ''
111
- end
116
+ def cookie
117
+ ''
118
+ end
112
119
 
113
- def user_agent
114
- "Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
115
- end
120
+ def parse_author
121
+ @page.css('//meta[name="author"]/@content').first.to_s
122
+ end
123
+
124
+ def parse_circle
125
+ nil
126
+ end
127
+
128
+ def parse_resolver
129
+ self.class.to_s
130
+ end
131
+
132
+ def user_agent
133
+ "Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
134
+ end
116
135
  end
117
136
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
- VERSION = '1.1.0'
4
+ VERSION = '1.3.2'
5
5
  end
data/panchira.gemspec CHANGED
@@ -1,38 +1,44 @@
1
- lib = File.expand_path("lib", __dir__)
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
2
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
- require "panchira/version"
5
+ require 'panchira/version'
4
6
 
5
7
  Gem::Specification.new do |spec|
6
- spec.name = "panchira"
8
+ spec.name = 'panchira'
7
9
  spec.version = Panchira::VERSION
8
- spec.authors = ["kyp"]
9
- spec.email = ["kyp@kmc.gr.jp"]
10
+ spec.authors = ['kyp']
11
+ spec.email = ['kyp@kmc.gr.jp']
10
12
 
11
- spec.summary = "A parser for hentai websites"
13
+ spec.summary = 'A parser for hentai websites'
12
14
  spec.description = <<-TEXT
13
15
  Panchira allows you to parse attributes of hentais on some web platforms, such as Pixiv and DLSite.
14
16
  If you need card previews on hentai but can't get it with simply parsing metatags, then it is time for Panchira.
15
17
  TEXT
16
- spec.homepage = "https://github.com/nuita/panchira"
17
- spec.license = "MIT"
18
+ spec.homepage = 'https://github.com/nuita/panchira'
19
+ spec.license = 'MIT'
18
20
 
19
- spec.metadata["homepage_uri"] = spec.homepage
20
- spec.metadata["source_code_uri"] = "https://github.com/nuita/panchira"
21
- spec.metadata["changelog_uri"] = "https://github.com/nuita/panchira/blob/master/CHANGELOG.md"
21
+ spec.metadata['homepage_uri'] = spec.homepage
22
+ spec.metadata['source_code_uri'] = 'https://github.com/nuita/panchira'
23
+ spec.metadata['changelog_uri'] = 'https://github.com/nuita/panchira/blob/master/CHANGELOG.md'
22
24
 
23
25
  # Specify which files should be added to the gem when it is released.
24
26
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
25
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
27
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
26
28
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
27
29
  end
28
- spec.bindir = "exe"
30
+ spec.bindir = 'exe'
29
31
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
- spec.require_paths = ["lib"]
32
+ spec.require_paths = ['lib']
33
+
34
+ spec.required_ruby_version = '>= 2.6'
31
35
 
32
- spec.add_development_dependency "bundler", "~> 2.0"
33
- spec.add_development_dependency "rake", "~> 12.3.3"
34
- spec.add_development_dependency "minitest", "~> 5.0"
36
+ spec.add_development_dependency 'bundler', '~> 2.0'
37
+ spec.add_development_dependency 'minitest', '~> 5.0'
38
+ spec.add_development_dependency 'rake', '~> 12.3.3'
39
+ spec.add_development_dependency 'rubocop', '~> 1.7'
40
+ spec.add_development_dependency 'rubocop-minitest', '~> 0.10'
35
41
 
36
- spec.add_dependency "nokogiri", "~> 1.10.9"
37
- spec.add_dependency "fastimage", "~> 2.1.7"
42
+ spec.add_dependency 'fastimage', '~> 2.1.7'
43
+ spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.12.0'
38
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: panchira
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - kyp
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-06 00:00:00.000000000 Z
11
+ date: 2021-05-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: rake
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -39,33 +53,33 @@ dependencies:
39
53
  - !ruby/object:Gem::Version
40
54
  version: 12.3.3
41
55
  - !ruby/object:Gem::Dependency
42
- name: minitest
56
+ name: rubocop
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - "~>"
46
60
  - !ruby/object:Gem::Version
47
- version: '5.0'
61
+ version: '1.7'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - "~>"
53
67
  - !ruby/object:Gem::Version
54
- version: '5.0'
68
+ version: '1.7'
55
69
  - !ruby/object:Gem::Dependency
56
- name: nokogiri
70
+ name: rubocop-minitest
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: 1.10.9
62
- type: :runtime
75
+ version: '0.10'
76
+ type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: 1.10.9
82
+ version: '0.10'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: fastimage
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +94,26 @@ dependencies:
80
94
  - - "~>"
81
95
  - !ruby/object:Gem::Version
82
96
  version: 2.1.7
97
+ - !ruby/object:Gem::Dependency
98
+ name: nokogiri
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: 1.10.9
104
+ - - "<"
105
+ - !ruby/object:Gem::Version
106
+ version: 1.12.0
107
+ type: :runtime
108
+ prerelease: false
109
+ version_requirements: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: 1.10.9
114
+ - - "<"
115
+ - !ruby/object:Gem::Version
116
+ version: 1.12.0
83
117
  description: |2
84
118
  Panchira allows you to parse attributes of hentais on some web platforms, such as Pixiv and DLSite.
85
119
  If you need card previews on hentai but can't get it with simply parsing metatags, then it is time for Panchira.
@@ -91,6 +125,8 @@ extra_rdoc_files: []
91
125
  files:
92
126
  - ".github/workflows/ruby.yml"
93
127
  - ".gitignore"
128
+ - ".rubocop.yml"
129
+ - ".rubocop_todo.yml"
94
130
  - CHANGELOG.md
95
131
  - Gemfile
96
132
  - Gemfile.lock
@@ -120,7 +156,7 @@ metadata:
120
156
  homepage_uri: https://github.com/nuita/panchira
121
157
  source_code_uri: https://github.com/nuita/panchira
122
158
  changelog_uri: https://github.com/nuita/panchira/blob/master/CHANGELOG.md
123
- post_install_message:
159
+ post_install_message:
124
160
  rdoc_options: []
125
161
  require_paths:
126
162
  - lib
@@ -128,15 +164,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
128
164
  requirements:
129
165
  - - ">="
130
166
  - !ruby/object:Gem::Version
131
- version: '0'
167
+ version: '2.6'
132
168
  required_rubygems_version: !ruby/object:Gem::Requirement
133
169
  requirements:
134
170
  - - ">="
135
171
  - !ruby/object:Gem::Version
136
172
  version: '0'
137
173
  requirements: []
138
- rubygems_version: 3.0.3
139
- signing_key:
174
+ rubygems_version: 3.1.4
175
+ signing_key:
140
176
  specification_version: 4
141
177
  summary: A parser for hentai websites
142
178
  test_files: []