panchira 1.5.3 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b0a6e502368c14a15073925a7c717646a8e59ff1fbb17240aba4abdedd93fec7
4
- data.tar.gz: 42557986e00b95619e845c79b20a15f17370fbf72b8bf163f98e0a1660d2d244
3
+ metadata.gz: 3c9dcae2b8945a7deca6a15ec2effb1c41b3839209d8ce50bc1ef0969e26f12a
4
+ data.tar.gz: ac932d71e0a592afce2dc5075a47ece87e27f8adf79fe2d00c1bfb62ecedb822
5
5
  SHA512:
6
- metadata.gz: 7f500d879e81fda3ac9c3869b16293b1591da1ac60b149e679fa35be797524a63e01696995f9e4d46f3e0854f213dc276dc84427cbb4bd0086504b931e9ac3f0
7
- data.tar.gz: 120c8d8c4d86b3a6dda82d14b9cc63a9e9854d930b835fb46681c2392cabce0807ee67332fe2aaafea4f7afd4f626001842a53a8aa8cdb853bb2c7ff6c1773c2
6
+ metadata.gz: dd3ddc556f55e3cc00f529ce5ea278d69225876bfd77b8e874224667ea5f37d072276a050fcaa35cb903d2d7ca13a4111f3f23d59af2a786e781dc7761686f21
7
+ data.tar.gz: 309a2b4a742794d55c9d4d98fdd9c46499181370cf08c116724815e32e4d9e54b81e08b4ea5cfec891425c8cf3bfcfdb0f60c55d9020c022270e86af421414bd
data/.rubocop.yml CHANGED
@@ -37,6 +37,10 @@ Lint/MissingSuper:
37
37
  Lint/SymbolConversion:
38
38
  EnforcedStyle: consistent
39
39
 
40
+ Metrics/BlockLength:
41
+ Exclude:
42
+ - panchira.gemspec
43
+
40
44
  Style/AsciiComments:
41
45
  Enabled: false
42
46
 
@@ -64,6 +68,10 @@ Style/NumericPredicate:
64
68
  Style/ParallelAssignment:
65
69
  Enabled: false
66
70
 
71
+ Style/RegexpLiteral:
72
+ EnforcedStyle: slashes
73
+ AllowInnerSlashes: true
74
+
67
75
  Style/WordArray:
68
76
  EnforcedStyle: brackets
69
77
 
data/CHANGELOG.md CHANGED
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/).
6
6
 
7
+ ## 1.6.0 - 2023-01-05
8
+ ### Added
9
+ - Added support for non-Japanese DLsite URLs.
10
+
11
+ ### Fixed
12
+ - Fixed an issue where FANZA resolver don't work due to renewal of the website.
13
+ - Fixed an issue where Melonbooks resolver don't work due to renewal of the website.
14
+
7
15
  ## 1.5.3 - 2022-06-04
8
16
  ### Fixed
9
17
  - Fixed an issue where Pixiv resolver can't retrieve image urls for manga.
@@ -134,17 +142,3 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
134
142
  ## 0.1.0 - 2020-05-13 [YANKED]
135
143
  ### Added
136
144
  - Released Panchira gem. At this time we can parse only 5 websites.
137
-
138
- [1.3.5]: https://github.com/nuita/panchira/releases/tag/v1.3.5
139
- [1.3.4]: https://github.com/nuita/panchira/releases/tag/v1.3.4
140
- [1.3.3]: https://github.com/nuita/panchira/releases/tag/v1.3.3
141
- [1.3.2]: https://github.com/nuita/panchira/releases/tag/v1.3.2
142
- [1.3.1]: https://github.com/nuita/panchira/releases/tag/v1.3.1
143
- [1.3.0]: https://github.com/nuita/panchira/releases/tag/v1.3.0
144
- [1.2.0]: https://github.com/nuita/panchira/releases/tag/v1.2.0
145
- [1.1.0]: https://github.com/nuita/panchira/releases/tag/v1.1.0
146
- [1.0.0]: https://github.com/nuita/panchira/releases/tag/v1.0.0
147
- [0.3.0]: https://github.com/nuita/panchira/releases/tag/v0.3.0
148
- [0.2.0]: https://github.com/nuita/panchira/releases/tag/v0.2.0
149
- [0.1.1]: https://github.com/nuita/panchira/releases/tag/v0.1.1
150
- [0.1.0]: https://github.com/nuita/panchira/releases/tag/v0.1.0
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- panchira (1.5.3)
4
+ panchira (1.6.0)
5
5
  fastimage (~> 2.1.7)
6
6
  nokogiri (>= 1.10.9, < 1.14.0)
7
7
 
@@ -10,34 +10,36 @@ GEM
10
10
  specs:
11
11
  ast (2.4.2)
12
12
  fastimage (2.1.7)
13
- mini_portile2 (2.8.0)
14
- minitest (5.15.0)
15
- nokogiri (1.13.6)
13
+ json (2.6.3)
14
+ mini_portile2 (2.8.1)
15
+ minitest (5.17.0)
16
+ nokogiri (1.13.10)
16
17
  mini_portile2 (~> 2.8.0)
17
18
  racc (~> 1.4)
18
19
  parallel (1.22.1)
19
- parser (3.1.2.0)
20
+ parser (3.2.0.0)
20
21
  ast (~> 2.4.1)
21
- racc (1.6.0)
22
+ racc (1.6.2)
22
23
  rainbow (3.1.1)
23
24
  rake (12.3.3)
24
- regexp_parser (2.5.0)
25
+ regexp_parser (2.6.1)
25
26
  rexml (3.2.5)
26
- rubocop (1.30.0)
27
+ rubocop (1.42.0)
28
+ json (~> 2.3)
27
29
  parallel (~> 1.10)
28
- parser (>= 3.1.0.0)
30
+ parser (>= 3.1.2.1)
29
31
  rainbow (>= 2.2.2, < 4.0)
30
32
  regexp_parser (>= 1.8, < 3.0)
31
33
  rexml (>= 3.2.5, < 4.0)
32
- rubocop-ast (>= 1.18.0, < 2.0)
34
+ rubocop-ast (>= 1.24.1, < 2.0)
33
35
  ruby-progressbar (~> 1.7)
34
36
  unicode-display_width (>= 1.4.0, < 3.0)
35
- rubocop-ast (1.18.0)
37
+ rubocop-ast (1.24.1)
36
38
  parser (>= 3.1.1.0)
37
- rubocop-minitest (0.20.0)
39
+ rubocop-minitest (0.25.1)
38
40
  rubocop (>= 0.90, < 2.0)
39
41
  ruby-progressbar (1.11.0)
40
- unicode-display_width (2.1.0)
42
+ unicode-display_width (2.4.2)
41
43
 
42
44
  PLATFORMS
43
45
  ruby
@@ -52,4 +54,4 @@ DEPENDENCIES
52
54
  rubocop-minitest (~> 0.10)
53
55
 
54
56
  BUNDLED WITH
55
- 2.1.4
57
+ 2.4.2
@@ -51,6 +51,10 @@ module Panchira
51
51
  end
52
52
  end
53
53
  end
54
+
55
+ def parse_canonical_url
56
+ super[/^.+\/product_id\/[^\/]+/]
57
+ end
54
58
  end
55
59
 
56
60
  ::Panchira::Extensions.register(Panchira::DlsiteResolver)
@@ -15,29 +15,38 @@ module Panchira
15
15
  end
16
16
 
17
17
  class FanzaBookResolver < FanzaResolver
18
- URL_REGEXP = %r{book\.dmm\.co\.jp/}.freeze
18
+ URL_REGEXP = /book\.dmm\.co\.jp\//.freeze
19
19
 
20
20
  private
21
21
 
22
- def parse_author
23
- @page.css('.m-boxDetailProductInfoMainList__description__list__item > a').first&.text.to_s
22
+ def after_fetch
23
+ text = @page.css('//script[type="application/ld+json"]').first.text
24
+ @schema = JSON.parse(text)
25
+ end
26
+
27
+ def parse_title
28
+ @schema['name']
29
+ end
30
+
31
+ def parse_authors
32
+ @schema['subjectOf']['author']['name']
24
33
  end
25
34
 
26
35
  def parse_image_url
27
- @page.css('.m-imgDetailProductPack/@src').first.to_s
36
+ @schema['image'].sub('ps.', 'pl.')
28
37
  end
29
38
 
30
39
  def parse_tags
31
- @page.css('.m-boxDetailProductInfo__list__description__item > a').map(&:text)
40
+ @schema['subjectOf']['genre']
32
41
  end
33
42
 
34
43
  def parse_description
35
- @page.css('.m-boxDetailProduct__info__story').first&.text.to_s.gsub(/[\n\t]/, '')
44
+ @schema['description']
36
45
  end
37
46
  end
38
47
 
39
48
  class FanzaDoujinResolver < FanzaResolver
40
- URL_REGEXP = %r{dmm\.co\.jp/dc/doujin/}.freeze
49
+ URL_REGEXP = /dmm\.co\.jp\/dc\/doujin\//.freeze
41
50
 
42
51
  private
43
52
 
@@ -57,7 +66,7 @@ module Panchira
57
66
  end
58
67
 
59
68
  class FanzaVideoResolver < FanzaResolver
60
- URL_REGEXP = %r{www.dmm.co.jp/digital/}.freeze
69
+ URL_REGEXP = /www.dmm.co.jp\/digital\//.freeze
61
70
 
62
71
  private
63
72
 
@@ -5,13 +5,14 @@ module Panchira
5
5
  URL_REGEXP = /(www|ecchi)\.iwara\.tv\//.freeze
6
6
 
7
7
  private
8
+
8
9
  def parse_title
9
10
  super.split(' | ')[0]
10
11
  end
11
12
 
12
13
  def parse_image_url
13
- url = @page.at_css('#video-player')&.attributes['poster']&.value
14
- 'https:' + url if url
14
+ url = @page.at_css('#video-player')&.attributes&.fetch('poster')&.value
15
+ "https:#{url}" if url
15
16
  end
16
17
 
17
18
  def parse_author
@@ -4,7 +4,7 @@ module Panchira
4
4
  # KomifloResolver resolves Komiflo.
5
5
  # Komiflo has its API server, so we can utilize it.
6
6
  class KomifloResolver < Resolver
7
- URL_REGEXP = %r{komiflo\.com(?:/#!)?/comics/(\d+)}.freeze
7
+ URL_REGEXP = /komiflo\.com(?:\/#!)?\/comics\/(\d+)/.freeze
8
8
 
9
9
  def initialize(url, options = nil)
10
10
  super(url, options)
@@ -33,7 +33,7 @@ module Panchira
33
33
  end
34
34
 
35
35
  def parse_canonical_url
36
- id = @url.slice(%r{komiflo\.com(?:/#!)?/comics/(\d+)}, 1)
36
+ id = @url.slice(/komiflo\.com(?:\/#!)?\/comics\/(\d+)/, 1)
37
37
  "https://komiflo.com/comics/#{id}"
38
38
  end
39
39
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Panchira
4
4
  class MelonbooksResolver < Resolver
5
- URL_REGEXP = %r{melonbooks.co.jp/detail/detail.php\?product_id=(\d+)}.freeze
5
+ URL_REGEXP = /melonbooks.co.jp\/detail\/detail.php\?product_id=(\d+)/.freeze
6
6
 
7
7
  def fetch
8
8
  result = PanchiraResult.new
@@ -12,11 +12,12 @@ module Panchira
12
12
 
13
13
  @page = fetch_page(result.canonical_url) if @url != result.canonical_url
14
14
 
15
- result.title, result.author, result.circle = parse_table
15
+ result.title = parse_title
16
+ result.author, result.circle = parse_table
16
17
  result.description = parse_description
17
18
  result.image = parse_image
18
- result.tags = parse_tags
19
19
  result.resolver = parse_resolver
20
+ result.tags = parse_tags
20
21
 
21
22
  result
22
23
  end
@@ -24,12 +25,10 @@ module Panchira
24
25
  private
25
26
 
26
27
  def parse_table
27
- title, author, circle = nil, nil, nil
28
+ author, circle = nil, nil
28
29
 
29
- @page.css('#description > table.stripe > tr').each do |tr|
30
+ @page.css('div.table-wrapper > table > tr').each do |tr|
30
31
  case tr.css('th').text
31
- when 'タイトル'
32
- title = tr.css('td').text.strip
33
32
  when 'サークル名'
34
33
  circle = tr.css('td > a').text.match(/^(.+)\W\(作品数:/)&.values_at(1)&.first
35
34
  when '作家名'
@@ -37,7 +36,11 @@ module Panchira
37
36
  end
38
37
  end
39
38
 
40
- [title, author, circle]
39
+ [author, circle]
40
+ end
41
+
42
+ def parse_title
43
+ @page.xpath('//h1[@class="page-header"]//text()').text
41
44
  end
42
45
 
43
46
  def parse_canonical_url
@@ -46,14 +49,7 @@ module Panchira
46
49
  end
47
50
 
48
51
  def parse_description
49
- # スタッフの紹介文でidが分岐
50
- special_description = @page.xpath('//div[@id="special_description"]//p/text()')
51
- if special_description.any?
52
- special_description.first.to_s
53
- else
54
- description = @page.xpath('//div[@id="description"]//p/text()')
55
- description.first.to_s
56
- end
52
+ @page.css('div.item-detail > div > p').first.text.strip
57
53
  end
58
54
 
59
55
  def parse_image_url
@@ -64,7 +60,7 @@ module Panchira
64
60
  end
65
61
 
66
62
  def parse_tags
67
- @page.css('#related_tags .clearfix').children.children.map(&:text)
63
+ @page.css('div.item-detail2 > p > a').map { |m| m.text.sub('#', '') }
68
64
  end
69
65
  end
70
66
 
@@ -5,7 +5,7 @@ require 'net/https'
5
5
  module Panchira
6
6
  module Narou
7
7
  class Novel18Resolver < Resolver
8
- URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
8
+ URL_REGEXP = /novel18\.syosetu\.com\//.freeze
9
9
  ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
10
10
 
11
11
  def initialize(url, options = nil)
@@ -33,7 +33,7 @@ module Panchira
33
33
  return @page.css('//meta[property="og:image"]/@content').first.to_s
34
34
  end
35
35
 
36
- if md = thumbnail_url.match(%r{pic.nijie.net/\w+(?<resolution>/\w+/)nijie.+\.(?<format>png|jpg|jpeg)})
36
+ if md = thumbnail_url.match(/pic.nijie.net\/\w+(?<resolution>\/\w+\/)nijie.+\.(?<format>png|jpg|jpeg)/)
37
37
  thumbnail_url.sub(md[:resolution], '/')
38
38
  else
39
39
  thumbnail_url
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Panchira
4
4
  class PixivResolver < Resolver
5
- URL_REGEXP = %r{pixiv\.net/.*(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
5
+ URL_REGEXP = /pixiv\.net\/.*(member_illust.php?.*illust_id=|artworks\/)(\d+)/.freeze
6
6
 
7
7
  def initialize(url, options = nil)
8
8
  super(url, options)
@@ -43,7 +43,7 @@ module Panchira
43
43
 
44
44
  proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
45
45
 
46
- case res = Net::HTTP.get_response(URI.parse(proxy_url))
46
+ case Net::HTTP.get_response(URI.parse(proxy_url))
47
47
  when Net::HTTPMovedPermanently
48
48
  # 301が返された場合、locationで渡されたURIにホストが含まれず扱いづらいため決め打ちする
49
49
  proxy_url = "https://pixiv.cat/#{@illust_id}-1.jpg"
@@ -60,7 +60,7 @@ module Panchira
60
60
  end
61
61
 
62
62
  class PixivNovelResolver < Resolver
63
- URL_REGEXP = %r{pixiv\.net/novel/show.php\?id=(\d+)}.freeze
63
+ URL_REGEXP = /pixiv\.net\/novel\/show.php\?id=(\d+)/.freeze
64
64
 
65
65
  def initialize(url, options = nil)
66
66
  super(url, options)
@@ -26,6 +26,8 @@ module Panchira
26
26
 
27
27
  @page = fetch_page(result.canonical_url) if @url != result.canonical_url
28
28
 
29
+ after_fetch if respond_to?(:after_fetch, true)
30
+
29
31
  result.title = parse_title
30
32
  result.description = parse_description
31
33
  result.image = parse_image
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
 
3
5
  module Panchira
4
6
  class TwitterResolver < Resolver
5
- URL_REGEXP = %r{twitter.com/(\w+)/status/(\d+)}.freeze
7
+ URL_REGEXP = /twitter.com\/(\w+)\/status\/(\d+)/.freeze
6
8
 
7
9
  def initialize(url, options = nil)
8
10
  super(url, options)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
- VERSION = '1.5.3'
4
+ VERSION = '1.6.0'
5
5
  end
data/panchira.gemspec CHANGED
@@ -25,10 +25,10 @@ Gem::Specification.new do |spec|
25
25
  # Specify which files should be added to the gem when it is released.
26
26
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
27
27
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
28
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
28
+ `git ls-files -z`.split("\x0").reject { |f| f.match(/^(test|spec|features)\//) }
29
29
  end
30
30
  spec.bindir = 'exe'
31
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
+ spec.executables = spec.files.grep(/^exe\//) { |f| File.basename(f) }
32
32
  spec.require_paths = ['lib']
33
33
 
34
34
  spec.required_ruby_version = '>= 2.7'
@@ -41,4 +41,5 @@ Gem::Specification.new do |spec|
41
41
 
42
42
  spec.add_dependency 'fastimage', '~> 2.1.7'
43
43
  spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.14.0'
44
+ spec.metadata['rubygems_mfa_required'] = 'true'
44
45
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: panchira
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.3
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kyp
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-06-03 00:00:00.000000000 Z
11
+ date: 2023-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -158,6 +158,7 @@ metadata:
158
158
  homepage_uri: https://github.com/nuita/panchira
159
159
  source_code_uri: https://github.com/nuita/panchira
160
160
  changelog_uri: https://github.com/nuita/panchira/blob/master/CHANGELOG.md
161
+ rubygems_mfa_required: 'true'
161
162
  post_install_message:
162
163
  rdoc_options: []
163
164
  require_paths:
@@ -173,7 +174,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
173
174
  - !ruby/object:Gem::Version
174
175
  version: '0'
175
176
  requirements: []
176
- rubygems_version: 3.1.4
177
+ rubygems_version: 3.3.4
177
178
  signing_key:
178
179
  specification_version: 4
179
180
  summary: A parser for hentai websites