panchira 1.5.2 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db34e8033acf822616172b330fc61ffe2ee5a1c9dfe46bc1737257717aeff4c0
4
- data.tar.gz: 5ec893680ef7e04b2f85d16b3458ee9f6b2db76ac6c01544a088d280a574e98c
3
+ metadata.gz: 3c9dcae2b8945a7deca6a15ec2effb1c41b3839209d8ce50bc1ef0969e26f12a
4
+ data.tar.gz: ac932d71e0a592afce2dc5075a47ece87e27f8adf79fe2d00c1bfb62ecedb822
5
5
  SHA512:
6
- metadata.gz: 71b2d7707d78b21004acdca984f1869cc81a9e9169bee9239b13261a79002ed859a5a87c8aa08350a89d37ce734abb47637c909aa01b5dab171eb871ad27d9e0
7
- data.tar.gz: fbe9744acbbdbd13376e2a6bdaf1c4430c20861163b39f6bb7ed26436c8fd6c64876e329c0c9aae5c129a59257d7c06eac2508b7c1283bc27d405f0c2e836b0e
6
+ metadata.gz: dd3ddc556f55e3cc00f529ce5ea278d69225876bfd77b8e874224667ea5f37d072276a050fcaa35cb903d2d7ca13a4111f3f23d59af2a786e781dc7761686f21
7
+ data.tar.gz: 309a2b4a742794d55c9d4d98fdd9c46499181370cf08c116724815e32e4d9e54b81e08b4ea5cfec891425c8cf3bfcfdb0f60c55d9020c022270e86af421414bd
data/.rubocop.yml CHANGED
@@ -37,6 +37,10 @@ Lint/MissingSuper:
37
37
  Lint/SymbolConversion:
38
38
  EnforcedStyle: consistent
39
39
 
40
+ Metrics/BlockLength:
41
+ Exclude:
42
+ - panchira.gemspec
43
+
40
44
  Style/AsciiComments:
41
45
  Enabled: false
42
46
 
@@ -64,6 +68,10 @@ Style/NumericPredicate:
64
68
  Style/ParallelAssignment:
65
69
  Enabled: false
66
70
 
71
+ Style/RegexpLiteral:
72
+ EnforcedStyle: slashes
73
+ AllowInnerSlashes: true
74
+
67
75
  Style/WordArray:
68
76
  EnforcedStyle: brackets
69
77
 
data/CHANGELOG.md CHANGED
@@ -4,6 +4,21 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/).
6
6
 
7
+ ## 1.6.0 - 2023-01-05
8
+ ### Added
9
+ - Added support for non-Japanese DLsite URLs.
10
+
11
+ ### Fixed
12
+ - Fixed an issue where FANZA resolver don't work due to renewal of the website.
13
+ - Fixed an issue where Melonbooks resolver don't work due to renewal of the website.
14
+
15
+ ## 1.5.3 - 2022-06-04
16
+ ### Fixed
17
+ - Fixed an issue where Pixiv resolver can't retrieve image urls for manga.
18
+
19
+ ### Changed
20
+ - DLsite resolver now returns descriptions without platform PR sentences.
21
+
7
22
  ## 1.5.2 - 2022-03-20
8
23
  ### Fixed
9
24
  - Fixed an issue where Pixiv resolver can't retrieve not-proxied image scales.
@@ -127,17 +142,3 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
127
142
  ## 0.1.0 - 2020-05-13 [YANKED]
128
143
  ### Added
129
144
  - Released Panchira gem. At this time we can parse only 5 websites.
130
-
131
- [1.3.5]: https://github.com/nuita/panchira/releases/tag/v1.3.5
132
- [1.3.4]: https://github.com/nuita/panchira/releases/tag/v1.3.4
133
- [1.3.3]: https://github.com/nuita/panchira/releases/tag/v1.3.3
134
- [1.3.2]: https://github.com/nuita/panchira/releases/tag/v1.3.2
135
- [1.3.1]: https://github.com/nuita/panchira/releases/tag/v1.3.1
136
- [1.3.0]: https://github.com/nuita/panchira/releases/tag/v1.3.0
137
- [1.2.0]: https://github.com/nuita/panchira/releases/tag/v1.2.0
138
- [1.1.0]: https://github.com/nuita/panchira/releases/tag/v1.1.0
139
- [1.0.0]: https://github.com/nuita/panchira/releases/tag/v1.0.0
140
- [0.3.0]: https://github.com/nuita/panchira/releases/tag/v0.3.0
141
- [0.2.0]: https://github.com/nuita/panchira/releases/tag/v0.2.0
142
- [0.1.1]: https://github.com/nuita/panchira/releases/tag/v0.1.1
143
- [0.1.0]: https://github.com/nuita/panchira/releases/tag/v0.1.0
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- panchira (1.5.2)
4
+ panchira (1.6.0)
5
5
  fastimage (~> 2.1.7)
6
6
  nokogiri (>= 1.10.9, < 1.14.0)
7
7
 
@@ -10,35 +10,40 @@ GEM
10
10
  specs:
11
11
  ast (2.4.2)
12
12
  fastimage (2.1.7)
13
- minitest (5.15.0)
14
- nokogiri (1.13.3-x86_64-darwin)
13
+ json (2.6.3)
14
+ mini_portile2 (2.8.1)
15
+ minitest (5.17.0)
16
+ nokogiri (1.13.10)
17
+ mini_portile2 (~> 2.8.0)
15
18
  racc (~> 1.4)
16
- parallel (1.21.0)
17
- parser (3.1.1.0)
19
+ parallel (1.22.1)
20
+ parser (3.2.0.0)
18
21
  ast (~> 2.4.1)
19
- racc (1.6.0)
22
+ racc (1.6.2)
20
23
  rainbow (3.1.1)
21
24
  rake (12.3.3)
22
- regexp_parser (2.2.1)
25
+ regexp_parser (2.6.1)
23
26
  rexml (3.2.5)
24
- rubocop (1.25.1)
27
+ rubocop (1.42.0)
28
+ json (~> 2.3)
25
29
  parallel (~> 1.10)
26
- parser (>= 3.1.0.0)
30
+ parser (>= 3.1.2.1)
27
31
  rainbow (>= 2.2.2, < 4.0)
28
32
  regexp_parser (>= 1.8, < 3.0)
29
- rexml
30
- rubocop-ast (>= 1.15.1, < 2.0)
33
+ rexml (>= 3.2.5, < 4.0)
34
+ rubocop-ast (>= 1.24.1, < 2.0)
31
35
  ruby-progressbar (~> 1.7)
32
36
  unicode-display_width (>= 1.4.0, < 3.0)
33
- rubocop-ast (1.16.0)
37
+ rubocop-ast (1.24.1)
34
38
  parser (>= 3.1.1.0)
35
- rubocop-minitest (0.17.2)
39
+ rubocop-minitest (0.25.1)
36
40
  rubocop (>= 0.90, < 2.0)
37
41
  ruby-progressbar (1.11.0)
38
- unicode-display_width (2.1.0)
42
+ unicode-display_width (2.4.2)
39
43
 
40
44
  PLATFORMS
41
45
  ruby
46
+ x86_64-linux
42
47
 
43
48
  DEPENDENCIES
44
49
  bundler (~> 2.0)
@@ -49,4 +54,4 @@ DEPENDENCIES
49
54
  rubocop-minitest (~> 0.10)
50
55
 
51
56
  BUNDLED WITH
52
- 2.1.4
57
+ 2.4.2
@@ -18,6 +18,10 @@ module Panchira
18
18
  @title_md[1]
19
19
  end
20
20
 
21
+ def parse_description
22
+ super.split('「DLsite').first
23
+ end
24
+
21
25
  def parse_authors
22
26
  @page.css('table[id*="work_"] tr').each do |tr|
23
27
  next unless tr.css('th').text =~ /(作|著)者/
@@ -47,6 +51,10 @@ module Panchira
47
51
  end
48
52
  end
49
53
  end
54
+
55
+ def parse_canonical_url
56
+ super[/^.+\/product_id\/[^\/]+/]
57
+ end
50
58
  end
51
59
 
52
60
  ::Panchira::Extensions.register(Panchira::DlsiteResolver)
@@ -15,29 +15,38 @@ module Panchira
15
15
  end
16
16
 
17
17
  class FanzaBookResolver < FanzaResolver
18
- URL_REGEXP = %r{book\.dmm\.co\.jp/}.freeze
18
+ URL_REGEXP = /book\.dmm\.co\.jp\//.freeze
19
19
 
20
20
  private
21
21
 
22
- def parse_author
23
- @page.css('.m-boxDetailProductInfoMainList__description__list__item > a').first&.text.to_s
22
+ def after_fetch
23
+ text = @page.css('//script[type="application/ld+json"]').first.text
24
+ @schema = JSON.parse(text)
25
+ end
26
+
27
+ def parse_title
28
+ @schema['name']
29
+ end
30
+
31
+ def parse_authors
32
+ @schema['subjectOf']['author']['name']
24
33
  end
25
34
 
26
35
  def parse_image_url
27
- @page.css('.m-imgDetailProductPack/@src').first.to_s
36
+ @schema['image'].sub('ps.', 'pl.')
28
37
  end
29
38
 
30
39
  def parse_tags
31
- @page.css('.m-boxDetailProductInfo__list__description__item > a').map(&:text)
40
+ @schema['subjectOf']['genre']
32
41
  end
33
42
 
34
43
  def parse_description
35
- @page.css('.m-boxDetailProduct__info__story').first&.text.to_s.gsub(/[\n\t]/, '')
44
+ @schema['description']
36
45
  end
37
46
  end
38
47
 
39
48
  class FanzaDoujinResolver < FanzaResolver
40
- URL_REGEXP = %r{dmm\.co\.jp/dc/doujin/}.freeze
49
+ URL_REGEXP = /dmm\.co\.jp\/dc\/doujin\//.freeze
41
50
 
42
51
  private
43
52
 
@@ -57,7 +66,7 @@ module Panchira
57
66
  end
58
67
 
59
68
  class FanzaVideoResolver < FanzaResolver
60
- URL_REGEXP = %r{www.dmm.co.jp/digital/}.freeze
69
+ URL_REGEXP = /www.dmm.co.jp\/digital\//.freeze
61
70
 
62
71
  private
63
72
 
@@ -5,13 +5,14 @@ module Panchira
5
5
  URL_REGEXP = /(www|ecchi)\.iwara\.tv\//.freeze
6
6
 
7
7
  private
8
+
8
9
  def parse_title
9
10
  super.split(' | ')[0]
10
11
  end
11
12
 
12
13
  def parse_image_url
13
- url = @page.at_css('#video-player')&.attributes['poster']&.value
14
- 'https:' + url if url
14
+ url = @page.at_css('#video-player')&.attributes&.fetch('poster')&.value
15
+ "https:#{url}" if url
15
16
  end
16
17
 
17
18
  def parse_author
@@ -4,7 +4,7 @@ module Panchira
4
4
  # KomifloResolver resolves Komiflo.
5
5
  # Komiflo has its API server, so we can utilize it.
6
6
  class KomifloResolver < Resolver
7
- URL_REGEXP = %r{komiflo\.com(?:/#!)?/comics/(\d+)}.freeze
7
+ URL_REGEXP = /komiflo\.com(?:\/#!)?\/comics\/(\d+)/.freeze
8
8
 
9
9
  def initialize(url, options = nil)
10
10
  super(url, options)
@@ -33,7 +33,7 @@ module Panchira
33
33
  end
34
34
 
35
35
  def parse_canonical_url
36
- id = @url.slice(%r{komiflo\.com(?:/#!)?/comics/(\d+)}, 1)
36
+ id = @url.slice(/komiflo\.com(?:\/#!)?\/comics\/(\d+)/, 1)
37
37
  "https://komiflo.com/comics/#{id}"
38
38
  end
39
39
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Panchira
4
4
  class MelonbooksResolver < Resolver
5
- URL_REGEXP = %r{melonbooks.co.jp/detail/detail.php\?product_id=(\d+)}.freeze
5
+ URL_REGEXP = /melonbooks.co.jp\/detail\/detail.php\?product_id=(\d+)/.freeze
6
6
 
7
7
  def fetch
8
8
  result = PanchiraResult.new
@@ -12,11 +12,12 @@ module Panchira
12
12
 
13
13
  @page = fetch_page(result.canonical_url) if @url != result.canonical_url
14
14
 
15
- result.title, result.author, result.circle = parse_table
15
+ result.title = parse_title
16
+ result.author, result.circle = parse_table
16
17
  result.description = parse_description
17
18
  result.image = parse_image
18
- result.tags = parse_tags
19
19
  result.resolver = parse_resolver
20
+ result.tags = parse_tags
20
21
 
21
22
  result
22
23
  end
@@ -24,12 +25,10 @@ module Panchira
24
25
  private
25
26
 
26
27
  def parse_table
27
- title, author, circle = nil, nil, nil
28
+ author, circle = nil, nil
28
29
 
29
- @page.css('#description > table.stripe > tr').each do |tr|
30
+ @page.css('div.table-wrapper > table > tr').each do |tr|
30
31
  case tr.css('th').text
31
- when 'タイトル'
32
- title = tr.css('td').text.strip
33
32
  when 'サークル名'
34
33
  circle = tr.css('td > a').text.match(/^(.+)\W\(作品数:/)&.values_at(1)&.first
35
34
  when '作家名'
@@ -37,7 +36,11 @@ module Panchira
37
36
  end
38
37
  end
39
38
 
40
- [title, author, circle]
39
+ [author, circle]
40
+ end
41
+
42
+ def parse_title
43
+ @page.xpath('//h1[@class="page-header"]//text()').text
41
44
  end
42
45
 
43
46
  def parse_canonical_url
@@ -46,14 +49,7 @@ module Panchira
46
49
  end
47
50
 
48
51
  def parse_description
49
- # スタッフの紹介文でidが分岐
50
- special_description = @page.xpath('//div[@id="special_description"]//p/text()')
51
- if special_description.any?
52
- special_description.first.to_s
53
- else
54
- description = @page.xpath('//div[@id="description"]//p/text()')
55
- description.first.to_s
56
- end
52
+ @page.css('div.item-detail > div > p').first.text.strip
57
53
  end
58
54
 
59
55
  def parse_image_url
@@ -64,7 +60,7 @@ module Panchira
64
60
  end
65
61
 
66
62
  def parse_tags
67
- @page.css('#related_tags .clearfix').children.children.map(&:text)
63
+ @page.css('div.item-detail2 > p > a').map { |m| m.text.sub('#', '') }
68
64
  end
69
65
  end
70
66
 
@@ -5,7 +5,7 @@ require 'net/https'
5
5
  module Panchira
6
6
  module Narou
7
7
  class Novel18Resolver < Resolver
8
- URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
8
+ URL_REGEXP = /novel18\.syosetu\.com\//.freeze
9
9
  ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
10
10
 
11
11
  def initialize(url, options = nil)
@@ -33,7 +33,7 @@ module Panchira
33
33
  return @page.css('//meta[property="og:image"]/@content').first.to_s
34
34
  end
35
35
 
36
- if md = thumbnail_url.match(%r{pic.nijie.net/\w+(?<resolution>/\w+/)nijie.+\.(?<format>png|jpg|jpeg)})
36
+ if md = thumbnail_url.match(/pic.nijie.net\/\w+(?<resolution>\/\w+\/)nijie.+\.(?<format>png|jpg|jpeg)/)
37
37
  thumbnail_url.sub(md[:resolution], '/')
38
38
  else
39
39
  thumbnail_url
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Panchira
4
4
  class PixivResolver < Resolver
5
- URL_REGEXP = %r{pixiv\.net/.*(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
5
+ URL_REGEXP = /pixiv\.net\/.*(member_illust.php?.*illust_id=|artworks\/)(\d+)/.freeze
6
6
 
7
7
  def initialize(url, options = nil)
8
8
  super(url, options)
@@ -44,7 +44,8 @@ module Panchira
44
44
  proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
45
45
 
46
46
  case Net::HTTP.get_response(URI.parse(proxy_url))
47
- when Net::HTTPNotFound
47
+ when Net::HTTPMovedPermanently
48
+ # 301が返された場合、locationで渡されたURIにホストが含まれず扱いづらいため決め打ちする
48
49
  proxy_url = "https://pixiv.cat/#{@illust_id}-1.jpg"
49
50
  end
50
51
 
@@ -59,7 +60,7 @@ module Panchira
59
60
  end
60
61
 
61
62
  class PixivNovelResolver < Resolver
62
- URL_REGEXP = %r{pixiv\.net/novel/show.php\?id=(\d+)}.freeze
63
+ URL_REGEXP = /pixiv\.net\/novel\/show.php\?id=(\d+)/.freeze
63
64
 
64
65
  def initialize(url, options = nil)
65
66
  super(url, options)
@@ -26,6 +26,8 @@ module Panchira
26
26
 
27
27
  @page = fetch_page(result.canonical_url) if @url != result.canonical_url
28
28
 
29
+ after_fetch if respond_to?(:after_fetch, true)
30
+
29
31
  result.title = parse_title
30
32
  result.description = parse_description
31
33
  result.image = parse_image
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'uri'
2
4
 
3
5
  module Panchira
4
6
  class TwitterResolver < Resolver
5
- URL_REGEXP = %r{twitter.com/(\w+)/status/(\d+)}.freeze
7
+ URL_REGEXP = /twitter.com\/(\w+)\/status\/(\d+)/.freeze
6
8
 
7
9
  def initialize(url, options = nil)
8
10
  super(url, options)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
- VERSION = '1.5.2'
4
+ VERSION = '1.6.0'
5
5
  end
data/panchira.gemspec CHANGED
@@ -25,10 +25,10 @@ Gem::Specification.new do |spec|
25
25
  # Specify which files should be added to the gem when it is released.
26
26
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
27
27
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
28
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
28
+ `git ls-files -z`.split("\x0").reject { |f| f.match(/^(test|spec|features)\//) }
29
29
  end
30
30
  spec.bindir = 'exe'
31
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
+ spec.executables = spec.files.grep(/^exe\//) { |f| File.basename(f) }
32
32
  spec.require_paths = ['lib']
33
33
 
34
34
  spec.required_ruby_version = '>= 2.7'
@@ -41,4 +41,5 @@ Gem::Specification.new do |spec|
41
41
 
42
42
  spec.add_dependency 'fastimage', '~> 2.1.7'
43
43
  spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.14.0'
44
+ spec.metadata['rubygems_mfa_required'] = 'true'
44
45
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: panchira
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.2
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kyp
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-03-20 00:00:00.000000000 Z
11
+ date: 2023-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -158,6 +158,7 @@ metadata:
158
158
  homepage_uri: https://github.com/nuita/panchira
159
159
  source_code_uri: https://github.com/nuita/panchira
160
160
  changelog_uri: https://github.com/nuita/panchira/blob/master/CHANGELOG.md
161
+ rubygems_mfa_required: 'true'
161
162
  post_install_message:
162
163
  rdoc_options: []
163
164
  require_paths:
@@ -173,7 +174,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
173
174
  - !ruby/object:Gem::Version
174
175
  version: '0'
175
176
  requirements: []
176
- rubygems_version: 3.1.4
177
+ rubygems_version: 3.3.4
177
178
  signing_key:
178
179
  specification_version: 4
179
180
  summary: A parser for hentai websites