panchira 1.5.3 → 1.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +12 -14
- data/Gemfile.lock +16 -14
- data/lib/panchira/resolvers/dlsite_resolver.rb +4 -0
- data/lib/panchira/resolvers/fanza_resolver.rb +17 -8
- data/lib/panchira/resolvers/iwara_resolver.rb +3 -2
- data/lib/panchira/resolvers/komiflo_resolver.rb +2 -2
- data/lib/panchira/resolvers/melonbooks_resolver.rb +13 -17
- data/lib/panchira/resolvers/narou_resolver.rb +1 -1
- data/lib/panchira/resolvers/nijie_resolver.rb +1 -1
- data/lib/panchira/resolvers/pixiv_resolver.rb +11 -4
- data/lib/panchira/resolvers/resolver.rb +2 -0
- data/lib/panchira/resolvers/twitter_resolver.rb +3 -1
- data/lib/panchira/version.rb +1 -1
- data/panchira.gemspec +3 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b02593817c5d22160cbfa4d728ced289a5e01e99c23dcbd718f1298fc6c62ef
|
4
|
+
data.tar.gz: 7d13c5e4a2aecf3dfa884eecb9216df09351d4094d9d8505544835c0227b880b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 820aedc33d0a4b2c6d57906116f14d2fad503f5f6cee2a43e255555c36740a9d382371e08acf30c9a590cd4ad2096335de0e58e762228f3b419d92ee1d0ca18f
|
7
|
+
data.tar.gz: cffff5906df054ed8c444780840560fb7694811302d2ab0887ff2664a5f835507adfc48f9b09bf102e040749db43f5f7e9eb59b011b199ea5cacf7c18313434a
|
data/.rubocop.yml
CHANGED
@@ -37,6 +37,10 @@ Lint/MissingSuper:
|
|
37
37
|
Lint/SymbolConversion:
|
38
38
|
EnforcedStyle: consistent
|
39
39
|
|
40
|
+
Metrics/BlockLength:
|
41
|
+
Exclude:
|
42
|
+
- panchira.gemspec
|
43
|
+
|
40
44
|
Style/AsciiComments:
|
41
45
|
Enabled: false
|
42
46
|
|
@@ -64,6 +68,10 @@ Style/NumericPredicate:
|
|
64
68
|
Style/ParallelAssignment:
|
65
69
|
Enabled: false
|
66
70
|
|
71
|
+
Style/RegexpLiteral:
|
72
|
+
EnforcedStyle: slashes
|
73
|
+
AllowInnerSlashes: true
|
74
|
+
|
67
75
|
Style/WordArray:
|
68
76
|
EnforcedStyle: brackets
|
69
77
|
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
6
6
|
|
7
|
+
## 1.6.1 - 2023-01-17
|
8
|
+
### Added
|
9
|
+
- Pixiv resolver now retrieves AI-generated information as a tag.
|
10
|
+
|
11
|
+
## 1.6.0 - 2023-01-05
|
12
|
+
### Added
|
13
|
+
- Added support for non-Japanese DLsite URLs.
|
14
|
+
|
15
|
+
### Fixed
|
16
|
+
- Fixed an issue where FANZA resolver don't work due to renewal of the website.
|
17
|
+
- Fixed an issue where Melonbooks resolver don't work due to renewal of the website.
|
18
|
+
|
7
19
|
## 1.5.3 - 2022-06-04
|
8
20
|
### Fixed
|
9
21
|
- Fixed an issue where Pixiv resolver can't retrieve image urls for manga.
|
@@ -134,17 +146,3 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
|
|
134
146
|
## 0.1.0 - 2020-05-13 [YANKED]
|
135
147
|
### Added
|
136
148
|
- Released Panchira gem. At this time we can parse only 5 websites.
|
137
|
-
|
138
|
-
[1.3.5]: https://github.com/nuita/panchira/releases/tag/v1.3.5
|
139
|
-
[1.3.4]: https://github.com/nuita/panchira/releases/tag/v1.3.4
|
140
|
-
[1.3.3]: https://github.com/nuita/panchira/releases/tag/v1.3.3
|
141
|
-
[1.3.2]: https://github.com/nuita/panchira/releases/tag/v1.3.2
|
142
|
-
[1.3.1]: https://github.com/nuita/panchira/releases/tag/v1.3.1
|
143
|
-
[1.3.0]: https://github.com/nuita/panchira/releases/tag/v1.3.0
|
144
|
-
[1.2.0]: https://github.com/nuita/panchira/releases/tag/v1.2.0
|
145
|
-
[1.1.0]: https://github.com/nuita/panchira/releases/tag/v1.1.0
|
146
|
-
[1.0.0]: https://github.com/nuita/panchira/releases/tag/v1.0.0
|
147
|
-
[0.3.0]: https://github.com/nuita/panchira/releases/tag/v0.3.0
|
148
|
-
[0.2.0]: https://github.com/nuita/panchira/releases/tag/v0.2.0
|
149
|
-
[0.1.1]: https://github.com/nuita/panchira/releases/tag/v0.1.1
|
150
|
-
[0.1.0]: https://github.com/nuita/panchira/releases/tag/v0.1.0
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
panchira (1.
|
4
|
+
panchira (1.6.1)
|
5
5
|
fastimage (~> 2.1.7)
|
6
6
|
nokogiri (>= 1.10.9, < 1.14.0)
|
7
7
|
|
@@ -10,34 +10,36 @@ GEM
|
|
10
10
|
specs:
|
11
11
|
ast (2.4.2)
|
12
12
|
fastimage (2.1.7)
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
json (2.6.3)
|
14
|
+
mini_portile2 (2.8.1)
|
15
|
+
minitest (5.17.0)
|
16
|
+
nokogiri (1.13.10)
|
16
17
|
mini_portile2 (~> 2.8.0)
|
17
18
|
racc (~> 1.4)
|
18
19
|
parallel (1.22.1)
|
19
|
-
parser (3.
|
20
|
+
parser (3.2.0.0)
|
20
21
|
ast (~> 2.4.1)
|
21
|
-
racc (1.6.
|
22
|
+
racc (1.6.2)
|
22
23
|
rainbow (3.1.1)
|
23
24
|
rake (12.3.3)
|
24
|
-
regexp_parser (2.
|
25
|
+
regexp_parser (2.6.1)
|
25
26
|
rexml (3.2.5)
|
26
|
-
rubocop (1.
|
27
|
+
rubocop (1.42.0)
|
28
|
+
json (~> 2.3)
|
27
29
|
parallel (~> 1.10)
|
28
|
-
parser (>= 3.1.
|
30
|
+
parser (>= 3.1.2.1)
|
29
31
|
rainbow (>= 2.2.2, < 4.0)
|
30
32
|
regexp_parser (>= 1.8, < 3.0)
|
31
33
|
rexml (>= 3.2.5, < 4.0)
|
32
|
-
rubocop-ast (>= 1.
|
34
|
+
rubocop-ast (>= 1.24.1, < 2.0)
|
33
35
|
ruby-progressbar (~> 1.7)
|
34
36
|
unicode-display_width (>= 1.4.0, < 3.0)
|
35
|
-
rubocop-ast (1.
|
37
|
+
rubocop-ast (1.24.1)
|
36
38
|
parser (>= 3.1.1.0)
|
37
|
-
rubocop-minitest (0.
|
39
|
+
rubocop-minitest (0.25.1)
|
38
40
|
rubocop (>= 0.90, < 2.0)
|
39
41
|
ruby-progressbar (1.11.0)
|
40
|
-
unicode-display_width (2.
|
42
|
+
unicode-display_width (2.4.2)
|
41
43
|
|
42
44
|
PLATFORMS
|
43
45
|
ruby
|
@@ -52,4 +54,4 @@ DEPENDENCIES
|
|
52
54
|
rubocop-minitest (~> 0.10)
|
53
55
|
|
54
56
|
BUNDLED WITH
|
55
|
-
2.
|
57
|
+
2.4.2
|
@@ -15,29 +15,38 @@ module Panchira
|
|
15
15
|
end
|
16
16
|
|
17
17
|
class FanzaBookResolver < FanzaResolver
|
18
|
-
URL_REGEXP =
|
18
|
+
URL_REGEXP = /book\.dmm\.co\.jp\//.freeze
|
19
19
|
|
20
20
|
private
|
21
21
|
|
22
|
-
def
|
23
|
-
@page.css('
|
22
|
+
def after_fetch
|
23
|
+
text = @page.css('//script[type="application/ld+json"]').first.text
|
24
|
+
@schema = JSON.parse(text)
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_title
|
28
|
+
@schema['name']
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_authors
|
32
|
+
@schema['subjectOf']['author']['name']
|
24
33
|
end
|
25
34
|
|
26
35
|
def parse_image_url
|
27
|
-
@
|
36
|
+
@schema['image'].sub('ps.', 'pl.')
|
28
37
|
end
|
29
38
|
|
30
39
|
def parse_tags
|
31
|
-
@
|
40
|
+
@schema['subjectOf']['genre']
|
32
41
|
end
|
33
42
|
|
34
43
|
def parse_description
|
35
|
-
@
|
44
|
+
@schema['description']
|
36
45
|
end
|
37
46
|
end
|
38
47
|
|
39
48
|
class FanzaDoujinResolver < FanzaResolver
|
40
|
-
URL_REGEXP =
|
49
|
+
URL_REGEXP = /dmm\.co\.jp\/dc\/doujin\//.freeze
|
41
50
|
|
42
51
|
private
|
43
52
|
|
@@ -57,7 +66,7 @@ module Panchira
|
|
57
66
|
end
|
58
67
|
|
59
68
|
class FanzaVideoResolver < FanzaResolver
|
60
|
-
URL_REGEXP =
|
69
|
+
URL_REGEXP = /www.dmm.co.jp\/digital\//.freeze
|
61
70
|
|
62
71
|
private
|
63
72
|
|
@@ -5,13 +5,14 @@ module Panchira
|
|
5
5
|
URL_REGEXP = /(www|ecchi)\.iwara\.tv\//.freeze
|
6
6
|
|
7
7
|
private
|
8
|
+
|
8
9
|
def parse_title
|
9
10
|
super.split(' | ')[0]
|
10
11
|
end
|
11
12
|
|
12
13
|
def parse_image_url
|
13
|
-
url = @page.at_css('#video-player')&.attributes
|
14
|
-
|
14
|
+
url = @page.at_css('#video-player')&.attributes&.fetch('poster')&.value
|
15
|
+
"https:#{url}" if url
|
15
16
|
end
|
16
17
|
|
17
18
|
def parse_author
|
@@ -4,7 +4,7 @@ module Panchira
|
|
4
4
|
# KomifloResolver resolves Komiflo.
|
5
5
|
# Komiflo has its API server, so we can utilize it.
|
6
6
|
class KomifloResolver < Resolver
|
7
|
-
URL_REGEXP =
|
7
|
+
URL_REGEXP = /komiflo\.com(?:\/#!)?\/comics\/(\d+)/.freeze
|
8
8
|
|
9
9
|
def initialize(url, options = nil)
|
10
10
|
super(url, options)
|
@@ -33,7 +33,7 @@ module Panchira
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def parse_canonical_url
|
36
|
-
id = @url.slice(
|
36
|
+
id = @url.slice(/komiflo\.com(?:\/#!)?\/comics\/(\d+)/, 1)
|
37
37
|
"https://komiflo.com/comics/#{id}"
|
38
38
|
end
|
39
39
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Panchira
|
4
4
|
class MelonbooksResolver < Resolver
|
5
|
-
URL_REGEXP =
|
5
|
+
URL_REGEXP = /melonbooks.co.jp\/detail\/detail.php\?product_id=(\d+)/.freeze
|
6
6
|
|
7
7
|
def fetch
|
8
8
|
result = PanchiraResult.new
|
@@ -12,11 +12,12 @@ module Panchira
|
|
12
12
|
|
13
13
|
@page = fetch_page(result.canonical_url) if @url != result.canonical_url
|
14
14
|
|
15
|
-
result.title
|
15
|
+
result.title = parse_title
|
16
|
+
result.author, result.circle = parse_table
|
16
17
|
result.description = parse_description
|
17
18
|
result.image = parse_image
|
18
|
-
result.tags = parse_tags
|
19
19
|
result.resolver = parse_resolver
|
20
|
+
result.tags = parse_tags
|
20
21
|
|
21
22
|
result
|
22
23
|
end
|
@@ -24,12 +25,10 @@ module Panchira
|
|
24
25
|
private
|
25
26
|
|
26
27
|
def parse_table
|
27
|
-
|
28
|
+
author, circle = nil, nil
|
28
29
|
|
29
|
-
@page.css('
|
30
|
+
@page.css('div.table-wrapper > table > tr').each do |tr|
|
30
31
|
case tr.css('th').text
|
31
|
-
when 'タイトル'
|
32
|
-
title = tr.css('td').text.strip
|
33
32
|
when 'サークル名'
|
34
33
|
circle = tr.css('td > a').text.match(/^(.+)\W\(作品数:/)&.values_at(1)&.first
|
35
34
|
when '作家名'
|
@@ -37,7 +36,11 @@ module Panchira
|
|
37
36
|
end
|
38
37
|
end
|
39
38
|
|
40
|
-
[
|
39
|
+
[author, circle]
|
40
|
+
end
|
41
|
+
|
42
|
+
def parse_title
|
43
|
+
@page.xpath('//h1[@class="page-header"]//text()').text
|
41
44
|
end
|
42
45
|
|
43
46
|
def parse_canonical_url
|
@@ -46,14 +49,7 @@ module Panchira
|
|
46
49
|
end
|
47
50
|
|
48
51
|
def parse_description
|
49
|
-
|
50
|
-
special_description = @page.xpath('//div[@id="special_description"]//p/text()')
|
51
|
-
if special_description.any?
|
52
|
-
special_description.first.to_s
|
53
|
-
else
|
54
|
-
description = @page.xpath('//div[@id="description"]//p/text()')
|
55
|
-
description.first.to_s
|
56
|
-
end
|
52
|
+
@page.css('div.item-detail > div > p').first.text.strip
|
57
53
|
end
|
58
54
|
|
59
55
|
def parse_image_url
|
@@ -64,7 +60,7 @@ module Panchira
|
|
64
60
|
end
|
65
61
|
|
66
62
|
def parse_tags
|
67
|
-
@page.css('
|
63
|
+
@page.css('div.item-detail2 > p > a').map { |m| m.text.sub('#', '') }
|
68
64
|
end
|
69
65
|
end
|
70
66
|
|
@@ -5,7 +5,7 @@ require 'net/https'
|
|
5
5
|
module Panchira
|
6
6
|
module Narou
|
7
7
|
class Novel18Resolver < Resolver
|
8
|
-
URL_REGEXP =
|
8
|
+
URL_REGEXP = /novel18\.syosetu\.com\//.freeze
|
9
9
|
ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
|
10
10
|
|
11
11
|
def initialize(url, options = nil)
|
@@ -33,7 +33,7 @@ module Panchira
|
|
33
33
|
return @page.css('//meta[property="og:image"]/@content').first.to_s
|
34
34
|
end
|
35
35
|
|
36
|
-
if md = thumbnail_url.match(
|
36
|
+
if md = thumbnail_url.match(/pic.nijie.net\/\w+(?<resolution>\/\w+\/)nijie.+\.(?<format>png|jpg|jpeg)/)
|
37
37
|
thumbnail_url.sub(md[:resolution], '/')
|
38
38
|
else
|
39
39
|
thumbnail_url
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module Panchira
|
4
4
|
class PixivResolver < Resolver
|
5
|
-
URL_REGEXP =
|
5
|
+
URL_REGEXP = /pixiv\.net\/.*(member_illust.php?.*illust_id=|artworks\/)(\d+)/.freeze
|
6
6
|
|
7
7
|
def initialize(url, options = nil)
|
8
8
|
super(url, options)
|
@@ -43,7 +43,7 @@ module Panchira
|
|
43
43
|
|
44
44
|
proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
|
45
45
|
|
46
|
-
case
|
46
|
+
case Net::HTTP.get_response(URI.parse(proxy_url))
|
47
47
|
when Net::HTTPMovedPermanently
|
48
48
|
# 301が返された場合、locationで渡されたURIにホストが含まれず扱いづらいため決め打ちする
|
49
49
|
proxy_url = "https://pixiv.cat/#{@illust_id}-1.jpg"
|
@@ -55,12 +55,19 @@ module Panchira
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def parse_tags
|
58
|
-
@json['body']['tags']['tags'].map { |content| content['tag'] }
|
58
|
+
tags = @json['body']['tags']['tags'].map { |content| content['tag'] }
|
59
|
+
tags.unshift('AI生成') if parse_ai
|
60
|
+
|
61
|
+
tags
|
62
|
+
end
|
63
|
+
|
64
|
+
def parse_ai
|
65
|
+
@json['body']['aiType'] == 2
|
59
66
|
end
|
60
67
|
end
|
61
68
|
|
62
69
|
class PixivNovelResolver < Resolver
|
63
|
-
URL_REGEXP =
|
70
|
+
URL_REGEXP = /pixiv\.net\/novel\/show.php\?id=(\d+)/.freeze
|
64
71
|
|
65
72
|
def initialize(url, options = nil)
|
66
73
|
super(url, options)
|
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'uri'
|
2
4
|
|
3
5
|
module Panchira
|
4
6
|
class TwitterResolver < Resolver
|
5
|
-
URL_REGEXP =
|
7
|
+
URL_REGEXP = /twitter.com\/(\w+)\/status\/(\d+)/.freeze
|
6
8
|
|
7
9
|
def initialize(url, options = nil)
|
8
10
|
super(url, options)
|
data/lib/panchira/version.rb
CHANGED
data/panchira.gemspec
CHANGED
@@ -25,10 +25,10 @@ Gem::Specification.new do |spec|
|
|
25
25
|
# Specify which files should be added to the gem when it is released.
|
26
26
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
27
27
|
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
28
|
-
`git ls-files -z`.split("\x0").reject { |f| f.match(
|
28
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(/^(test|spec|features)\//) }
|
29
29
|
end
|
30
30
|
spec.bindir = 'exe'
|
31
|
-
spec.executables = spec.files.grep(
|
31
|
+
spec.executables = spec.files.grep(/^exe\//) { |f| File.basename(f) }
|
32
32
|
spec.require_paths = ['lib']
|
33
33
|
|
34
34
|
spec.required_ruby_version = '>= 2.7'
|
@@ -41,4 +41,5 @@ Gem::Specification.new do |spec|
|
|
41
41
|
|
42
42
|
spec.add_dependency 'fastimage', '~> 2.1.7'
|
43
43
|
spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.14.0'
|
44
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
44
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: panchira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kyp
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -158,6 +158,7 @@ metadata:
|
|
158
158
|
homepage_uri: https://github.com/nuita/panchira
|
159
159
|
source_code_uri: https://github.com/nuita/panchira
|
160
160
|
changelog_uri: https://github.com/nuita/panchira/blob/master/CHANGELOG.md
|
161
|
+
rubygems_mfa_required: 'true'
|
161
162
|
post_install_message:
|
162
163
|
rdoc_options: []
|
163
164
|
require_paths:
|
@@ -173,7 +174,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
173
174
|
- !ruby/object:Gem::Version
|
174
175
|
version: '0'
|
175
176
|
requirements: []
|
176
|
-
rubygems_version: 3.
|
177
|
+
rubygems_version: 3.3.4
|
177
178
|
signing_key:
|
178
179
|
specification_version: 4
|
179
180
|
summary: A parser for hentai websites
|