panchira 1.4.0 → 1.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -0
- data/.rubocop.yml +7 -0
- data/CHANGELOG.md +14 -0
- data/Gemfile.lock +15 -15
- data/README.md +18 -0
- data/lib/panchira/resolvers/komiflo_resolver.rb +2 -2
- data/lib/panchira/resolvers/narou_resolver.rb +4 -4
- data/lib/panchira/resolvers/pixiv_resolver.rb +18 -4
- data/lib/panchira/resolvers/resolver.rb +4 -2
- data/lib/panchira/resolvers/twitter_resolver.rb +85 -5
- data/lib/panchira/version.rb +1 -1
- data/lib/panchira.rb +2 -2
- data/panchira.gemspec +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db34e8033acf822616172b330fc61ffe2ee5a1c9dfe46bc1737257717aeff4c0
|
4
|
+
data.tar.gz: 5ec893680ef7e04b2f85d16b3458ee9f6b2db76ac6c01544a088d280a574e98c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 71b2d7707d78b21004acdca984f1869cc81a9e9169bee9239b13261a79002ed859a5a87c8aa08350a89d37ce734abb47637c909aa01b5dab171eb871ad27d9e0
|
7
|
+
data.tar.gz: fbe9744acbbdbd13376e2a6bdaf1c4430c20861163b39f6bb7ed26436c8fd6c64876e329c0c9aae5c129a59257d7c06eac2508b7c1283bc27d405f0c2e836b0e
|
data/.github/workflows/ruby.yml
CHANGED
data/.rubocop.yml
CHANGED
@@ -17,6 +17,10 @@ Layout/FirstHashElementIndentation:
|
|
17
17
|
Layout/IndentationConsistency:
|
18
18
|
EnforcedStyle: indented_internal_methods
|
19
19
|
|
20
|
+
Layout/MultilineAssignmentLayout:
|
21
|
+
EnforcedStyle: same_line
|
22
|
+
SupportedTypes: ["block"]
|
23
|
+
|
20
24
|
Layout/MultilineMethodCallIndentation:
|
21
25
|
EnforcedStyle: indented
|
22
26
|
|
@@ -30,6 +34,9 @@ Lint/MissingSuper:
|
|
30
34
|
Exclude:
|
31
35
|
- lib/panchira/resolvers/*
|
32
36
|
|
37
|
+
Lint/SymbolConversion:
|
38
|
+
EnforcedStyle: consistent
|
39
|
+
|
33
40
|
Style/AsciiComments:
|
34
41
|
Enabled: false
|
35
42
|
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,20 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/).
|
6
6
|
|
7
|
+
## 1.5.2 - 2022-03-20
|
8
|
+
### Fixed
|
9
|
+
- Fixed an issue where Pixiv resolver can't retrieve not-proxied image scales.
|
10
|
+
|
11
|
+
## 1.5.1 - 2022-03-20
|
12
|
+
### Added
|
13
|
+
- Pixiv resolver can now fetch image URIs that are not proxied.
|
14
|
+
|
15
|
+
## 1.5.0 - 2022-03-01
|
16
|
+
### Changed
|
17
|
+
- You can now set options in Panchira::fetch and Resolver's constructors.
|
18
|
+
- Twitter resolver can now fetch datas from API (requires bearer token).
|
19
|
+
- Max execution time is now set to 10 seconds.
|
20
|
+
|
7
21
|
## 1.4.0 - 2022-01-10
|
8
22
|
### Added
|
9
23
|
- Added support for non-Japanese pixiv URLs.
|
data/Gemfile.lock
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
panchira (1.
|
4
|
+
panchira (1.5.2)
|
5
5
|
fastimage (~> 2.1.7)
|
6
|
-
nokogiri (>= 1.10.9, < 1.
|
6
|
+
nokogiri (>= 1.10.9, < 1.14.0)
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
11
|
ast (2.4.2)
|
12
12
|
fastimage (2.1.7)
|
13
|
-
minitest (5.
|
14
|
-
nokogiri (1.
|
13
|
+
minitest (5.15.0)
|
14
|
+
nokogiri (1.13.3-x86_64-darwin)
|
15
15
|
racc (~> 1.4)
|
16
|
-
parallel (1.
|
17
|
-
parser (3.
|
16
|
+
parallel (1.21.0)
|
17
|
+
parser (3.1.1.0)
|
18
18
|
ast (~> 2.4.1)
|
19
19
|
racc (1.6.0)
|
20
|
-
rainbow (3.
|
20
|
+
rainbow (3.1.1)
|
21
21
|
rake (12.3.3)
|
22
|
-
regexp_parser (2.
|
22
|
+
regexp_parser (2.2.1)
|
23
23
|
rexml (3.2.5)
|
24
|
-
rubocop (1.
|
24
|
+
rubocop (1.25.1)
|
25
25
|
parallel (~> 1.10)
|
26
|
-
parser (>= 3.
|
26
|
+
parser (>= 3.1.0.0)
|
27
27
|
rainbow (>= 2.2.2, < 4.0)
|
28
28
|
regexp_parser (>= 1.8, < 3.0)
|
29
29
|
rexml
|
30
|
-
rubocop-ast (>= 1.
|
30
|
+
rubocop-ast (>= 1.15.1, < 2.0)
|
31
31
|
ruby-progressbar (~> 1.7)
|
32
32
|
unicode-display_width (>= 1.4.0, < 3.0)
|
33
|
-
rubocop-ast (1.
|
34
|
-
parser (>= 3.
|
35
|
-
rubocop-minitest (0.
|
33
|
+
rubocop-ast (1.16.0)
|
34
|
+
parser (>= 3.1.1.0)
|
35
|
+
rubocop-minitest (0.17.2)
|
36
36
|
rubocop (>= 0.90, < 2.0)
|
37
37
|
ruby-progressbar (1.11.0)
|
38
|
-
unicode-display_width (2.
|
38
|
+
unicode-display_width (2.1.0)
|
39
39
|
|
40
40
|
PLATFORMS
|
41
41
|
ruby
|
data/README.md
CHANGED
@@ -46,6 +46,24 @@ In most situation you would call `Panchira#fetch`. It is a singular method that
|
|
46
46
|
|
47
47
|
Panchira has a special treatment for each website. `Resolver` classes are where those treatments take place, and you can use your own `Resolver` classes by registering it to Panchira. See `Panchira::Extensions` documentation in source code for further details.
|
48
48
|
|
49
|
+
### About Twitter API
|
50
|
+
|
51
|
+
Due to a recent change in Twitter, it's getting really hard to fetch tweet data by scraping. To solve this problem, Panchira can now use Twitter official API.
|
52
|
+
|
53
|
+
To use Twitter API instead of normal scraping, please set Twitter's bearer token as an option to `Panchira::fetch`. If you don't set token, Panchira will just fall back to simple scraping.
|
54
|
+
|
55
|
+
```
|
56
|
+
> Panchira.fetch("https://twitter.com/example/status/1234567890", options: {twitter: {bearer_token: 'ABC...123'}})
|
57
|
+
```
|
58
|
+
|
59
|
+
### About Pixiv proxy
|
60
|
+
|
61
|
+
By default, Panchira returns a link to [Pixiv.cat](https://pixiv.cat/) as a image URI, but you can change this behavior by setting `fetch_raw_image_url` as an option. To access not-proxied URI, pximg.net, you have to set Referer as `https://app-api.pixiv.net/` in HTTP request header.
|
62
|
+
|
63
|
+
```
|
64
|
+
> Panchira.fetch("https://pixiv.net/artworks/12345678", options: {pixiv: {fetch_raw_image_url: true}})
|
65
|
+
```
|
66
|
+
|
49
67
|
## Development
|
50
68
|
|
51
69
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -6,8 +6,8 @@ module Panchira
|
|
6
6
|
class KomifloResolver < Resolver
|
7
7
|
URL_REGEXP = %r{komiflo\.com(?:/#!)?/comics/(\d+)}.freeze
|
8
8
|
|
9
|
-
def initialize(url)
|
10
|
-
|
9
|
+
def initialize(url, options = nil)
|
10
|
+
super(url, options)
|
11
11
|
|
12
12
|
@id = url.slice(URL_REGEXP, 1)
|
13
13
|
raw_json = URI.parse("https://api.komiflo.com/content/id/#{@id}").read('User-Agent' => user_agent)
|
@@ -8,8 +8,8 @@ module Panchira
|
|
8
8
|
URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
|
9
9
|
ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
|
10
10
|
|
11
|
-
def initialize(url)
|
12
|
-
super(url)
|
11
|
+
def initialize(url, options = nil)
|
12
|
+
super(url, options)
|
13
13
|
|
14
14
|
if id = @url.match(ID_REGEXP)[:id]
|
15
15
|
@desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
|
@@ -48,8 +48,8 @@ module Panchira
|
|
48
48
|
URL_REGEXP = /ncode\.syosetu\.com/.freeze
|
49
49
|
ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
|
50
50
|
|
51
|
-
def initialize(url)
|
52
|
-
super(url)
|
51
|
+
def initialize(url, options = nil)
|
52
|
+
super(url, options)
|
53
53
|
|
54
54
|
if id = @url.match(ID_REGEXP)[:id]
|
55
55
|
@desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
|
@@ -4,12 +4,14 @@ module Panchira
|
|
4
4
|
class PixivResolver < Resolver
|
5
5
|
URL_REGEXP = %r{pixiv\.net/.*(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
|
6
6
|
|
7
|
-
def initialize(url)
|
8
|
-
super(url)
|
7
|
+
def initialize(url, options = nil)
|
8
|
+
super(url, options)
|
9
9
|
@illust_id = url.slice(URL_REGEXP, 2)
|
10
10
|
|
11
11
|
raw_json = URI.parse("https://www.pixiv.net/ajax/illust/#{@illust_id}").read('User-Agent' => user_agent)
|
12
12
|
@json = JSON.parse(raw_json)
|
13
|
+
|
14
|
+
@fetch_raw_image_url = options&.dig(:pixiv, :fetch_raw_image_url)
|
13
15
|
end
|
14
16
|
|
15
17
|
private
|
@@ -26,7 +28,19 @@ module Panchira
|
|
26
28
|
"https://pixiv.net/member_illust.php?mode=medium&illust_id=#{@illust_id}"
|
27
29
|
end
|
28
30
|
|
31
|
+
def parse_image
|
32
|
+
image = PanchiraImage.new
|
33
|
+
image.url = parse_image_url
|
34
|
+
image.width, image.height = FastImage.size(image.url, http_header: {'Referer' => 'https://app-api.pixiv.net/'})
|
35
|
+
|
36
|
+
image
|
37
|
+
end
|
38
|
+
|
29
39
|
def parse_image_url
|
40
|
+
if @fetch_raw_image_url
|
41
|
+
return @json['body']['urls']['original']
|
42
|
+
end
|
43
|
+
|
30
44
|
proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
|
31
45
|
|
32
46
|
case Net::HTTP.get_response(URI.parse(proxy_url))
|
@@ -47,8 +61,8 @@ module Panchira
|
|
47
61
|
class PixivNovelResolver < Resolver
|
48
62
|
URL_REGEXP = %r{pixiv\.net/novel/show.php\?id=(\d+)}.freeze
|
49
63
|
|
50
|
-
def initialize(url)
|
51
|
-
super(url)
|
64
|
+
def initialize(url, options = nil)
|
65
|
+
super(url, options)
|
52
66
|
@novel_id = url.slice(URL_REGEXP, 1)
|
53
67
|
|
54
68
|
raw_json = URI.parse("https://www.pixiv.net/ajax/novel/#{@novel_id}").read('User-Agent' => user_agent)
|
@@ -11,8 +11,9 @@ module Panchira
|
|
11
11
|
# You must override this in subclasses to limit which urls to resolve.
|
12
12
|
URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
13
13
|
|
14
|
-
def initialize(url)
|
14
|
+
def initialize(url, options = nil)
|
15
15
|
@url = url
|
16
|
+
@options = options
|
16
17
|
end
|
17
18
|
|
18
19
|
# This function is called right after this Resolver instance is made.
|
@@ -53,7 +54,8 @@ module Panchira
|
|
53
54
|
def fetch_page(url)
|
54
55
|
read_options = {
|
55
56
|
'User-Agent' => user_agent,
|
56
|
-
'Cookie' => cookie
|
57
|
+
'Cookie' => cookie,
|
58
|
+
:read_timeout => 10
|
57
59
|
}
|
58
60
|
|
59
61
|
raw_page = URI.parse(url).read(read_options)
|
@@ -1,22 +1,102 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
1
3
|
module Panchira
|
2
4
|
class TwitterResolver < Resolver
|
3
|
-
URL_REGEXP =
|
5
|
+
URL_REGEXP = %r{twitter.com/(\w+)/status/(\d+)}.freeze
|
6
|
+
|
7
|
+
def initialize(url, options = nil)
|
8
|
+
super(url, options)
|
9
|
+
@screen_name = @url.slice(URL_REGEXP, 1)
|
10
|
+
@id = @url.slice(URL_REGEXP, 2)
|
11
|
+
|
12
|
+
@bearer_token = options&.dig(:twitter, :bearer_token)
|
13
|
+
|
14
|
+
@author = nil
|
15
|
+
@response = nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def fetch
|
19
|
+
return super unless @bearer_token
|
20
|
+
|
21
|
+
@response = fetch_api if @bearer_token
|
22
|
+
|
23
|
+
result = PanchiraResult.new
|
24
|
+
|
25
|
+
result.canonical_url = parse_canonical_url
|
26
|
+
result.title = parse_title
|
27
|
+
result.description = parse_description
|
28
|
+
result.image = parse_image
|
29
|
+
result.tags = parse_tags
|
30
|
+
result.author = parse_author
|
31
|
+
result.resolver = parse_resolver
|
32
|
+
|
33
|
+
result
|
34
|
+
end
|
4
35
|
|
5
36
|
private
|
37
|
+
|
38
|
+
def fetch_api
|
39
|
+
uri = URI.parse("https://api.twitter.com/2/tweets/#{@id}")
|
40
|
+
uri.query = URI.encode_www_form({
|
41
|
+
'expansions': 'attachments.media_keys,author_id',
|
42
|
+
'media.fields': 'preview_image_url,type,url',
|
43
|
+
'user.fields': 'name,username',
|
44
|
+
'tweet.fields': 'entities'
|
45
|
+
})
|
46
|
+
|
47
|
+
raw_json = uri.read('Authorization' => "Bearer #{@bearer_token}")
|
48
|
+
JSON.parse(raw_json)
|
49
|
+
end
|
50
|
+
|
51
|
+
def parse_canonical_url
|
52
|
+
# Twitter returns false canonical url when the account is set as sensitive.
|
53
|
+
"https://twitter.com/#{@screen_name}/status/#{@id}"
|
54
|
+
end
|
55
|
+
|
6
56
|
def parse_title
|
7
|
-
@title =
|
57
|
+
@title = if @response
|
58
|
+
@author = @response['includes']['users'][0]['name']
|
59
|
+
"#{@author} on Twitter"
|
60
|
+
else
|
61
|
+
super
|
62
|
+
end
|
8
63
|
end
|
9
64
|
|
10
65
|
def parse_author
|
11
|
-
@title.match(/\A(.+) on Twitter\z/)[1]
|
66
|
+
@author || @title.match(/\A(.+) on Twitter\z/)[1]
|
67
|
+
rescue StandardError
|
68
|
+
nil
|
12
69
|
end
|
13
70
|
|
14
71
|
def parse_description
|
15
|
-
@
|
72
|
+
if @response
|
73
|
+
@response['data']['text']
|
74
|
+
else
|
75
|
+
@description = super.gsub(/\A“|”\z/, '')
|
76
|
+
end
|
16
77
|
end
|
17
78
|
|
18
79
|
def parse_tags
|
19
|
-
@
|
80
|
+
if @response
|
81
|
+
@response.dig('data', 'entities', 'hashtags')&.map { |obj| obj['tag'] }
|
82
|
+
else
|
83
|
+
@description.scan(/[##]([^##\s]+)/).map(&:first)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def parse_image_url
|
88
|
+
return super unless @response
|
89
|
+
|
90
|
+
first_media = @response.dig('includes', 'media')&.first
|
91
|
+
|
92
|
+
return unless first_media
|
93
|
+
|
94
|
+
case first_media['type']
|
95
|
+
when 'photo'
|
96
|
+
first_media['url']
|
97
|
+
when 'video'
|
98
|
+
first_media['preview_image_url']
|
99
|
+
end
|
20
100
|
end
|
21
101
|
end
|
22
102
|
|
data/lib/panchira/version.rb
CHANGED
data/lib/panchira.rb
CHANGED
@@ -21,10 +21,10 @@ Dir.glob("#{project_root}/panchira/resolvers/*_resolver.rb").sort.each { |file|
|
|
21
21
|
module Panchira
|
22
22
|
class << self
|
23
23
|
# Return a PanchiraResult that contains the attributes of given url.
|
24
|
-
def fetch(url)
|
24
|
+
def fetch(url, options = nil)
|
25
25
|
resolver = select_resolver(url)
|
26
26
|
|
27
|
-
resolver.new(url).fetch
|
27
|
+
resolver.new(url, options).fetch
|
28
28
|
end
|
29
29
|
|
30
30
|
private
|
data/panchira.gemspec
CHANGED
@@ -40,5 +40,5 @@ Gem::Specification.new do |spec|
|
|
40
40
|
spec.add_development_dependency 'rubocop-minitest', '~> 0.10'
|
41
41
|
|
42
42
|
spec.add_dependency 'fastimage', '~> 2.1.7'
|
43
|
-
spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.
|
43
|
+
spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.14.0'
|
44
44
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: panchira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kyp
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -103,7 +103,7 @@ dependencies:
|
|
103
103
|
version: 1.10.9
|
104
104
|
- - "<"
|
105
105
|
- !ruby/object:Gem::Version
|
106
|
-
version: 1.
|
106
|
+
version: 1.14.0
|
107
107
|
type: :runtime
|
108
108
|
prerelease: false
|
109
109
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -113,7 +113,7 @@ dependencies:
|
|
113
113
|
version: 1.10.9
|
114
114
|
- - "<"
|
115
115
|
- !ruby/object:Gem::Version
|
116
|
-
version: 1.
|
116
|
+
version: 1.14.0
|
117
117
|
description: |2
|
118
118
|
Panchira allows you to parse attributes of hentais on some web platforms, such as Pixiv and DLSite.
|
119
119
|
If you need card previews on hentai but can't get it with simply parsing metatags, then it is time for Panchira.
|
@@ -173,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
173
173
|
- !ruby/object:Gem::Version
|
174
174
|
version: '0'
|
175
175
|
requirements: []
|
176
|
-
rubygems_version: 3.
|
176
|
+
rubygems_version: 3.1.4
|
177
177
|
signing_key:
|
178
178
|
specification_version: 4
|
179
179
|
summary: A parser for hentai websites
|