panchira 1.3.6 → 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 56602175dff2975d0d6f606c31da48b8cf904c61aa98d8f0e6d940ef4c1d230d
4
- data.tar.gz: b1ceb998bb60dafbb5b992ad156192d6ea2f58fec8ee606aac3d1d88ebe9b1b0
3
+ metadata.gz: 642bed0e6765fbea03fe04681debc2d3c02a269cbc6a7547d68209f9b0b0aad7
4
+ data.tar.gz: 626ca110ca53489b8ec0289a0486a13abbeccc1aae1408bc6d51de058606e425
5
5
  SHA512:
6
- metadata.gz: 5909d32a1231288cc8567ada0a58fb038d1eaf8424ac9ff3b4298dd81b3f95651217ddb3b5b39e24834174abbc7bfd454d083836a0889df2188df511600f7a49
7
- data.tar.gz: 0f8d01c68ceb44f10ef06d274667e8acd254254987056e25624d2a3f58de0e05fc95a07002b3cb7d0d8a207461bf6f0a96c211714fed015eadc3e9673eb17699
6
+ metadata.gz: aa1f9adc654d34794da25aa65c53526fed63c6d5c3528f9c201edf0433d10e802539fa3ca33a674bd3bcbeffd6f94d4502b57e7df0484d2dbd65237b9ffc2710
7
+ data.tar.gz: 645e6c3aafe4f5f2919c61ca4caf42ba045732be2b94a173061be632bf22c7deebe0500145c9f6e4d6d47eccab1d3922887ec6f85cf5af97162bd24a50a4d771
@@ -9,22 +9,27 @@ name: Ruby
9
9
 
10
10
  on:
11
11
  push:
12
- branches: [ master ]
12
+ branches: [master]
13
13
  pull_request:
14
- branches: [ master ]
14
+ branches: [master]
15
15
 
16
16
  jobs:
17
17
  test:
18
-
19
18
  runs-on: ubuntu-18.04
20
-
19
+ strategy:
20
+ fail-fast: false
21
+ matrix:
22
+ ruby: ["2.7", "3.0"]
23
+ name: Ruby ${{ matrix.ruby }}
21
24
  steps:
22
- - uses: actions/checkout@v2
23
- - name: Set up Ruby
24
- uses: ruby/setup-ruby@v1
25
- with:
26
- ruby-version: 2.6
27
- - name: Install dependencies
28
- run: bundle install
29
- - name: Run tests
30
- run: bundle exec rake test
25
+ - uses: actions/checkout@v2
26
+ - name: Set up Ruby
27
+ uses: ruby/setup-ruby@v1
28
+ with:
29
+ ruby-version: ${{ matrix.ruby }}
30
+ - name: Install dependencies
31
+ run: bundle install
32
+ - name: Run tests
33
+ run: bundle exec rake test
34
+ env:
35
+ TWITTER_BEARER_TOKEN: ${{ secrets.TWITTER_BEARER_TOKEN }}
data/.rubocop.yml CHANGED
@@ -4,7 +4,7 @@ require:
4
4
  - rubocop-minitest
5
5
 
6
6
  AllCops:
7
- TargetRubyVersion: 2.6
7
+ TargetRubyVersion: 2.7
8
8
  NewCops: enable
9
9
  Exclude:
10
10
  - bin/*
@@ -17,6 +17,10 @@ Layout/FirstHashElementIndentation:
17
17
  Layout/IndentationConsistency:
18
18
  EnforcedStyle: indented_internal_methods
19
19
 
20
+ Layout/MultilineAssignmentLayout:
21
+ EnforcedStyle: same_line
22
+ SupportedTypes: ["block"]
23
+
20
24
  Layout/MultilineMethodCallIndentation:
21
25
  EnforcedStyle: indented
22
26
 
@@ -27,9 +31,12 @@ Lint/AssignmentInCondition:
27
31
  Enabled: false
28
32
 
29
33
  Lint/MissingSuper:
30
- Exclude:
34
+ Exclude:
31
35
  - lib/panchira/resolvers/*
32
36
 
37
+ Lint/SymbolConversion:
38
+ EnforcedStyle: consistent
39
+
33
40
  Style/AsciiComments:
34
41
  Enabled: false
35
42
 
data/CHANGELOG.md CHANGED
@@ -4,6 +4,26 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/).
6
6
 
7
+ ## 1.5.1 - 2022-03-20
8
+ ### Added
9
+ - Pixiv resolver can now fetch image URIs that are not proxied.
10
+
11
+ ## 1.5.0 - 2022-03-01
12
+ ### Changed
13
+ - You can now set options in Panchira::fetch and Resolver's constructors.
14
+ - Twitter resolver can now fetch datas from API (requires bearer token).
15
+ - Max execution time is now set to 10 seconds.
16
+
17
+ ## 1.4.0 - 2022-01-10
18
+ ### Added
19
+ - Added support for non-Japanese pixiv URLs.
20
+
21
+ ### Fixed
22
+ - Fixed an issue where Nijie Resolver failed to fetch image.
23
+
24
+ ### Changed
25
+ - Dropped support for Ruby 2.6.
26
+
7
27
  ## 1.3.6 - 2021-10-19
8
28
  ### Added
9
29
  - Added support for Twitter.
data/Gemfile.lock CHANGED
@@ -1,41 +1,41 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- panchira (1.3.6)
4
+ panchira (1.5.1)
5
5
  fastimage (~> 2.1.7)
6
- nokogiri (>= 1.10.9, < 1.13.0)
6
+ nokogiri (>= 1.10.9, < 1.14.0)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
11
  ast (2.4.2)
12
12
  fastimage (2.1.7)
13
- minitest (5.14.4)
14
- nokogiri (1.11.7-x86_64-darwin)
13
+ minitest (5.15.0)
14
+ nokogiri (1.13.3-x86_64-darwin)
15
15
  racc (~> 1.4)
16
- parallel (1.20.1)
17
- parser (3.0.1.1)
16
+ parallel (1.21.0)
17
+ parser (3.1.1.0)
18
18
  ast (~> 2.4.1)
19
- racc (1.5.2)
20
- rainbow (3.0.0)
19
+ racc (1.6.0)
20
+ rainbow (3.1.1)
21
21
  rake (12.3.3)
22
- regexp_parser (2.1.1)
22
+ regexp_parser (2.2.1)
23
23
  rexml (3.2.5)
24
- rubocop (1.15.0)
24
+ rubocop (1.25.1)
25
25
  parallel (~> 1.10)
26
- parser (>= 3.0.0.0)
26
+ parser (>= 3.1.0.0)
27
27
  rainbow (>= 2.2.2, < 4.0)
28
28
  regexp_parser (>= 1.8, < 3.0)
29
29
  rexml
30
- rubocop-ast (>= 1.5.0, < 2.0)
30
+ rubocop-ast (>= 1.15.1, < 2.0)
31
31
  ruby-progressbar (~> 1.7)
32
32
  unicode-display_width (>= 1.4.0, < 3.0)
33
- rubocop-ast (1.5.0)
34
- parser (>= 3.0.1.1)
35
- rubocop-minitest (0.12.1)
33
+ rubocop-ast (1.16.0)
34
+ parser (>= 3.1.1.0)
35
+ rubocop-minitest (0.17.2)
36
36
  rubocop (>= 0.90, < 2.0)
37
37
  ruby-progressbar (1.11.0)
38
- unicode-display_width (2.0.0)
38
+ unicode-display_width (2.1.0)
39
39
 
40
40
  PLATFORMS
41
41
  ruby
data/README.md CHANGED
@@ -46,6 +46,24 @@ In most situation you would call `Panchira#fetch`. It is a singular method that
46
46
 
47
47
  Panchira has a special treatment for each website. `Resolver` classes are where those treatments take place, and you can use your own `Resolver` classes by registering it to Panchira. See `Panchira::Extensions` documentation in source code for further details.
48
48
 
49
+ ### About Twitter API
50
+
51
+ Due to a recent change in Twitter, it's getting really hard to fetch tweet data by scraping. To solve this problem, Panchira can now use Twitter official API.
52
+
53
+ To use Twitter API instead of normal scraping, please set Twitter's bearer token as an option to `Panchira::fetch`. If you don't set token, Panchira will just fall back to simple scraping.
54
+
55
+ ```
56
+ > Panchira.fetch("https://twitter.com/example/status/1234567890", options: {twitter: {bearer_token: 'ABC...123'}})
57
+ ```
58
+
59
+ ### About Pixiv proxy
60
+
61
+ By default, Panchira returns a link to [Pixiv.cat](https://pixiv.cat/) as a image URI, but you can change this behavior by setting `fetch_raw_image_url` as an option. To access not-proxied URI, pximg.net, you have to set Referer as `https://app-api.pixiv.net/` in HTTP request header.
62
+
63
+ ```
64
+ > Panchira.fetch("https://pixiv.net/artworks/12345678", options: {pixiv: {fetch_raw_image_url: true}})
65
+ ```
66
+
49
67
  ## Development
50
68
 
51
69
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -6,8 +6,8 @@ module Panchira
6
6
  class KomifloResolver < Resolver
7
7
  URL_REGEXP = %r{komiflo\.com(?:/#!)?/comics/(\d+)}.freeze
8
8
 
9
- def initialize(url)
10
- @url = url
9
+ def initialize(url, options = nil)
10
+ super(url, options)
11
11
 
12
12
  @id = url.slice(URL_REGEXP, 1)
13
13
  raw_json = URI.parse("https://api.komiflo.com/content/id/#{@id}").read('User-Agent' => user_agent)
@@ -8,8 +8,8 @@ module Panchira
8
8
  URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
9
9
  ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
10
10
 
11
- def initialize(url)
12
- super(url)
11
+ def initialize(url, options = nil)
12
+ super(url, options)
13
13
 
14
14
  if id = @url.match(ID_REGEXP)[:id]
15
15
  @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
@@ -48,8 +48,8 @@ module Panchira
48
48
  URL_REGEXP = /ncode\.syosetu\.com/.freeze
49
49
  ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
50
50
 
51
- def initialize(url)
52
- super(url)
51
+ def initialize(url, options = nil)
52
+ super(url, options)
53
53
 
54
54
  if id = @url.match(ID_REGEXP)[:id]
55
55
  @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
@@ -26,17 +26,17 @@ module Panchira
26
26
  end
27
27
 
28
28
  def parse_image_url
29
- str = @page.css('//script[@type="application/ld+json"]/text()').first.to_s
30
-
31
- if s = str.match(%r{https://pic.nijie.(net|info)/(?<servername>\d+)/[^/]+/nijie_picture/(?<imagename>[^"]+)})
32
- # 動画は容量大きすぎるし取らない
33
- if s[:imagename] =~ /(jpg|png)/
34
- "https://pic.nijie.net/#{s[:servername]}/nijie_picture/#{s[:imagename]}"
35
- else
36
- s[0]
37
- end
29
+ str = @page.css('//script[@type="application/ld+json"]/text()').first.to_s.split.join(' ')
30
+ thumbnail_url = JSON.parse(str)['thumbnailUrl']
31
+
32
+ unless thumbnail_url
33
+ return @page.css('//meta[property="og:image"]/@content').first.to_s
34
+ end
35
+
36
+ if md = thumbnail_url.match(%r{pic.nijie.net/\w+(?<resolution>/\w+/)nijie.+\.(?<format>png|jpg|jpeg)})
37
+ thumbnail_url.sub(md[:resolution], '/')
38
38
  else
39
- @page.css('//meta[property="og:image"]/@content').first.to_s
39
+ thumbnail_url
40
40
  end
41
41
  end
42
42
 
@@ -2,14 +2,16 @@
2
2
 
3
3
  module Panchira
4
4
  class PixivResolver < Resolver
5
- URL_REGEXP = %r{pixiv\.net/(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
5
+ URL_REGEXP = %r{pixiv\.net/.*(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
6
6
 
7
- def initialize(url)
8
- super(url)
7
+ def initialize(url, options = nil)
8
+ super(url, options)
9
9
  @illust_id = url.slice(URL_REGEXP, 2)
10
10
 
11
11
  raw_json = URI.parse("https://www.pixiv.net/ajax/illust/#{@illust_id}").read('User-Agent' => user_agent)
12
12
  @json = JSON.parse(raw_json)
13
+
14
+ @fetch_raw_image_url = options&.dig(:pixiv, :fetch_raw_image_url)
13
15
  end
14
16
 
15
17
  private
@@ -27,6 +29,10 @@ module Panchira
27
29
  end
28
30
 
29
31
  def parse_image_url
32
+ if @fetch_raw_image_url
33
+ return @json['body']['urls']['original']
34
+ end
35
+
30
36
  proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
31
37
 
32
38
  case Net::HTTP.get_response(URI.parse(proxy_url))
@@ -47,8 +53,8 @@ module Panchira
47
53
  class PixivNovelResolver < Resolver
48
54
  URL_REGEXP = %r{pixiv\.net/novel/show.php\?id=(\d+)}.freeze
49
55
 
50
- def initialize(url)
51
- super(url)
56
+ def initialize(url, options = nil)
57
+ super(url, options)
52
58
  @novel_id = url.slice(URL_REGEXP, 1)
53
59
 
54
60
  raw_json = URI.parse("https://www.pixiv.net/ajax/novel/#{@novel_id}").read('User-Agent' => user_agent)
@@ -11,8 +11,9 @@ module Panchira
11
11
  # You must override this in subclasses to limit which urls to resolve.
12
12
  URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
13
13
 
14
- def initialize(url)
14
+ def initialize(url, options = nil)
15
15
  @url = url
16
+ @options = options
16
17
  end
17
18
 
18
19
  # This function is called right after this Resolver instance is made.
@@ -53,7 +54,8 @@ module Panchira
53
54
  def fetch_page(url)
54
55
  read_options = {
55
56
  'User-Agent' => user_agent,
56
- 'Cookie' => cookie
57
+ 'Cookie' => cookie,
58
+ :read_timeout => 10
57
59
  }
58
60
 
59
61
  raw_page = URI.parse(url).read(read_options)
@@ -1,22 +1,102 @@
1
+ require 'uri'
2
+
1
3
  module Panchira
2
4
  class TwitterResolver < Resolver
3
- URL_REGEXP = /twitter.com\/\w+\/status\/\d+/.freeze
5
+ URL_REGEXP = %r{twitter.com/(\w+)/status/(\d+)}.freeze
6
+
7
+ def initialize(url, options = nil)
8
+ super(url, options)
9
+ @screen_name = @url.slice(URL_REGEXP, 1)
10
+ @id = @url.slice(URL_REGEXP, 2)
11
+
12
+ @bearer_token = options&.dig(:twitter, :bearer_token)
13
+
14
+ @author = nil
15
+ @response = nil
16
+ end
17
+
18
+ def fetch
19
+ return super unless @bearer_token
20
+
21
+ @response = fetch_api if @bearer_token
22
+
23
+ result = PanchiraResult.new
24
+
25
+ result.canonical_url = parse_canonical_url
26
+ result.title = parse_title
27
+ result.description = parse_description
28
+ result.image = parse_image
29
+ result.tags = parse_tags
30
+ result.author = parse_author
31
+ result.resolver = parse_resolver
32
+
33
+ result
34
+ end
4
35
 
5
36
  private
37
+
38
+ def fetch_api
39
+ uri = URI.parse("https://api.twitter.com/2/tweets/#{@id}")
40
+ uri.query = URI.encode_www_form({
41
+ 'expansions': 'attachments.media_keys,author_id',
42
+ 'media.fields': 'preview_image_url,type,url',
43
+ 'user.fields': 'name,username',
44
+ 'tweet.fields': 'entities'
45
+ })
46
+
47
+ raw_json = uri.read('Authorization' => "Bearer #{@bearer_token}")
48
+ JSON.parse(raw_json)
49
+ end
50
+
51
+ def parse_canonical_url
52
+ # Twitter returns false canonical url when the account is set as sensitive.
53
+ "https://twitter.com/#{@screen_name}/status/#{@id}"
54
+ end
55
+
6
56
  def parse_title
7
- @title = super
57
+ @title = if @response
58
+ @author = @response['includes']['users'][0]['name']
59
+ "#{@author} on Twitter"
60
+ else
61
+ super
62
+ end
8
63
  end
9
64
 
10
65
  def parse_author
11
- @title.match(/\A(.+) on Twitter\z/)[1]
66
+ @author || @title.match(/\A(.+) on Twitter\z/)[1]
67
+ rescue StandardError
68
+ nil
12
69
  end
13
70
 
14
71
  def parse_description
15
- @description = super.gsub(/\A“|”\z/, '')
72
+ if @response
73
+ @response['data']['text']
74
+ else
75
+ @description = super.gsub(/\A“|”\z/, '')
76
+ end
16
77
  end
17
78
 
18
79
  def parse_tags
19
- @description.scan(/[##]([^##\s]+)/).map(&:first)
80
+ if @response
81
+ @response.dig('data', 'entities', 'hashtags')&.map { |obj| obj['tag'] }
82
+ else
83
+ @description.scan(/[##]([^##\s]+)/).map(&:first)
84
+ end
85
+ end
86
+
87
+ def parse_image_url
88
+ return super unless @response
89
+
90
+ first_media = @response.dig('includes', 'media')&.first
91
+
92
+ return unless first_media
93
+
94
+ case first_media['type']
95
+ when 'photo'
96
+ first_media['url']
97
+ when 'video'
98
+ first_media['preview_image_url']
99
+ end
20
100
  end
21
101
  end
22
102
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
- VERSION = '1.3.6'
4
+ VERSION = '1.5.1'
5
5
  end
data/lib/panchira.rb CHANGED
@@ -21,10 +21,10 @@ Dir.glob("#{project_root}/panchira/resolvers/*_resolver.rb").sort.each { |file|
21
21
  module Panchira
22
22
  class << self
23
23
  # Return a PanchiraResult that contains the attributes of given url.
24
- def fetch(url)
24
+ def fetch(url, options = nil)
25
25
  resolver = select_resolver(url)
26
26
 
27
- resolver.new(url).fetch
27
+ resolver.new(url, options).fetch
28
28
  end
29
29
 
30
30
  private
data/panchira.gemspec CHANGED
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ['lib']
33
33
 
34
- spec.required_ruby_version = '>= 2.6'
34
+ spec.required_ruby_version = '>= 2.7'
35
35
 
36
36
  spec.add_development_dependency 'bundler', '~> 2.0'
37
37
  spec.add_development_dependency 'minitest', '~> 5.0'
@@ -40,5 +40,5 @@ Gem::Specification.new do |spec|
40
40
  spec.add_development_dependency 'rubocop-minitest', '~> 0.10'
41
41
 
42
42
  spec.add_dependency 'fastimage', '~> 2.1.7'
43
- spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.13.0'
43
+ spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.14.0'
44
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: panchira
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.6
4
+ version: 1.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - kyp
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-10-19 00:00:00.000000000 Z
11
+ date: 2022-03-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -103,7 +103,7 @@ dependencies:
103
103
  version: 1.10.9
104
104
  - - "<"
105
105
  - !ruby/object:Gem::Version
106
- version: 1.13.0
106
+ version: 1.14.0
107
107
  type: :runtime
108
108
  prerelease: false
109
109
  version_requirements: !ruby/object:Gem::Requirement
@@ -113,7 +113,7 @@ dependencies:
113
113
  version: 1.10.9
114
114
  - - "<"
115
115
  - !ruby/object:Gem::Version
116
- version: 1.13.0
116
+ version: 1.14.0
117
117
  description: |2
118
118
  Panchira allows you to parse attributes of hentais on some web platforms, such as Pixiv and DLSite.
119
119
  If you need card previews on hentai but can't get it with simply parsing metatags, then it is time for Panchira.
@@ -166,7 +166,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
166
166
  requirements:
167
167
  - - ">="
168
168
  - !ruby/object:Gem::Version
169
- version: '2.6'
169
+ version: '2.7'
170
170
  required_rubygems_version: !ruby/object:Gem::Requirement
171
171
  requirements:
172
172
  - - ">="