panchira 1.3.5 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 45b393a405624e26e8d6555c1833e0f65e5e93deef13518197ec4b15fc3bff61
4
- data.tar.gz: b770975cbbba8d66cf6acde417ae9d32e78d310f145726ff16b60824a49f99e3
3
+ metadata.gz: 9119f6e4ad4e4a3b551642f7d19a7853805d2c82b65c5fc868b472f265e7169c
4
+ data.tar.gz: 03cbbf38e009cd326b4f40f467de1dcc1d0fb852de362b3dde05ed56eb65c2c2
5
5
  SHA512:
6
- metadata.gz: 9c4ca217b6bd3fa782fa92d36ed1a1974880e2ff1746319cbd4f3e652d947807f74490b937a7f84cd4b1c8c46b0a95bd91a8374c28d575aa224c3c9ce6817385
7
- data.tar.gz: b050fcb7586b81209e0d2f0ac521ab34c4db0ad05362655b3b25ae16c2d226a31b57da50019a195bb092080cfddf5eb1bdf44c80757f3943b1075860182b3add
6
+ metadata.gz: 6ff2fab3b4489ade9e7accb6f10dc2d391aa5ba5ebbbb08f130926df81acd31eeae36d2208b915848e5e1337e71ab2a7cbe300eeeb728ed8669593fc139573f8
7
+ data.tar.gz: df76a29e1af2515d4eee99568426295e02fbcf8b1b6b835932633458f3f287d4ca2460ec1a497a73f2f8a5e66a100acf71aa37e35be3dd778c95ac83f2f808e7
@@ -9,22 +9,27 @@ name: Ruby
9
9
 
10
10
  on:
11
11
  push:
12
- branches: [ master ]
12
+ branches: [master]
13
13
  pull_request:
14
- branches: [ master ]
14
+ branches: [master]
15
15
 
16
16
  jobs:
17
17
  test:
18
-
19
18
  runs-on: ubuntu-18.04
20
-
19
+ strategy:
20
+ fail-fast: false
21
+ matrix:
22
+ ruby: ["2.7", "3.0"]
23
+ name: Ruby ${{ matrix.ruby }}
21
24
  steps:
22
- - uses: actions/checkout@v2
23
- - name: Set up Ruby
24
- uses: ruby/setup-ruby@v1
25
- with:
26
- ruby-version: 2.6
27
- - name: Install dependencies
28
- run: bundle install
29
- - name: Run tests
30
- run: bundle exec rake test
25
+ - uses: actions/checkout@v2
26
+ - name: Set up Ruby
27
+ uses: ruby/setup-ruby@v1
28
+ with:
29
+ ruby-version: ${{ matrix.ruby }}
30
+ - name: Install dependencies
31
+ run: bundle install
32
+ - name: Run tests
33
+ run: bundle exec rake test
34
+ env:
35
+ TWITTER_BEARER_TOKEN: ${{ secrets.TWITTER_BEARER_TOKEN }}
data/.rubocop.yml CHANGED
@@ -4,7 +4,7 @@ require:
4
4
  - rubocop-minitest
5
5
 
6
6
  AllCops:
7
- TargetRubyVersion: 2.6
7
+ TargetRubyVersion: 2.7
8
8
  NewCops: enable
9
9
  Exclude:
10
10
  - bin/*
@@ -17,6 +17,10 @@ Layout/FirstHashElementIndentation:
17
17
  Layout/IndentationConsistency:
18
18
  EnforcedStyle: indented_internal_methods
19
19
 
20
+ Layout/MultilineAssignmentLayout:
21
+ EnforcedStyle: same_line
22
+ SupportedTypes: ["block"]
23
+
20
24
  Layout/MultilineMethodCallIndentation:
21
25
  EnforcedStyle: indented
22
26
 
@@ -27,9 +31,12 @@ Lint/AssignmentInCondition:
27
31
  Enabled: false
28
32
 
29
33
  Lint/MissingSuper:
30
- Exclude:
34
+ Exclude:
31
35
  - lib/panchira/resolvers/*
32
36
 
37
+ Lint/SymbolConversion:
38
+ EnforcedStyle: consistent
39
+
33
40
  Style/AsciiComments:
34
41
  Enabled: false
35
42
 
data/CHANGELOG.md CHANGED
@@ -4,6 +4,29 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/).
6
6
 
7
+ ## 1.5.0 - 2022-03-01
8
+ ### Changed
9
+ - You can now set options in Panchira::fetch and Resolver's constructors.
10
+ - Twitter resolvers can now fetch datas from API (requires bearer token).
11
+ - Max execution time is now set to 10 seconds.
12
+
13
+ ## 1.4.0 - 2022-01-10
14
+ ### Added
15
+ - Added support for non-Japanese pixiv URLs.
16
+
17
+ ### Fixed
18
+ - Fixed an issue where Nijie Resolver failed to fetch image.
19
+
20
+ ### Changed
21
+ - Dropped support for Ruby 2.6.
22
+
23
+ ## 1.3.6 - 2021-10-19
24
+ ### Added
25
+ - Added support for Twitter.
26
+
27
+ ### Changed
28
+ - Melonbooks Resolver now returns image URI without resizing queries.
29
+
7
30
  ## 1.3.5 - 2021-09-01
8
31
  ### Added
9
32
  - Added support for Pixiv Novel.
data/Gemfile.lock CHANGED
@@ -1,41 +1,43 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- panchira (1.3.5)
4
+ panchira (1.5.0)
5
5
  fastimage (~> 2.1.7)
6
- nokogiri (>= 1.10.9, < 1.12.0)
6
+ nokogiri (>= 1.10.9, < 1.14.0)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
11
  ast (2.4.2)
12
12
  fastimage (2.1.7)
13
- minitest (5.14.4)
14
- nokogiri (1.11.7-x86_64-darwin)
13
+ mini_portile2 (2.8.0)
14
+ minitest (5.15.0)
15
+ nokogiri (1.13.3)
16
+ mini_portile2 (~> 2.8.0)
15
17
  racc (~> 1.4)
16
- parallel (1.20.1)
17
- parser (3.0.1.1)
18
+ parallel (1.21.0)
19
+ parser (3.1.1.0)
18
20
  ast (~> 2.4.1)
19
- racc (1.5.2)
20
- rainbow (3.0.0)
21
+ racc (1.6.0)
22
+ rainbow (3.1.1)
21
23
  rake (12.3.3)
22
- regexp_parser (2.1.1)
24
+ regexp_parser (2.2.1)
23
25
  rexml (3.2.5)
24
- rubocop (1.15.0)
26
+ rubocop (1.25.1)
25
27
  parallel (~> 1.10)
26
- parser (>= 3.0.0.0)
28
+ parser (>= 3.1.0.0)
27
29
  rainbow (>= 2.2.2, < 4.0)
28
30
  regexp_parser (>= 1.8, < 3.0)
29
31
  rexml
30
- rubocop-ast (>= 1.5.0, < 2.0)
32
+ rubocop-ast (>= 1.15.1, < 2.0)
31
33
  ruby-progressbar (~> 1.7)
32
34
  unicode-display_width (>= 1.4.0, < 3.0)
33
- rubocop-ast (1.5.0)
34
- parser (>= 3.0.1.1)
35
- rubocop-minitest (0.12.1)
35
+ rubocop-ast (1.16.0)
36
+ parser (>= 3.1.1.0)
37
+ rubocop-minitest (0.17.2)
36
38
  rubocop (>= 0.90, < 2.0)
37
39
  ruby-progressbar (1.11.0)
38
- unicode-display_width (2.0.0)
40
+ unicode-display_width (2.1.0)
39
41
 
40
42
  PLATFORMS
41
43
  ruby
data/README.md CHANGED
@@ -46,6 +46,16 @@ In most situation you would call `Panchira#fetch`. It is a singular method that
46
46
 
47
47
  Panchira has a special treatment for each website. `Resolver` classes are where those treatments take place, and you can use your own `Resolver` classes by registering it to Panchira. See `Panchira::Extensions` documentation in source code for further details.
48
48
 
49
+ ### About Twitter API
50
+
51
+ Due to a recent change in Twitter, it's getting really hard to fetch tweet data by scraping. To solve this problem, Panchira can now use Twitter official API.
52
+
53
+ To use Twitter API instead of normal scraping, please set Twitter's bearer token as an option to `Panchira::fetch`. If you don't set token, Panchira will just fall back to simple scraping.
54
+
55
+ ```
56
+ > Panchira.fetch("https://twitter.com/example/status/1234567890", options: {twitter: {bearer_token: 'ABC...123'}})
57
+ ```
58
+
49
59
  ## Development
50
60
 
51
61
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -6,8 +6,8 @@ module Panchira
6
6
  class KomifloResolver < Resolver
7
7
  URL_REGEXP = %r{komiflo\.com(?:/#!)?/comics/(\d+)}.freeze
8
8
 
9
- def initialize(url)
10
- @url = url
9
+ def initialize(url, options = nil)
10
+ super(url, options)
11
11
 
12
12
  @id = url.slice(URL_REGEXP, 1)
13
13
  raw_json = URI.parse("https://api.komiflo.com/content/id/#{@id}").read('User-Agent' => user_agent)
@@ -57,7 +57,10 @@ module Panchira
57
57
  end
58
58
 
59
59
  def parse_image_url
60
- @page.css('//meta[property="og:image"]/@content').first.to_s.sub(/&c=1/, '')
60
+ url = @page.css('//meta[property="og:image"]/@content').first.to_s
61
+ image = url.match(/resize_image.php\?image=([^&]+)/)[1]
62
+
63
+ "https://melonbooks.akamaized.net/user_data/packages/resize_image.php?image=#{image}"
61
64
  end
62
65
 
63
66
  def parse_tags
@@ -8,8 +8,8 @@ module Panchira
8
8
  URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
9
9
  ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
10
10
 
11
- def initialize(url)
12
- super(url)
11
+ def initialize(url, options = nil)
12
+ super(url, options)
13
13
 
14
14
  if id = @url.match(ID_REGEXP)[:id]
15
15
  @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
@@ -48,8 +48,8 @@ module Panchira
48
48
  URL_REGEXP = /ncode\.syosetu\.com/.freeze
49
49
  ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
50
50
 
51
- def initialize(url)
52
- super(url)
51
+ def initialize(url, options = nil)
52
+ super(url, options)
53
53
 
54
54
  if id = @url.match(ID_REGEXP)[:id]
55
55
  @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
@@ -26,17 +26,17 @@ module Panchira
26
26
  end
27
27
 
28
28
  def parse_image_url
29
- str = @page.css('//script[@type="application/ld+json"]/text()').first.to_s
30
-
31
- if s = str.match(%r{https://pic.nijie.(net|info)/(?<servername>\d+)/[^/]+/nijie_picture/(?<imagename>[^"]+)})
32
- # 動画は容量大きすぎるし取らない
33
- if s[:imagename] =~ /(jpg|png)/
34
- "https://pic.nijie.net/#{s[:servername]}/nijie_picture/#{s[:imagename]}"
35
- else
36
- s[0]
37
- end
29
+ str = @page.css('//script[@type="application/ld+json"]/text()').first.to_s.split.join(' ')
30
+ thumbnail_url = JSON.parse(str)['thumbnailUrl']
31
+
32
+ unless thumbnail_url
33
+ return @page.css('//meta[property="og:image"]/@content').first.to_s
34
+ end
35
+
36
+ if md = thumbnail_url.match(%r{pic.nijie.net/\w+(?<resolution>/\w+/)nijie.+\.(?<format>png|jpg|jpeg)})
37
+ thumbnail_url.sub(md[:resolution], '/')
38
38
  else
39
- @page.css('//meta[property="og:image"]/@content').first.to_s
39
+ thumbnail_url
40
40
  end
41
41
  end
42
42
 
@@ -2,10 +2,10 @@
2
2
 
3
3
  module Panchira
4
4
  class PixivResolver < Resolver
5
- URL_REGEXP = %r{pixiv\.net/(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
5
+ URL_REGEXP = %r{pixiv\.net/.*(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
6
6
 
7
- def initialize(url)
8
- super(url)
7
+ def initialize(url, options = nil)
8
+ super(url, options)
9
9
  @illust_id = url.slice(URL_REGEXP, 2)
10
10
 
11
11
  raw_json = URI.parse("https://www.pixiv.net/ajax/illust/#{@illust_id}").read('User-Agent' => user_agent)
@@ -47,8 +47,8 @@ module Panchira
47
47
  class PixivNovelResolver < Resolver
48
48
  URL_REGEXP = %r{pixiv\.net/novel/show.php\?id=(\d+)}.freeze
49
49
 
50
- def initialize(url)
51
- super(url)
50
+ def initialize(url, options = nil)
51
+ super(url, options)
52
52
  @novel_id = url.slice(URL_REGEXP, 1)
53
53
 
54
54
  raw_json = URI.parse("https://www.pixiv.net/ajax/novel/#{@novel_id}").read('User-Agent' => user_agent)
@@ -11,8 +11,9 @@ module Panchira
11
11
  # You must override this in subclasses to limit which urls to resolve.
12
12
  URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
13
13
 
14
- def initialize(url)
14
+ def initialize(url, options = nil)
15
15
  @url = url
16
+ @options = options
16
17
  end
17
18
 
18
19
  # This function is called right after this Resolver instance is made.
@@ -53,7 +54,8 @@ module Panchira
53
54
  def fetch_page(url)
54
55
  read_options = {
55
56
  'User-Agent' => user_agent,
56
- 'Cookie' => cookie
57
+ 'Cookie' => cookie,
58
+ :read_timeout => 10
57
59
  }
58
60
 
59
61
  raw_page = URI.parse(url).read(read_options)
@@ -0,0 +1,101 @@
1
+ require 'uri'
2
+
3
+ module Panchira
4
+ class TwitterResolver < Resolver
5
+ URL_REGEXP = %r{twitter.com/(\w+)/status/(\d+)}.freeze
6
+
7
+ def initialize(url, options = nil)
8
+ super(url, options)
9
+ @screen_name = @url.slice(URL_REGEXP, 1)
10
+ @id = @url.slice(URL_REGEXP, 2)
11
+
12
+ @bearer_token = options&.dig(:twitter, :bearer_token)
13
+ end
14
+
15
+ def fetch
16
+ return super unless @bearer_token
17
+
18
+ @response = fetch_api if @bearer_token
19
+
20
+ result = PanchiraResult.new
21
+
22
+ result.canonical_url = parse_canonical_url
23
+ result.title = parse_title
24
+ result.description = parse_description
25
+ result.image = parse_image
26
+ result.tags = parse_tags
27
+ result.author = parse_author
28
+ result.resolver = parse_resolver
29
+
30
+ result
31
+ end
32
+
33
+ private
34
+
35
+ def fetch_api
36
+ uri = URI.parse("https://api.twitter.com/2/tweets/#{@id}")
37
+ uri.query = URI.encode_www_form({
38
+ 'expansions': 'attachments.media_keys,author_id',
39
+ 'media.fields': 'preview_image_url,type,url',
40
+ 'user.fields': 'name,username',
41
+ 'tweet.fields': 'entities'
42
+ })
43
+
44
+ raw_json = uri.read('Authorization' => "Bearer #{@bearer_token}")
45
+ JSON.parse(raw_json)
46
+ end
47
+
48
+ def parse_canonical_url
49
+ # Twitter returns false canonical url when the account is set as sensitive.
50
+ "https://twitter.com/#{@screen_name}/status/#{@id}"
51
+ end
52
+
53
+ def parse_title
54
+ @title = if @response
55
+ @author = @response['includes']['users'][0]['name']
56
+ "#{@author} on Twitter"
57
+ else
58
+ super
59
+ end
60
+ end
61
+
62
+ def parse_author
63
+ @author || @title.match(/\A(.+) on Twitter\z/)[1]
64
+ rescue StandardError
65
+ nil
66
+ end
67
+
68
+ def parse_description
69
+ if @response
70
+ @response['data']['text']
71
+ else
72
+ @description = super.gsub(/\A“|”\z/, '')
73
+ end
74
+ end
75
+
76
+ def parse_tags
77
+ if @response
78
+ @response.dig('data', 'entities', 'hashtags')&.map { |obj| obj['tag'] }
79
+ else
80
+ @description.scan(/[##]([^##\s]+)/).map(&:first)
81
+ end
82
+ end
83
+
84
+ def parse_image_url
85
+ return super unless @response
86
+
87
+ first_media = @response.dig('includes', 'media')&.first
88
+
89
+ return unless first_media
90
+
91
+ case first_media['type']
92
+ when 'photo'
93
+ first_media['url']
94
+ when 'video'
95
+ first_media['preview_image_url']
96
+ end
97
+ end
98
+ end
99
+
100
+ ::Panchira::Extensions.register(Panchira::TwitterResolver)
101
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
- VERSION = '1.3.5'
4
+ VERSION = '1.5.0'
5
5
  end
data/lib/panchira.rb CHANGED
@@ -21,10 +21,10 @@ Dir.glob("#{project_root}/panchira/resolvers/*_resolver.rb").sort.each { |file|
21
21
  module Panchira
22
22
  class << self
23
23
  # Return a PanchiraResult that contains the attributes of given url.
24
- def fetch(url)
24
+ def fetch(url, options = nil)
25
25
  resolver = select_resolver(url)
26
26
 
27
- resolver.new(url).fetch
27
+ resolver.new(url, options).fetch
28
28
  end
29
29
 
30
30
  private
data/panchira.gemspec CHANGED
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ['lib']
33
33
 
34
- spec.required_ruby_version = '>= 2.6'
34
+ spec.required_ruby_version = '>= 2.7'
35
35
 
36
36
  spec.add_development_dependency 'bundler', '~> 2.0'
37
37
  spec.add_development_dependency 'minitest', '~> 5.0'
@@ -40,5 +40,5 @@ Gem::Specification.new do |spec|
40
40
  spec.add_development_dependency 'rubocop-minitest', '~> 0.10'
41
41
 
42
42
  spec.add_dependency 'fastimage', '~> 2.1.7'
43
- spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.12.0'
43
+ spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.14.0'
44
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: panchira
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.5
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - kyp
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-09-01 00:00:00.000000000 Z
11
+ date: 2022-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -103,7 +103,7 @@ dependencies:
103
103
  version: 1.10.9
104
104
  - - "<"
105
105
  - !ruby/object:Gem::Version
106
- version: 1.12.0
106
+ version: 1.14.0
107
107
  type: :runtime
108
108
  prerelease: false
109
109
  version_requirements: !ruby/object:Gem::Requirement
@@ -113,7 +113,7 @@ dependencies:
113
113
  version: 1.10.9
114
114
  - - "<"
115
115
  - !ruby/object:Gem::Version
116
- version: 1.12.0
116
+ version: 1.14.0
117
117
  description: |2
118
118
  Panchira allows you to parse attributes of hentais on some web platforms, such as Pixiv and DLSite.
119
119
  If you need card previews on hentai but can't get it with simply parsing metatags, then it is time for Panchira.
@@ -148,6 +148,7 @@ files:
148
148
  - lib/panchira/resolvers/nijie_resolver.rb
149
149
  - lib/panchira/resolvers/pixiv_resolver.rb
150
150
  - lib/panchira/resolvers/resolver.rb
151
+ - lib/panchira/resolvers/twitter_resolver.rb
151
152
  - lib/panchira/version.rb
152
153
  - panchira.gemspec
153
154
  homepage: https://github.com/nuita/panchira
@@ -165,7 +166,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
165
166
  requirements:
166
167
  - - ">="
167
168
  - !ruby/object:Gem::Version
168
- version: '2.6'
169
+ version: '2.7'
169
170
  required_rubygems_version: !ruby/object:Gem::Requirement
170
171
  requirements:
171
172
  - - ">="