panchira 1.4.0 → 1.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87af8a25ccd6d841b133aaa28ce7853c0111ae0ce4768f287a1f9a8a09eec148
4
- data.tar.gz: 93233eca9a9fd019fb82aafbd1e97630aa242d1645d2e2306fee52d1812aa144
3
+ metadata.gz: db34e8033acf822616172b330fc61ffe2ee5a1c9dfe46bc1737257717aeff4c0
4
+ data.tar.gz: 5ec893680ef7e04b2f85d16b3458ee9f6b2db76ac6c01544a088d280a574e98c
5
5
  SHA512:
6
- metadata.gz: 42e8539356b8c73b8cced17cdaec2452960f17175736a004702c37e7370f407ea995504a07e2b060c53f52629f42cb247e34080d9e54c52a07158fddb5e427c1
7
- data.tar.gz: 4b1746f991ba2353304c96e3297efca0a45ba0e3d9c908f653e72aa6920de3654f2714235d24037de591699287bf0c027b0720d6e2741cf3ef6a6e5ae1add95a
6
+ metadata.gz: 71b2d7707d78b21004acdca984f1869cc81a9e9169bee9239b13261a79002ed859a5a87c8aa08350a89d37ce734abb47637c909aa01b5dab171eb871ad27d9e0
7
+ data.tar.gz: fbe9744acbbdbd13376e2a6bdaf1c4430c20861163b39f6bb7ed26436c8fd6c64876e329c0c9aae5c129a59257d7c06eac2508b7c1283bc27d405f0c2e836b0e
@@ -31,3 +31,5 @@ jobs:
31
31
  run: bundle install
32
32
  - name: Run tests
33
33
  run: bundle exec rake test
34
+ env:
35
+ TWITTER_BEARER_TOKEN: ${{ secrets.TWITTER_BEARER_TOKEN }}
data/.rubocop.yml CHANGED
@@ -17,6 +17,10 @@ Layout/FirstHashElementIndentation:
17
17
  Layout/IndentationConsistency:
18
18
  EnforcedStyle: indented_internal_methods
19
19
 
20
+ Layout/MultilineAssignmentLayout:
21
+ EnforcedStyle: same_line
22
+ SupportedTypes: ["block"]
23
+
20
24
  Layout/MultilineMethodCallIndentation:
21
25
  EnforcedStyle: indented
22
26
 
@@ -30,6 +34,9 @@ Lint/MissingSuper:
30
34
  Exclude:
31
35
  - lib/panchira/resolvers/*
32
36
 
37
+ Lint/SymbolConversion:
38
+ EnforcedStyle: consistent
39
+
33
40
  Style/AsciiComments:
34
41
  Enabled: false
35
42
 
data/CHANGELOG.md CHANGED
@@ -4,6 +4,20 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/).
6
6
 
7
+ ## 1.5.2 - 2022-03-20
8
+ ### Fixed
9
+ - Fixed an issue where Pixiv resolver can't retrieve not-proxied image scales.
10
+
11
+ ## 1.5.1 - 2022-03-20
12
+ ### Added
13
+ - Pixiv resolver can now fetch image URIs that are not proxied.
14
+
15
+ ## 1.5.0 - 2022-03-01
16
+ ### Changed
17
+ - You can now set options in Panchira::fetch and Resolver's constructors.
18
+ - Twitter resolver can now fetch datas from API (requires bearer token).
19
+ - Max execution time is now set to 10 seconds.
20
+
7
21
  ## 1.4.0 - 2022-01-10
8
22
  ### Added
9
23
  - Added support for non-Japanese pixiv URLs.
data/Gemfile.lock CHANGED
@@ -1,41 +1,41 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- panchira (1.4.0)
4
+ panchira (1.5.2)
5
5
  fastimage (~> 2.1.7)
6
- nokogiri (>= 1.10.9, < 1.13.0)
6
+ nokogiri (>= 1.10.9, < 1.14.0)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
11
  ast (2.4.2)
12
12
  fastimage (2.1.7)
13
- minitest (5.14.4)
14
- nokogiri (1.12.5-x86_64-darwin)
13
+ minitest (5.15.0)
14
+ nokogiri (1.13.3-x86_64-darwin)
15
15
  racc (~> 1.4)
16
- parallel (1.20.1)
17
- parser (3.0.1.1)
16
+ parallel (1.21.0)
17
+ parser (3.1.1.0)
18
18
  ast (~> 2.4.1)
19
19
  racc (1.6.0)
20
- rainbow (3.0.0)
20
+ rainbow (3.1.1)
21
21
  rake (12.3.3)
22
- regexp_parser (2.1.1)
22
+ regexp_parser (2.2.1)
23
23
  rexml (3.2.5)
24
- rubocop (1.15.0)
24
+ rubocop (1.25.1)
25
25
  parallel (~> 1.10)
26
- parser (>= 3.0.0.0)
26
+ parser (>= 3.1.0.0)
27
27
  rainbow (>= 2.2.2, < 4.0)
28
28
  regexp_parser (>= 1.8, < 3.0)
29
29
  rexml
30
- rubocop-ast (>= 1.5.0, < 2.0)
30
+ rubocop-ast (>= 1.15.1, < 2.0)
31
31
  ruby-progressbar (~> 1.7)
32
32
  unicode-display_width (>= 1.4.0, < 3.0)
33
- rubocop-ast (1.5.0)
34
- parser (>= 3.0.1.1)
35
- rubocop-minitest (0.12.1)
33
+ rubocop-ast (1.16.0)
34
+ parser (>= 3.1.1.0)
35
+ rubocop-minitest (0.17.2)
36
36
  rubocop (>= 0.90, < 2.0)
37
37
  ruby-progressbar (1.11.0)
38
- unicode-display_width (2.0.0)
38
+ unicode-display_width (2.1.0)
39
39
 
40
40
  PLATFORMS
41
41
  ruby
data/README.md CHANGED
@@ -46,6 +46,24 @@ In most situation you would call `Panchira#fetch`. It is a singular method that
46
46
 
47
47
  Panchira has a special treatment for each website. `Resolver` classes are where those treatments take place, and you can use your own `Resolver` classes by registering it to Panchira. See `Panchira::Extensions` documentation in source code for further details.
48
48
 
49
+ ### About Twitter API
50
+
51
+ Due to a recent change in Twitter, it's getting really hard to fetch tweet data by scraping. To solve this problem, Panchira can now use Twitter official API.
52
+
53
+ To use Twitter API instead of normal scraping, please set Twitter's bearer token as an option to `Panchira::fetch`. If you don't set token, Panchira will just fall back to simple scraping.
54
+
55
+ ```
56
+ > Panchira.fetch("https://twitter.com/example/status/1234567890", options: {twitter: {bearer_token: 'ABC...123'}})
57
+ ```
58
+
59
+ ### About Pixiv proxy
60
+
61
+ By default, Panchira returns a link to [Pixiv.cat](https://pixiv.cat/) as a image URI, but you can change this behavior by setting `fetch_raw_image_url` as an option. To access not-proxied URI, pximg.net, you have to set Referer as `https://app-api.pixiv.net/` in HTTP request header.
62
+
63
+ ```
64
+ > Panchira.fetch("https://pixiv.net/artworks/12345678", options: {pixiv: {fetch_raw_image_url: true}})
65
+ ```
66
+
49
67
  ## Development
50
68
 
51
69
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -6,8 +6,8 @@ module Panchira
6
6
  class KomifloResolver < Resolver
7
7
  URL_REGEXP = %r{komiflo\.com(?:/#!)?/comics/(\d+)}.freeze
8
8
 
9
- def initialize(url)
10
- @url = url
9
+ def initialize(url, options = nil)
10
+ super(url, options)
11
11
 
12
12
  @id = url.slice(URL_REGEXP, 1)
13
13
  raw_json = URI.parse("https://api.komiflo.com/content/id/#{@id}").read('User-Agent' => user_agent)
@@ -8,8 +8,8 @@ module Panchira
8
8
  URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
9
9
  ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
10
10
 
11
- def initialize(url)
12
- super(url)
11
+ def initialize(url, options = nil)
12
+ super(url, options)
13
13
 
14
14
  if id = @url.match(ID_REGEXP)[:id]
15
15
  @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
@@ -48,8 +48,8 @@ module Panchira
48
48
  URL_REGEXP = /ncode\.syosetu\.com/.freeze
49
49
  ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
50
50
 
51
- def initialize(url)
52
- super(url)
51
+ def initialize(url, options = nil)
52
+ super(url, options)
53
53
 
54
54
  if id = @url.match(ID_REGEXP)[:id]
55
55
  @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
@@ -4,12 +4,14 @@ module Panchira
4
4
  class PixivResolver < Resolver
5
5
  URL_REGEXP = %r{pixiv\.net/.*(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
6
6
 
7
- def initialize(url)
8
- super(url)
7
+ def initialize(url, options = nil)
8
+ super(url, options)
9
9
  @illust_id = url.slice(URL_REGEXP, 2)
10
10
 
11
11
  raw_json = URI.parse("https://www.pixiv.net/ajax/illust/#{@illust_id}").read('User-Agent' => user_agent)
12
12
  @json = JSON.parse(raw_json)
13
+
14
+ @fetch_raw_image_url = options&.dig(:pixiv, :fetch_raw_image_url)
13
15
  end
14
16
 
15
17
  private
@@ -26,7 +28,19 @@ module Panchira
26
28
  "https://pixiv.net/member_illust.php?mode=medium&illust_id=#{@illust_id}"
27
29
  end
28
30
 
31
+ def parse_image
32
+ image = PanchiraImage.new
33
+ image.url = parse_image_url
34
+ image.width, image.height = FastImage.size(image.url, http_header: {'Referer' => 'https://app-api.pixiv.net/'})
35
+
36
+ image
37
+ end
38
+
29
39
  def parse_image_url
40
+ if @fetch_raw_image_url
41
+ return @json['body']['urls']['original']
42
+ end
43
+
30
44
  proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
31
45
 
32
46
  case Net::HTTP.get_response(URI.parse(proxy_url))
@@ -47,8 +61,8 @@ module Panchira
47
61
  class PixivNovelResolver < Resolver
48
62
  URL_REGEXP = %r{pixiv\.net/novel/show.php\?id=(\d+)}.freeze
49
63
 
50
- def initialize(url)
51
- super(url)
64
+ def initialize(url, options = nil)
65
+ super(url, options)
52
66
  @novel_id = url.slice(URL_REGEXP, 1)
53
67
 
54
68
  raw_json = URI.parse("https://www.pixiv.net/ajax/novel/#{@novel_id}").read('User-Agent' => user_agent)
@@ -11,8 +11,9 @@ module Panchira
11
11
  # You must override this in subclasses to limit which urls to resolve.
12
12
  URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
13
13
 
14
- def initialize(url)
14
+ def initialize(url, options = nil)
15
15
  @url = url
16
+ @options = options
16
17
  end
17
18
 
18
19
  # This function is called right after this Resolver instance is made.
@@ -53,7 +54,8 @@ module Panchira
53
54
  def fetch_page(url)
54
55
  read_options = {
55
56
  'User-Agent' => user_agent,
56
- 'Cookie' => cookie
57
+ 'Cookie' => cookie,
58
+ :read_timeout => 10
57
59
  }
58
60
 
59
61
  raw_page = URI.parse(url).read(read_options)
@@ -1,22 +1,102 @@
1
+ require 'uri'
2
+
1
3
  module Panchira
2
4
  class TwitterResolver < Resolver
3
- URL_REGEXP = /twitter.com\/\w+\/status\/\d+/.freeze
5
+ URL_REGEXP = %r{twitter.com/(\w+)/status/(\d+)}.freeze
6
+
7
+ def initialize(url, options = nil)
8
+ super(url, options)
9
+ @screen_name = @url.slice(URL_REGEXP, 1)
10
+ @id = @url.slice(URL_REGEXP, 2)
11
+
12
+ @bearer_token = options&.dig(:twitter, :bearer_token)
13
+
14
+ @author = nil
15
+ @response = nil
16
+ end
17
+
18
+ def fetch
19
+ return super unless @bearer_token
20
+
21
+ @response = fetch_api if @bearer_token
22
+
23
+ result = PanchiraResult.new
24
+
25
+ result.canonical_url = parse_canonical_url
26
+ result.title = parse_title
27
+ result.description = parse_description
28
+ result.image = parse_image
29
+ result.tags = parse_tags
30
+ result.author = parse_author
31
+ result.resolver = parse_resolver
32
+
33
+ result
34
+ end
4
35
 
5
36
  private
37
+
38
+ def fetch_api
39
+ uri = URI.parse("https://api.twitter.com/2/tweets/#{@id}")
40
+ uri.query = URI.encode_www_form({
41
+ 'expansions': 'attachments.media_keys,author_id',
42
+ 'media.fields': 'preview_image_url,type,url',
43
+ 'user.fields': 'name,username',
44
+ 'tweet.fields': 'entities'
45
+ })
46
+
47
+ raw_json = uri.read('Authorization' => "Bearer #{@bearer_token}")
48
+ JSON.parse(raw_json)
49
+ end
50
+
51
+ def parse_canonical_url
52
+ # Twitter returns false canonical url when the account is set as sensitive.
53
+ "https://twitter.com/#{@screen_name}/status/#{@id}"
54
+ end
55
+
6
56
  def parse_title
7
- @title = super
57
+ @title = if @response
58
+ @author = @response['includes']['users'][0]['name']
59
+ "#{@author} on Twitter"
60
+ else
61
+ super
62
+ end
8
63
  end
9
64
 
10
65
  def parse_author
11
- @title.match(/\A(.+) on Twitter\z/)[1]
66
+ @author || @title.match(/\A(.+) on Twitter\z/)[1]
67
+ rescue StandardError
68
+ nil
12
69
  end
13
70
 
14
71
  def parse_description
15
- @description = super.gsub(/\A“|”\z/, '')
72
+ if @response
73
+ @response['data']['text']
74
+ else
75
+ @description = super.gsub(/\A“|”\z/, '')
76
+ end
16
77
  end
17
78
 
18
79
  def parse_tags
19
- @description.scan(/[##]([^##\s]+)/).map(&:first)
80
+ if @response
81
+ @response.dig('data', 'entities', 'hashtags')&.map { |obj| obj['tag'] }
82
+ else
83
+ @description.scan(/[##]([^##\s]+)/).map(&:first)
84
+ end
85
+ end
86
+
87
+ def parse_image_url
88
+ return super unless @response
89
+
90
+ first_media = @response.dig('includes', 'media')&.first
91
+
92
+ return unless first_media
93
+
94
+ case first_media['type']
95
+ when 'photo'
96
+ first_media['url']
97
+ when 'video'
98
+ first_media['preview_image_url']
99
+ end
20
100
  end
21
101
  end
22
102
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Panchira
4
- VERSION = '1.4.0'
4
+ VERSION = '1.5.2'
5
5
  end
data/lib/panchira.rb CHANGED
@@ -21,10 +21,10 @@ Dir.glob("#{project_root}/panchira/resolvers/*_resolver.rb").sort.each { |file|
21
21
  module Panchira
22
22
  class << self
23
23
  # Return a PanchiraResult that contains the attributes of given url.
24
- def fetch(url)
24
+ def fetch(url, options = nil)
25
25
  resolver = select_resolver(url)
26
26
 
27
- resolver.new(url).fetch
27
+ resolver.new(url, options).fetch
28
28
  end
29
29
 
30
30
  private
data/panchira.gemspec CHANGED
@@ -40,5 +40,5 @@ Gem::Specification.new do |spec|
40
40
  spec.add_development_dependency 'rubocop-minitest', '~> 0.10'
41
41
 
42
42
  spec.add_dependency 'fastimage', '~> 2.1.7'
43
- spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.13.0'
43
+ spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.14.0'
44
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: panchira
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - kyp
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-10 00:00:00.000000000 Z
11
+ date: 2022-03-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -103,7 +103,7 @@ dependencies:
103
103
  version: 1.10.9
104
104
  - - "<"
105
105
  - !ruby/object:Gem::Version
106
- version: 1.13.0
106
+ version: 1.14.0
107
107
  type: :runtime
108
108
  prerelease: false
109
109
  version_requirements: !ruby/object:Gem::Requirement
@@ -113,7 +113,7 @@ dependencies:
113
113
  version: 1.10.9
114
114
  - - "<"
115
115
  - !ruby/object:Gem::Version
116
- version: 1.13.0
116
+ version: 1.14.0
117
117
  description: |2
118
118
  Panchira allows you to parse attributes of hentais on some web platforms, such as Pixiv and DLSite.
119
119
  If you need card previews on hentai but can't get it with simply parsing metatags, then it is time for Panchira.
@@ -173,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
173
173
  - !ruby/object:Gem::Version
174
174
  version: '0'
175
175
  requirements: []
176
- rubygems_version: 3.3.4
176
+ rubygems_version: 3.1.4
177
177
  signing_key:
178
178
  specification_version: 4
179
179
  summary: A parser for hentai websites