RubyGems - panchira - Versions diffs - 1.3.6 → 1.5.1 - Mend

panchira 1.3.6 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/.github/workflows/ruby.yml +18 -13
data/.rubocop.yml +9 -2
data/CHANGELOG.md +20 -0
data/Gemfile.lock +16 -16
data/README.md +18 -0
data/lib/panchira/resolvers/komiflo_resolver.rb +2 -2
data/lib/panchira/resolvers/narou_resolver.rb +4 -4
data/lib/panchira/resolvers/nijie_resolver.rb +10 -10
data/lib/panchira/resolvers/pixiv_resolver.rb +11 -5
data/lib/panchira/resolvers/resolver.rb +4 -2
data/lib/panchira/resolvers/twitter_resolver.rb +85 -5
data/lib/panchira/version.rb +1 -1
data/lib/panchira.rb +2 -2
data/panchira.gemspec +2 -2
metadata +5 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 56602175dff2975d0d6f606c31da48b8cf904c61aa98d8f0e6d940ef4c1d230d
-  data.tar.gz: b1ceb998bb60dafbb5b992ad156192d6ea2f58fec8ee606aac3d1d88ebe9b1b0
+  metadata.gz: 642bed0e6765fbea03fe04681debc2d3c02a269cbc6a7547d68209f9b0b0aad7
+  data.tar.gz: 626ca110ca53489b8ec0289a0486a13abbeccc1aae1408bc6d51de058606e425
 SHA512:
-  metadata.gz: 5909d32a1231288cc8567ada0a58fb038d1eaf8424ac9ff3b4298dd81b3f95651217ddb3b5b39e24834174abbc7bfd454d083836a0889df2188df511600f7a49
-  data.tar.gz: 0f8d01c68ceb44f10ef06d274667e8acd254254987056e25624d2a3f58de0e05fc95a07002b3cb7d0d8a207461bf6f0a96c211714fed015eadc3e9673eb17699
+  metadata.gz: aa1f9adc654d34794da25aa65c53526fed63c6d5c3528f9c201edf0433d10e802539fa3ca33a674bd3bcbeffd6f94d4502b57e7df0484d2dbd65237b9ffc2710
+  data.tar.gz: 645e6c3aafe4f5f2919c61ca4caf42ba045732be2b94a173061be632bf22c7deebe0500145c9f6e4d6d47eccab1d3922887ec6f85cf5af97162bd24a50a4d771

data/.github/workflows/ruby.yml CHANGED Viewed

@@ -9,22 +9,27 @@ name: Ruby
 on:
   push:
-    branches: [ master ]
+    branches: [master]
   pull_request:
-    branches: [ master ]
+    branches: [master]
 jobs:
   test:
     runs-on: ubuntu-18.04
+    strategy:
+      fail-fast: false
+      matrix:
+        ruby: ["2.7", "3.0"]
+    name: Ruby ${{ matrix.ruby }}
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Ruby
-      uses: ruby/setup-ruby@v1
-      with:
-        ruby-version: 2.6
-    - name: Install dependencies
-      run: bundle install
-    - name: Run tests
-      run: bundle exec rake test
+      - uses: actions/checkout@v2
+      - name: Set up Ruby
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: ${{ matrix.ruby }}
+      - name: Install dependencies
+        run: bundle install
+      - name: Run tests
+        run: bundle exec rake test
+        env:
+          TWITTER_BEARER_TOKEN: ${{ secrets.TWITTER_BEARER_TOKEN }}

data/.rubocop.yml CHANGED Viewed

@@ -4,7 +4,7 @@ require:
   - rubocop-minitest
 AllCops:
-  TargetRubyVersion: 2.6
+  TargetRubyVersion: 2.7
   NewCops: enable
   Exclude:
     - bin/*
@@ -17,6 +17,10 @@ Layout/FirstHashElementIndentation:
 Layout/IndentationConsistency:
   EnforcedStyle: indented_internal_methods
+Layout/MultilineAssignmentLayout:
+  EnforcedStyle: same_line
+  SupportedTypes: ["block"]
 Layout/MultilineMethodCallIndentation:
   EnforcedStyle: indented
@@ -27,9 +31,12 @@ Lint/AssignmentInCondition:
   Enabled: false
 Lint/MissingSuper:
-  Exclude:
+  Exclude:
     - lib/panchira/resolvers/*
+Lint/SymbolConversion:
+  EnforcedStyle: consistent
 Style/AsciiComments:
   Enabled: false

data/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,26 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
+## 1.5.1 - 2022-03-20
+### Added
+- Pixiv resolver can now fetch image URIs that are not proxied.
+## 1.5.0 - 2022-03-01
+### Changed
+- You can now set options in Panchira::fetch and Resolver's constructors.
+- Twitter resolver can now fetch datas from API (requires bearer token).
+- Max execution time is now set to 10 seconds.
+## 1.4.0 - 2022-01-10
+### Added
+- Added support for non-Japanese pixiv URLs.
+### Fixed
+- Fixed an issue where Nijie Resolver failed to fetch image.
+### Changed
+- Dropped support for Ruby 2.6.
 ## 1.3.6 - 2021-10-19
 ### Added
 - Added support for Twitter.

data/Gemfile.lock CHANGED Viewed

@@ -1,41 +1,41 @@
 PATH
   remote: .
   specs:
-    panchira (1.3.6)
+    panchira (1.5.1)
       fastimage (~> 2.1.7)
-      nokogiri (>= 1.10.9, < 1.13.0)
+      nokogiri (>= 1.10.9, < 1.14.0)
 GEM
   remote: https://rubygems.org/
   specs:
     ast (2.4.2)
     fastimage (2.1.7)
-    minitest (5.14.4)
-    nokogiri (1.11.7-x86_64-darwin)
+    minitest (5.15.0)
+    nokogiri (1.13.3-x86_64-darwin)
       racc (~> 1.4)
-    parallel (1.20.1)
-    parser (3.0.1.1)
+    parallel (1.21.0)
+    parser (3.1.1.0)
       ast (~> 2.4.1)
-    racc (1.5.2)
-    rainbow (3.0.0)
+    racc (1.6.0)
+    rainbow (3.1.1)
     rake (12.3.3)
-    regexp_parser (2.1.1)
+    regexp_parser (2.2.1)
     rexml (3.2.5)
-    rubocop (1.15.0)
+    rubocop (1.25.1)
       parallel (~> 1.10)
-      parser (>= 3.0.0.0)
+      parser (>= 3.1.0.0)
       rainbow (>= 2.2.2, < 4.0)
       regexp_parser (>= 1.8, < 3.0)
       rexml
-      rubocop-ast (>= 1.5.0, < 2.0)
+      rubocop-ast (>= 1.15.1, < 2.0)
       ruby-progressbar (~> 1.7)
       unicode-display_width (>= 1.4.0, < 3.0)
-    rubocop-ast (1.5.0)
-      parser (>= 3.0.1.1)
-    rubocop-minitest (0.12.1)
+    rubocop-ast (1.16.0)
+      parser (>= 3.1.1.0)
+    rubocop-minitest (0.17.2)
       rubocop (>= 0.90, < 2.0)
     ruby-progressbar (1.11.0)
-    unicode-display_width (2.0.0)
+    unicode-display_width (2.1.0)
 PLATFORMS
   ruby

data/README.md CHANGED Viewed

@@ -46,6 +46,24 @@ In most situation you would call `Panchira#fetch`. It is a singular method that
 Panchira has a special treatment for each website. `Resolver` classes are where those treatments take place, and you can use your own `Resolver` classes by registering it to Panchira. See `Panchira::Extensions` documentation in source code for further details.
+### About Twitter API
+Due to a recent change in Twitter, it's getting really hard to fetch tweet data by scraping. To solve this problem, Panchira can now use Twitter official API.
+To use Twitter API instead of normal scraping, please set Twitter's bearer token as an option to `Panchira::fetch`. If you don't set token, Panchira will just fall back to simple scraping.
+```
+> Panchira.fetch("https://twitter.com/example/status/1234567890", options: {twitter: {bearer_token: 'ABC...123'}})
+```
+### About Pixiv proxy
+By default, Panchira returns a link to [Pixiv.cat](https://pixiv.cat/) as a image URI, but you can change this behavior by setting `fetch_raw_image_url` as an option. To access not-proxied URI, pximg.net, you have to set Referer as `https://app-api.pixiv.net/` in HTTP request header.
+```
+> Panchira.fetch("https://pixiv.net/artworks/12345678", options: {pixiv: {fetch_raw_image_url: true}})
+```
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

data/lib/panchira/resolvers/komiflo_resolver.rb CHANGED Viewed

@@ -6,8 +6,8 @@ module Panchira
   class KomifloResolver < Resolver
     URL_REGEXP = %r{komiflo\.com(?:/#!)?/comics/(\d+)}.freeze
-    def initialize(url)
-      @url = url
+    def initialize(url, options = nil)
+      super(url, options)
       @id = url.slice(URL_REGEXP, 1)
       raw_json = URI.parse("https://api.komiflo.com/content/id/#{@id}").read('User-Agent' => user_agent)

data/lib/panchira/resolvers/narou_resolver.rb CHANGED Viewed

@@ -8,8 +8,8 @@ module Panchira
       URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
       ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
-      def initialize(url)
-        super(url)
+      def initialize(url, options = nil)
+        super(url, options)
         if id = @url.match(ID_REGEXP)[:id]
           @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
@@ -48,8 +48,8 @@ module Panchira
       URL_REGEXP = /ncode\.syosetu\.com/.freeze
       ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
-      def initialize(url)
-        super(url)
+      def initialize(url, options = nil)
+        super(url, options)
         if id = @url.match(ID_REGEXP)[:id]
           @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")

data/lib/panchira/resolvers/nijie_resolver.rb CHANGED Viewed

@@ -26,17 +26,17 @@ module Panchira
       end
       def parse_image_url
-        str = @page.css('//script[@type="application/ld+json"]/text()').first.to_s
-        if s = str.match(%r{https://pic.nijie.(net|info)/(?<servername>\d+)/[^/]+/nijie_picture/(?<imagename>[^"]+)})
-          # 動画は容量大きすぎるし取らない
-          if s[:imagename] =~ /(jpg|png)/
-            "https://pic.nijie.net/#{s[:servername]}/nijie_picture/#{s[:imagename]}"
-          else
-            s[0]
-          end
+        str = @page.css('//script[@type="application/ld+json"]/text()').first.to_s.split.join(' ')
+        thumbnail_url = JSON.parse(str)['thumbnailUrl']
+        unless thumbnail_url
+          return @page.css('//meta[property="og:image"]/@content').first.to_s
+        end
+        if md = thumbnail_url.match(%r{pic.nijie.net/\w+(?<resolution>/\w+/)nijie.+\.(?<format>png|jpg|jpeg)})
+          thumbnail_url.sub(md[:resolution], '/')
         else
-          @page.css('//meta[property="og:image"]/@content').first.to_s
+          thumbnail_url
         end
       end

data/lib/panchira/resolvers/pixiv_resolver.rb CHANGED Viewed

@@ -2,14 +2,16 @@
 module Panchira
   class PixivResolver < Resolver
-    URL_REGEXP = %r{pixiv\.net/(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
+    URL_REGEXP = %r{pixiv\.net/.*(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
-    def initialize(url)
-      super(url)
+    def initialize(url, options = nil)
+      super(url, options)
       @illust_id = url.slice(URL_REGEXP, 2)
       raw_json = URI.parse("https://www.pixiv.net/ajax/illust/#{@illust_id}").read('User-Agent' => user_agent)
       @json = JSON.parse(raw_json)
+      @fetch_raw_image_url = options&.dig(:pixiv, :fetch_raw_image_url)
     end
     private
@@ -27,6 +29,10 @@ module Panchira
       end
       def parse_image_url
+        if @fetch_raw_image_url
+          return @json['body']['urls']['original']
+        end
         proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
         case Net::HTTP.get_response(URI.parse(proxy_url))
@@ -47,8 +53,8 @@ module Panchira
   class PixivNovelResolver < Resolver
     URL_REGEXP = %r{pixiv\.net/novel/show.php\?id=(\d+)}.freeze
-    def initialize(url)
-      super(url)
+    def initialize(url, options = nil)
+      super(url, options)
       @novel_id = url.slice(URL_REGEXP, 1)
       raw_json = URI.parse("https://www.pixiv.net/ajax/novel/#{@novel_id}").read('User-Agent' => user_agent)

data/lib/panchira/resolvers/resolver.rb CHANGED Viewed

@@ -11,8 +11,9 @@ module Panchira
     # You must override this in subclasses to limit which urls to resolve.
     URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
-    def initialize(url)
+    def initialize(url, options = nil)
       @url = url
+      @options = options
     end
     # This function is called right after this Resolver instance is made.
@@ -53,7 +54,8 @@ module Panchira
       def fetch_page(url)
         read_options = {
           'User-Agent' => user_agent,
-          'Cookie' => cookie
+          'Cookie' => cookie,
+          :read_timeout => 10
         }
         raw_page = URI.parse(url).read(read_options)

data/lib/panchira/resolvers/twitter_resolver.rb CHANGED Viewed

@@ -1,22 +1,102 @@
+require 'uri'
 module Panchira
   class TwitterResolver < Resolver
-    URL_REGEXP = /twitter.com\/\w+\/status\/\d+/.freeze
+    URL_REGEXP = %r{twitter.com/(\w+)/status/(\d+)}.freeze
+    def initialize(url, options = nil)
+      super(url, options)
+      @screen_name = @url.slice(URL_REGEXP, 1)
+      @id = @url.slice(URL_REGEXP, 2)
+      @bearer_token = options&.dig(:twitter, :bearer_token)
+      @author = nil
+      @response = nil
+    end
+    def fetch
+      return super unless @bearer_token
+      @response = fetch_api if @bearer_token
+      result = PanchiraResult.new
+      result.canonical_url = parse_canonical_url
+      result.title = parse_title
+      result.description = parse_description
+      result.image = parse_image
+      result.tags = parse_tags
+      result.author = parse_author
+      result.resolver = parse_resolver
+      result
+    end
     private
+      def fetch_api
+        uri = URI.parse("https://api.twitter.com/2/tweets/#{@id}")
+        uri.query = URI.encode_www_form({
+          'expansions': 'attachments.media_keys,author_id',
+          'media.fields': 'preview_image_url,type,url',
+          'user.fields': 'name,username',
+          'tweet.fields': 'entities'
+        })
+        raw_json = uri.read('Authorization' => "Bearer #{@bearer_token}")
+        JSON.parse(raw_json)
+      end
+      def parse_canonical_url
+        # Twitter returns false canonical url when the account is set as sensitive.
+        "https://twitter.com/#{@screen_name}/status/#{@id}"
+      end
       def parse_title
-        @title = super
+        @title = if @response
+                   @author = @response['includes']['users'][0]['name']
+                   "#{@author} on Twitter"
+                 else
+                   super
+                 end
       end
       def parse_author
-        @title.match(/\A(.+) on Twitter\z/)[1]
+        @author || @title.match(/\A(.+) on Twitter\z/)[1]
+      rescue StandardError
+        nil
       end
       def parse_description
-        @description = super.gsub(/\A“|”\z/, '')
+        if @response
+          @response['data']['text']
+        else
+          @description = super.gsub(/\A“|”\z/, '')
+        end
       end
       def parse_tags
-        @description.scan(/[#＃]([^#＃\s]+)/).map(&:first)
+        if @response
+          @response.dig('data', 'entities', 'hashtags')&.map { |obj| obj['tag'] }
+        else
+          @description.scan(/[#＃]([^#＃\s]+)/).map(&:first)
+        end
+      end
+      def parse_image_url
+        return super unless @response
+        first_media = @response.dig('includes', 'media')&.first
+        return unless first_media
+        case first_media['type']
+        when 'photo'
+          first_media['url']
+        when 'video'
+          first_media['preview_image_url']
+        end
       end
   end

data/lib/panchira/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Panchira
-  VERSION = '1.3.6'
+  VERSION = '1.5.1'
 end

data/lib/panchira.rb CHANGED Viewed

@@ -21,10 +21,10 @@ Dir.glob("#{project_root}/panchira/resolvers/*_resolver.rb").sort.each { |file|
 module Panchira
   class << self
     # Return a PanchiraResult that contains the attributes of given url.
-    def fetch(url)
+    def fetch(url, options = nil)
       resolver = select_resolver(url)
-      resolver.new(url).fetch
+      resolver.new(url, options).fetch
     end
     private

data/panchira.gemspec CHANGED Viewed

@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
   spec.require_paths = ['lib']
-  spec.required_ruby_version = '>= 2.6'
+  spec.required_ruby_version = '>= 2.7'
   spec.add_development_dependency 'bundler', '~> 2.0'
   spec.add_development_dependency 'minitest', '~> 5.0'
@@ -40,5 +40,5 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency 'rubocop-minitest', '~> 0.10'
   spec.add_dependency 'fastimage', '~> 2.1.7'
-  spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.13.0'
+  spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.14.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: panchira
 version: !ruby/object:Gem::Version
-  version: 1.3.6
+  version: 1.5.1
 platform: ruby
 authors:
 - kyp
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2021-10-19 00:00:00.000000000 Z
+date: 2022-03-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -103,7 +103,7 @@ dependencies:
         version: 1.10.9
     - - "<"
       - !ruby/object:Gem::Version
-        version: 1.13.0
+        version: 1.14.0
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
@@ -113,7 +113,7 @@ dependencies:
         version: 1.10.9
     - - "<"
       - !ruby/object:Gem::Version
-        version: 1.13.0
+        version: 1.14.0
 description: |2
       Panchira allows you to parse attributes of hentais on some web platforms, such as Pixiv and DLSite.
       If you need card previews on hentai but can't get it with simply parsing metatags, then it is time for Panchira.
@@ -166,7 +166,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '2.6'
+      version: '2.7'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="