RubyGems - panchira - Versions diffs - 1.4.0 → 1.5.2 - Mend

panchira 1.4.0 → 1.5.2

Files changed (15) hide show

checksums.yaml +4 -4
data/.github/workflows/ruby.yml +2 -0
data/.rubocop.yml +7 -0
data/CHANGELOG.md +14 -0
data/Gemfile.lock +15 -15
data/README.md +18 -0
data/lib/panchira/resolvers/komiflo_resolver.rb +2 -2
data/lib/panchira/resolvers/narou_resolver.rb +4 -4
data/lib/panchira/resolvers/pixiv_resolver.rb +18 -4
data/lib/panchira/resolvers/resolver.rb +4 -2
data/lib/panchira/resolvers/twitter_resolver.rb +85 -5
data/lib/panchira/version.rb +1 -1
data/lib/panchira.rb +2 -2
data/panchira.gemspec +1 -1
metadata +5 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 87af8a25ccd6d841b133aaa28ce7853c0111ae0ce4768f287a1f9a8a09eec148
-  data.tar.gz: 93233eca9a9fd019fb82aafbd1e97630aa242d1645d2e2306fee52d1812aa144
+  metadata.gz: db34e8033acf822616172b330fc61ffe2ee5a1c9dfe46bc1737257717aeff4c0
+  data.tar.gz: 5ec893680ef7e04b2f85d16b3458ee9f6b2db76ac6c01544a088d280a574e98c
 SHA512:
-  metadata.gz: 42e8539356b8c73b8cced17cdaec2452960f17175736a004702c37e7370f407ea995504a07e2b060c53f52629f42cb247e34080d9e54c52a07158fddb5e427c1
-  data.tar.gz: 4b1746f991ba2353304c96e3297efca0a45ba0e3d9c908f653e72aa6920de3654f2714235d24037de591699287bf0c027b0720d6e2741cf3ef6a6e5ae1add95a
+  metadata.gz: 71b2d7707d78b21004acdca984f1869cc81a9e9169bee9239b13261a79002ed859a5a87c8aa08350a89d37ce734abb47637c909aa01b5dab171eb871ad27d9e0
+  data.tar.gz: fbe9744acbbdbd13376e2a6bdaf1c4430c20861163b39f6bb7ed26436c8fd6c64876e329c0c9aae5c129a59257d7c06eac2508b7c1283bc27d405f0c2e836b0e

data/.github/workflows/ruby.yml CHANGED Viewed

@@ -31,3 +31,5 @@ jobs:
         run: bundle install
       - name: Run tests
         run: bundle exec rake test
+        env:
+          TWITTER_BEARER_TOKEN: ${{ secrets.TWITTER_BEARER_TOKEN }}

data/.rubocop.yml CHANGED Viewed

@@ -17,6 +17,10 @@ Layout/FirstHashElementIndentation:
 Layout/IndentationConsistency:
   EnforcedStyle: indented_internal_methods
+Layout/MultilineAssignmentLayout:
+  EnforcedStyle: same_line
+  SupportedTypes: ["block"]
 Layout/MultilineMethodCallIndentation:
   EnforcedStyle: indented
@@ -30,6 +34,9 @@ Lint/MissingSuper:
   Exclude:
     - lib/panchira/resolvers/*
+Lint/SymbolConversion:
+  EnforcedStyle: consistent
 Style/AsciiComments:
   Enabled: false

data/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
+## 1.5.2 - 2022-03-20
+### Fixed
+- Fixed an issue where Pixiv resolver can't retrieve not-proxied image scales.
+## 1.5.1 - 2022-03-20
+### Added
+- Pixiv resolver can now fetch image URIs that are not proxied.
+## 1.5.0 - 2022-03-01
+### Changed
+- You can now set options in Panchira::fetch and Resolver's constructors.
+- Twitter resolver can now fetch datas from API (requires bearer token).
+- Max execution time is now set to 10 seconds.
 ## 1.4.0 - 2022-01-10
 ### Added
 - Added support for non-Japanese pixiv URLs.

data/Gemfile.lock CHANGED Viewed

@@ -1,41 +1,41 @@
 PATH
   remote: .
   specs:
-    panchira (1.4.0)
+    panchira (1.5.2)
       fastimage (~> 2.1.7)
-      nokogiri (>= 1.10.9, < 1.13.0)
+      nokogiri (>= 1.10.9, < 1.14.0)
 GEM
   remote: https://rubygems.org/
   specs:
     ast (2.4.2)
     fastimage (2.1.7)
-    minitest (5.14.4)
-    nokogiri (1.12.5-x86_64-darwin)
+    minitest (5.15.0)
+    nokogiri (1.13.3-x86_64-darwin)
       racc (~> 1.4)
-    parallel (1.20.1)
-    parser (3.0.1.1)
+    parallel (1.21.0)
+    parser (3.1.1.0)
       ast (~> 2.4.1)
     racc (1.6.0)
-    rainbow (3.0.0)
+    rainbow (3.1.1)
     rake (12.3.3)
-    regexp_parser (2.1.1)
+    regexp_parser (2.2.1)
     rexml (3.2.5)
-    rubocop (1.15.0)
+    rubocop (1.25.1)
       parallel (~> 1.10)
-      parser (>= 3.0.0.0)
+      parser (>= 3.1.0.0)
       rainbow (>= 2.2.2, < 4.0)
       regexp_parser (>= 1.8, < 3.0)
       rexml
-      rubocop-ast (>= 1.5.0, < 2.0)
+      rubocop-ast (>= 1.15.1, < 2.0)
       ruby-progressbar (~> 1.7)
       unicode-display_width (>= 1.4.0, < 3.0)
-    rubocop-ast (1.5.0)
-      parser (>= 3.0.1.1)
-    rubocop-minitest (0.12.1)
+    rubocop-ast (1.16.0)
+      parser (>= 3.1.1.0)
+    rubocop-minitest (0.17.2)
       rubocop (>= 0.90, < 2.0)
     ruby-progressbar (1.11.0)
-    unicode-display_width (2.0.0)
+    unicode-display_width (2.1.0)
 PLATFORMS
   ruby

data/README.md CHANGED Viewed

@@ -46,6 +46,24 @@ In most situation you would call `Panchira#fetch`. It is a singular method that
 Panchira has a special treatment for each website. `Resolver` classes are where those treatments take place, and you can use your own `Resolver` classes by registering it to Panchira. See `Panchira::Extensions` documentation in source code for further details.
+### About Twitter API
+Due to a recent change in Twitter, it's getting really hard to fetch tweet data by scraping. To solve this problem, Panchira can now use Twitter official API.
+To use Twitter API instead of normal scraping, please set Twitter's bearer token as an option to `Panchira::fetch`. If you don't set token, Panchira will just fall back to simple scraping.
+```
+> Panchira.fetch("https://twitter.com/example/status/1234567890", options: {twitter: {bearer_token: 'ABC...123'}})
+```
+### About Pixiv proxy
+By default, Panchira returns a link to [Pixiv.cat](https://pixiv.cat/) as a image URI, but you can change this behavior by setting `fetch_raw_image_url` as an option. To access not-proxied URI, pximg.net, you have to set Referer as `https://app-api.pixiv.net/` in HTTP request header.
+```
+> Panchira.fetch("https://pixiv.net/artworks/12345678", options: {pixiv: {fetch_raw_image_url: true}})
+```
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

data/lib/panchira/resolvers/komiflo_resolver.rb CHANGED Viewed

@@ -6,8 +6,8 @@ module Panchira
   class KomifloResolver < Resolver
     URL_REGEXP = %r{komiflo\.com(?:/#!)?/comics/(\d+)}.freeze
-    def initialize(url)
-      @url = url
+    def initialize(url, options = nil)
+      super(url, options)
       @id = url.slice(URL_REGEXP, 1)
       raw_json = URI.parse("https://api.komiflo.com/content/id/#{@id}").read('User-Agent' => user_agent)

data/lib/panchira/resolvers/narou_resolver.rb CHANGED Viewed

@@ -8,8 +8,8 @@ module Panchira
       URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
       ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
-      def initialize(url)
-        super(url)
+      def initialize(url, options = nil)
+        super(url, options)
         if id = @url.match(ID_REGEXP)[:id]
           @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
@@ -48,8 +48,8 @@ module Panchira
       URL_REGEXP = /ncode\.syosetu\.com/.freeze
       ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
-      def initialize(url)
-        super(url)
+      def initialize(url, options = nil)
+        super(url, options)
         if id = @url.match(ID_REGEXP)[:id]
           @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")

data/lib/panchira/resolvers/pixiv_resolver.rb CHANGED Viewed

@@ -4,12 +4,14 @@ module Panchira
   class PixivResolver < Resolver
     URL_REGEXP = %r{pixiv\.net/.*(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
-    def initialize(url)
-      super(url)
+    def initialize(url, options = nil)
+      super(url, options)
       @illust_id = url.slice(URL_REGEXP, 2)
       raw_json = URI.parse("https://www.pixiv.net/ajax/illust/#{@illust_id}").read('User-Agent' => user_agent)
       @json = JSON.parse(raw_json)
+      @fetch_raw_image_url = options&.dig(:pixiv, :fetch_raw_image_url)
     end
     private
@@ -26,7 +28,19 @@ module Panchira
         "https://pixiv.net/member_illust.php?mode=medium&illust_id=#{@illust_id}"
       end
+      def parse_image
+        image = PanchiraImage.new
+        image.url = parse_image_url
+        image.width, image.height = FastImage.size(image.url, http_header: {'Referer' => 'https://app-api.pixiv.net/'})
+        image
+      end
       def parse_image_url
+        if @fetch_raw_image_url
+          return @json['body']['urls']['original']
+        end
         proxy_url = "https://pixiv.cat/#{@illust_id}.jpg"
         case Net::HTTP.get_response(URI.parse(proxy_url))
@@ -47,8 +61,8 @@ module Panchira
   class PixivNovelResolver < Resolver
     URL_REGEXP = %r{pixiv\.net/novel/show.php\?id=(\d+)}.freeze
-    def initialize(url)
-      super(url)
+    def initialize(url, options = nil)
+      super(url, options)
       @novel_id = url.slice(URL_REGEXP, 1)
       raw_json = URI.parse("https://www.pixiv.net/ajax/novel/#{@novel_id}").read('User-Agent' => user_agent)

data/lib/panchira/resolvers/resolver.rb CHANGED Viewed

@@ -11,8 +11,9 @@ module Panchira
     # You must override this in subclasses to limit which urls to resolve.
     URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
-    def initialize(url)
+    def initialize(url, options = nil)
       @url = url
+      @options = options
     end
     # This function is called right after this Resolver instance is made.
@@ -53,7 +54,8 @@ module Panchira
       def fetch_page(url)
         read_options = {
           'User-Agent' => user_agent,
-          'Cookie' => cookie
+          'Cookie' => cookie,
+          :read_timeout => 10
         }
         raw_page = URI.parse(url).read(read_options)

data/lib/panchira/resolvers/twitter_resolver.rb CHANGED Viewed

@@ -1,22 +1,102 @@
+require 'uri'
 module Panchira
   class TwitterResolver < Resolver
-    URL_REGEXP = /twitter.com\/\w+\/status\/\d+/.freeze
+    URL_REGEXP = %r{twitter.com/(\w+)/status/(\d+)}.freeze
+    def initialize(url, options = nil)
+      super(url, options)
+      @screen_name = @url.slice(URL_REGEXP, 1)
+      @id = @url.slice(URL_REGEXP, 2)
+      @bearer_token = options&.dig(:twitter, :bearer_token)
+      @author = nil
+      @response = nil
+    end
+    def fetch
+      return super unless @bearer_token
+      @response = fetch_api if @bearer_token
+      result = PanchiraResult.new
+      result.canonical_url = parse_canonical_url
+      result.title = parse_title
+      result.description = parse_description
+      result.image = parse_image
+      result.tags = parse_tags
+      result.author = parse_author
+      result.resolver = parse_resolver
+      result
+    end
     private
+      def fetch_api
+        uri = URI.parse("https://api.twitter.com/2/tweets/#{@id}")
+        uri.query = URI.encode_www_form({
+          'expansions': 'attachments.media_keys,author_id',
+          'media.fields': 'preview_image_url,type,url',
+          'user.fields': 'name,username',
+          'tweet.fields': 'entities'
+        })
+        raw_json = uri.read('Authorization' => "Bearer #{@bearer_token}")
+        JSON.parse(raw_json)
+      end
+      def parse_canonical_url
+        # Twitter returns false canonical url when the account is set as sensitive.
+        "https://twitter.com/#{@screen_name}/status/#{@id}"
+      end
       def parse_title
-        @title = super
+        @title = if @response
+                   @author = @response['includes']['users'][0]['name']
+                   "#{@author} on Twitter"
+                 else
+                   super
+                 end
       end
       def parse_author
-        @title.match(/\A(.+) on Twitter\z/)[1]
+        @author || @title.match(/\A(.+) on Twitter\z/)[1]
+      rescue StandardError
+        nil
       end
       def parse_description
-        @description = super.gsub(/\A“|”\z/, '')
+        if @response
+          @response['data']['text']
+        else
+          @description = super.gsub(/\A“|”\z/, '')
+        end
       end
       def parse_tags
-        @description.scan(/[#＃]([^#＃\s]+)/).map(&:first)
+        if @response
+          @response.dig('data', 'entities', 'hashtags')&.map { |obj| obj['tag'] }
+        else
+          @description.scan(/[#＃]([^#＃\s]+)/).map(&:first)
+        end
+      end
+      def parse_image_url
+        return super unless @response
+        first_media = @response.dig('includes', 'media')&.first
+        return unless first_media
+        case first_media['type']
+        when 'photo'
+          first_media['url']
+        when 'video'
+          first_media['preview_image_url']
+        end
       end
   end

data/lib/panchira/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Panchira
-  VERSION = '1.4.0'
+  VERSION = '1.5.2'
 end

data/lib/panchira.rb CHANGED Viewed

@@ -21,10 +21,10 @@ Dir.glob("#{project_root}/panchira/resolvers/*_resolver.rb").sort.each { |file|
 module Panchira
   class << self
     # Return a PanchiraResult that contains the attributes of given url.
-    def fetch(url)
+    def fetch(url, options = nil)
       resolver = select_resolver(url)
-      resolver.new(url).fetch
+      resolver.new(url, options).fetch
     end
     private

data/panchira.gemspec CHANGED Viewed

@@ -40,5 +40,5 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency 'rubocop-minitest', '~> 0.10'
   spec.add_dependency 'fastimage', '~> 2.1.7'
-  spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.13.0'
+  spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.14.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: panchira
 version: !ruby/object:Gem::Version
-  version: 1.4.0
+  version: 1.5.2
 platform: ruby
 authors:
 - kyp
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2022-01-10 00:00:00.000000000 Z
+date: 2022-03-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -103,7 +103,7 @@ dependencies:
         version: 1.10.9
     - - "<"
       - !ruby/object:Gem::Version
-        version: 1.13.0
+        version: 1.14.0
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
@@ -113,7 +113,7 @@ dependencies:
         version: 1.10.9
     - - "<"
       - !ruby/object:Gem::Version
-        version: 1.13.0
+        version: 1.14.0
 description: |2
       Panchira allows you to parse attributes of hentais on some web platforms, such as Pixiv and DLSite.
       If you need card previews on hentai but can't get it with simply parsing metatags, then it is time for Panchira.
@@ -173,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.3.4
+rubygems_version: 3.1.4
 signing_key:
 specification_version: 4
 summary: A parser for hentai websites