RubyGems - panchira - Versions diffs - 1.4.0 → 1.5.0 - Mend

panchira 1.4.0 → 1.5.0

Files changed (15) hide show

checksums.yaml +4 -4
data/.github/workflows/ruby.yml +2 -0
data/.rubocop.yml +7 -0
data/CHANGELOG.md +6 -0
data/Gemfile.lock +17 -15
data/README.md +10 -0
data/lib/panchira/resolvers/komiflo_resolver.rb +2 -2
data/lib/panchira/resolvers/narou_resolver.rb +4 -4
data/lib/panchira/resolvers/pixiv_resolver.rb +4 -4
data/lib/panchira/resolvers/resolver.rb +4 -2
data/lib/panchira/resolvers/twitter_resolver.rb +82 -5
data/lib/panchira/version.rb +1 -1
data/lib/panchira.rb +2 -2
data/panchira.gemspec +1 -1
metadata +5 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 87af8a25ccd6d841b133aaa28ce7853c0111ae0ce4768f287a1f9a8a09eec148
-  data.tar.gz: 93233eca9a9fd019fb82aafbd1e97630aa242d1645d2e2306fee52d1812aa144
+  metadata.gz: 9119f6e4ad4e4a3b551642f7d19a7853805d2c82b65c5fc868b472f265e7169c
+  data.tar.gz: 03cbbf38e009cd326b4f40f467de1dcc1d0fb852de362b3dde05ed56eb65c2c2
 SHA512:
-  metadata.gz: 42e8539356b8c73b8cced17cdaec2452960f17175736a004702c37e7370f407ea995504a07e2b060c53f52629f42cb247e34080d9e54c52a07158fddb5e427c1
-  data.tar.gz: 4b1746f991ba2353304c96e3297efca0a45ba0e3d9c908f653e72aa6920de3654f2714235d24037de591699287bf0c027b0720d6e2741cf3ef6a6e5ae1add95a
+  metadata.gz: 6ff2fab3b4489ade9e7accb6f10dc2d391aa5ba5ebbbb08f130926df81acd31eeae36d2208b915848e5e1337e71ab2a7cbe300eeeb728ed8669593fc139573f8
+  data.tar.gz: df76a29e1af2515d4eee99568426295e02fbcf8b1b6b835932633458f3f287d4ca2460ec1a497a73f2f8a5e66a100acf71aa37e35be3dd778c95ac83f2f808e7

data/.github/workflows/ruby.yml CHANGED Viewed

@@ -31,3 +31,5 @@ jobs:
         run: bundle install
       - name: Run tests
         run: bundle exec rake test
+        env:
+          TWITTER_BEARER_TOKEN: ${{ secrets.TWITTER_BEARER_TOKEN }}

data/.rubocop.yml CHANGED Viewed

@@ -17,6 +17,10 @@ Layout/FirstHashElementIndentation:
 Layout/IndentationConsistency:
   EnforcedStyle: indented_internal_methods
+Layout/MultilineAssignmentLayout:
+  EnforcedStyle: same_line
+  SupportedTypes: ["block"]
 Layout/MultilineMethodCallIndentation:
   EnforcedStyle: indented
@@ -30,6 +34,9 @@ Lint/MissingSuper:
   Exclude:
     - lib/panchira/resolvers/*
+Lint/SymbolConversion:
+  EnforcedStyle: consistent
 Style/AsciiComments:
   Enabled: false

data/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
+## 1.5.0 - 2022-03-01
+### Changed
+- You can now set options in Panchira::fetch and Resolver's constructors.
+- Twitter resolvers can now fetch datas from API (requires bearer token).
+- Max execution time is now set to 10 seconds.
 ## 1.4.0 - 2022-01-10
 ### Added
 - Added support for non-Japanese pixiv URLs.

data/Gemfile.lock CHANGED Viewed

@@ -1,41 +1,43 @@
 PATH
   remote: .
   specs:
-    panchira (1.4.0)
+    panchira (1.5.0)
       fastimage (~> 2.1.7)
-      nokogiri (>= 1.10.9, < 1.13.0)
+      nokogiri (>= 1.10.9, < 1.14.0)
 GEM
   remote: https://rubygems.org/
   specs:
     ast (2.4.2)
     fastimage (2.1.7)
-    minitest (5.14.4)
-    nokogiri (1.12.5-x86_64-darwin)
+    mini_portile2 (2.8.0)
+    minitest (5.15.0)
+    nokogiri (1.13.3)
+      mini_portile2 (~> 2.8.0)
       racc (~> 1.4)
-    parallel (1.20.1)
-    parser (3.0.1.1)
+    parallel (1.21.0)
+    parser (3.1.1.0)
       ast (~> 2.4.1)
     racc (1.6.0)
-    rainbow (3.0.0)
+    rainbow (3.1.1)
     rake (12.3.3)
-    regexp_parser (2.1.1)
+    regexp_parser (2.2.1)
     rexml (3.2.5)
-    rubocop (1.15.0)
+    rubocop (1.25.1)
       parallel (~> 1.10)
-      parser (>= 3.0.0.0)
+      parser (>= 3.1.0.0)
       rainbow (>= 2.2.2, < 4.0)
       regexp_parser (>= 1.8, < 3.0)
       rexml
-      rubocop-ast (>= 1.5.0, < 2.0)
+      rubocop-ast (>= 1.15.1, < 2.0)
       ruby-progressbar (~> 1.7)
       unicode-display_width (>= 1.4.0, < 3.0)
-    rubocop-ast (1.5.0)
-      parser (>= 3.0.1.1)
-    rubocop-minitest (0.12.1)
+    rubocop-ast (1.16.0)
+      parser (>= 3.1.1.0)
+    rubocop-minitest (0.17.2)
       rubocop (>= 0.90, < 2.0)
     ruby-progressbar (1.11.0)
-    unicode-display_width (2.0.0)
+    unicode-display_width (2.1.0)
 PLATFORMS
   ruby

data/README.md CHANGED Viewed

@@ -46,6 +46,16 @@ In most situation you would call `Panchira#fetch`. It is a singular method that
 Panchira has a special treatment for each website. `Resolver` classes are where those treatments take place, and you can use your own `Resolver` classes by registering it to Panchira. See `Panchira::Extensions` documentation in source code for further details.
+### About Twitter API
+Due to a recent change in Twitter, it's getting really hard to fetch tweet data by scraping. To solve this problem, Panchira can now use Twitter official API.
+To use Twitter API instead of normal scraping, please set Twitter's bearer token as an option to `Panchira::fetch`. If you don't set token, Panchira will just fall back to simple scraping.
+```
+> Panchira.fetch("https://twitter.com/example/status/1234567890", options: {twitter: {bearer_token: 'ABC...123'}})
+```
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

data/lib/panchira/resolvers/komiflo_resolver.rb CHANGED Viewed

@@ -6,8 +6,8 @@ module Panchira
   class KomifloResolver < Resolver
     URL_REGEXP = %r{komiflo\.com(?:/#!)?/comics/(\d+)}.freeze
-    def initialize(url)
-      @url = url
+    def initialize(url, options = nil)
+      super(url, options)
       @id = url.slice(URL_REGEXP, 1)
       raw_json = URI.parse("https://api.komiflo.com/content/id/#{@id}").read('User-Agent' => user_agent)

data/lib/panchira/resolvers/narou_resolver.rb CHANGED Viewed

@@ -8,8 +8,8 @@ module Panchira
       URL_REGEXP = %r{novel18\.syosetu\.com/}.freeze
       ID_REGEXP = %{novel18\.syosetu\.com/(?<id>[^/]+)}
-      def initialize(url)
-        super(url)
+      def initialize(url, options = nil)
+        super(url, options)
         if id = @url.match(ID_REGEXP)[:id]
           @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")
@@ -48,8 +48,8 @@ module Panchira
       URL_REGEXP = /ncode\.syosetu\.com/.freeze
       ID_REGEXP = %{ncode\.syosetu\.com/(?<id>[^/]+)}
-      def initialize(url)
-        super(url)
+      def initialize(url, options = nil)
+        super(url, options)
         if id = @url.match(ID_REGEXP)[:id]
           @desc = fetch_page("https://novel18.syosetu.com/novelview/infotop/ncode/#{id}/")

data/lib/panchira/resolvers/pixiv_resolver.rb CHANGED Viewed

@@ -4,8 +4,8 @@ module Panchira
   class PixivResolver < Resolver
     URL_REGEXP = %r{pixiv\.net/.*(member_illust.php?.*illust_id=|artworks/)(\d+)}.freeze
-    def initialize(url)
-      super(url)
+    def initialize(url, options = nil)
+      super(url, options)
       @illust_id = url.slice(URL_REGEXP, 2)
       raw_json = URI.parse("https://www.pixiv.net/ajax/illust/#{@illust_id}").read('User-Agent' => user_agent)
@@ -47,8 +47,8 @@ module Panchira
   class PixivNovelResolver < Resolver
     URL_REGEXP = %r{pixiv\.net/novel/show.php\?id=(\d+)}.freeze
-    def initialize(url)
-      super(url)
+    def initialize(url, options = nil)
+      super(url, options)
       @novel_id = url.slice(URL_REGEXP, 1)
       raw_json = URI.parse("https://www.pixiv.net/ajax/novel/#{@novel_id}").read('User-Agent' => user_agent)

data/lib/panchira/resolvers/resolver.rb CHANGED Viewed

@@ -11,8 +11,9 @@ module Panchira
     # You must override this in subclasses to limit which urls to resolve.
     URL_REGEXP = URI::DEFAULT_PARSER.make_regexp
-    def initialize(url)
+    def initialize(url, options = nil)
       @url = url
+      @options = options
     end
     # This function is called right after this Resolver instance is made.
@@ -53,7 +54,8 @@ module Panchira
       def fetch_page(url)
         read_options = {
           'User-Agent' => user_agent,
-          'Cookie' => cookie
+          'Cookie' => cookie,
+          :read_timeout => 10
         }
         raw_page = URI.parse(url).read(read_options)

data/lib/panchira/resolvers/twitter_resolver.rb CHANGED Viewed

@@ -1,22 +1,99 @@
+require 'uri'
 module Panchira
   class TwitterResolver < Resolver
-    URL_REGEXP = /twitter.com\/\w+\/status\/\d+/.freeze
+    URL_REGEXP = %r{twitter.com/(\w+)/status/(\d+)}.freeze
+    def initialize(url, options = nil)
+      super(url, options)
+      @screen_name = @url.slice(URL_REGEXP, 1)
+      @id = @url.slice(URL_REGEXP, 2)
+      @bearer_token = options&.dig(:twitter, :bearer_token)
+    end
+    def fetch
+      return super unless @bearer_token
+      @response = fetch_api if @bearer_token
+      result = PanchiraResult.new
+      result.canonical_url = parse_canonical_url
+      result.title = parse_title
+      result.description = parse_description
+      result.image = parse_image
+      result.tags = parse_tags
+      result.author = parse_author
+      result.resolver = parse_resolver
+      result
+    end
     private
+      def fetch_api
+        uri = URI.parse("https://api.twitter.com/2/tweets/#{@id}")
+        uri.query = URI.encode_www_form({
+          'expansions': 'attachments.media_keys,author_id',
+          'media.fields': 'preview_image_url,type,url',
+          'user.fields': 'name,username',
+          'tweet.fields': 'entities'
+        })
+        raw_json = uri.read('Authorization' => "Bearer #{@bearer_token}")
+        JSON.parse(raw_json)
+      end
+      def parse_canonical_url
+        # Twitter returns false canonical url when the account is set as sensitive.
+        "https://twitter.com/#{@screen_name}/status/#{@id}"
+      end
       def parse_title
-        @title = super
+        @title = if @response
+                   @author = @response['includes']['users'][0]['name']
+                   "#{@author} on Twitter"
+                 else
+                   super
+                 end
       end
       def parse_author
-        @title.match(/\A(.+) on Twitter\z/)[1]
+        @author || @title.match(/\A(.+) on Twitter\z/)[1]
+      rescue StandardError
+        nil
       end
       def parse_description
-        @description = super.gsub(/\A“|”\z/, '')
+        if @response
+          @response['data']['text']
+        else
+          @description = super.gsub(/\A“|”\z/, '')
+        end
       end
       def parse_tags
-        @description.scan(/[#＃]([^#＃\s]+)/).map(&:first)
+        if @response
+          @response.dig('data', 'entities', 'hashtags')&.map { |obj| obj['tag'] }
+        else
+          @description.scan(/[#＃]([^#＃\s]+)/).map(&:first)
+        end
+      end
+      def parse_image_url
+        return super unless @response
+        first_media = @response.dig('includes', 'media')&.first
+        return unless first_media
+        case first_media['type']
+        when 'photo'
+          first_media['url']
+        when 'video'
+          first_media['preview_image_url']
+        end
       end
   end

data/lib/panchira/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Panchira
-  VERSION = '1.4.0'
+  VERSION = '1.5.0'
 end

data/lib/panchira.rb CHANGED Viewed

@@ -21,10 +21,10 @@ Dir.glob("#{project_root}/panchira/resolvers/*_resolver.rb").sort.each { |file|
 module Panchira
   class << self
     # Return a PanchiraResult that contains the attributes of given url.
-    def fetch(url)
+    def fetch(url, options = nil)
       resolver = select_resolver(url)
-      resolver.new(url).fetch
+      resolver.new(url, options).fetch
     end
     private

data/panchira.gemspec CHANGED Viewed

@@ -40,5 +40,5 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency 'rubocop-minitest', '~> 0.10'
   spec.add_dependency 'fastimage', '~> 2.1.7'
-  spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.13.0'
+  spec.add_dependency 'nokogiri', '>= 1.10.9', '< 1.14.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: panchira
 version: !ruby/object:Gem::Version
-  version: 1.4.0
+  version: 1.5.0
 platform: ruby
 authors:
 - kyp
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2022-01-10 00:00:00.000000000 Z
+date: 2022-03-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -103,7 +103,7 @@ dependencies:
         version: 1.10.9
     - - "<"
       - !ruby/object:Gem::Version
-        version: 1.13.0
+        version: 1.14.0
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
@@ -113,7 +113,7 @@ dependencies:
         version: 1.10.9
     - - "<"
       - !ruby/object:Gem::Version
-        version: 1.13.0
+        version: 1.14.0
 description: |2
       Panchira allows you to parse attributes of hentais on some web platforms, such as Pixiv and DLSite.
       If you need card previews on hentai but can't get it with simply parsing metatags, then it is time for Panchira.
@@ -173,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.3.4
+rubygems_version: 3.1.4
 signing_key:
 specification_version: 4
 summary: A parser for hentai websites