RubyGems - title_grabber - Versions diffs - 0.5.0 → 0.5.1 - Mend

title_grabber 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: '028ecf2bea074495354ac08494517b7f254e8a27d782e2a3ff1eec7bd313474b'
-  data.tar.gz: 20adeb673a80980e1c3e8ee04285451eb0aa4d8e98c4c17cad5b72f9549f04ee
+  metadata.gz: 4e56496e45f91fb324125076d07c4fe3ec57fc9200246f78c78f9a13f3ca3b44
+  data.tar.gz: b1d99e4a53edc27a27b95fd1326159861a1c548af103a5992a60ce7aa83c0d83
 SHA512:
-  metadata.gz: 2a6268347f464325956f4281f0132b9f6c03d945eefab10754fb93387b0bd0db384cac05899238532f42c62869f58e6282e1e8eadf326bde9026b3d65ba97261
-  data.tar.gz: e1388a778ea78d29f9c791db0e6eaaa0fa379ba912f6869a0b90f087c1fb7e366c61b08f47dce25f003abbc8f6909dde3b3c99a1636fdcf1777eb2fcbb61c72e
+  metadata.gz: d132efc517ca28ab5f6c857d5d9267005d1d74f3cbff29032c88b1a40ff60e6a49827f7479b486e8b8a2922cabab447e4370d70fbaa9db963b9a5a00fb1c91a1
+  data.tar.gz: a6d9d08caca7314a1a2c595039a065a8e22ca8b37aecf60303973db4b8939baa0fb62011ae2be201c01b9247ac7c31b7486c6a37343c86836a4c30167d565dd0

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    title_grabber (0.4.1)
+    title_grabber (0.5.0)
       oga (~> 2.15)
 GEM

data/lib/http_helper.rb CHANGED

@@ -21,10 +21,10 @@ module HTTPHelper
     retries = 0
     begin
-      res = Timeout.timeout(read_to) {
-              open(url, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE,
-                        open_timeout: connect_to, read_timeout: read_to)
-            }
+      Timeout.timeout(read_to) {
+        open(url, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE,
+                  open_timeout: connect_to, read_timeout: read_to)
+      }
     rescue => err
       msg = err.message
       if msg =~ REDIR_FORBIDDEN

data/lib/title_grabber.rb CHANGED

@@ -84,7 +84,7 @@ module TitleGrabber
         end
         thr_cnt = [max_threads, queue.size].min
-        threads = 1.upto(thr_cnt).map.with_index { |_, i|
+        1.upto(thr_cnt).map.with_index { |_, i|
           Thread.new(i) do |j|
             Thread.current.name = "Thread ##{i + 1}"
@@ -104,33 +104,9 @@ module TitleGrabber
                       end
                 if doc
-                  tweet_urls = []
-                  TWEET_TXT_SELS.each do |tweet_txt_sel|
-                    tweet_urls.concat(doc.css("#{TWEET_PERMA_LINK_SEL} #{tweet_txt_sel} a").
-                                          map { |a| a[-"href"] })
-                  end
-                  tweet_urls.compact!
-                  tweet_urls.uniq!
-                  tweet_urls.map! do |url|
-                    if url.match?(URL_RE) && (res = open_w_timeout(url, **http_opts))
-                      uri = res.base_uri
-                      uri.host == TWITTER_HOST && !uri.to_s.match?(TWITTER_STATUS_RE) ? nil : uri.to_s
-                    else
-                      url
-                    end
-                  end
-                  tweet_urls.compact!
-                  tweet_urls.map! do |url|
-                    url.start_with?("/") ? URI.join(TWITTER_URL_PREFIX, url).to_s : url
+                  if e_url = parse_end_url_from(doc)
+                    end_url = e_url
                   end
-                  tweet_urls.delete_if { |url|
-                    uri = URI(url)
-                    uri.host == TWITTER_HOST && uri.path.count("/") > 1 &&
-                      !uri.to_s.match?(TWITTER_STATUS_RE)
-                  }
-                  tweet_urls.sort!
-                  end_url = tweet_urls.join(CSV_FIELD_SEP) unless tweet_urls.empty?
                   page_title = doc.at_css('title')&.text || -""
                   clean_up_whitespace(page_title) unless page_title.empty?
@@ -164,6 +140,11 @@ module TitleGrabber
     private
+    def http_opts
+      @http_opts ||= { connect_to: connect_to, read_to: read_to,
+                       max_retries: max_retries }
+    end
     def processed_urls
       @processed_urls ||= begin
                             urls = {}
@@ -186,9 +167,38 @@ module TitleGrabber
                           end
     end
-    def http_opts
-      @http_opts ||= { connect_to: connect_to, read_to: read_to,
-                       max_retries: max_retries }
+    def parse_end_url_from(doc)
+      tweet_urls = []
+      TWEET_TXT_SELS.each do |tweet_txt_sel|
+        tweet_urls.concat(doc.css("#{TWEET_PERMA_LINK_SEL} #{tweet_txt_sel} a").
+                              map { |a| a[-"href"] })
+      end
+      tweet_urls.compact!
+      tweet_urls.uniq!
+      tweet_urls.map! do |url|
+        if url.match?(URL_RE) && (res = open_w_timeout(url, **http_opts))
+          uri = res.base_uri
+          uri.host == TWITTER_HOST && !uri.to_s.match?(TWITTER_STATUS_RE) ? nil : uri.to_s
+        else
+          url
+        end
+      end
+      tweet_urls.compact!
+      tweet_urls.map! do |url|
+        url.start_with?("/") ? URI.join(TWITTER_URL_PREFIX, url).to_s : url
+      end
+      tweet_urls.delete_if { |url|
+        uri = URI(url)
+        uri.host == TWITTER_HOST && uri.path.count("/") > 1 &&
+          !uri.to_s.match?(TWITTER_STATUS_RE)
+      }
+      tweet_urls.sort!
+      tweet_urls.join(CSV_FIELD_SEP) unless tweet_urls.empty?
     end
   end
 end

data/lib/title_grabber/version.rb CHANGED

@@ -1,3 +1,3 @@
 module TitleGrabber
-  VERSION = "0.5.0"
+  VERSION = "0.5.1"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: title_grabber
 version: !ruby/object:Gem::Version
-  version: 0.5.0
+  version: 0.5.1
 platform: ruby
 authors:
 - Cristian Rasch
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-04-18 00:00:00.000000000 Z
+date: 2019-04-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: oga