RubyGems - title_grabber - Versions diffs - 0.4.0 → 0.4.1 - Mend

title_grabber 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 58643e0df803b9315f741db2effdcbd8b3d4e52845b06de27f18dd726f732bb6
-  data.tar.gz: 013fcdb1650a497126e11b62845240489c296d8de7b0dcd9bc95ccbafb288633
+  metadata.gz: 58ceec17ac17673c12eee2cdd5fbef23cdc561b89b5820f652e592a91047a399
+  data.tar.gz: 3f4be7dbff89b096c51fb28839d0bd2f4a85873507ee6600b7d2e481c16ea766
 SHA512:
-  metadata.gz: '09167bbc4fcd61034322ab62ec5ca68ebffcb8920c9f07622e6b69367c76a639c03314101a3588ecd0e53e62b08d180e323114e5c9f70760c8416e4a342b9850'
-  data.tar.gz: d2f053afc4fc465049d8068302e1baf56a76ef1c6230813a7facd03e24207c03bfd104ebe45c7b2ae5dbc821140a59d4d6d3c4137e766258eb4cf19fbe0a92de
+  metadata.gz: 2e87da24fb755d6869b68af2fd201a19e7f9ea6754b1dd26263f58124055847a9594cd186a88c39b2b91f59507f969a35b47bbf4924ca0c35c8f2776e4533836
+  data.tar.gz: '0957bf0dc17b6f02dff00b185ae0fd9ba163ce9610655063dab0ce8e16bd8c761db1a7cd8d283a685a408ed2470bef1ac7bcb42167f995e2857feeeb94e7647c'

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    title_grabber (0.3.7)
+    title_grabber (0.4.0)
       http (~> 4.1)
       oga (~> 2.15)

data/README.md CHANGED Viewed

@@ -23,14 +23,14 @@ Or install it yourself as:
 Just pass it a list of files containing URLs (one per line)
 ```
-title-grabber /abs/path/2/file1.txt rel/path/2/file2.txt
+title-grabber -f /abs/path/2/file1.txt,rel/path/2/file2.txt
 ```
 Data is either recorded to out.csv in the CWD or the file specified using the
 -o/--output argument, e.g.
 ```
-title-grabber -o ~/output.csv /abs/path/2/file1.txt rel/path/2/file2.txt
+title-grabber -o ~/output.csv -f /abs/path/2/file1.txt,rel/path/2/file2.txt
 ```
 See all available CLI switches and env vars

data/exe/title-grabber CHANGED Viewed

@@ -16,6 +16,11 @@ OptionParser.new do |args|
     exit
   end
+  args.on("-f", "--files /abs/f1,rel/f2", Array, "1 or more comma-separated paths to text files containing 1 URL per line ") do |files|
+    arguments[:file_paths] = files.map { |f| Pathname(f).expand_path }.
+                                   select { |f| f.file? && f.exist? }
+  end
   args.on("-o", "--output FILE", "Output file. Defaults to #{TitleGrabber::DEF_OUT_PATH.basename}") do |out|
     arguments[:output] = Pathname(out)
   end
@@ -49,9 +54,9 @@ OptionParser.new do |args|
   end
 end.parse!
-if ARGV.empty?
+if Array(arguments[:file_paths]).empty?
   STDERR.puts "At least 1 input file is required!\n"
   exit(1)
 else
-  TitleGrabber.call(ARGF, arguments)
+  TitleGrabber.call(arguments)
 end

data/lib/title_grabber/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module TitleGrabber
-  VERSION = "0.4.0"
+  VERSION = "0.4.1"
 end

data/lib/title_grabber.rb CHANGED Viewed

@@ -33,19 +33,19 @@ module TitleGrabber
   TWITTER_URL_PREFIX = -"https://#{TWITTER_HOST}"
   CSV_FIELD_SEP = -","
-  def self.call(lines, options)
-    MultiThreadedGrabber.new(lines, options).call
+  def self.call(options)
+    MultiThreadedGrabber.new(options).call
   end
   class MultiThreadedGrabber
     include HTTPHelper
     include TextHelper
-    attr_reader :lines, :out_path, :tmp_path, :connect_to, :read_to, :write_to,
+    attr_reader :file_paths, :out_path, :tmp_path, :connect_to, :read_to, :write_to,
                 :max_redirects, :max_retries, :max_threads, :logger
-    def initialize(lines, options)
-      @lines = lines
+    def initialize(options)
+      @file_paths = options[:file_paths]
       @out_path = options.fetch(:output, DEF_OUT_PATH)
       @tmp_path = @out_path.sub_ext(".tmp#{@out_path.extname}")
@@ -72,19 +72,20 @@ module TitleGrabber
       CSV.open(tmp_path, "w", force_quotes: true) do |csv|
         csv << HEADERS
-        lines.each do |line|
-          md = line.match(URL_RE)
-          next unless md
+        file_paths.each do |file_path|
+          file_path.each_line do |line|
+            md = line.match(URL_RE)
+            next unless md
-          url = md.to_s
-          if h = processed_urls[url]
-            csv << [url, h[END_URL_HEAD], h[PAGE_TIT_HEAD], h[ART_TIT_HEAD]]
-            next
-          end
+            url = md.to_s
+            if h = processed_urls[url]
+              csv << [url, h[END_URL_HEAD], h[PAGE_TIT_HEAD], h[ART_TIT_HEAD]]
+              next
+            end
-          queue << url
+            queue << url
+          end
         end
-        lines = nil
         thr_cnt = [max_threads, queue.size].min
         threads = 1.upto(thr_cnt).map.with_index { |_, i|
@@ -116,7 +117,7 @@ module TitleGrabber
                   tweet_urls.compact!
                   tweet_urls.uniq!
                   tweet_urls.map! do |url|
-                    if res = open_w_timeout(url, **http_opts)
+                    if url.match?(URL_RE) && (res = open_w_timeout(url, **http_opts))
                       uri = res.uri
                       uri.host == TWITTER_HOST && !uri.to_s.match?(TWITTER_STATUS_RE) ? nil : uri.to_s
                     else

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: title_grabber
 version: !ruby/object:Gem::Version
-  version: 0.4.0
+  version: 0.4.1
 platform: ruby
 authors:
 - Cristian Rasch
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-04-17 00:00:00.000000000 Z
+date: 2019-04-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: http