RubyGems - title_grabber - Versions diffs - 0.4.0 → 0.4.1 - Mend

title_grabber 0.4.0 → 0.4.1

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 58643e0df803b9315f741db2effdcbd8b3d4e52845b06de27f18dd726f732bb6
-  data.tar.gz: 013fcdb1650a497126e11b62845240489c296d8de7b0dcd9bc95ccbafb288633
+  metadata.gz: 58ceec17ac17673c12eee2cdd5fbef23cdc561b89b5820f652e592a91047a399
+  data.tar.gz: 3f4be7dbff89b096c51fb28839d0bd2f4a85873507ee6600b7d2e481c16ea766
 SHA512:
-  metadata.gz: '09167bbc4fcd61034322ab62ec5ca68ebffcb8920c9f07622e6b69367c76a639c03314101a3588ecd0e53e62b08d180e323114e5c9f70760c8416e4a342b9850'
-  data.tar.gz: d2f053afc4fc465049d8068302e1baf56a76ef1c6230813a7facd03e24207c03bfd104ebe45c7b2ae5dbc821140a59d4d6d3c4137e766258eb4cf19fbe0a92de
+  metadata.gz: 2e87da24fb755d6869b68af2fd201a19e7f9ea6754b1dd26263f58124055847a9594cd186a88c39b2b91f59507f969a35b47bbf4924ca0c35c8f2776e4533836
+  data.tar.gz: '0957bf0dc17b6f02dff00b185ae0fd9ba163ce9610655063dab0ce8e16bd8c761db1a7cd8d283a685a408ed2470bef1ac7bcb42167f995e2857feeeb94e7647c'

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    title_grabber (0.3.7)
+    title_grabber (0.4.0)
       http (~> 4.1)
       oga (~> 2.15)

data/README.md CHANGED Viewed

@@ -23,14 +23,14 @@ Or install it yourself as:
 Just pass it a list of files containing URLs (one per line)
 ```
-title-grabber /abs/path/2/file1.txt rel/path/2/file2.txt
+title-grabber -f /abs/path/2/file1.txt,rel/path/2/file2.txt
 ```
 Data is either recorded to out.csv in the CWD or the file specified using the
 -o/--output argument, e.g.
 ```
-title-grabber -o ~/output.csv /abs/path/2/file1.txt rel/path/2/file2.txt
+title-grabber -o ~/output.csv -f /abs/path/2/file1.txt,rel/path/2/file2.txt
 ```
 See all available CLI switches and env vars

data/exe/title-grabber CHANGED Viewed

@@ -16,6 +16,11 @@ OptionParser.new do |args|
     exit
   end
+  args.on("-f", "--files /abs/f1,rel/f2", Array, "1 or more comma-separated paths to text files containing 1 URL per line ") do |files|
+    arguments[:file_paths] = files.map { |f| Pathname(f).expand_path }.
+                                   select { |f| f.file? && f.exist? }
+  end
   args.on("-o", "--output FILE", "Output file. Defaults to #{TitleGrabber::DEF_OUT_PATH.basename}") do |out|
     arguments[:output] = Pathname(out)
   end
@@ -49,9 +54,9 @@ OptionParser.new do |args|
   end
 end.parse!
-if ARGV.empty?
+if Array(arguments[:file_paths]).empty?
   STDERR.puts "At least 1 input file is required!\n"
   exit(1)
 else
-  TitleGrabber.call(ARGF, arguments)
+  TitleGrabber.call(arguments)
 end

data/lib/title_grabber/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module TitleGrabber
-  VERSION = "0.4.0"
+  VERSION = "0.4.1"
 end

data/lib/title_grabber.rb CHANGED Viewed

@@ -33,19 +33,19 @@ module TitleGrabber
   TWITTER_URL_PREFIX = -"https://#{TWITTER_HOST}"
   CSV_FIELD_SEP = -","
-  def self.call(lines, options)
-    MultiThreadedGrabber.new(lines, options).call
+  def self.call(options)
+    MultiThreadedGrabber.new(options).call
   end
   class MultiThreadedGrabber
     include HTTPHelper
     include TextHelper
-    attr_reader :lines, :out_path, :tmp_path, :connect_to, :read_to, :write_to,
+    attr_reader :file_paths, :out_path, :tmp_path, :connect_to, :read_to, :write_to,
                 :max_redirects, :max_retries, :max_threads, :logger
-    def initialize(lines, options)
-      @lines = lines
+    def initialize(options)
+      @file_paths = options[:file_paths]
       @out_path = options.fetch(:output, DEF_OUT_PATH)
       @tmp_path = @out_path.sub_ext(".tmp#{@out_path.extname}")
@@ -72,19 +72,20 @@ module TitleGrabber
       CSV.open(tmp_path, "w", force_quotes: true) do |csv|
         csv << HEADERS
-        lines.each do |line|
-          md = line.match(URL_RE)
-          next unless md
+        file_paths.each do |file_path|
+          file_path.each_line do |line|
+            md = line.match(URL_RE)
+            next unless md
-          url = md.to_s
-          if h = processed_urls[url]
-            csv << [url, h[END_URL_HEAD], h[PAGE_TIT_HEAD], h[ART_TIT_HEAD]]
-            next
-          end
+            url = md.to_s
+            if h = processed_urls[url]
+              csv << [url, h[END_URL_HEAD], h[PAGE_TIT_HEAD], h[ART_TIT_HEAD]]
+              next
+            end
-          queue << url
+            queue << url
+          end
         end
-        lines = nil
         thr_cnt = [max_threads, queue.size].min
         threads = 1.upto(thr_cnt).map.with_index { |_, i|
@@ -116,7 +117,7 @@ module TitleGrabber
                   tweet_urls.compact!
                   tweet_urls.uniq!
                   tweet_urls.map! do |url|
-                    if res = open_w_timeout(url, **http_opts)
+                    if url.match?(URL_RE) && (res = open_w_timeout(url, **http_opts))
                       uri = res.uri
                       uri.host == TWITTER_HOST && !uri.to_s.match?(TWITTER_STATUS_RE) ? nil : uri.to_s
                     else

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: title_grabber
 version: !ruby/object:Gem::Version
-  version: 0.4.0
+  version: 0.4.1
 platform: ruby
 authors:
 - Cristian Rasch
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-04-17 00:00:00.000000000 Z
+date: 2019-04-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: http