RubyGems - stevedore-uploader - Versions diffs - 1.0.14-java → 1.0.15-java - Mend

stevedore-uploader 1.0.14-java → 1.0.15-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 594d704c741c5af52ab9953ab334483b51b6d0ed
-  data.tar.gz: d7fda45ea1116f25fdb0b98537f9d7f2ffab2ab9
+  metadata.gz: 592883a568d09ec2fa6a6155ac06409f96b01386
+  data.tar.gz: 6aaa836860bcdd8cbc432d92ade4bb61616f9a67
 SHA512:
-  metadata.gz: ec87c6dd3fac2105b428a20810c7ddfb8f24010ce28369dab755032b0705ea787e86103a7ad887cd5bdd014614105c95f1f5dae431d11b0624a0b96eabb53dbe
-  data.tar.gz: 2f642f39598fbc5be89fc814387212defbf71794e81c125327fa053b8f487ad3444faa16e447ea943c3d1f2d141d302aae2f8f0e11e0683b42934beb429f4f57
+  metadata.gz: 6993d8f2bf553ec22560f50e95bd5f1415aace8f11c43dd335ae80cf5b955c7fb50afe7b66f98055375f1a49e8442add0a9f65b8e2013ef43bcb5c319aeded74
+  data.tar.gz: 0dde17b5fe9791d24d5342448911f704ad93fedcaf715d040ebb0a323ea84d369f7f91389cc7be6472a3740583936d5b6ad0b3554b4a97c997589b8406c22fa7

data/bin/stevedore CHANGED Viewed

@@ -6,69 +6,67 @@ raise Exception, "You've gotta use Java 1.8; you're on #{java.lang.System.getPro
 require "#{File.expand_path(File.dirname(__FILE__))}/../lib/stevedore-uploader.rb"
-if __FILE__ == $0
-  require 'optparse'
-  require 'ostruct'
-  options = OpenStruct.new
-  options.ocr = true
-  op = OptionParser.new("Usage: stevedore [options] target_(dir_or_csv)") do |opts|
-    opts.on("-hSERVER:PORT", "--host=SERVER:PORT",
-            "The location of the ElasticSearch server") do |host|
-      options.host = host
-    end
-    opts.on("-iNAME", "--index=NAME",
-            "A name to use for the ES index (defaults to using the directory name)") do |index|
-      options.index = index
-    end
-    opts.on("-sPATH", "--s3path=PATH",
-            "The path under your bucket where these files have been uploaded. (defaults to ES index)"
-      ) do |s3path|
-      options.s3path = s3path
-    end
-    opts.on("-bPATH", "--s3bucket=PATH",
-            "The s3 bucket where these files have already been be uploaded (or will be later)."
-      ) do |s3bucket|
-      options.s3bucket = s3bucket
-    end
-    opts.on("--title-column=COLNAME",
-            "If target file is a CSV, which column contains the title of the row. Integer index or string column name."
-      ) do |title_column|
-      options.title_column = title_column
-    end
-    opts.on("--text-column=COLNAME",
-            "If target file is a CSV, which column contains the main, searchable of the row. Integer index or string column name."
-      ) do |text_column|
-      options.text_column = text_column
-    end
-    opts.on("--slice-size=SLICE",
-            "Process documents in batches of SLICE. Default is 100. Lower this if you get timeouts. Raise it to go faster."
-      ) do |slice_size|
-      options.slice_size = slice_size.to_i
-    end
-    opts.on("-o", "--[no-]ocr", "don't attempt to OCR any PDFs, even if they contain no text") do |v|
-      options.ocr = v
-    end
-    opts.on( '-?', '--help', 'Display this screen' ) do
-      puts opts
-      exit
-    end
+require 'optparse'
+require 'ostruct'
+options = OpenStruct.new
+options.ocr = true
+op = OptionParser.new("Usage: stevedore [options] target_(dir_or_csv)") do |opts|
+  opts.on("-hSERVER:PORT", "--host=SERVER:PORT",
+          "The location of the ElasticSearch server") do |host|
+    options.host = host
   end
-  op.parse!
+  opts.on("-iNAME", "--index=NAME",
+          "A name to use for the ES index (defaults to using the directory name)") do |index|
+    options.index = index
+  end
-  # to delete an index: curl -X DELETE localhost:9200/indexname/
-  unless ARGV.length == 1
-    puts op
+  opts.on("-sPATH", "--s3path=PATH",
+          "The path under your bucket where these files have been uploaded. (defaults to ES index)"
+    ) do |s3path|
+    options.s3path = s3path
+  end
+  opts.on("-bPATH", "--s3bucket=PATH",
+          "The s3 bucket where these files have already been be uploaded (or will be later)."
+    ) do |s3bucket|
+    options.s3bucket = s3bucket
+  end
+  opts.on("--title-column=COLNAME",
+          "If target file is a CSV, which column contains the title of the row. Integer index or string column name."
+    ) do |title_column|
+    options.title_column = title_column
+  end
+  opts.on("--text-column=COLNAME",
+          "If target file is a CSV, which column contains the main, searchable of the row. Integer index or string column name."
+    ) do |text_column|
+    options.text_column = text_column
+  end
+  opts.on("--slice-size=SLICE",
+          "Process documents in batches of SLICE. Default is 100. Lower this if you get timeouts. Raise it to go faster."
+    ) do |slice_size|
+    options.slice_size = slice_size.to_i
+  end
+  opts.on("-o", "--[no-]ocr", "don't attempt to OCR any PDFs, even if they contain no text") do |v|
+    options.ocr = v
+  end
+  opts.on( '-?', '--help', 'Display this screen' ) do
+    puts opts
     exit
   end
 end
+op.parse!
+# to delete an index: curl -X DELETE localhost:9200/indexname/
+unless ARGV.length == 1
+  puts op
+  exit
+end
 # you can provide either a path to files locally or
 # an S3 endpoint as s3://int-data-dumps/YOURINDEXNAME
 FOLDER = ARGV.shift
@@ -93,28 +91,22 @@ S3_BASEPATH = "https://#{S3_BUCKET}.s3.amazonaws.com/#{S3_PATH}"
 raise ArgumentError, "specify a destination" unless FOLDER
 raise ArgumentError, "specify the elasticsearch host" unless ES_HOST
-###############################
-# actual stuff
-###############################
-if __FILE__ == $0
-  f = Stevedore::ESUploader.new(ES_HOST, ES_INDEX, S3_BUCKET, S3_BASEPATH)
-  f.should_ocr = options.ocr
-  puts "Will not OCR, per --no-ocr option" unless f.should_ocr
-  f.slice_size = options.slice_size if options.slice_size
-  puts "Slice size set to #{f.slice_size}" if options.slice_size
-  if FOLDER.match(/\.[ct]sv$/)
-    f.do_csv!(FOLDER, File.join(f.s3_basepath, File.basename(FOLDER)), options.title_column, options.text_column)
-  else
-    f.do!(FOLDER)
-  end
-  puts "Finished uploading documents at #{Time.now}"
+f = Stevedore::ESUploader.new(ES_HOST, ES_INDEX, S3_BUCKET, S3_BASEPATH)
+f.should_ocr = options.ocr
+puts "Will not OCR, per --no-ocr option" unless f.should_ocr
+f.slice_size = options.slice_size if options.slice_size
+puts "Slice size set to #{f.slice_size}" if options.slice_size
-  puts "Created Stevedore for #{ES_INDEX}; go check out https://stevedore.newsdev.net/search/#{ES_INDEX} or http://stevedore.adm.prd.newsdev.nytimes.com/search/#{ES_INDEX}"
-  if f.errors.size > 0
-    STDERR.puts "#{f.errors.size} failed documents:"
-    STDERR.puts f.errors.inspect
-    puts "Uploading successful, but with #{f.errors.size} errors."
-  end
+if FOLDER.match(/\.[ct]sv$/)
+  f.do_csv!(FOLDER, File.join(f.s3_basepath, File.basename(FOLDER)), options.title_column, options.text_column)
+else
+  f.do!(FOLDER)
+end
+puts "Finished uploading documents at #{Time.now}"
+puts "Created Stevedore for #{ES_INDEX}; go check out https://stevedore.newsdev.net/search/#{ES_INDEX} or http://stevedore.adm.prd.newsdev.nytimes.com/search/#{ES_INDEX}"
+if f.errors.size > 0
+  STDERR.puts "#{f.errors.size} failed documents:"
+  STDERR.puts f.errors.inspect
+  puts "Uploading successful, but with #{f.errors.size} errors."
 end

data/lib/stevedore-uploader.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require 'rika'
-require 'jruby-openssl'
+require 'openssl'
 require 'net/https'
 require 'elasticsearch'
 require 'elasticsearch/transport/transport/http/manticore'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: stevedore-uploader
 version: !ruby/object:Gem::Version
-  version: 1.0.14
+  version: 1.0.15
 platform: java
 authors:
 - Jeremy B. Merrill