RubyGems - traject - Versions diffs - 2.3.0-java → 2.3.1-java - Mend

traject 2.3.0-java → 2.3.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/.travis.yml +2 -1
data/lib/traject/indexer.rb +0 -1
data/lib/traject/macros/marc21.rb +4 -0
data/lib/traject/marc_extractor_spec.rb +1 -1
data/lib/traject/solr_json_writer.rb +11 -12
data/lib/traject/thread_pool.rb +14 -12
data/lib/traject/translation_map.rb +10 -12
data/lib/traject/util.rb +16 -18
data/lib/traject/version.rb +1 -1
data/test/indexer/macros_marc21_test.rb +2 -0
data/test/marc_extractor_test.rb +2 -2
data/traject.gemspec +12 -12
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: da2d58493789ee303af138ff79e09d4ca0bc1b8a
-  data.tar.gz: cba09265be65f83ea8535d43bfd7b3e367cbd926
+  metadata.gz: b482969bfad4936f4bab36ebbc6b0a2584f06457
+  data.tar.gz: e58bd672a66565f3dca63e4f3c59ad9eda457625
 SHA512:
-  metadata.gz: ce2e6ae0ca6f768df0d8be8212b56127320086fe8bc51ee538d80f1f68e981f6a5a463c5ff4ba6ea2a5d6919d69f613602c7a04d3cb5b393c041595bea23788a
-  data.tar.gz: 0db68769f207be97a649121300e09081ca4c3881ef45e23c3950c8a054e11c44429767e2f3e1ec7eaee71fa39c3399a59ac1129f60f50e2afed52d890d587b82
+  metadata.gz: ea57b4e0b1fb2050793215097786ae3f12e819ecd8d1616964eeba29be24aa53e10c2b47a4d1fd16be70d0c2e4973e3e30a01bdbf9bc5a28f09060197f446ee4
+  data.tar.gz: dc6ea5a40377aff2b9ca33c9a84deb295c972db31b159da7f3b3526a339c80f3aed5b3a3d01088102a749aa3e69d8c91d53c9e5f5395b2f15f5d755eb9daf9c6

data/.travis.yml CHANGED Viewed

@@ -10,6 +10,7 @@ rvm:
   - rbx-2
 before_install:
   - gem update --system
-  - gem install bundler
+  - gem uninstall bundler
+  - gem update bundler
 jdk:
   - oraclejdk8

data/lib/traject/indexer.rb CHANGED Viewed

@@ -1,6 +1,5 @@
 require 'yell'
-require 'traject'
 require 'traject/qualified_const_get'
 require 'traject/thread_pool'

data/lib/traject/macros/marc21.rb CHANGED Viewed

@@ -238,6 +238,10 @@ module Traject::Macros
       # single square bracket characters if they are the start and/or end
       #   chars and there are no internal square brackets.
       str = str.sub(/\A\[?([^\[\]]+)\]?\Z/, '\1')
+      # trim any leading or trailing whitespace
+      str.strip!
       return str
     end

data/lib/traject/marc_extractor_spec.rb CHANGED Viewed

@@ -170,7 +170,7 @@ module Traject
         hash         = Hash.new
         # Split the string(s) given on colon
-        spec_strings = spec_string.is_a?(Array) ? spec_string.map { |s| s.split(/\s*:\s*/) }.flatten : spec_string.split(/s*:\s*/)
+        spec_strings = spec_string.is_a?(Array) ? spec_string.map { |s| s.split(/\s*:\s*/) }.flatten : spec_string.split(/\s*:\s*/)
         spec_strings.each do |part|
           if m = DATAFIELD_PATTERN.match(part)

data/lib/traject/solr_json_writer.rb CHANGED Viewed

@@ -1,6 +1,5 @@
 require 'yell'
-require 'traject'
 require 'traject/util'
 require 'traject/qualified_const_get'
 require 'traject/thread_pool'
@@ -28,21 +27,21 @@ require 'concurrent' # for atomic_fixnum
 #   My tests indicate that this setting doesn't change overall index speed by a ton.
 #
 # * solr_writer.thread_pool: How many threads to use for the writer. Default is 1.
-#   Likely useful even under MRI since thread will be waiting on Solr for some time.
+#   Likely useful even under MRI since thread will be waiting on Solr for some time.
 #
-# * solr_writer.max_skipped: How many records skipped due to errors before we
-#   bail out with a fatal error? Set to -1 for unlimited skips. Default 0,
-#   raise and abort on a single record that could not be added to Solr.
+# * solr_writer.max_skipped: How many records skipped due to errors before we
+#   bail out with a fatal error? Set to -1 for unlimited skips. Default 0,
+#   raise and abort on a single record that could not be added to Solr.
 #
 # * solr_writer.commit_on_close: Set to true (or "true") if you want to commit at the
 #   end of the indexing run. (Old "solrj_writer.commit_on_close" supported for backwards
 #   compat only.)
 #
 # * solr_writer.commit_timeout: If commit_on_close, how long to wait for Solr before
-#   giving up as a timeout. Default 10 minutes. Solr can be slow.
+#   giving up as a timeout. Default 10 minutes. Solr can be slow.
 #
 # * solr_json_writer.http_client Mainly intended for testing, set your own HTTPClient
-#   or mock object to be used for HTTP.
+#   or mock object to be used for HTTP.
 class Traject::SolrJsonWriter
@@ -85,7 +84,7 @@ class Traject::SolrJsonWriter
     @thread_pool = Traject::ThreadPool.new(@thread_pool_size)
     # old setting solrj_writer supported for backwards compat, as we make
-    # this the new default writer.
+    # this the new default writer.
     @commit_on_close = (settings["solr_writer.commit_on_close"] || settings["solrj_writer.commit_on_close"]).to_s == "true"
     # Figure out where to send updates
@@ -118,12 +117,12 @@ class Traject::SolrJsonWriter
     end
     if exception || resp.status != 200
-      error_message = exception ?
-        Traject::Util.exception_to_log_message(exception) :
+      error_message = exception ?
+        Traject::Util.exception_to_log_message(exception) :
         "Solr response: #{resp.status}: #{resp.body}"
       logger.error "Error in Solr batch add. Will retry documents individually at performance penalty: #{error_message}"
       batch.each do |c|
         send_single(c)
       end
@@ -138,7 +137,7 @@ class Traject::SolrJsonWriter
     begin
       resp = @http_client.post @solr_update_url, json_package, "Content-type" => "application/json"
       # Catch Timeouts and network errors as skipped records, but otherwise
-      # allow unexpected errors to propagate up.
+      # allow unexpected errors to propagate up.
     rescue HTTPClient::TimeoutError, SocketError, Errno::ECONNREFUSED => exception
     end

data/lib/traject/thread_pool.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module Traject
   # be created, and work sent to the Traject::ThreadPool will just be executed
   # in the caller thread. We call this a nil threadpool. One situation it can be useful
   # is if you are running under MRI, where multi-core parallelism isn't available, so
-  # an actual threadpool may not be useful. (Although in some cases a thread pool,
+  # an actual threadpool may not be useful. (Although in some cases a thread pool,
   # especially one with size 1, can be useful in MRI for I/O blocking operations)
   #
   # 3) Use the #maybe_in_threadpool method to send blocks to thread pool for
@@ -40,7 +40,7 @@ module Traject
   #  to complete, then return.  You can not give any more work to the pool
   #  after you do this. By default it'll wait pretty much forever, which should
   #  be fine. If you never call shutdown, then queued or in-progress work
-  #  may be abandoned when the program ends, which would be bad.
+  #  may be abandoned when the program ends, which would be bad.
   #
   # 7) We will keep track of total times a block is run in thread pool, and
   #  total elapsed (wall) time of running all blocks, so an average_execution_ms
@@ -51,24 +51,26 @@ module Traject
     attr_reader :pool_size, :queue_capacity
     # First arg is pool size, 0 or nil and we'll be a null/no-op pool which executes
-    # work in caller thread.
+    # work in caller thread.
     def initialize(pool_size)
+      @thread_pool             = nil # assume we don't have one
+      @exceptions_caught_queue = [] # start off without exceptions
       unless pool_size.nil? || pool_size == 0
-        @pool_size = pool_size.to_i
+        @pool_size      = pool_size.to_i
         @queue_capacity = pool_size * 3
-        @thread_pool = Concurrent::ThreadPoolExecutor.new(
-          :min_threads     => @pool_size,
-          :max_threads     => @pool_size,
-          :max_queue       => @queue_capacity,
-          :fallback_policy => :caller_runs
+        @thread_pool             = Concurrent::ThreadPoolExecutor.new(
+            :min_threads     => @pool_size,
+            :max_threads     => @pool_size,
+            :max_queue       => @queue_capacity,
+            :fallback_policy => :caller_runs
         )
         # A thread-safe queue to collect exceptions cross-threads.
         # We really only need to save the first exception, but a queue
         # is a convenient way to store a value concurrency-safely, and
-        # might as well store all of them.
-        @exceptions_caught_queue   =  Queue.new
+        # might as well store all of them.
+        @exceptions_caught_queue = Queue.new
       end
     end
@@ -133,7 +135,7 @@ module Traject
     # as a non-functioning threadpool -- then this method is just
     # a no-op.
     def raise_collected_exception!
-      if @exceptions_caught_queue && (! @exceptions_caught_queue.empty?)
+      unless @exceptions_caught_queue.empty?
         e = @exceptions_caught_queue.pop
         raise e
       end

data/lib/traject/translation_map.rb CHANGED Viewed

@@ -1,5 +1,3 @@
-require 'traject'
 require 'yaml'
 require 'dot-properties'
@@ -131,13 +129,13 @@ module Traject
           yaml_file = File.join( base, "translation_maps", "#{path}.yaml"  )
           prop_file = File.join(base, "translation_maps", "#{path}.properties" )
-          if File.exists? rb_file
+          if File.exist? rb_file
             found = eval( File.open(rb_file).read , binding, rb_file )
             break
-          elsif File.exists? yaml_file
+          elsif File.exist? yaml_file
             found = YAML.load_file(yaml_file)
             break
-          elsif File.exists? prop_file
+          elsif File.exist? prop_file
             found = Traject::TranslationMap.read_properties(prop_file)
             break
           end
@@ -231,21 +229,21 @@ module Traject
       array.replace( self.translate_array(array))
     end
-    # Return a new TranslationMap that results from merging argument on top of self.
+    # Return a new TranslationMap that results from merging argument on top of self.
     # Can be useful for taking an existing translation map, but merging a few
-    # overrides on top.
+    # overrides on top.
     #
     #     merged_map = TranslationMap.new(something).merge TranslationMap.new(else)
     #     #...
     #     merged_map.translate_array(something) # etc
     #
-    # If a default is set in the second map, it will merge over the first too.
+    # If a default is set in the second map, it will merge over the first too.
     #
     # You can also pass in a plain hash as an arg, instead of an existing TranslationMap:
     #
     #     TranslationMap.new(something).merge("overridden_key" => "value", "a" => "")
     def merge(other_map)
-      default = other_map.default || self.default
+      default = other_map.default || self.default
       TranslationMap.new(self.to_hash.merge(other_map.to_hash), :default => default)
     end
@@ -258,9 +256,9 @@ module Traject
     protected
     # We use dot-properties gem for reading .properties files,
-    # return a hash.
-    def self.read_properties(file_name)
-      return DotProperties.load(file_name).to_h
+    # return a hash.
+    def self.read_properties(file_name)
+      return DotProperties.load(file_name).to_h
     end
   end

data/lib/traject/util.rb CHANGED Viewed

@@ -1,5 +1,3 @@
-require 'traject'
 module Traject
   # Just some internal utility methods
   module Util
@@ -27,17 +25,17 @@ module Traject
     end
     # Provide a config source file path, and an exception.
-    #
+    #
     # Returns the line number from the first line in the stack
-    # trace of the exception that matches your file path.
+    # trace of the exception that matches your file path.
     # of the first line in the backtrace matching that file_path.
-    #
-    # Returns `nil` if no suitable backtrace line can be found.
     #
-    # Has special logic to try and grep the info out of a SyntaxError, bah.
+    # Returns `nil` if no suitable backtrace line can be found.
+    #
+    # Has special logic to try and grep the info out of a SyntaxError, bah.
     def self.backtrace_lineno_for_config(file_path, exception)
       # For a SyntaxError, we really need to grep it from the
-      # exception message, it really appears to be nowhere else. Ugh.
+      # exception message, it really appears to be nowhere else. Ugh.
       if exception.kind_of? SyntaxError
         if exception.message =~ /:(\d+):/
           return $1.to_i
@@ -45,13 +43,13 @@ module Traject
       end
       # Otherwise we try to fish it out of the backtrace, first
-      # line matching the config file path.
+      # line matching the config file path.
       # exception.backtrace_locations exists in MRI 2.1+, which makes
       # our task a lot easier. But not yet in JRuby 1.7.x, so we got to
-      # handle the old way of having to parse the strings in backtrace too.
-      if ( exception.respond_to?(:backtrace_locations) &&
-           exception.backtrace_locations &&
+      # handle the old way of having to parse the strings in backtrace too.
+      if ( exception.respond_to?(:backtrace_locations) &&
+           exception.backtrace_locations &&
            exception.backtrace_locations.length > 0 )
         location = exception.backtrace_locations.find do |bt|
           bt.path == file_path
@@ -71,19 +69,19 @@ module Traject
     # Extract just the part of the backtrace that is "below"
     # the config file mentioned. If we can't find the config file
-    # in the stack trace, we might return empty array.
+    # in the stack trace, we might return empty array.
     #
     # If the ruby supports Exception#backtrace_locations, the
-    # returned array will actually be of Thread::Backtrace::Location elements.
+    # returned array will actually be of Thread::Backtrace::Location elements.
     def self.backtrace_from_config(file_path, exception)
       filtered_trace = []
       found = false
       # MRI 2.1+ has exception.backtrace_locations which makes
-      # this a lot easier, but JRuby 1.7.x doesn't yet, so we
-      # need to do it both ways.
-      if ( exception.respond_to?(:backtrace_locations) &&
-           exception.backtrace_locations &&
+      # this a lot easier, but JRuby 1.7.x doesn't yet, so we
+      # need to do it both ways.
+      if ( exception.respond_to?(:backtrace_locations) &&
+           exception.backtrace_locations &&
            exception.backtrace_locations.length > 0 )
         exception.backtrace_locations.each do |location|

data/lib/traject/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Traject
-  VERSION = "2.3.0"
+  VERSION = "2.3.1"
 end

data/test/indexer/macros_marc21_test.rb CHANGED Viewed

@@ -118,6 +118,8 @@ describe "Traject::Macros::Marc21" do
       assert_equal "one two three", Marc21.trim_punctuation("one two three:")
       assert_equal "one two three .", Marc21.trim_punctuation("one two three .")
       assert_equal "one two three", Marc21.trim_punctuation("one two three.")
+      assert_equal "one two three...", Marc21.trim_punctuation("one two three...")
+      assert_equal "one two three", Marc21.trim_punctuation(" one two three.")
       assert_equal "one two [three]", Marc21.trim_punctuation("one two [three]")
       assert_equal "one two three", Marc21.trim_punctuation("one two three]")

data/test/marc_extractor_test.rb CHANGED Viewed

@@ -35,7 +35,7 @@ describe "Traject::MarcExtractor" do
     end
     it "parses a mixed bag" do
-      parsed  = Traject::MarcExtractor::Spec.hash_from_string("245abcde:810:700|*4|bcd")
+      parsed  = Traject::MarcExtractor::Spec.hash_from_string("245abcdes:810:700|*4|bcd")
       spec245 = parsed['245'].first
       spec810 = parsed['810'].first
       spec700 = parsed['700'].first
@@ -46,7 +46,7 @@ describe "Traject::MarcExtractor" do
       assert spec245
       assert_nil spec245.indicator1
       assert_nil spec245.indicator2
-      assert_equal %w{a b c d e}, spec245.subfields
+      assert_equal %w{a b c d e s}, spec245.subfields
       #810
       assert spec810

data/traject.gemspec CHANGED Viewed

@@ -4,13 +4,13 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'traject/version'
 Gem::Specification.new do |spec|
-  spec.name          = "traject"
-  spec.version       = Traject::VERSION
-  spec.authors       = ["Jonathan Rochkind", "Bill Dueber"]
-  spec.email         = ["none@nowhere.org"]
-  spec.summary       = %q{Index MARC to Solr; or generally process source records to hash-like structures}
-  spec.homepage      = "http://github.com/traject/traject"
-  spec.license       = "MIT"
+  spec.name     = "traject"
+  spec.version  = Traject::VERSION
+  spec.authors  = ["Jonathan Rochkind", "Bill Dueber"]
+  spec.email    = ["none@nowhere.org"]
+  spec.summary  = %q{Index MARC to Solr; or generally process source records to hash-like structures}
+  spec.homepage = "http://github.com/traject/traject"
+  spec.license  = "MIT"
   spec.files         = `git ls-files`.split($/)
   spec.executables   = ["traject"]
@@ -23,10 +23,10 @@ Gem::Specification.new do |spec|
   spec.add_dependency "concurrent-ruby", ">= 0.8.0"
   spec.add_dependency "marc", "~> 1.0"
-  spec.add_dependency "hashie", "~> 3.1"            # used for Indexer#settings
-  spec.add_dependency "slop", ">= 3.4.5", "< 4.0"   # command line parsing
-  spec.add_dependency "yell"                        # logging
-  spec.add_dependency "dot-properties", ">= 0.1.1"  # reading java style .properties
+  spec.add_dependency "hashie", "~> 3.1" # used for Indexer#settings
+  spec.add_dependency "slop", ">= 3.4.5", "< 4.0" # command line parsing
+  spec.add_dependency "yell" # logging
+  spec.add_dependency "dot-properties", ">= 0.1.1" # reading java style .properties
   spec.add_dependency "httpclient", "~> 2.5"
   spec.add_dependency 'marc-fastxmlwriter', '~>1.0' # fast marc->xml
@@ -40,8 +40,8 @@ Gem::Specification.new do |spec|
     spec.platform = "ruby"
   end
+  spec.add_development_dependency "bundler", '~> 1.7'
-  spec.add_development_dependency "bundler", "~> 1.7"
   spec.add_development_dependency "rake"
   spec.add_development_dependency "minitest"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: traject
 version: !ruby/object:Gem::Version
-  version: 2.3.0
+  version: 2.3.1
 platform: java
 authors:
 - Jonathan Rochkind
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-02-24 00:00:00.000000000 Z
+date: 2016-04-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   requirement: !ruby/object:Gem::Requirement