traject 2.3.0-java → 2.3.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: da2d58493789ee303af138ff79e09d4ca0bc1b8a
4
- data.tar.gz: cba09265be65f83ea8535d43bfd7b3e367cbd926
3
+ metadata.gz: b482969bfad4936f4bab36ebbc6b0a2584f06457
4
+ data.tar.gz: e58bd672a66565f3dca63e4f3c59ad9eda457625
5
5
  SHA512:
6
- metadata.gz: ce2e6ae0ca6f768df0d8be8212b56127320086fe8bc51ee538d80f1f68e981f6a5a463c5ff4ba6ea2a5d6919d69f613602c7a04d3cb5b393c041595bea23788a
7
- data.tar.gz: 0db68769f207be97a649121300e09081ca4c3881ef45e23c3950c8a054e11c44429767e2f3e1ec7eaee71fa39c3399a59ac1129f60f50e2afed52d890d587b82
6
+ metadata.gz: ea57b4e0b1fb2050793215097786ae3f12e819ecd8d1616964eeba29be24aa53e10c2b47a4d1fd16be70d0c2e4973e3e30a01bdbf9bc5a28f09060197f446ee4
7
+ data.tar.gz: dc6ea5a40377aff2b9ca33c9a84deb295c972db31b159da7f3b3526a339c80f3aed5b3a3d01088102a749aa3e69d8c91d53c9e5f5395b2f15f5d755eb9daf9c6
data/.travis.yml CHANGED
@@ -10,6 +10,7 @@ rvm:
10
10
  - rbx-2
11
11
  before_install:
12
12
  - gem update --system
13
- - gem install bundler
13
+ - gem uninstall bundler
14
+ - gem update bundler
14
15
  jdk:
15
16
  - oraclejdk8
@@ -1,6 +1,5 @@
1
1
  require 'yell'
2
2
 
3
- require 'traject'
4
3
  require 'traject/qualified_const_get'
5
4
  require 'traject/thread_pool'
6
5
 
@@ -238,6 +238,10 @@ module Traject::Macros
238
238
  # single square bracket characters if they are the start and/or end
239
239
  # chars and there are no internal square brackets.
240
240
  str = str.sub(/\A\[?([^\[\]]+)\]?\Z/, '\1')
241
+
242
+ # trim any leading or trailing whitespace
243
+ str.strip!
244
+
241
245
  return str
242
246
  end
243
247
 
@@ -170,7 +170,7 @@ module Traject
170
170
  hash = Hash.new
171
171
 
172
172
  # Split the string(s) given on colon
173
- spec_strings = spec_string.is_a?(Array) ? spec_string.map { |s| s.split(/\s*:\s*/) }.flatten : spec_string.split(/s*:\s*/)
173
+ spec_strings = spec_string.is_a?(Array) ? spec_string.map { |s| s.split(/\s*:\s*/) }.flatten : spec_string.split(/\s*:\s*/)
174
174
 
175
175
  spec_strings.each do |part|
176
176
  if m = DATAFIELD_PATTERN.match(part)
@@ -1,6 +1,5 @@
1
1
  require 'yell'
2
2
 
3
- require 'traject'
4
3
  require 'traject/util'
5
4
  require 'traject/qualified_const_get'
6
5
  require 'traject/thread_pool'
@@ -28,21 +27,21 @@ require 'concurrent' # for atomic_fixnum
28
27
  # My tests indicate that this setting doesn't change overall index speed by a ton.
29
28
  #
30
29
  # * solr_writer.thread_pool: How many threads to use for the writer. Default is 1.
31
- # Likely useful even under MRI since thread will be waiting on Solr for some time.
30
+ # Likely useful even under MRI since thread will be waiting on Solr for some time.
32
31
  #
33
- # * solr_writer.max_skipped: How many records skipped due to errors before we
34
- # bail out with a fatal error? Set to -1 for unlimited skips. Default 0,
35
- # raise and abort on a single record that could not be added to Solr.
32
+ # * solr_writer.max_skipped: How many records skipped due to errors before we
33
+ # bail out with a fatal error? Set to -1 for unlimited skips. Default 0,
34
+ # raise and abort on a single record that could not be added to Solr.
36
35
  #
37
36
  # * solr_writer.commit_on_close: Set to true (or "true") if you want to commit at the
38
37
  # end of the indexing run. (Old "solrj_writer.commit_on_close" supported for backwards
39
38
  # compat only.)
40
39
  #
41
40
  # * solr_writer.commit_timeout: If commit_on_close, how long to wait for Solr before
42
- # giving up as a timeout. Default 10 minutes. Solr can be slow.
41
+ # giving up as a timeout. Default 10 minutes. Solr can be slow.
43
42
  #
44
43
  # * solr_json_writer.http_client Mainly intended for testing, set your own HTTPClient
45
- # or mock object to be used for HTTP.
44
+ # or mock object to be used for HTTP.
46
45
 
47
46
 
48
47
  class Traject::SolrJsonWriter
@@ -85,7 +84,7 @@ class Traject::SolrJsonWriter
85
84
  @thread_pool = Traject::ThreadPool.new(@thread_pool_size)
86
85
 
87
86
  # old setting solrj_writer supported for backwards compat, as we make
88
- # this the new default writer.
87
+ # this the new default writer.
89
88
  @commit_on_close = (settings["solr_writer.commit_on_close"] || settings["solrj_writer.commit_on_close"]).to_s == "true"
90
89
 
91
90
  # Figure out where to send updates
@@ -118,12 +117,12 @@ class Traject::SolrJsonWriter
118
117
  end
119
118
 
120
119
  if exception || resp.status != 200
121
- error_message = exception ?
122
- Traject::Util.exception_to_log_message(exception) :
120
+ error_message = exception ?
121
+ Traject::Util.exception_to_log_message(exception) :
123
122
  "Solr response: #{resp.status}: #{resp.body}"
124
123
 
125
124
  logger.error "Error in Solr batch add. Will retry documents individually at performance penalty: #{error_message}"
126
-
125
+
127
126
  batch.each do |c|
128
127
  send_single(c)
129
128
  end
@@ -138,7 +137,7 @@ class Traject::SolrJsonWriter
138
137
  begin
139
138
  resp = @http_client.post @solr_update_url, json_package, "Content-type" => "application/json"
140
139
  # Catch Timeouts and network errors as skipped records, but otherwise
141
- # allow unexpected errors to propagate up.
140
+ # allow unexpected errors to propagate up.
142
141
  rescue HTTPClient::TimeoutError, SocketError, Errno::ECONNREFUSED => exception
143
142
  end
144
143
 
@@ -13,7 +13,7 @@ module Traject
13
13
  # be created, and work sent to the Traject::ThreadPool will just be executed
14
14
  # in the caller thread. We call this a nil threadpool. One situation it can be useful
15
15
  # is if you are running under MRI, where multi-core parallelism isn't available, so
16
- # an actual threadpool may not be useful. (Although in some cases a thread pool,
16
+ # an actual threadpool may not be useful. (Although in some cases a thread pool,
17
17
  # especially one with size 1, can be useful in MRI for I/O blocking operations)
18
18
  #
19
19
  # 3) Use the #maybe_in_threadpool method to send blocks to thread pool for
@@ -40,7 +40,7 @@ module Traject
40
40
  # to complete, then return. You can not give any more work to the pool
41
41
  # after you do this. By default it'll wait pretty much forever, which should
42
42
  # be fine. If you never call shutdown, then queued or in-progress work
43
- # may be abandoned when the program ends, which would be bad.
43
+ # may be abandoned when the program ends, which would be bad.
44
44
  #
45
45
  # 7) We will keep track of total times a block is run in thread pool, and
46
46
  # total elapsed (wall) time of running all blocks, so an average_execution_ms
@@ -51,24 +51,26 @@ module Traject
51
51
  attr_reader :pool_size, :queue_capacity
52
52
 
53
53
  # First arg is pool size, 0 or nil and we'll be a null/no-op pool which executes
54
- # work in caller thread.
54
+ # work in caller thread.
55
55
  def initialize(pool_size)
56
+ @thread_pool = nil # assume we don't have one
57
+ @exceptions_caught_queue = [] # start off without exceptions
56
58
  unless pool_size.nil? || pool_size == 0
57
- @pool_size = pool_size.to_i
59
+ @pool_size = pool_size.to_i
58
60
  @queue_capacity = pool_size * 3
59
61
 
60
- @thread_pool = Concurrent::ThreadPoolExecutor.new(
61
- :min_threads => @pool_size,
62
- :max_threads => @pool_size,
63
- :max_queue => @queue_capacity,
64
- :fallback_policy => :caller_runs
62
+ @thread_pool = Concurrent::ThreadPoolExecutor.new(
63
+ :min_threads => @pool_size,
64
+ :max_threads => @pool_size,
65
+ :max_queue => @queue_capacity,
66
+ :fallback_policy => :caller_runs
65
67
  )
66
68
 
67
69
  # A thread-safe queue to collect exceptions cross-threads.
68
70
  # We really only need to save the first exception, but a queue
69
71
  # is a convenient way to store a value concurrency-safely, and
70
- # might as well store all of them.
71
- @exceptions_caught_queue = Queue.new
72
+ # might as well store all of them.
73
+ @exceptions_caught_queue = Queue.new
72
74
  end
73
75
  end
74
76
 
@@ -133,7 +135,7 @@ module Traject
133
135
  # as a non-functioning threadpool -- then this method is just
134
136
  # a no-op.
135
137
  def raise_collected_exception!
136
- if @exceptions_caught_queue && (! @exceptions_caught_queue.empty?)
138
+ unless @exceptions_caught_queue.empty?
137
139
  e = @exceptions_caught_queue.pop
138
140
  raise e
139
141
  end
@@ -1,5 +1,3 @@
1
- require 'traject'
2
-
3
1
  require 'yaml'
4
2
  require 'dot-properties'
5
3
 
@@ -131,13 +129,13 @@ module Traject
131
129
  yaml_file = File.join( base, "translation_maps", "#{path}.yaml" )
132
130
  prop_file = File.join(base, "translation_maps", "#{path}.properties" )
133
131
 
134
- if File.exists? rb_file
132
+ if File.exist? rb_file
135
133
  found = eval( File.open(rb_file).read , binding, rb_file )
136
134
  break
137
- elsif File.exists? yaml_file
135
+ elsif File.exist? yaml_file
138
136
  found = YAML.load_file(yaml_file)
139
137
  break
140
- elsif File.exists? prop_file
138
+ elsif File.exist? prop_file
141
139
  found = Traject::TranslationMap.read_properties(prop_file)
142
140
  break
143
141
  end
@@ -231,21 +229,21 @@ module Traject
231
229
  array.replace( self.translate_array(array))
232
230
  end
233
231
 
234
- # Return a new TranslationMap that results from merging argument on top of self.
232
+ # Return a new TranslationMap that results from merging argument on top of self.
235
233
  # Can be useful for taking an existing translation map, but merging a few
236
- # overrides on top.
234
+ # overrides on top.
237
235
  #
238
236
  # merged_map = TranslationMap.new(something).merge TranslationMap.new(else)
239
237
  # #...
240
238
  # merged_map.translate_array(something) # etc
241
239
  #
242
- # If a default is set in the second map, it will merge over the first too.
240
+ # If a default is set in the second map, it will merge over the first too.
243
241
  #
244
242
  # You can also pass in a plain hash as an arg, instead of an existing TranslationMap:
245
243
  #
246
244
  # TranslationMap.new(something).merge("overridden_key" => "value", "a" => "")
247
245
  def merge(other_map)
248
- default = other_map.default || self.default
246
+ default = other_map.default || self.default
249
247
  TranslationMap.new(self.to_hash.merge(other_map.to_hash), :default => default)
250
248
  end
251
249
 
@@ -258,9 +256,9 @@ module Traject
258
256
  protected
259
257
 
260
258
  # We use dot-properties gem for reading .properties files,
261
- # return a hash.
262
- def self.read_properties(file_name)
263
- return DotProperties.load(file_name).to_h
259
+ # return a hash.
260
+ def self.read_properties(file_name)
261
+ return DotProperties.load(file_name).to_h
264
262
  end
265
263
 
266
264
  end
data/lib/traject/util.rb CHANGED
@@ -1,5 +1,3 @@
1
- require 'traject'
2
-
3
1
  module Traject
4
2
  # Just some internal utility methods
5
3
  module Util
@@ -27,17 +25,17 @@ module Traject
27
25
  end
28
26
 
29
27
  # Provide a config source file path, and an exception.
30
- #
28
+ #
31
29
  # Returns the line number from the first line in the stack
32
- # trace of the exception that matches your file path.
30
+ # trace of the exception that matches your file path.
33
31
  # of the first line in the backtrace matching that file_path.
34
- #
35
- # Returns `nil` if no suitable backtrace line can be found.
36
32
  #
37
- # Has special logic to try and grep the info out of a SyntaxError, bah.
33
+ # Returns `nil` if no suitable backtrace line can be found.
34
+ #
35
+ # Has special logic to try and grep the info out of a SyntaxError, bah.
38
36
  def self.backtrace_lineno_for_config(file_path, exception)
39
37
  # For a SyntaxError, we really need to grep it from the
40
- # exception message, it really appears to be nowhere else. Ugh.
38
+ # exception message, it really appears to be nowhere else. Ugh.
41
39
  if exception.kind_of? SyntaxError
42
40
  if exception.message =~ /:(\d+):/
43
41
  return $1.to_i
@@ -45,13 +43,13 @@ module Traject
45
43
  end
46
44
 
47
45
  # Otherwise we try to fish it out of the backtrace, first
48
- # line matching the config file path.
46
+ # line matching the config file path.
49
47
 
50
48
  # exception.backtrace_locations exists in MRI 2.1+, which makes
51
49
  # our task a lot easier. But not yet in JRuby 1.7.x, so we got to
52
- # handle the old way of having to parse the strings in backtrace too.
53
- if ( exception.respond_to?(:backtrace_locations) &&
54
- exception.backtrace_locations &&
50
+ # handle the old way of having to parse the strings in backtrace too.
51
+ if ( exception.respond_to?(:backtrace_locations) &&
52
+ exception.backtrace_locations &&
55
53
  exception.backtrace_locations.length > 0 )
56
54
  location = exception.backtrace_locations.find do |bt|
57
55
  bt.path == file_path
@@ -71,19 +69,19 @@ module Traject
71
69
 
72
70
  # Extract just the part of the backtrace that is "below"
73
71
  # the config file mentioned. If we can't find the config file
74
- # in the stack trace, we might return empty array.
72
+ # in the stack trace, we might return empty array.
75
73
  #
76
74
  # If the ruby supports Exception#backtrace_locations, the
77
- # returned array will actually be of Thread::Backtrace::Location elements.
75
+ # returned array will actually be of Thread::Backtrace::Location elements.
78
76
  def self.backtrace_from_config(file_path, exception)
79
77
  filtered_trace = []
80
78
  found = false
81
79
 
82
80
  # MRI 2.1+ has exception.backtrace_locations which makes
83
- # this a lot easier, but JRuby 1.7.x doesn't yet, so we
84
- # need to do it both ways.
85
- if ( exception.respond_to?(:backtrace_locations) &&
86
- exception.backtrace_locations &&
81
+ # this a lot easier, but JRuby 1.7.x doesn't yet, so we
82
+ # need to do it both ways.
83
+ if ( exception.respond_to?(:backtrace_locations) &&
84
+ exception.backtrace_locations &&
87
85
  exception.backtrace_locations.length > 0 )
88
86
 
89
87
  exception.backtrace_locations.each do |location|
@@ -1,3 +1,3 @@
1
1
  module Traject
2
- VERSION = "2.3.0"
2
+ VERSION = "2.3.1"
3
3
  end
@@ -118,6 +118,8 @@ describe "Traject::Macros::Marc21" do
118
118
  assert_equal "one two three", Marc21.trim_punctuation("one two three:")
119
119
  assert_equal "one two three .", Marc21.trim_punctuation("one two three .")
120
120
  assert_equal "one two three", Marc21.trim_punctuation("one two three.")
121
+ assert_equal "one two three...", Marc21.trim_punctuation("one two three...")
122
+ assert_equal "one two three", Marc21.trim_punctuation(" one two three.")
121
123
 
122
124
  assert_equal "one two [three]", Marc21.trim_punctuation("one two [three]")
123
125
  assert_equal "one two three", Marc21.trim_punctuation("one two three]")
@@ -35,7 +35,7 @@ describe "Traject::MarcExtractor" do
35
35
  end
36
36
 
37
37
  it "parses a mixed bag" do
38
- parsed = Traject::MarcExtractor::Spec.hash_from_string("245abcde:810:700|*4|bcd")
38
+ parsed = Traject::MarcExtractor::Spec.hash_from_string("245abcdes:810:700|*4|bcd")
39
39
  spec245 = parsed['245'].first
40
40
  spec810 = parsed['810'].first
41
41
  spec700 = parsed['700'].first
@@ -46,7 +46,7 @@ describe "Traject::MarcExtractor" do
46
46
  assert spec245
47
47
  assert_nil spec245.indicator1
48
48
  assert_nil spec245.indicator2
49
- assert_equal %w{a b c d e}, spec245.subfields
49
+ assert_equal %w{a b c d e s}, spec245.subfields
50
50
 
51
51
  #810
52
52
  assert spec810
data/traject.gemspec CHANGED
@@ -4,13 +4,13 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'traject/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "traject"
8
- spec.version = Traject::VERSION
9
- spec.authors = ["Jonathan Rochkind", "Bill Dueber"]
10
- spec.email = ["none@nowhere.org"]
11
- spec.summary = %q{Index MARC to Solr; or generally process source records to hash-like structures}
12
- spec.homepage = "http://github.com/traject/traject"
13
- spec.license = "MIT"
7
+ spec.name = "traject"
8
+ spec.version = Traject::VERSION
9
+ spec.authors = ["Jonathan Rochkind", "Bill Dueber"]
10
+ spec.email = ["none@nowhere.org"]
11
+ spec.summary = %q{Index MARC to Solr; or generally process source records to hash-like structures}
12
+ spec.homepage = "http://github.com/traject/traject"
13
+ spec.license = "MIT"
14
14
 
15
15
  spec.files = `git ls-files`.split($/)
16
16
  spec.executables = ["traject"]
@@ -23,10 +23,10 @@ Gem::Specification.new do |spec|
23
23
  spec.add_dependency "concurrent-ruby", ">= 0.8.0"
24
24
  spec.add_dependency "marc", "~> 1.0"
25
25
 
26
- spec.add_dependency "hashie", "~> 3.1" # used for Indexer#settings
27
- spec.add_dependency "slop", ">= 3.4.5", "< 4.0" # command line parsing
28
- spec.add_dependency "yell" # logging
29
- spec.add_dependency "dot-properties", ">= 0.1.1" # reading java style .properties
26
+ spec.add_dependency "hashie", "~> 3.1" # used for Indexer#settings
27
+ spec.add_dependency "slop", ">= 3.4.5", "< 4.0" # command line parsing
28
+ spec.add_dependency "yell" # logging
29
+ spec.add_dependency "dot-properties", ">= 0.1.1" # reading java style .properties
30
30
  spec.add_dependency "httpclient", "~> 2.5"
31
31
  spec.add_dependency 'marc-fastxmlwriter', '~>1.0' # fast marc->xml
32
32
 
@@ -40,8 +40,8 @@ Gem::Specification.new do |spec|
40
40
  spec.platform = "ruby"
41
41
  end
42
42
 
43
+ spec.add_development_dependency "bundler", '~> 1.7'
43
44
 
44
- spec.add_development_dependency "bundler", "~> 1.7"
45
45
  spec.add_development_dependency "rake"
46
46
  spec.add_development_dependency "minitest"
47
47
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.0
4
+ version: 2.3.1
5
5
  platform: java
6
6
  authors:
7
7
  - Jonathan Rochkind
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-02-24 00:00:00.000000000 Z
12
+ date: 2016-04-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement