traject 3.0.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,11 +42,11 @@ module Traject::Macros
42
42
  #
43
43
  # * :translation_map => String: translate with named translation map looked up in load
44
44
  # path, uses Tranject::TranslationMap.new(translation_map_arg).
45
- # **Instead**, use `extract_marc(whatever), translation_map(translation_map_arg)
45
+ # **Instead**, use `extract_marc(whatever), translation_map(translation_map_arg)`
46
46
  #
47
47
  # * :trim_punctuation => true; trims leading/trailing punctuation using standard algorithms that
48
48
  # have shown themselves useful with Marc, using Marc21.trim_punctuation. **Instead**, use
49
- # `extract_marc(whatever), trim_punctuation
49
+ # `extract_marc(whatever), trim_punctuation`
50
50
  #
51
51
  # * :default => String: if otherwise empty, add default value. **Instead**, use `extract_marc(whatever), default("default value")`
52
52
  #
@@ -26,19 +26,19 @@ module Traject::Macros
26
26
  accumulator.concat list.uniq if list
27
27
  end
28
28
  end
29
-
29
+
30
30
  # If a num begins with a known OCLC prefix, return it without the prefix.
31
31
  # otherwise nil.
32
32
  #
33
- # Allow (OCoLC) and/or ocn/ocm/on
34
-
33
+ # Allow (OCoLC) and/or ocn/ocm/on
34
+
35
35
  OCLCPAT = /
36
36
  \A\s*
37
37
  (?:(?:\(OCoLC\)) |
38
38
  (?:\(OCoLC\))?(?:(?:ocm)|(?:ocn)|(?:on))
39
39
  )(\d+)
40
40
  /x
41
-
41
+
42
42
  def self.oclcnum_extract(num)
43
43
  if m = OCLCPAT.match(num)
44
44
  return m[1]
@@ -364,13 +364,16 @@ module Traject::Macros
364
364
  end
365
365
  end
366
366
  end
367
- # Okay, nothing from 008, try 260
367
+ # Okay, nothing from 008, first try 264, then try 260
368
368
  if found_date.nil?
369
+ v264c = MarcExtractor.cached("264c", :separator => nil).extract(record).first
369
370
  v260c = MarcExtractor.cached("260c", :separator => nil).extract(record).first
370
371
  # just try to take the first four digits out of there, we're not going to try
371
372
  # anything crazy.
372
- if m = /(\d{4})/.match(v260c)
373
+ if m = /(\d{4})/.match(v264c)
373
374
  found_date = m[1].to_i
375
+ elsif m = /(\d{4})/.match(v260c)
376
+ found_date = m[1].to_i
374
377
  end
375
378
  end
376
379
 
@@ -519,11 +522,11 @@ module Traject::Macros
519
522
 
520
523
  # Extracts LCSH-carrying fields, and formatting them
521
524
  # as a pre-coordinated LCSH string, for instance suitable for including
522
- # in a facet.
525
+ # in a facet.
523
526
  #
524
527
  # You can supply your own list of fields as a spec, but for significant
525
528
  # customization you probably just want to write your own method in
526
- # terms of the Marc21Semantics.assemble_lcsh method.
529
+ # terms of the Marc21Semantics.assemble_lcsh method.
527
530
  def marc_lcsh_formatted(options = {})
528
531
  spec = options[:spec] || "600:610:611:630:648:650:651:654:662"
529
532
  subd_separator = options[:subdivison_separator] || " — "
@@ -540,17 +543,17 @@ module Traject::Macros
540
543
  end
541
544
 
542
545
  # Takes a MARC::Field and formats it into a pre-coordinated LCSH string
543
- # with subdivision seperators in the right place.
546
+ # with subdivision seperators in the right place.
544
547
  #
545
548
  # For 600 fields especially, need to not just join with subdivision seperator
546
549
  # to take acount of $a$d$t -- for other fields, might be able to just
547
- # join subfields, not sure.
550
+ # join subfields, not sure.
548
551
  #
549
552
  # WILL strip trailing period from generated string, contrary to some LCSH practice.
550
553
  # Our data is inconsistent on whether it has period or not, this was
551
- # the easiest way to standardize.
554
+ # the easiest way to standardize.
552
555
  #
553
- # Default subdivision seperator is em-dash with spaces, set to '--' if you want.
556
+ # Default subdivision seperator is em-dash with spaces, set to '--' if you want.
554
557
  #
555
558
  # Cite: "Dash (-) that precedes a subdivision in an extended 600 subject heading
556
559
  # is not carried in the MARC record. It may be system generated as a display constant
@@ -26,9 +26,15 @@ module Traject
26
26
  # Make sure to avoid text content that was all blank, which is "between the children"
27
27
  # whitespace.
28
28
  result = result.collect do |n|
29
- n.xpath('.//text()').collect(&:text).tap do |arr|
30
- arr.reject! { |s| s =~ (/\A\s+\z/) }
31
- end.join(" ")
29
+ if n.kind_of?(Nokogiri::XML::Attr)
30
+ # attribute value
31
+ n.value
32
+ else
33
+ # text from node
34
+ n.xpath('.//text()').collect(&:text).tap do |arr|
35
+ arr.reject! { |s| s =~ (/\A\s+\z/) }
36
+ end.join(" ")
37
+ end
32
38
  end
33
39
  else
34
40
  # just put all matches in accumulator as Nokogiri::XML::Node's
@@ -21,6 +21,9 @@ module Traject
21
21
  # If you need to use namespaces here, you need to have them registered with
22
22
  # `nokogiri.default_namespaces`. If your source docs use namespaces, you DO need
23
23
  # to use them in your each_record_xpath.
24
+ # * nokogiri.strict_mode: if set to `true` or `"true"`, ask Nokogiri to parse in 'strict'
25
+ # mode, it will raise a `Nokogiri::XML::SyntaxError` if the XML is not well-formed, instead
26
+ # of trying to take it's best-guess correction. https://nokogiri.org/tutorials/ensuring_well_formed_markup.html
24
27
  # * nokogiri_reader.extra_xpath_hooks: Experimental in progress, see below.
25
28
  #
26
29
  # ## nokogiri_reader.extra_xpath_hooks: For handling nodes outside of your each_record_xpath
@@ -87,7 +90,11 @@ module Traject
87
90
  end
88
91
 
89
92
  def each
90
- whole_input_doc = Nokogiri::XML.parse(input_stream)
93
+ config_proc = if settings["nokogiri.strict_mode"]
94
+ proc { |config| config.strict }
95
+ end
96
+
97
+ whole_input_doc = Nokogiri::XML.parse(input_stream, &config_proc)
91
98
 
92
99
  if each_record_xpath
93
100
  whole_input_doc.xpath(each_record_xpath, default_namespaces).each do |matching_node|
@@ -118,35 +125,26 @@ module Traject
118
125
  private
119
126
 
120
127
 
121
- # In MRI Nokogiri, this is as simple as `new_parent_doc.root = node`
128
+ # We simply do `new_parent_doc.root = node`
122
129
  # It seemed maybe safer to dup the node as well as remove the original from the original doc,
123
130
  # but I believe this will result in double memory usage, as unlinked nodes aren't GC'd until
124
131
  # their doc is. I am hoping this pattern results in less memory usage.
125
132
  # https://github.com/sparklemotion/nokogiri/issues/1703
126
133
  #
127
- # However, in JRuby it's a different story, JRuby doesn't properly preserve namespaces
128
- # when re-parenting a node.
134
+ # We used to have to do something different in Jruby to work around bug:
129
135
  # https://github.com/sparklemotion/nokogiri/issues/1774
130
136
  #
131
- # The nodes within the tree re-parented _know_ they are in the correct namespaces,
132
- # and xpath queries require that namespace, but the appropriate xmlns attributes
133
- # aren't included in the serialized XML. This JRuby-specific code seems to get
134
- # things back to a consistent state.
137
+ # But as of nokogiri 1.9, that does not work, and is not necessary if we accept
138
+ # that Jruby nokogiri may put xmlns declerations on different elements than MRI,
139
+ # although it should be semantically equivalent for a namespace-aware parser.
140
+ # https://github.com/sparklemotion/nokogiri/issues/1875
141
+ #
142
+ # This as a separate method now exists largely as a historical artifact, and for this
143
+ # documentation.
135
144
  def reparent_node_to_root(new_parent_doc, node)
136
- if Traject::Util.is_jruby?
137
- original_ns_scopes = node.namespace_scopes
138
- end
139
145
 
140
146
  new_parent_doc.root = node
141
147
 
142
- if Traject::Util.is_jruby?
143
- original_ns_scopes.each do |ns|
144
- if new_parent_doc.at_xpath("//#{ns.prefix}:*", ns.prefix => ns.href)
145
- new_parent_doc.root.add_namespace(ns.prefix, ns.href)
146
- end
147
- end
148
- end
149
-
150
148
  return new_parent_doc
151
149
  end
152
150
 
@@ -115,9 +115,15 @@ module Traject
115
115
  # @returns [HTTP::Client] from http.rb gem
116
116
  def http_client
117
117
  @http_client ||= begin
118
- # timeout setting on http.rb seems to be a mess.
119
- # https://github.com/httprb/http/issues/488
120
- client = HTTP.timeout(:global, write: timeout / 3, connect: timeout / 3, read: timeout / 3)
118
+ client = nil
119
+
120
+ if HTTP::VERSION.split(".").first.to_i > 3
121
+ client = HTTP.timeout(timeout)
122
+ else
123
+ # timeout setting on http.rb 3.x are a bit of a mess.
124
+ # https://github.com/httprb/http/issues/488
125
+ client = HTTP.timeout(:global, write: timeout / 3, connect: timeout / 3, read: timeout / 3)
126
+ end
121
127
 
122
128
  if settings["oai_pmh.try_gzip"]
123
129
  client = client.use(:auto_inflate).headers("accept-encoding" => "gzip;q=1.0, identity;q=0.5")
@@ -16,7 +16,30 @@ require 'concurrent' # for atomic_fixnum
16
16
  # This should work under both MRI and JRuby, with JRuby getting much
17
17
  # better performance due to the threading model.
18
18
  #
19
- # Relevant settings
19
+ # Solr updates are by default sent with no commit params. This will definitely
20
+ # maximize your performance, and *especially* for bulk/batch indexing is recommended --
21
+ # use Solr auto commit in your Solr configuration instead, possibly with `commit_on_close`
22
+ # setting here.
23
+ #
24
+ # However, if you want the writer to send `commitWithin=true`, `commit=true`,
25
+ # `softCommit=true`, or any other URL parameters valid for Solr update handlers,
26
+ # you can configure this with `solr_writer.solr_update_args` setting. See:
27
+ # https://lucene.apache.org/solr/guide/7_0/near-real-time-searching.html#passing-commit-and-commitwithin-parameters-as-part-of-the-url
28
+ # Eg:
29
+ #
30
+ # settings do
31
+ # provide "solr_writer.solr_update_args", { commitWithin: 1000 }
32
+ # end
33
+ #
34
+ # (That it's a hash makes it infeasible to set/override on command line, if this is
35
+ # annoying for you let us know)
36
+ #
37
+ # `solr_update_args` will apply to batch and individual update requests, but
38
+ # not to commit sent if `commit_on_close`. You can also instead set
39
+ # `solr_writer.solr_commit_args` for that (or pass in an arg to #commit if calling
40
+ # manually)
41
+ #
42
+ # ## Relevant settings
20
43
  #
21
44
  # * solr.url (optional if solr.update_url is set) The URL to the solr core to index into
22
45
  #
@@ -35,19 +58,32 @@ require 'concurrent' # for atomic_fixnum
35
58
  #
36
59
  # * solr_writer.skippable_exceptions: List of classes that will be rescued internal to
37
60
  # SolrJsonWriter, and handled with max_skipped logic. Defaults to
38
- # `[HTTPClient::TimeoutError, SocketError, Errno::ECONNREFUSED]`
61
+ # `[HTTPClient::TimeoutError, SocketError, Errno::ECONNREFUSED, Traject::SolrJsonWriter::BadHttpResponse]`
62
+ #
63
+ # * solr_writer.solr_update_args: A _hash_ of query params to send to solr update url.
64
+ # Will be sent with every update request. Eg `{ softCommit: true }` or `{ commitWithin: 1000 }`.
65
+ # See also `solr_writer.solr_commit_args`
39
66
  #
40
67
  # * solr_writer.commit_on_close: Set to true (or "true") if you want to commit at the
41
68
  # end of the indexing run. (Old "solrj_writer.commit_on_close" supported for backwards
42
69
  # compat only.)
43
70
  #
71
+ # * solr_writer.commit_solr_update_args: A hash of query params to send when committing.
72
+ # Will be used for automatic `close_on_commit`, as well as any manual calls to #commit.
73
+ # If set, must include {"commit" => "true"} or { "softCommit" => "true" } if you actually
74
+ # want commits to happen when SolrJsonWriter tries to commit! But can be used to switch to softCommits
75
+ # (hard commits default), or specify additional params like optimize etc.
76
+ #
77
+ # * solr_writer.http_timeout: Value in seconds, will be set on the httpclient as connect/receive/send
78
+ # timeout. No way to set them individually at present. Default nil, use HTTPClient defaults
79
+ # (60 for connect/recieve, 120 for send).
80
+ #
44
81
  # * solr_writer.commit_timeout: If commit_on_close, how long to wait for Solr before
45
- # giving up as a timeout. Default 10 minutes. Solr can be slow.
82
+ # giving up as a timeout (http client receive_timeout). Default 10 minutes. Solr can be slow at commits. Overrides solr_writer.timeout
46
83
  #
47
84
  # * solr_json_writer.http_client Mainly intended for testing, set your own HTTPClient
48
85
  # or mock object to be used for HTTP.
49
-
50
-
86
+ #
51
87
  class Traject::SolrJsonWriter
52
88
  include Traject::QualifiedConstGet
53
89
 
@@ -71,7 +107,21 @@ class Traject::SolrJsonWriter
71
107
  @max_skipped = nil
72
108
  end
73
109
 
74
- @http_client = @settings["solr_json_writer.http_client"] || HTTPClient.new
110
+ @http_client = if @settings["solr_json_writer.http_client"]
111
+ @settings["solr_json_writer.http_client"]
112
+ else
113
+ client = HTTPClient.new
114
+ if @settings["solr_writer.http_timeout"]
115
+ client.connect_timeout = client.receive_timeout = client.send_timeout = @settings["solr_writer.http_timeout"]
116
+ end
117
+
118
+ if @settings["solr_writer.basic_auth_user"] &&
119
+ @settings["solr_writer.basic_auth_password"]
120
+ client.set_auth(@settings["solr.url"], @settings["solr_writer.basic_auth_user"], @settings["solr_writer.basic_auth_password"])
121
+ end
122
+
123
+ client
124
+ end
75
125
 
76
126
  @batch_size = (settings["solr_writer.batch_size"] || DEFAULT_BATCH_SIZE).to_i
77
127
  @batch_size = 1 if @batch_size < 1
@@ -96,6 +146,9 @@ class Traject::SolrJsonWriter
96
146
  # Figure out where to send updates
97
147
  @solr_update_url = self.determine_solr_update_url
98
148
 
149
+ @solr_update_args = settings["solr_writer.solr_update_args"]
150
+ @commit_solr_update_args = settings["solr_writer.commit_solr_update_args"]
151
+
99
152
  logger.info(" #{self.class.name} writing to '#{@solr_update_url}' in batches of #{@batch_size} with #{@thread_pool_size} bg threads")
100
153
  end
101
154
 
@@ -123,14 +176,28 @@ class Traject::SolrJsonWriter
123
176
  send_batch( Traject::Util.drain_queue(@batched_queue) )
124
177
  end
125
178
 
179
+ # configured update url, with either settings @solr_update_args or passed in
180
+ # query_params added to it
181
+ def solr_update_url_with_query(query_params)
182
+ if query_params
183
+ @solr_update_url + '?' + URI.encode_www_form(query_params)
184
+ else
185
+ @solr_update_url
186
+ end
187
+ end
188
+
126
189
  # Send the given batch of contexts. If something goes wrong, send
127
190
  # them one at a time.
128
191
  # @param [Array<Traject::Indexer::Context>] an array of contexts
129
192
  def send_batch(batch)
130
193
  return if batch.empty?
194
+
195
+ logger.debug("#{self.class.name}: sending batch of #{batch.size} to Solr")
196
+
131
197
  json_package = JSON.generate(batch.map { |c| c.output_hash })
198
+
132
199
  begin
133
- resp = @http_client.post @solr_update_url, json_package, "Content-type" => "application/json"
200
+ resp = @http_client.post solr_update_url_with_query(@solr_update_args), json_package, "Content-type" => "application/json"
134
201
  rescue StandardError => exception
135
202
  end
136
203
 
@@ -151,34 +218,71 @@ class Traject::SolrJsonWriter
151
218
  # Send a single context to Solr, logging an error if need be
152
219
  # @param [Traject::Indexer::Context] c The context whose document you want to send
153
220
  def send_single(c)
221
+ logger.debug("#{self.class.name}: sending single record to Solr: #{c.output_hash}")
222
+
154
223
  json_package = JSON.generate([c.output_hash])
155
224
  begin
156
- resp = @http_client.post @solr_update_url, json_package, "Content-type" => "application/json"
157
- # Catch Timeouts and network errors as skipped records, but otherwise
158
- # allow unexpected errors to propagate up.
159
- rescue *skippable_exceptions => exception
160
- # no body, local variable exception set above will be used below
161
- end
225
+ post_url = solr_update_url_with_query(@solr_update_args)
226
+ resp = @http_client.post post_url, json_package, "Content-type" => "application/json"
162
227
 
163
- if exception || resp.status != 200
164
- if exception
165
- msg = Traject::Util.exception_to_log_message(exception)
228
+ unless resp.status == 200
229
+ raise BadHttpResponse.new("Unexpected HTTP response status #{resp.status} from POST #{post_url}", resp)
230
+ end
231
+
232
+ # Catch Timeouts and network errors -- as well as non-200 http responses --
233
+ # as skipped records, but otherwise allow unexpected errors to propagate up.
234
+ rescue *skippable_exceptions => exception
235
+ msg = if exception.kind_of?(BadHttpResponse)
236
+ "Solr error response: #{exception.response.status}: #{exception.response.body}"
166
237
  else
167
- msg = "Solr error response: #{resp.status}: #{resp.body}"
238
+ Traject::Util.exception_to_log_message(exception)
168
239
  end
240
+
169
241
  logger.error "Could not add record #{c.record_inspect}: #{msg}"
170
242
  logger.debug("\t" + exception.backtrace.join("\n\t")) if exception
171
243
  logger.debug(c.source_record.to_s) if c.source_record
172
244
 
173
245
  @skipped_record_incrementer.increment
174
246
  if @max_skipped and skipped_record_count > @max_skipped
175
- raise MaxSkippedRecordsExceeded.new("#{self.class.name}: Exceeded maximum number of skipped records (#{@max_skipped}): aborting")
247
+ # re-raising in rescue means the last encountered error will be available as #cause
248
+ # on raised exception, a feature in ruby 2.1+.
249
+ raise MaxSkippedRecordsExceeded.new("#{self.class.name}: Exceeded maximum number of skipped records (#{@max_skipped}): aborting: #{exception.message}")
176
250
  end
177
-
178
251
  end
252
+ end
253
+
179
254
 
255
+ # Very beginning of a delete implementation. POSTs a delete request to solr
256
+ # for id in arg (value of Solr UniqueID field, usually `id` field).
257
+ #
258
+ # Right now, does it inline and immediately, no use of background threads or batching.
259
+ # This could change.
260
+ #
261
+ # Right now, if unsuccesful for any reason, will raise immediately out of here.
262
+ # Could raise any of the `skippable_exceptions` (timeouts, network errors), an
263
+ # exception will be raised right out of here.
264
+ #
265
+ # Will use `solr_writer.solr_update_args` settings.
266
+ #
267
+ # There is no built-in way to direct a record to be deleted from an indexing config
268
+ # file at the moment, this is just a loose method on the writer.
269
+ def delete(id)
270
+ logger.debug("#{self.class.name}: Sending delete to Solr for #{id}")
271
+
272
+ json_package = {delete: id}
273
+ resp = @http_client.post solr_update_url_with_query(@solr_update_args), JSON.generate(json_package), "Content-type" => "application/json"
274
+ if resp.status != 200
275
+ raise RuntimeError.new("Could not delete #{id.inspect}, http response #{resp.status}: #{resp.body}")
276
+ end
180
277
  end
181
278
 
279
+ # Send a delete all query.
280
+ #
281
+ # This method takes no params and will not automatically commit the deletes.
282
+ # @example @writer.delete_all!
283
+ def delete_all!
284
+ delete(query: "*:*")
285
+ end
182
286
 
183
287
  # Get the logger from the settings, or default to an effectively null logger
184
288
  def logger
@@ -199,14 +303,16 @@ class Traject::SolrJsonWriter
199
303
  @thread_pool.maybe_in_thread_pool { send_batch(batch) }
200
304
  end
201
305
 
202
- # Wait for shutdown, and time it.
203
- logger.debug "#{self.class.name}: Shutting down thread pool, waiting if needed..."
204
- elapsed = @thread_pool.shutdown_and_wait
205
- if elapsed > 60
206
- logger.warn "Waited #{elapsed} seconds for all threads, you may want to increase solr_writer.thread_pool (currently #{@settings["solr_writer.thread_pool"]})"
306
+ if @thread_pool_size && @thread_pool_size > 0
307
+ # Wait for shutdown, and time it.
308
+ logger.debug "#{self.class.name}: Shutting down thread pool, waiting if needed..."
309
+ elapsed = @thread_pool.shutdown_and_wait
310
+ if elapsed > 60
311
+ logger.warn "Waited #{elapsed} seconds for all threads, you may want to increase solr_writer.thread_pool (currently #{@settings["solr_writer.thread_pool"]})"
312
+ end
313
+ logger.debug "#{self.class.name}: Thread pool shutdown complete"
314
+ logger.warn "#{self.class.name}: #{skipped_record_count} skipped records" if skipped_record_count > 0
207
315
  end
208
- logger.debug "#{self.class.name}: Thread pool shutdown complete"
209
- logger.warn "#{self.class.name}: #{skipped_record_count} skipped records" if skipped_record_count > 0
210
316
 
211
317
  # check again now that we've waited, there could still be some
212
318
  # that didn't show up before.
@@ -220,14 +326,32 @@ class Traject::SolrJsonWriter
220
326
 
221
327
 
222
328
  # Send a commit
223
- def commit
329
+ #
330
+ # Called automatially by `close_on_commit` setting, but also can be called manually.
331
+ #
332
+ # If settings `solr_writer.commit_solr_update_args` is set, will be used by default.
333
+ # That setting needs `{ commit: true }` or `{softCommit: true}` if you want it to
334
+ # actually do a commit!
335
+ #
336
+ # Optional query_params argument is the actual args to send, you must be sure
337
+ # to make it include "commit: true" or "softCommit: true" for it to actually commit!
338
+ # But you may want to include other params too, like optimize etc. query_param
339
+ # argument replaces setting `solr_writer.commit_solr_update_args`, they are not merged.
340
+ #
341
+ # @param [Hash] query_params optional query params to send to solr update. Default {"commit" => "true"}
342
+ #
343
+ # @example @writer.commit
344
+ # @example @writer.commit(softCommit: true)
345
+ # @example @writer.commit(commit: true, optimize: true, waitFlush: false)
346
+ def commit(query_params = nil)
347
+ query_params ||= @commit_solr_update_args || {"commit" => "true"}
224
348
  logger.info "#{self.class.name} sending commit to solr at url #{@solr_update_url}..."
225
349
 
226
350
  original_timeout = @http_client.receive_timeout
227
351
 
228
352
  @http_client.receive_timeout = (settings["commit_timeout"] || (10 * 60)).to_i
229
353
 
230
- resp = @http_client.get(@solr_update_url, {"commit" => 'true'})
354
+ resp = @http_client.get(solr_update_url_with_query(query_params))
231
355
  unless resp.status == 200
232
356
  raise RuntimeError.new("Could not commit to Solr: #{resp.status} #{resp.body}")
233
357
  end
@@ -279,10 +403,24 @@ class Traject::SolrJsonWriter
279
403
 
280
404
  class MaxSkippedRecordsExceeded < RuntimeError ; end
281
405
 
406
+ # Adapted from HTTPClient::BadResponseError.
407
+ # It's got a #response accessor that will give you the HTTPClient
408
+ # Response object that had a bad status, although relying on that
409
+ # would tie you to our HTTPClient implementation that maybe should
410
+ # be considered an implementation detail, so I dunno.
411
+ class BadHttpResponse < RuntimeError
412
+ # HTTP::Message:: a response
413
+ attr_reader :response
414
+
415
+ def initialize(msg, response = nil) # :nodoc:
416
+ super(msg)
417
+ @response = response
418
+ end
419
+ end
282
420
 
283
421
  private
284
422
 
285
423
  def skippable_exceptions
286
- @skippable_exceptions ||= (settings["solr_writer.skippable_exceptions"] || [HTTPClient::TimeoutError, SocketError, Errno::ECONNREFUSED])
424
+ @skippable_exceptions ||= (settings["solr_writer.skippable_exceptions"] || [HTTPClient::TimeoutError, SocketError, Errno::ECONNREFUSED, Traject::SolrJsonWriter::BadHttpResponse])
287
425
  end
288
426
  end