traject 0.16.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +1 -0
- data/README.md +183 -191
- data/bench/bench.rb +1 -1
- data/doc/batch_execution.md +14 -0
- data/doc/extending.md +14 -12
- data/doc/indexing_rules.md +265 -0
- data/lib/traject/command_line.rb +12 -41
- data/lib/traject/debug_writer.rb +32 -13
- data/lib/traject/indexer.rb +101 -24
- data/lib/traject/indexer/settings.rb +18 -17
- data/lib/traject/json_writer.rb +32 -11
- data/lib/traject/line_writer.rb +6 -6
- data/lib/traject/macros/basic.rb +1 -1
- data/lib/traject/macros/marc21.rb +17 -13
- data/lib/traject/macros/marc21_semantics.rb +27 -25
- data/lib/traject/macros/marc_format_classifier.rb +39 -25
- data/lib/traject/marc4j_reader.rb +36 -22
- data/lib/traject/marc_extractor.rb +79 -75
- data/lib/traject/marc_reader.rb +33 -25
- data/lib/traject/mock_reader.rb +9 -10
- data/lib/traject/ndj_reader.rb +7 -7
- data/lib/traject/null_writer.rb +1 -1
- data/lib/traject/qualified_const_get.rb +12 -2
- data/lib/traject/solrj_writer.rb +61 -52
- data/lib/traject/thread_pool.rb +45 -45
- data/lib/traject/translation_map.rb +59 -27
- data/lib/traject/util.rb +3 -3
- data/lib/traject/version.rb +1 -1
- data/lib/traject/yaml_writer.rb +1 -1
- data/test/debug_writer_test.rb +7 -7
- data/test/indexer/each_record_test.rb +4 -4
- data/test/indexer/macros_marc21_semantics_test.rb +12 -12
- data/test/indexer/macros_marc21_test.rb +10 -10
- data/test/indexer/macros_test.rb +1 -1
- data/test/indexer/map_record_test.rb +6 -6
- data/test/indexer/read_write_test.rb +43 -4
- data/test/indexer/settings_test.rb +2 -2
- data/test/indexer/to_field_test.rb +8 -8
- data/test/marc4j_reader_test.rb +4 -4
- data/test/marc_extractor_test.rb +33 -25
- data/test/marc_format_classifier_test.rb +3 -3
- data/test/marc_reader_test.rb +2 -2
- data/test/test_helper.rb +3 -3
- data/test/test_support/demo_config.rb +52 -48
- data/test/translation_map_test.rb +22 -4
- data/test/translation_maps/bad_ruby.rb +2 -2
- data/test/translation_maps/both_map.rb +1 -1
- data/test/translation_maps/default_literal.rb +1 -1
- data/test/translation_maps/default_passthrough.rb +1 -1
- data/test/translation_maps/ruby_map.rb +1 -1
- metadata +7 -31
- data/doc/macros.md +0 -103
data/lib/traject/marc_reader.rb
CHANGED
@@ -1,35 +1,43 @@
|
|
1
1
|
require 'marc'
|
2
|
-
require 'traject/ndj_reader'
|
2
|
+
require 'traject/ndj_reader'
|
3
3
|
|
4
|
-
#
|
5
|
-
# MARC
|
4
|
+
# `Traject::MarcReader` uses pure ruby marc gem to parse MARC records. It
|
5
|
+
# can read MARC ISO 2709 ('binary'), MARC-XML, and Marc-in-json (newline-delimited-json).
|
6
6
|
#
|
7
|
-
#
|
7
|
+
# MarcReader can not currently read binary MARC in the MARC8 encoding, see
|
8
|
+
# the Traject::Marc4JReader instead.
|
8
9
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
# de-serialization.
|
10
|
+
# By default assumes binary MARC encoding, please set marc_source.type setting
|
11
|
+
# for XML or json.
|
12
12
|
#
|
13
|
-
#
|
14
|
-
|
13
|
+
# ## Settings
|
14
|
+
|
15
|
+
# * "marc_source.type": serialization type. default 'binary'
|
16
|
+
# * "binary". standard ISO 2709 "binary" MARC format.
|
17
|
+
# * "xml", MarcXML
|
18
|
+
# * "json" The "marc-in-json" format, encoded as newline-separated
|
19
|
+
# json. (synonym 'ndj'). A simplistic newline-separated json, with no comments
|
20
|
+
# allowed, and no unescpaed internal newlines allowed in the json
|
21
|
+
# objects -- we just read line by line, and assume each line is a
|
22
|
+
# marc-in-json. http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/
|
23
|
+
# * "marc_reader.xml_parser": For XML type, which XML parser to tell Marc::Reader
|
24
|
+
# to use. Anything recognized by [Marc::Reader :parser
|
25
|
+
# argument](http://rdoc.info/github/ruby-marc/ruby-marc/MARC/XMLReader).
|
26
|
+
# By default, asks Marc::Reader to take
|
27
|
+
# it's best guess as to highest performance available
|
28
|
+
# installed option. Probably best to leave as default.
|
29
|
+
#
|
30
|
+
# ## Example
|
31
|
+
#
|
32
|
+
# In a configuration file:
|
15
33
|
#
|
16
|
-
#
|
17
|
-
# ["marc_source.type"] serialization type. default 'binary'
|
18
|
-
# * "binary". Actual marc.
|
19
|
-
# * "xml", MarcXML
|
20
|
-
# * "json" The "marc-in-json" format, encoded as newline-separated
|
21
|
-
# json. A simplistic newline-separated json, with no comments
|
22
|
-
# allowed, and no unescpaed internal newlines allowed in the json
|
23
|
-
# objects -- we just read line by line, and assume each line is a
|
24
|
-
# marc-in-json. http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/
|
25
|
-
# ["marc_reader.xml_parser"] For XML type, which XML parser to tell Marc::Reader
|
26
|
-
# to use. Anything recognized by Marc::Reader :parser
|
27
|
-
# argument. By default, asks Marc::Reader to take
|
28
|
-
# it's best guess as to highest performance available
|
29
|
-
# installed option.
|
34
|
+
# require 'traject/marc_reader'
|
30
35
|
#
|
36
|
+
# settings do
|
37
|
+
# provide "reader_class_name", "Traject::MarcReader"
|
38
|
+
# provide "marc_source.type", "xml"
|
39
|
+
# end
|
31
40
|
#
|
32
|
-
# Can NOT yet read Marc8, input is always assumed UTF8.
|
33
41
|
class Traject::MarcReader
|
34
42
|
include Enumerable
|
35
43
|
|
@@ -64,4 +72,4 @@ class Traject::MarcReader
|
|
64
72
|
self.internal_reader.each(*args, &block)
|
65
73
|
end
|
66
74
|
|
67
|
-
end
|
75
|
+
end
|
data/lib/traject/mock_reader.rb
CHANGED
@@ -10,15 +10,14 @@ module Traject
|
|
10
10
|
#
|
11
11
|
# Specify in a config files as follows:
|
12
12
|
#
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
13
|
+
# require 'traject/mock_writer'
|
14
|
+
# require 'traject/mock_reader'
|
15
|
+
#
|
16
|
+
# settings do
|
17
|
+
# store "reader_class_name", "Traject::MockReader"
|
18
|
+
# store "writer_class_name", "Traject::MockWriter"
|
19
|
+
# store "mock_reader.limit", 4_000 # default is 10_000
|
20
|
+
# end
|
22
21
|
class MockReader
|
23
22
|
|
24
23
|
attr_accessor :limit
|
@@ -50,7 +49,7 @@ module Traject
|
|
50
49
|
while true
|
51
50
|
json = this_file_iter.next
|
52
51
|
next unless json =~ /\S/
|
53
|
-
records << MARC::Record.new_from_hash(JSON.parse(json))
|
52
|
+
records << MARC::Record.new_from_hash(JSON.parse(json))
|
54
53
|
end
|
55
54
|
rescue StopIteration
|
56
55
|
end
|
data/lib/traject/ndj_reader.rb
CHANGED
@@ -8,7 +8,7 @@ require 'zlib'
|
|
8
8
|
|
9
9
|
class Traject::NDJReader
|
10
10
|
include Enumerable
|
11
|
-
|
11
|
+
|
12
12
|
def initialize(input_stream, settings)
|
13
13
|
@settings = settings
|
14
14
|
@input_stream = input_stream
|
@@ -16,16 +16,16 @@ class Traject::NDJReader
|
|
16
16
|
@input_stream = Zlib::GzipReader.new(@input_stream, :external_encoding => "UTF-8")
|
17
17
|
end
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def logger
|
21
21
|
@logger ||= (settings[:logger] || Yell.new(STDERR, :level => "gt.fatal")) # null logger)
|
22
|
-
end
|
22
|
+
end
|
23
23
|
|
24
24
|
def each
|
25
25
|
unless block_given?
|
26
26
|
return enum_for(:each)
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
@input_stream.each_with_index do |json, i|
|
30
30
|
begin
|
31
31
|
yield MARC::Record.new_from_hash(JSON.parse(json))
|
@@ -34,7 +34,7 @@ class Traject::NDJReader
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
end
|
39
|
-
|
40
|
-
|
39
|
+
|
40
|
+
|
data/lib/traject/null_writer.rb
CHANGED
@@ -3,7 +3,17 @@
|
|
3
3
|
#
|
4
4
|
# Method to take a string constant name, including :: qualifications, and
|
5
5
|
# look up the actual constant. Looks up relative to current file.
|
6
|
-
#
|
6
|
+
# Respects leading ::. Etc.
|
7
|
+
#
|
8
|
+
# class Something
|
9
|
+
# include Traject::QualifiedConstGet
|
10
|
+
#
|
11
|
+
# def foo
|
12
|
+
# #...
|
13
|
+
# klass = qualified_const_get("Foo::Bar")
|
14
|
+
# #...
|
15
|
+
# end
|
16
|
+
# end
|
7
17
|
module Traject::QualifiedConstGet
|
8
18
|
|
9
19
|
|
@@ -27,4 +37,4 @@ module Traject::QualifiedConstGet
|
|
27
37
|
path.inject(Object) { |ns,name| ns.const_get(name) }
|
28
38
|
end
|
29
39
|
|
30
|
-
end
|
40
|
+
end
|
data/lib/traject/solrj_writer.rb
CHANGED
@@ -1,19 +1,3 @@
|
|
1
|
-
# TODO: THREAD POOL
|
2
|
-
#
|
3
|
-
# 1) Exception handling in threads, what's the right thing to do
|
4
|
-
# 2) General count of failed records in a thread safe way, so we can report
|
5
|
-
# it back from 'close', so process can report it back, and non-zero exit
|
6
|
-
# code can be emited from command-line.
|
7
|
-
# 3) back pressure on thread pool. give it a bounded blocking queue instead,
|
8
|
-
# to make sure thousands of add tasks don't build up, waiting until the end.
|
9
|
-
# or does that even matter? So what if they build up in the queue and only
|
10
|
-
# get taken care of at the end, is that okay? I do emit a warning right now
|
11
|
-
# if it takes more than 60 seconds to process remaining thread pool task queue
|
12
|
-
# at end.
|
13
|
-
# 4) No tests yet that actually test thread pool stuff; additionally, may make
|
14
|
-
# some of the batch tests fail in non-deterministic ways, since batch tests
|
15
|
-
# assume order of add (and our Mock solr server is not thread safe yet!)
|
16
|
-
|
17
1
|
require 'yell'
|
18
2
|
|
19
3
|
require 'traject'
|
@@ -26,7 +10,6 @@ require 'thread' # for Mutex
|
|
26
10
|
|
27
11
|
#
|
28
12
|
# Writes to a Solr using SolrJ, and the SolrJ HttpSolrServer.
|
29
|
-
# (sub-class later for the ConcurrentUpdate server?)
|
30
13
|
#
|
31
14
|
# After you call #close, you can check #skipped_record_count if you want
|
32
15
|
# for an integer count of skipped records.
|
@@ -35,38 +18,64 @@ require 'thread' # for Mutex
|
|
35
18
|
# you may not get a raise immediately after calling #put, you may get it on
|
36
19
|
# a FUTURE #put or #close. You should get it eventually though.
|
37
20
|
#
|
38
|
-
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
#
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
#
|
21
|
+
# ## Settings
|
22
|
+
#
|
23
|
+
# * solr.url: Your solr url (required)
|
24
|
+
#
|
25
|
+
# * solrj_writer.server_class_name: Defaults to "HttpSolrServer". You can specify
|
26
|
+
# another Solr Server sub-class, but it has
|
27
|
+
# to take a one-arg url constructor. Maybe
|
28
|
+
# subclass this writer class and overwrite
|
29
|
+
# instantiate_solr_server! otherwise
|
30
|
+
#
|
31
|
+
# * solrj.jar_dir: Custom directory containing all of the SolrJ jars. All
|
32
|
+
# jars in this dir will be loaded. Otherwise,
|
33
|
+
# we load our own packaged solrj jars. This setting
|
34
|
+
# can't really be used differently in the same app instance,
|
35
|
+
# since jars are loaded globally.
|
36
|
+
#
|
37
|
+
# * solrj_writer.parser_class_name: A String name of a class in package
|
38
|
+
# org.apache.solr.client.solrj.impl,
|
39
|
+
# we'll instantiate one with a zero-arg
|
40
|
+
# constructor, and pass it as an arg to setParser on
|
41
|
+
# the SolrServer instance, if present.
|
42
|
+
# NOTE: For contacting a Solr 1.x server, with the
|
43
|
+
# recent version of SolrJ used by default, set to
|
44
|
+
# "XMLResponseParser"
|
45
|
+
#
|
46
|
+
# * solrj_writer.commit_on_close: If true (or string 'true'), send a commit to solr
|
47
|
+
# at end of #process.
|
48
|
+
#
|
49
|
+
# * solrj_writer.batch_size: If non-nil and more than 1, send documents to
|
50
|
+
# solr in batches of solrj_writer.batch_size. If nil/1,
|
51
|
+
# however, an http transaction with solr will be done
|
52
|
+
# per doc. DEFAULT to 100, which seems to be a sweet spot.
|
53
|
+
#
|
54
|
+
# * solrj_writer.thread_pool: Defaults to 1. A thread pool is used for submitting docs
|
55
|
+
# to solr. Set to 0 or nil to disable threading. Set to 1,
|
56
|
+
# there will still be a single bg thread doing the adds. For
|
57
|
+
# very fast Solr servers and very fast indexing processes, may
|
58
|
+
# make sense to increase this value to throw at Solr as fast as it
|
59
|
+
# can catch.
|
60
|
+
#
|
61
|
+
# ## Example
|
62
|
+
#
|
63
|
+
# settings do
|
64
|
+
# provide "writer_class_name", "Traject::SolrJWriter"
|
65
|
+
#
|
66
|
+
# # This is just regular ruby, so don't be afraid to have conditionals!
|
67
|
+
# # Switch on hostname, for test and production server differences
|
68
|
+
# if Socket.gethostname =~ /devhost/
|
69
|
+
# provide "solr.url", "http://my.dev.machine:9033/catalog"
|
70
|
+
# else
|
71
|
+
# provide "solr.url", "http://my.production.machine:9033/catalog"
|
72
|
+
# end
|
73
|
+
#
|
74
|
+
# provide "solrj_writer.parser_class_name", "BinaryResponseParser" # for Solr 4.x
|
75
|
+
# # provide "solrj_writer.parser_class_name", "XMLResponseParser" # For solr 1.x or 3.x
|
76
|
+
#
|
77
|
+
# provide "solrj_writer.commit_on_close", "true"
|
78
|
+
# end
|
70
79
|
class Traject::SolrJWriter
|
71
80
|
# just a tuple of a SolrInputDocument
|
72
81
|
# and a Traject::Indexer::Context it came from
|
@@ -150,7 +159,7 @@ class Traject::SolrJWriter
|
|
150
159
|
|
151
160
|
if settings["solrj_writer.batch_size"].to_i > 1
|
152
161
|
ready_batch = []
|
153
|
-
|
162
|
+
|
154
163
|
batched_queue.add(package)
|
155
164
|
if batched_queue.size >= settings["solrj_writer.batch_size"].to_i
|
156
165
|
batched_queue.drain_to(ready_batch)
|
@@ -164,7 +173,7 @@ class Traject::SolrJWriter
|
|
164
173
|
end
|
165
174
|
end
|
166
175
|
|
167
|
-
@thread_pool.maybe_in_thread_pool { batch_add_document_packages(ready_batch) }
|
176
|
+
@thread_pool.maybe_in_thread_pool { batch_add_document_packages(ready_batch) }
|
168
177
|
end
|
169
178
|
else # non-batched add, add one at a time.
|
170
179
|
@thread_pool.maybe_in_thread_pool { add_one_document_package(package) }
|
@@ -192,7 +201,7 @@ class Traject::SolrJWriter
|
|
192
201
|
# shared state batched_queue in a mutex.
|
193
202
|
def batch_add_document_packages(current_batch)
|
194
203
|
begin
|
195
|
-
a = current_batch.collect {|package| package.solr_document }
|
204
|
+
a = current_batch.collect {|package| package.solr_document }
|
196
205
|
solr_server.add( a )
|
197
206
|
|
198
207
|
$stderr.write "%" if @debug_ascii_progress
|
data/lib/traject/thread_pool.rb
CHANGED
@@ -1,47 +1,47 @@
|
|
1
1
|
module Traject
|
2
2
|
# An abstraction wrapping a threadpool executor in some configuration choices
|
3
|
-
# and other apparatus.
|
3
|
+
# and other apparatus.
|
4
|
+
#
|
5
|
+
# 1) Initialize with chosen pool size -- we create fixed size pools, where
|
6
|
+
# core and max sizes are the same.
|
4
7
|
#
|
5
|
-
# 1) Initialize with chosen pool size -- we create fixed size pools, where
|
6
|
-
# core and max sizes are the same.
|
7
|
-
|
8
8
|
# 2) If initialized with nil for threadcount, no thread pool will actually
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
9
|
+
# be created, and all threadpool-related methods become no-ops. We call this
|
10
|
+
# the nil/null threadpool. A non-nil threadpool requires jruby, but you can
|
11
|
+
# create a null Traject::ThreadPool.new(nil) under MRI without anything
|
12
|
+
# complaining.
|
13
13
|
#
|
14
14
|
# 3) Use the #maybe_in_threadpool method to send blocks to thread pool for
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
15
|
+
# execution -- if no threadpool configured your block will just be
|
16
|
+
# executed in calling thread. Be careful to not refer to any non-local
|
17
|
+
# variables in the block, unless the variable has an object you can
|
18
|
+
# use thread-safely!
|
19
19
|
#
|
20
20
|
# 4) Thread pools are java.util.concurrent.ThreadPoolExecutor, manually created
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
21
|
+
# with a work queue that will buffer up to (pool_size*3) tasks. If queue is full,
|
22
|
+
# the ThreadPoolExecutor is set up to use the ThreadPoolExecutor.CallerRunsPolicy,
|
23
|
+
# meaning the block will end up executing in caller's own thread. With the kind
|
24
|
+
# of work we're doing, where each unit of work is small and there are many of them--
|
25
|
+
# the CallerRunsPolicy serves as an effective 'back pressure' mechanism to keep
|
26
|
+
# the work queue from getting too large and exhausting memory, when producers are
|
27
|
+
# faster than consumers.
|
28
28
|
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
29
|
+
# 5) Any exceptions raised by pool-executed work are captured accumulated in a thread-safe
|
30
|
+
# manner, and can be re-raised in the thread of your choice by calling
|
31
|
+
# #raise_collected_exception!
|
32
32
|
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
#
|
33
|
+
# 6) When you are done with the threadpool, you can and must call
|
34
|
+
# #shutdown_and_wait, which will wait for all current queued work
|
35
|
+
# to complete, then return. You can not give any more work to the pool
|
36
|
+
# after you do this. By default it'll wait pretty much forever, which should
|
37
|
+
# be fine. If you never call shutdown, the pool will keep running forever
|
38
|
+
# and not allow your program to exit!
|
39
39
|
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
40
|
+
# 7) We will keep track of total times a block is run in thread pool, and
|
41
|
+
# total elapsed (wall) time of running all blocks, so an average_execution_ms
|
42
|
+
# time can be given. #average_execution_ms may be inaccurate if called when
|
43
|
+
# threads are still executing, as it's not entirely thread safe (may get
|
44
|
+
# an off by one as to total iterations)
|
45
45
|
class ThreadPool
|
46
46
|
attr_reader :pool_size, :label, :queue_capacity
|
47
47
|
|
@@ -60,15 +60,15 @@ module Traject
|
|
60
60
|
rejectedExecutionHandler = java.util.concurrent.ThreadPoolExecutor::CallerRunsPolicy.new
|
61
61
|
|
62
62
|
# keepalive times don't matter, we are setting core and max pool to
|
63
|
-
# same thing, fixed size pool.
|
63
|
+
# same thing, fixed size pool.
|
64
64
|
@thread_pool = java.util.concurrent.ThreadPoolExecutor.new(
|
65
|
-
@pool_size, @pool_size, 0, java.util.concurrent.TimeUnit::MILLISECONDS,
|
65
|
+
@pool_size, @pool_size, 0, java.util.concurrent.TimeUnit::MILLISECONDS,
|
66
66
|
blockingQueue, rejectedExecutionHandler)
|
67
67
|
|
68
|
-
# A thread-safe queue to collect exceptions cross-threads.
|
68
|
+
# A thread-safe queue to collect exceptions cross-threads.
|
69
69
|
# We make it small, we really only need to store the first
|
70
70
|
# exception, we don't care too much about others. But we'll
|
71
|
-
# keep the first 20, why not.
|
71
|
+
# keep the first 20, why not.
|
72
72
|
@async_exception_queue = java.util.concurrent.ArrayBlockingQueue.new(20)
|
73
73
|
end
|
74
74
|
end
|
@@ -101,7 +101,7 @@ module Traject
|
|
101
101
|
# # and would be pointing to a different string now!
|
102
102
|
#
|
103
103
|
# Note, that just makes block-local variables, it doesn't
|
104
|
-
# help you with whether a data structure itself is thread safe.
|
104
|
+
# help you with whether a data structure itself is thread safe.
|
105
105
|
def maybe_in_thread_pool(*args)
|
106
106
|
start_t = Time.now
|
107
107
|
|
@@ -121,7 +121,7 @@ module Traject
|
|
121
121
|
|
122
122
|
# Just for monitoring/debugging purposes, we'll return the work queue
|
123
123
|
# used by the threadpool. Don't recommend you do anything with it, as
|
124
|
-
# the original java.util.concurrent docs make the same recommendation.
|
124
|
+
# the original java.util.concurrent docs make the same recommendation.
|
125
125
|
def queue
|
126
126
|
@thread_pool && @thread_pool.queue
|
127
127
|
end
|
@@ -129,20 +129,20 @@ module Traject
|
|
129
129
|
# thread-safe way of storing an exception, to raise
|
130
130
|
# later in a different thread. We don't guarantee
|
131
131
|
# that we can store more than one at a time, only
|
132
|
-
# the first one recorded may be stored.
|
132
|
+
# the first one recorded may be stored.
|
133
133
|
def collect_exception(e)
|
134
134
|
# offer will silently do nothing if the queue is full, that's fine
|
135
|
-
# with us.
|
135
|
+
# with us.
|
136
136
|
@async_exception_queue.offer(e)
|
137
137
|
end
|
138
138
|
|
139
139
|
# If there's a stored collected exception, raise it
|
140
140
|
# again now. Call this to re-raise exceptions caught in
|
141
|
-
# other threads in the thread of your choice.
|
141
|
+
# other threads in the thread of your choice.
|
142
142
|
#
|
143
143
|
# If you call this method on a ThreadPool initialized with nil
|
144
144
|
# as a non-functioning threadpool -- then this method is just
|
145
|
-
# a no-op.
|
145
|
+
# a no-op.
|
146
146
|
def raise_collected_exception!
|
147
147
|
if @async_exception_queue && e = @async_exception_queue.poll
|
148
148
|
raise e
|
@@ -151,7 +151,7 @@ module Traject
|
|
151
151
|
|
152
152
|
# shutdown threadpool, and wait for all work to complete.
|
153
153
|
# this one is also a no-op if you have a null ThreadPool that
|
154
|
-
# doesn't really have a threadpool at all.
|
154
|
+
# doesn't really have a threadpool at all.
|
155
155
|
#
|
156
156
|
# returns elapsed time in seconds it took to shutdown
|
157
157
|
def shutdown_and_wait
|
@@ -168,4 +168,4 @@ module Traject
|
|
168
168
|
end
|
169
169
|
|
170
170
|
end
|
171
|
-
end
|
171
|
+
end
|