traject 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +1 -0
- data/README.md +183 -191
- data/bench/bench.rb +1 -1
- data/doc/batch_execution.md +14 -0
- data/doc/extending.md +14 -12
- data/doc/indexing_rules.md +265 -0
- data/lib/traject/command_line.rb +12 -41
- data/lib/traject/debug_writer.rb +32 -13
- data/lib/traject/indexer.rb +101 -24
- data/lib/traject/indexer/settings.rb +18 -17
- data/lib/traject/json_writer.rb +32 -11
- data/lib/traject/line_writer.rb +6 -6
- data/lib/traject/macros/basic.rb +1 -1
- data/lib/traject/macros/marc21.rb +17 -13
- data/lib/traject/macros/marc21_semantics.rb +27 -25
- data/lib/traject/macros/marc_format_classifier.rb +39 -25
- data/lib/traject/marc4j_reader.rb +36 -22
- data/lib/traject/marc_extractor.rb +79 -75
- data/lib/traject/marc_reader.rb +33 -25
- data/lib/traject/mock_reader.rb +9 -10
- data/lib/traject/ndj_reader.rb +7 -7
- data/lib/traject/null_writer.rb +1 -1
- data/lib/traject/qualified_const_get.rb +12 -2
- data/lib/traject/solrj_writer.rb +61 -52
- data/lib/traject/thread_pool.rb +45 -45
- data/lib/traject/translation_map.rb +59 -27
- data/lib/traject/util.rb +3 -3
- data/lib/traject/version.rb +1 -1
- data/lib/traject/yaml_writer.rb +1 -1
- data/test/debug_writer_test.rb +7 -7
- data/test/indexer/each_record_test.rb +4 -4
- data/test/indexer/macros_marc21_semantics_test.rb +12 -12
- data/test/indexer/macros_marc21_test.rb +10 -10
- data/test/indexer/macros_test.rb +1 -1
- data/test/indexer/map_record_test.rb +6 -6
- data/test/indexer/read_write_test.rb +43 -4
- data/test/indexer/settings_test.rb +2 -2
- data/test/indexer/to_field_test.rb +8 -8
- data/test/marc4j_reader_test.rb +4 -4
- data/test/marc_extractor_test.rb +33 -25
- data/test/marc_format_classifier_test.rb +3 -3
- data/test/marc_reader_test.rb +2 -2
- data/test/test_helper.rb +3 -3
- data/test/test_support/demo_config.rb +52 -48
- data/test/translation_map_test.rb +22 -4
- data/test/translation_maps/bad_ruby.rb +2 -2
- data/test/translation_maps/both_map.rb +1 -1
- data/test/translation_maps/default_literal.rb +1 -1
- data/test/translation_maps/default_passthrough.rb +1 -1
- data/test/translation_maps/ruby_map.rb +1 -1
- metadata +7 -31
- data/doc/macros.md +0 -103
data/lib/traject/marc_reader.rb
CHANGED
@@ -1,35 +1,43 @@
|
|
1
1
|
require 'marc'
|
2
|
-
require 'traject/ndj_reader'
|
2
|
+
require 'traject/ndj_reader'
|
3
3
|
|
4
|
-
#
|
5
|
-
# MARC
|
4
|
+
# `Traject::MarcReader` uses pure ruby marc gem to parse MARC records. It
|
5
|
+
# can read MARC ISO 2709 ('binary'), MARC-XML, and Marc-in-json (newline-delimited-json).
|
6
6
|
#
|
7
|
-
#
|
7
|
+
# MarcReader can not currently read binary MARC in the MARC8 encoding, see
|
8
|
+
# the Traject::Marc4JReader instead.
|
8
9
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
# de-serialization.
|
10
|
+
# By default assumes binary MARC encoding, please set marc_source.type setting
|
11
|
+
# for XML or json.
|
12
12
|
#
|
13
|
-
#
|
14
|
-
|
13
|
+
# ## Settings
|
14
|
+
|
15
|
+
# * "marc_source.type": serialization type. default 'binary'
|
16
|
+
# * "binary". standard ISO 2709 "binary" MARC format.
|
17
|
+
# * "xml", MarcXML
|
18
|
+
# * "json" The "marc-in-json" format, encoded as newline-separated
|
19
|
+
# json. (synonym 'ndj'). A simplistic newline-separated json, with no comments
|
20
|
+
# allowed, and no unescpaed internal newlines allowed in the json
|
21
|
+
# objects -- we just read line by line, and assume each line is a
|
22
|
+
# marc-in-json. http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/
|
23
|
+
# * "marc_reader.xml_parser": For XML type, which XML parser to tell Marc::Reader
|
24
|
+
# to use. Anything recognized by [Marc::Reader :parser
|
25
|
+
# argument](http://rdoc.info/github/ruby-marc/ruby-marc/MARC/XMLReader).
|
26
|
+
# By default, asks Marc::Reader to take
|
27
|
+
# it's best guess as to highest performance available
|
28
|
+
# installed option. Probably best to leave as default.
|
29
|
+
#
|
30
|
+
# ## Example
|
31
|
+
#
|
32
|
+
# In a configuration file:
|
15
33
|
#
|
16
|
-
#
|
17
|
-
# ["marc_source.type"] serialization type. default 'binary'
|
18
|
-
# * "binary". Actual marc.
|
19
|
-
# * "xml", MarcXML
|
20
|
-
# * "json" The "marc-in-json" format, encoded as newline-separated
|
21
|
-
# json. A simplistic newline-separated json, with no comments
|
22
|
-
# allowed, and no unescpaed internal newlines allowed in the json
|
23
|
-
# objects -- we just read line by line, and assume each line is a
|
24
|
-
# marc-in-json. http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/
|
25
|
-
# ["marc_reader.xml_parser"] For XML type, which XML parser to tell Marc::Reader
|
26
|
-
# to use. Anything recognized by Marc::Reader :parser
|
27
|
-
# argument. By default, asks Marc::Reader to take
|
28
|
-
# it's best guess as to highest performance available
|
29
|
-
# installed option.
|
34
|
+
# require 'traject/marc_reader'
|
30
35
|
#
|
36
|
+
# settings do
|
37
|
+
# provide "reader_class_name", "Traject::MarcReader"
|
38
|
+
# provide "marc_source.type", "xml"
|
39
|
+
# end
|
31
40
|
#
|
32
|
-
# Can NOT yet read Marc8, input is always assumed UTF8.
|
33
41
|
class Traject::MarcReader
|
34
42
|
include Enumerable
|
35
43
|
|
@@ -64,4 +72,4 @@ class Traject::MarcReader
|
|
64
72
|
self.internal_reader.each(*args, &block)
|
65
73
|
end
|
66
74
|
|
67
|
-
end
|
75
|
+
end
|
data/lib/traject/mock_reader.rb
CHANGED
@@ -10,15 +10,14 @@ module Traject
|
|
10
10
|
#
|
11
11
|
# Specify in a config files as follows:
|
12
12
|
#
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
13
|
+
# require 'traject/mock_writer'
|
14
|
+
# require 'traject/mock_reader'
|
15
|
+
#
|
16
|
+
# settings do
|
17
|
+
# store "reader_class_name", "Traject::MockReader"
|
18
|
+
# store "writer_class_name", "Traject::MockWriter"
|
19
|
+
# store "mock_reader.limit", 4_000 # default is 10_000
|
20
|
+
# end
|
22
21
|
class MockReader
|
23
22
|
|
24
23
|
attr_accessor :limit
|
@@ -50,7 +49,7 @@ module Traject
|
|
50
49
|
while true
|
51
50
|
json = this_file_iter.next
|
52
51
|
next unless json =~ /\S/
|
53
|
-
records << MARC::Record.new_from_hash(JSON.parse(json))
|
52
|
+
records << MARC::Record.new_from_hash(JSON.parse(json))
|
54
53
|
end
|
55
54
|
rescue StopIteration
|
56
55
|
end
|
data/lib/traject/ndj_reader.rb
CHANGED
@@ -8,7 +8,7 @@ require 'zlib'
|
|
8
8
|
|
9
9
|
class Traject::NDJReader
|
10
10
|
include Enumerable
|
11
|
-
|
11
|
+
|
12
12
|
def initialize(input_stream, settings)
|
13
13
|
@settings = settings
|
14
14
|
@input_stream = input_stream
|
@@ -16,16 +16,16 @@ class Traject::NDJReader
|
|
16
16
|
@input_stream = Zlib::GzipReader.new(@input_stream, :external_encoding => "UTF-8")
|
17
17
|
end
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def logger
|
21
21
|
@logger ||= (settings[:logger] || Yell.new(STDERR, :level => "gt.fatal")) # null logger)
|
22
|
-
end
|
22
|
+
end
|
23
23
|
|
24
24
|
def each
|
25
25
|
unless block_given?
|
26
26
|
return enum_for(:each)
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
@input_stream.each_with_index do |json, i|
|
30
30
|
begin
|
31
31
|
yield MARC::Record.new_from_hash(JSON.parse(json))
|
@@ -34,7 +34,7 @@ class Traject::NDJReader
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
end
|
39
|
-
|
40
|
-
|
39
|
+
|
40
|
+
|
data/lib/traject/null_writer.rb
CHANGED
@@ -3,7 +3,17 @@
|
|
3
3
|
#
|
4
4
|
# Method to take a string constant name, including :: qualifications, and
|
5
5
|
# look up the actual constant. Looks up relative to current file.
|
6
|
-
#
|
6
|
+
# Respects leading ::. Etc.
|
7
|
+
#
|
8
|
+
# class Something
|
9
|
+
# include Traject::QualifiedConstGet
|
10
|
+
#
|
11
|
+
# def foo
|
12
|
+
# #...
|
13
|
+
# klass = qualified_const_get("Foo::Bar")
|
14
|
+
# #...
|
15
|
+
# end
|
16
|
+
# end
|
7
17
|
module Traject::QualifiedConstGet
|
8
18
|
|
9
19
|
|
@@ -27,4 +37,4 @@ module Traject::QualifiedConstGet
|
|
27
37
|
path.inject(Object) { |ns,name| ns.const_get(name) }
|
28
38
|
end
|
29
39
|
|
30
|
-
end
|
40
|
+
end
|
data/lib/traject/solrj_writer.rb
CHANGED
@@ -1,19 +1,3 @@
|
|
1
|
-
# TODO: THREAD POOL
|
2
|
-
#
|
3
|
-
# 1) Exception handling in threads, what's the right thing to do
|
4
|
-
# 2) General count of failed records in a thread safe way, so we can report
|
5
|
-
# it back from 'close', so process can report it back, and non-zero exit
|
6
|
-
# code can be emited from command-line.
|
7
|
-
# 3) back pressure on thread pool. give it a bounded blocking queue instead,
|
8
|
-
# to make sure thousands of add tasks don't build up, waiting until the end.
|
9
|
-
# or does that even matter? So what if they build up in the queue and only
|
10
|
-
# get taken care of at the end, is that okay? I do emit a warning right now
|
11
|
-
# if it takes more than 60 seconds to process remaining thread pool task queue
|
12
|
-
# at end.
|
13
|
-
# 4) No tests yet that actually test thread pool stuff; additionally, may make
|
14
|
-
# some of the batch tests fail in non-deterministic ways, since batch tests
|
15
|
-
# assume order of add (and our Mock solr server is not thread safe yet!)
|
16
|
-
|
17
1
|
require 'yell'
|
18
2
|
|
19
3
|
require 'traject'
|
@@ -26,7 +10,6 @@ require 'thread' # for Mutex
|
|
26
10
|
|
27
11
|
#
|
28
12
|
# Writes to a Solr using SolrJ, and the SolrJ HttpSolrServer.
|
29
|
-
# (sub-class later for the ConcurrentUpdate server?)
|
30
13
|
#
|
31
14
|
# After you call #close, you can check #skipped_record_count if you want
|
32
15
|
# for an integer count of skipped records.
|
@@ -35,38 +18,64 @@ require 'thread' # for Mutex
|
|
35
18
|
# you may not get a raise immediately after calling #put, you may get it on
|
36
19
|
# a FUTURE #put or #close. You should get it eventually though.
|
37
20
|
#
|
38
|
-
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
#
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
#
|
21
|
+
# ## Settings
|
22
|
+
#
|
23
|
+
# * solr.url: Your solr url (required)
|
24
|
+
#
|
25
|
+
# * solrj_writer.server_class_name: Defaults to "HttpSolrServer". You can specify
|
26
|
+
# another Solr Server sub-class, but it has
|
27
|
+
# to take a one-arg url constructor. Maybe
|
28
|
+
# subclass this writer class and overwrite
|
29
|
+
# instantiate_solr_server! otherwise
|
30
|
+
#
|
31
|
+
# * solrj.jar_dir: Custom directory containing all of the SolrJ jars. All
|
32
|
+
# jars in this dir will be loaded. Otherwise,
|
33
|
+
# we load our own packaged solrj jars. This setting
|
34
|
+
# can't really be used differently in the same app instance,
|
35
|
+
# since jars are loaded globally.
|
36
|
+
#
|
37
|
+
# * solrj_writer.parser_class_name: A String name of a class in package
|
38
|
+
# org.apache.solr.client.solrj.impl,
|
39
|
+
# we'll instantiate one with a zero-arg
|
40
|
+
# constructor, and pass it as an arg to setParser on
|
41
|
+
# the SolrServer instance, if present.
|
42
|
+
# NOTE: For contacting a Solr 1.x server, with the
|
43
|
+
# recent version of SolrJ used by default, set to
|
44
|
+
# "XMLResponseParser"
|
45
|
+
#
|
46
|
+
# * solrj_writer.commit_on_close: If true (or string 'true'), send a commit to solr
|
47
|
+
# at end of #process.
|
48
|
+
#
|
49
|
+
# * solrj_writer.batch_size: If non-nil and more than 1, send documents to
|
50
|
+
# solr in batches of solrj_writer.batch_size. If nil/1,
|
51
|
+
# however, an http transaction with solr will be done
|
52
|
+
# per doc. DEFAULT to 100, which seems to be a sweet spot.
|
53
|
+
#
|
54
|
+
# * solrj_writer.thread_pool: Defaults to 1. A thread pool is used for submitting docs
|
55
|
+
# to solr. Set to 0 or nil to disable threading. Set to 1,
|
56
|
+
# there will still be a single bg thread doing the adds. For
|
57
|
+
# very fast Solr servers and very fast indexing processes, may
|
58
|
+
# make sense to increase this value to throw at Solr as fast as it
|
59
|
+
# can catch.
|
60
|
+
#
|
61
|
+
# ## Example
|
62
|
+
#
|
63
|
+
# settings do
|
64
|
+
# provide "writer_class_name", "Traject::SolrJWriter"
|
65
|
+
#
|
66
|
+
# # This is just regular ruby, so don't be afraid to have conditionals!
|
67
|
+
# # Switch on hostname, for test and production server differences
|
68
|
+
# if Socket.gethostname =~ /devhost/
|
69
|
+
# provide "solr.url", "http://my.dev.machine:9033/catalog"
|
70
|
+
# else
|
71
|
+
# provide "solr.url", "http://my.production.machine:9033/catalog"
|
72
|
+
# end
|
73
|
+
#
|
74
|
+
# provide "solrj_writer.parser_class_name", "BinaryResponseParser" # for Solr 4.x
|
75
|
+
# # provide "solrj_writer.parser_class_name", "XMLResponseParser" # For solr 1.x or 3.x
|
76
|
+
#
|
77
|
+
# provide "solrj_writer.commit_on_close", "true"
|
78
|
+
# end
|
70
79
|
class Traject::SolrJWriter
|
71
80
|
# just a tuple of a SolrInputDocument
|
72
81
|
# and a Traject::Indexer::Context it came from
|
@@ -150,7 +159,7 @@ class Traject::SolrJWriter
|
|
150
159
|
|
151
160
|
if settings["solrj_writer.batch_size"].to_i > 1
|
152
161
|
ready_batch = []
|
153
|
-
|
162
|
+
|
154
163
|
batched_queue.add(package)
|
155
164
|
if batched_queue.size >= settings["solrj_writer.batch_size"].to_i
|
156
165
|
batched_queue.drain_to(ready_batch)
|
@@ -164,7 +173,7 @@ class Traject::SolrJWriter
|
|
164
173
|
end
|
165
174
|
end
|
166
175
|
|
167
|
-
@thread_pool.maybe_in_thread_pool { batch_add_document_packages(ready_batch) }
|
176
|
+
@thread_pool.maybe_in_thread_pool { batch_add_document_packages(ready_batch) }
|
168
177
|
end
|
169
178
|
else # non-batched add, add one at a time.
|
170
179
|
@thread_pool.maybe_in_thread_pool { add_one_document_package(package) }
|
@@ -192,7 +201,7 @@ class Traject::SolrJWriter
|
|
192
201
|
# shared state batched_queue in a mutex.
|
193
202
|
def batch_add_document_packages(current_batch)
|
194
203
|
begin
|
195
|
-
a = current_batch.collect {|package| package.solr_document }
|
204
|
+
a = current_batch.collect {|package| package.solr_document }
|
196
205
|
solr_server.add( a )
|
197
206
|
|
198
207
|
$stderr.write "%" if @debug_ascii_progress
|
data/lib/traject/thread_pool.rb
CHANGED
@@ -1,47 +1,47 @@
|
|
1
1
|
module Traject
|
2
2
|
# An abstraction wrapping a threadpool executor in some configuration choices
|
3
|
-
# and other apparatus.
|
3
|
+
# and other apparatus.
|
4
|
+
#
|
5
|
+
# 1) Initialize with chosen pool size -- we create fixed size pools, where
|
6
|
+
# core and max sizes are the same.
|
4
7
|
#
|
5
|
-
# 1) Initialize with chosen pool size -- we create fixed size pools, where
|
6
|
-
# core and max sizes are the same.
|
7
|
-
|
8
8
|
# 2) If initialized with nil for threadcount, no thread pool will actually
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
9
|
+
# be created, and all threadpool-related methods become no-ops. We call this
|
10
|
+
# the nil/null threadpool. A non-nil threadpool requires jruby, but you can
|
11
|
+
# create a null Traject::ThreadPool.new(nil) under MRI without anything
|
12
|
+
# complaining.
|
13
13
|
#
|
14
14
|
# 3) Use the #maybe_in_threadpool method to send blocks to thread pool for
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
15
|
+
# execution -- if no threadpool configured your block will just be
|
16
|
+
# executed in calling thread. Be careful to not refer to any non-local
|
17
|
+
# variables in the block, unless the variable has an object you can
|
18
|
+
# use thread-safely!
|
19
19
|
#
|
20
20
|
# 4) Thread pools are java.util.concurrent.ThreadPoolExecutor, manually created
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
21
|
+
# with a work queue that will buffer up to (pool_size*3) tasks. If queue is full,
|
22
|
+
# the ThreadPoolExecutor is set up to use the ThreadPoolExecutor.CallerRunsPolicy,
|
23
|
+
# meaning the block will end up executing in caller's own thread. With the kind
|
24
|
+
# of work we're doing, where each unit of work is small and there are many of them--
|
25
|
+
# the CallerRunsPolicy serves as an effective 'back pressure' mechanism to keep
|
26
|
+
# the work queue from getting too large and exhausting memory, when producers are
|
27
|
+
# faster than consumers.
|
28
28
|
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
29
|
+
# 5) Any exceptions raised by pool-executed work are captured accumulated in a thread-safe
|
30
|
+
# manner, and can be re-raised in the thread of your choice by calling
|
31
|
+
# #raise_collected_exception!
|
32
32
|
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
#
|
33
|
+
# 6) When you are done with the threadpool, you can and must call
|
34
|
+
# #shutdown_and_wait, which will wait for all current queued work
|
35
|
+
# to complete, then return. You can not give any more work to the pool
|
36
|
+
# after you do this. By default it'll wait pretty much forever, which should
|
37
|
+
# be fine. If you never call shutdown, the pool will keep running forever
|
38
|
+
# and not allow your program to exit!
|
39
39
|
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
40
|
+
# 7) We will keep track of total times a block is run in thread pool, and
|
41
|
+
# total elapsed (wall) time of running all blocks, so an average_execution_ms
|
42
|
+
# time can be given. #average_execution_ms may be inaccurate if called when
|
43
|
+
# threads are still executing, as it's not entirely thread safe (may get
|
44
|
+
# an off by one as to total iterations)
|
45
45
|
class ThreadPool
|
46
46
|
attr_reader :pool_size, :label, :queue_capacity
|
47
47
|
|
@@ -60,15 +60,15 @@ module Traject
|
|
60
60
|
rejectedExecutionHandler = java.util.concurrent.ThreadPoolExecutor::CallerRunsPolicy.new
|
61
61
|
|
62
62
|
# keepalive times don't matter, we are setting core and max pool to
|
63
|
-
# same thing, fixed size pool.
|
63
|
+
# same thing, fixed size pool.
|
64
64
|
@thread_pool = java.util.concurrent.ThreadPoolExecutor.new(
|
65
|
-
@pool_size, @pool_size, 0, java.util.concurrent.TimeUnit::MILLISECONDS,
|
65
|
+
@pool_size, @pool_size, 0, java.util.concurrent.TimeUnit::MILLISECONDS,
|
66
66
|
blockingQueue, rejectedExecutionHandler)
|
67
67
|
|
68
|
-
# A thread-safe queue to collect exceptions cross-threads.
|
68
|
+
# A thread-safe queue to collect exceptions cross-threads.
|
69
69
|
# We make it small, we really only need to store the first
|
70
70
|
# exception, we don't care too much about others. But we'll
|
71
|
-
# keep the first 20, why not.
|
71
|
+
# keep the first 20, why not.
|
72
72
|
@async_exception_queue = java.util.concurrent.ArrayBlockingQueue.new(20)
|
73
73
|
end
|
74
74
|
end
|
@@ -101,7 +101,7 @@ module Traject
|
|
101
101
|
# # and would be pointing to a different string now!
|
102
102
|
#
|
103
103
|
# Note, that just makes block-local variables, it doesn't
|
104
|
-
# help you with whether a data structure itself is thread safe.
|
104
|
+
# help you with whether a data structure itself is thread safe.
|
105
105
|
def maybe_in_thread_pool(*args)
|
106
106
|
start_t = Time.now
|
107
107
|
|
@@ -121,7 +121,7 @@ module Traject
|
|
121
121
|
|
122
122
|
# Just for monitoring/debugging purposes, we'll return the work queue
|
123
123
|
# used by the threadpool. Don't recommend you do anything with it, as
|
124
|
-
# the original java.util.concurrent docs make the same recommendation.
|
124
|
+
# the original java.util.concurrent docs make the same recommendation.
|
125
125
|
def queue
|
126
126
|
@thread_pool && @thread_pool.queue
|
127
127
|
end
|
@@ -129,20 +129,20 @@ module Traject
|
|
129
129
|
# thread-safe way of storing an exception, to raise
|
130
130
|
# later in a different thread. We don't guarantee
|
131
131
|
# that we can store more than one at a time, only
|
132
|
-
# the first one recorded may be stored.
|
132
|
+
# the first one recorded may be stored.
|
133
133
|
def collect_exception(e)
|
134
134
|
# offer will silently do nothing if the queue is full, that's fine
|
135
|
-
# with us.
|
135
|
+
# with us.
|
136
136
|
@async_exception_queue.offer(e)
|
137
137
|
end
|
138
138
|
|
139
139
|
# If there's a stored collected exception, raise it
|
140
140
|
# again now. Call this to re-raise exceptions caught in
|
141
|
-
# other threads in the thread of your choice.
|
141
|
+
# other threads in the thread of your choice.
|
142
142
|
#
|
143
143
|
# If you call this method on a ThreadPool initialized with nil
|
144
144
|
# as a non-functioning threadpool -- then this method is just
|
145
|
-
# a no-op.
|
145
|
+
# a no-op.
|
146
146
|
def raise_collected_exception!
|
147
147
|
if @async_exception_queue && e = @async_exception_queue.poll
|
148
148
|
raise e
|
@@ -151,7 +151,7 @@ module Traject
|
|
151
151
|
|
152
152
|
# shutdown threadpool, and wait for all work to complete.
|
153
153
|
# this one is also a no-op if you have a null ThreadPool that
|
154
|
-
# doesn't really have a threadpool at all.
|
154
|
+
# doesn't really have a threadpool at all.
|
155
155
|
#
|
156
156
|
# returns elapsed time in seconds it took to shutdown
|
157
157
|
def shutdown_and_wait
|
@@ -168,4 +168,4 @@ module Traject
|
|
168
168
|
end
|
169
169
|
|
170
170
|
end
|
171
|
-
end
|
171
|
+
end
|