traject 2.3.4 → 3.0.0.alpha.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +16 -9
- data/CHANGES.md +74 -1
- data/Gemfile +2 -1
- data/README.md +104 -53
- data/Rakefile +8 -1
- data/doc/indexing_rules.md +79 -63
- data/doc/programmatic_use.md +218 -0
- data/doc/settings.md +28 -1
- data/doc/xml.md +134 -0
- data/lib/traject.rb +5 -0
- data/lib/traject/array_writer.rb +34 -0
- data/lib/traject/command_line.rb +18 -22
- data/lib/traject/debug_writer.rb +2 -5
- data/lib/traject/experimental_nokogiri_streaming_reader.rb +276 -0
- data/lib/traject/hashie/indifferent_access_fix.rb +25 -0
- data/lib/traject/indexer.rb +321 -92
- data/lib/traject/indexer/context.rb +39 -13
- data/lib/traject/indexer/marc_indexer.rb +30 -0
- data/lib/traject/indexer/nokogiri_indexer.rb +30 -0
- data/lib/traject/indexer/settings.rb +36 -53
- data/lib/traject/indexer/step.rb +27 -33
- data/lib/traject/macros/marc21.rb +37 -12
- data/lib/traject/macros/nokogiri_macros.rb +43 -0
- data/lib/traject/macros/transformation.rb +162 -0
- data/lib/traject/marc_extractor.rb +2 -0
- data/lib/traject/ndj_reader.rb +1 -1
- data/lib/traject/nokogiri_reader.rb +179 -0
- data/lib/traject/oai_pmh_nokogiri_reader.rb +159 -0
- data/lib/traject/solr_json_writer.rb +19 -12
- data/lib/traject/thread_pool.rb +13 -0
- data/lib/traject/util.rb +14 -2
- data/lib/traject/version.rb +1 -1
- data/test/debug_writer_test.rb +3 -3
- data/test/delimited_writer_test.rb +3 -3
- data/test/experimental_nokogiri_streaming_reader_test.rb +169 -0
- data/test/indexer/context_test.rb +23 -13
- data/test/indexer/error_handler_test.rb +59 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +46 -46
- data/test/indexer/macros/marc21/extract_all_marc_values_test.rb +1 -1
- data/test/indexer/macros/marc21/extract_marc_test.rb +19 -9
- data/test/indexer/macros/marc21/serialize_marc_test.rb +4 -4
- data/test/indexer/macros/to_field_test.rb +2 -2
- data/test/indexer/macros/transformation_test.rb +177 -0
- data/test/indexer/map_record_test.rb +2 -3
- data/test/indexer/nokogiri_indexer_test.rb +103 -0
- data/test/indexer/process_record_test.rb +55 -0
- data/test/indexer/process_with_test.rb +148 -0
- data/test/indexer/read_write_test.rb +52 -2
- data/test/indexer/settings_test.rb +34 -24
- data/test/indexer/to_field_test.rb +27 -2
- data/test/marc_extractor_test.rb +7 -7
- data/test/marc_reader_test.rb +4 -4
- data/test/nokogiri_reader_test.rb +158 -0
- data/test/oai_pmh_nokogiri_reader_test.rb +23 -0
- data/test/solr_json_writer_test.rb +24 -28
- data/test/test_helper.rb +8 -2
- data/test/test_support/namespace-test.xml +7 -0
- data/test/test_support/nokogiri_demo_config.rb +17 -0
- data/test/test_support/oai-pmh-one-record-2.xml +24 -0
- data/test/test_support/oai-pmh-one-record-first.xml +24 -0
- data/test/test_support/sample-oai-no-namespace.xml +197 -0
- data/test/test_support/sample-oai-pmh.xml +197 -0
- data/test/thread_pool_test.rb +38 -0
- data/test/translation_map_test.rb +3 -3
- data/test/translation_maps/ruby_map.rb +2 -1
- data/test/translation_maps/yaml_map.yaml +2 -1
- data/traject.gemspec +4 -11
- metadata +92 -6
@@ -47,6 +47,8 @@ require 'concurrent' # for atomic_fixnum
|
|
47
47
|
class Traject::SolrJsonWriter
|
48
48
|
include Traject::QualifiedConstGet
|
49
49
|
|
50
|
+
URI_REGEXP = URI::Parser.new.make_regexp.freeze
|
51
|
+
|
50
52
|
DEFAULT_MAX_SKIPPED = 0
|
51
53
|
DEFAULT_BATCH_SIZE = 100
|
52
54
|
|
@@ -105,6 +107,18 @@ class Traject::SolrJsonWriter
|
|
105
107
|
end
|
106
108
|
end
|
107
109
|
|
110
|
+
# Not part of standard writer API.
|
111
|
+
#
|
112
|
+
# If we are batching adds, and have some not-yet-written ones queued up --
|
113
|
+
# flush em all to solr.
|
114
|
+
#
|
115
|
+
# This should be thread-safe to call, but the write does take place in
|
116
|
+
# the caller's thread, no threading is done for you here, regardless of setting
|
117
|
+
# of solr_writer.thread_pool
|
118
|
+
def flush
|
119
|
+
send_batch( Traject::Util.drain_queue(@batched_queue) )
|
120
|
+
end
|
121
|
+
|
108
122
|
# Send the given batch of contexts. If something goes wrong, send
|
109
123
|
# them one at a time.
|
110
124
|
# @param [Array<Traject::Indexer::Context>] an array of contexts
|
@@ -147,7 +161,7 @@ class Traject::SolrJsonWriter
|
|
147
161
|
else
|
148
162
|
msg = "Solr error response: #{resp.status}: #{resp.body}"
|
149
163
|
end
|
150
|
-
logger.error "Could not add record #{c.
|
164
|
+
logger.error "Could not add record #{c.record_inspect}: #{msg}"
|
151
165
|
logger.debug(c.source_record.to_s)
|
152
166
|
|
153
167
|
@skipped_record_incrementer.increment
|
@@ -236,7 +250,7 @@ class Traject::SolrJsonWriter
|
|
236
250
|
|
237
251
|
# If we've got a solr.update_url, make sure it's ok
|
238
252
|
def check_solr_update_url(url)
|
239
|
-
unless /^#{
|
253
|
+
unless /^#{URI_REGEXP}$/.match(url)
|
240
254
|
raise ArgumentError.new("#{self.class.name} setting `solr.update_url` doesn't look like a URL: `#{url}`")
|
241
255
|
end
|
242
256
|
url
|
@@ -249,18 +263,11 @@ class Traject::SolrJsonWriter
|
|
249
263
|
end
|
250
264
|
|
251
265
|
# Not a URL? Bail
|
252
|
-
unless /^#{
|
266
|
+
unless /^#{URI_REGEXP}$/.match(url)
|
253
267
|
raise ArgumentError.new("#{self.class.name} setting `solr.url` doesn't look like a URL: `#{url}`")
|
254
268
|
end
|
255
269
|
|
256
|
-
#
|
257
|
-
|
258
|
-
resp = @http_client.get(candidate)
|
259
|
-
if resp.status == 404
|
260
|
-
candidate = [url.chomp('/'), 'update'].join('/')
|
261
|
-
end
|
262
|
-
candidate
|
270
|
+
# Assume the /update/json handler
|
271
|
+
return [url.chomp('/'), 'update', 'json'].join('/')
|
263
272
|
end
|
264
|
-
|
265
|
-
|
266
273
|
end
|
data/lib/traject/thread_pool.rb
CHANGED
@@ -50,11 +50,24 @@ module Traject
|
|
50
50
|
class ThreadPool
|
51
51
|
attr_reader :pool_size, :queue_capacity
|
52
52
|
|
53
|
+
@@disable_concurrency = false
|
54
|
+
|
55
|
+
# Calling Traject::ThreadPool.disable_concurrency! permanently and irrevocably (for program execution)
|
56
|
+
# forces all ThreadPools to have a pool_size of 0 -- running all work inline -- so should disable all
|
57
|
+
# use of threads in Traject.
|
58
|
+
def self.disable_concurrency! ; @@disable_concurrency = true ; end
|
59
|
+
def self.concurrency_disabled? ; @@disable_concurrency ; end
|
60
|
+
|
53
61
|
# First arg is pool size, 0 or nil and we'll be a null/no-op pool which executes
|
54
62
|
# work in caller thread.
|
55
63
|
def initialize(pool_size)
|
56
64
|
@thread_pool = nil # assume we don't have one
|
57
65
|
@exceptions_caught_queue = [] # start off without exceptions
|
66
|
+
|
67
|
+
if self.class.concurrency_disabled?
|
68
|
+
pool_size = 0
|
69
|
+
end
|
70
|
+
|
58
71
|
unless pool_size.nil? || pool_size == 0
|
59
72
|
@pool_size = pool_size.to_i
|
60
73
|
@queue_capacity = pool_size * 3
|
data/lib/traject/util.rb
CHANGED
@@ -60,7 +60,6 @@ module Traject
|
|
60
60
|
if line.start_with?(file_path)
|
61
61
|
if m = /\A.*\:(\d+)\:in/.match(line)
|
62
62
|
return m[1].to_i
|
63
|
-
break
|
64
63
|
end
|
65
64
|
end
|
66
65
|
end
|
@@ -116,11 +115,24 @@ module Traject
|
|
116
115
|
result << queue.deq(:raise_if_empty)
|
117
116
|
end
|
118
117
|
rescue ThreadError
|
119
|
-
# Need do nothing, queue was concurrently popped, no biggie
|
118
|
+
# Need do nothing, queue was concurrently popped, no biggie, but let's
|
119
|
+
# stop iterating and return what we've got.
|
120
|
+
return result
|
120
121
|
end
|
121
122
|
|
122
123
|
return result
|
123
124
|
end
|
124
125
|
|
126
|
+
def self.is_jruby?
|
127
|
+
unless defined?(@is_jruby)
|
128
|
+
@is_jruby = defined?(JRUBY_VERSION)
|
129
|
+
end
|
130
|
+
@is_jruby
|
131
|
+
end
|
132
|
+
# How can we refer to an io object input in logs? For now, if it's a file-like
|
133
|
+
# object, we can use #path.
|
134
|
+
def self.io_name(io_like_object)
|
135
|
+
io_like_object.path if io_like_object.respond_to?(:path)
|
136
|
+
end
|
125
137
|
end
|
126
138
|
end
|
data/lib/traject/version.rb
CHANGED
data/test/debug_writer_test.rb
CHANGED
@@ -9,7 +9,7 @@ describe 'Simple output' do
|
|
9
9
|
before do
|
10
10
|
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
11
11
|
@indexer = Traject::Indexer.new
|
12
|
-
@indexer.
|
12
|
+
@indexer.configure do
|
13
13
|
to_field "id", extract_marc("001", :first => true)
|
14
14
|
to_field "title", extract_marc("245ab")
|
15
15
|
end
|
@@ -46,7 +46,7 @@ describe 'Simple output' do
|
|
46
46
|
"record_num_1 title #{@title}",
|
47
47
|
]
|
48
48
|
assert_equal expected.join("\n").gsub(/\s/, ''), @io.string.gsub(/\s/, '')
|
49
|
-
assert_match
|
49
|
+
assert_match(/At least one record \(<record #1>\) doesn't define field 'id'/, logger_strio.string)
|
50
50
|
@writer.close
|
51
51
|
|
52
52
|
end
|
@@ -68,7 +68,7 @@ describe 'Simple output' do
|
|
68
68
|
"record_num_1 title #{@title}",
|
69
69
|
]
|
70
70
|
assert_equal expected.join("\n").gsub(/\s/, ''), @io.string.gsub(/\s/, '')
|
71
|
-
assert_match
|
71
|
+
assert_match(/At least one record \(<record #1, output_id:2710183>\) doesn't define field 'iden'/, logger_strio.string)
|
72
72
|
writer.close
|
73
73
|
|
74
74
|
end
|
@@ -39,13 +39,13 @@ describe "Delimited/CSV Writers" do
|
|
39
39
|
end
|
40
40
|
|
41
41
|
it "outputs a header if asked to" do
|
42
|
-
|
42
|
+
Traject::DelimitedWriter.new(@settings)
|
43
43
|
@out.string.chomp.must_equal %w[four one two].join("\t")
|
44
44
|
end
|
45
45
|
|
46
46
|
it "doesn't output a header if asked not to" do
|
47
47
|
@settings['delimited_writer.header'] = 'false'
|
48
|
-
|
48
|
+
Traject::DelimitedWriter.new(@settings)
|
49
49
|
@out.string.must_be_empty
|
50
50
|
end
|
51
51
|
|
@@ -69,7 +69,7 @@ describe "Delimited/CSV Writers" do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
it "writes the header" do
|
72
|
-
|
72
|
+
Traject::CSVWriter.new(@settings)
|
73
73
|
@out.string.chomp.must_equal 'four,one,two'
|
74
74
|
end
|
75
75
|
|
@@ -0,0 +1,169 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'traject/experimental_nokogiri_streaming_reader'
|
3
|
+
|
4
|
+
# Streaming nokogiri reader is experimental, half-finished, and not supported for real use.
|
5
|
+
describe "Traject::ExperimentalNokogiriStreamingReader" do
|
6
|
+
describe "with namespaces" do
|
7
|
+
before do
|
8
|
+
@namespaces = { "oai" => "http://www.openarchives.org/OAI/2.0/" }
|
9
|
+
@xml_sample_path = support_file_path("sample-oai-pmh.xml")
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "invalid settings" do
|
13
|
+
it "default_namespaces not a hash raises" do
|
14
|
+
error = assert_raises(ArgumentError) {
|
15
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {
|
16
|
+
"nokogiri.namespaces" => "i am not a hash",
|
17
|
+
})
|
18
|
+
}
|
19
|
+
assert(error.message =~ /nokogiri.namespaces must be a hash/)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "each_record_xpath with unregistered prefix raises" do
|
23
|
+
error = assert_raises(ArgumentError) {
|
24
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {
|
25
|
+
"nokogiri.namespaces" => @namespaces,
|
26
|
+
"nokogiri.each_record_xpath" => "//foo:bar"
|
27
|
+
})
|
28
|
+
}
|
29
|
+
assert(error.message =~ %r{Can't find namespace prefix 'foo' in '//foo:bar'})
|
30
|
+
end
|
31
|
+
|
32
|
+
it "raises on some unsupported xpath" do
|
33
|
+
error = assert_raises(ArgumentError) {
|
34
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {
|
35
|
+
"nokogiri.namespaces" => @namespaces,
|
36
|
+
"nokogiri.each_record_xpath" => "//oai:record[@id='foo']"
|
37
|
+
})
|
38
|
+
}
|
39
|
+
assert(error.message =~ /Only very simple xpaths supported\./)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "fixed path" do
|
44
|
+
before do
|
45
|
+
@each_record_xpath = "/oai:OAI-PMH/oai:ListRecords/oai:record"
|
46
|
+
end
|
47
|
+
|
48
|
+
it "reads" do
|
49
|
+
shared_tests
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "floating path" do
|
54
|
+
before do
|
55
|
+
@each_record_xpath = "//oai:record"
|
56
|
+
end
|
57
|
+
|
58
|
+
it "reads" do
|
59
|
+
shared_tests
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
describe "extra_xpath_hooks" do
|
65
|
+
it "catches oai-pmh resumption token" do
|
66
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {
|
67
|
+
"nokogiri.namespaces" => @namespaces,
|
68
|
+
"nokogiri.each_record_xpath" => "//oai:record",
|
69
|
+
"nokogiri_reader.extra_xpath_hooks" => {
|
70
|
+
"//oai:resumptionToken" => lambda do |node, clipboard|
|
71
|
+
clipboard[:resumptionToken] = node.text
|
72
|
+
end
|
73
|
+
}
|
74
|
+
})
|
75
|
+
_records = @reader.to_a
|
76
|
+
assert_equal "oai_dc.f(2018-05-03T18:09:08Z).u(2018-06-15T19:25:21Z).t(6387):100", @reader.clipboard[:resumptionToken]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe "outer namespaces" do
|
81
|
+
it "are preserved" do
|
82
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(support_file_path("namespace-test.xml")), {
|
83
|
+
"nokogiri.namespaces" => { mytop: "http://example.org/top" },
|
84
|
+
"nokogiri.each_record_xpath" => "//mytop:record"
|
85
|
+
})
|
86
|
+
yielded_records = []
|
87
|
+
@reader.each { |record|
|
88
|
+
yielded_records << record
|
89
|
+
}
|
90
|
+
|
91
|
+
assert yielded_records.length > 0
|
92
|
+
|
93
|
+
expected_namespaces = {"xmlns"=>"http://example.org/top", "xmlns:a"=>"http://example.org/a", "xmlns:b"=>"http://example.org/b"}
|
94
|
+
yielded_records.each do |rec|
|
95
|
+
assert_equal expected_namespaces, rec.namespaces
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "without namespaces" do
|
102
|
+
before do
|
103
|
+
@namespaces = {}
|
104
|
+
@xml_sample_path = support_file_path("sample-oai-no-namespace.xml")
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "fixed path" do
|
108
|
+
before do
|
109
|
+
@each_record_xpath = "/OAI-PMH/ListRecords/record"
|
110
|
+
end
|
111
|
+
|
112
|
+
it "reads" do
|
113
|
+
shared_tests
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
describe "floating path" do
|
118
|
+
before do
|
119
|
+
@each_record_xpath = "//record"
|
120
|
+
end
|
121
|
+
|
122
|
+
it "reads" do
|
123
|
+
shared_tests
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
def shared_tests
|
130
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {
|
131
|
+
"nokogiri.namespaces" => @namespaces,
|
132
|
+
"nokogiri.each_record_xpath" => @each_record_xpath
|
133
|
+
})
|
134
|
+
|
135
|
+
yielded_records = []
|
136
|
+
@reader.each { |record|
|
137
|
+
yielded_records << record
|
138
|
+
}
|
139
|
+
|
140
|
+
|
141
|
+
manually_extracted = Nokogiri::XML.parse(File.open(@xml_sample_path)).xpath(@each_record_xpath, @namespaces)
|
142
|
+
manually_extracted.collect do |node|
|
143
|
+
# nokogiri makes it so hard to reliably get an Element to serialize to XML with all
|
144
|
+
# it's inherited namespace declerations. :( We're only doing this for testing purposes
|
145
|
+
# anyway. This may not handle everything, but handles what we need in the test right now
|
146
|
+
if node.namespace
|
147
|
+
node["xmlns"] = node.namespace.href
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
assert_length manually_extracted.size, yielded_records
|
152
|
+
assert yielded_records.all? {|r| r.kind_of? Nokogiri::XML::Document }
|
153
|
+
assert_equal manually_extracted.collect(&:to_xml), yielded_records.collect(&:root).collect(&:to_xml)
|
154
|
+
end
|
155
|
+
|
156
|
+
describe "without each_record_xpath" do
|
157
|
+
before do
|
158
|
+
@xml_sample_path = support_file_path("namespace-test.xml")
|
159
|
+
end
|
160
|
+
it "yields whole file as one record" do
|
161
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {})
|
162
|
+
|
163
|
+
yielded_records = @reader.to_a
|
164
|
+
|
165
|
+
assert_length 1, yielded_records
|
166
|
+
assert_equal Nokogiri::XML.parse(File.open(@xml_sample_path)).to_xml, yielded_records.first.to_xml
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -5,7 +5,7 @@ describe "Traject::Indexer::Context" do
|
|
5
5
|
describe "source_record_id" do
|
6
6
|
before do
|
7
7
|
@record = MARC::Reader.new(support_file_path('test_data.utf8.mrc')).first
|
8
|
-
@context = Traject::Indexer::Context.new
|
8
|
+
@context = Traject::Indexer::Context.new(source_record_id_proc: Traject::Indexer::MarcIndexer.new.source_record_id_proc)
|
9
9
|
@record_001 = " 00282214 " # from the mrc file
|
10
10
|
end
|
11
11
|
|
@@ -13,23 +13,33 @@ describe "Traject::Indexer::Context" do
|
|
13
13
|
@context.source_record = @record
|
14
14
|
assert_equal @record_001, @context.source_record_id
|
15
15
|
end
|
16
|
+
end
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
describe "#record_inspect" do
|
19
|
+
before do
|
20
|
+
@record = MARC::Reader.new(support_file_path('test_data.utf8.mrc')).first
|
21
|
+
@source_record_id_proc = Traject::Indexer::MarcIndexer.new.source_record_id_proc
|
22
|
+
@record_001 = " 00282214 " # from the mrc file
|
21
23
|
|
22
|
-
|
23
|
-
@
|
24
|
-
@
|
25
|
-
assert_equal 'the_record_id', @context.source_record_id
|
24
|
+
@position = 10
|
25
|
+
@input_name = "some_file.mrc"
|
26
|
+
@position_in_input = 10
|
26
27
|
end
|
27
28
|
|
28
|
-
it "
|
29
|
-
@context
|
30
|
-
|
31
|
-
|
29
|
+
it "can print complete inspect label" do
|
30
|
+
@context = Traject::Indexer::Context.new(
|
31
|
+
source_record: @record,
|
32
|
+
source_record_id_proc: @source_record_id_proc,
|
33
|
+
position: @position,
|
34
|
+
input_name: @input_name,
|
35
|
+
position_in_input: @position_in_input
|
36
|
+
)
|
37
|
+
@context.output_hash["id"] = "output_id"
|
38
|
+
|
39
|
+
assert_equal "<record ##{@position} (#{@input_name} ##{@position_in_input}), source_id:#{@record_001} output_id:output_id>", @context.record_inspect
|
32
40
|
end
|
41
|
+
|
33
42
|
end
|
34
43
|
|
44
|
+
|
35
45
|
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe 'Custom mapping error handler' do
|
4
|
+
# the exception thrown by the custom handler
|
5
|
+
class CustomFakeException < StandardError; end
|
6
|
+
|
7
|
+
let(:indexer) { Traject::Indexer.new }
|
8
|
+
|
9
|
+
it 'invokes the default handler when custom handler is not set' do
|
10
|
+
output = StringIO.new
|
11
|
+
logger =Logger.new(output)
|
12
|
+
indexer.logger = logger
|
13
|
+
indexer.configure do
|
14
|
+
to_field 'id' do |_, _, _|
|
15
|
+
raise CustomFakeException, "I just like raising errors"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
e = assert_raises(CustomFakeException) do
|
20
|
+
indexer.map_record({})
|
21
|
+
end
|
22
|
+
|
23
|
+
assert_equal "I just like raising errors", e.message
|
24
|
+
assert output.string =~ /while executing \(to_field \"id\" at .*error_handler_test.rb:\d+\)/
|
25
|
+
assert output.string =~ /CustomFakeException: I just like raising errors/
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'invokes the custom handler when set' do
|
29
|
+
indexer.configure do
|
30
|
+
settings do
|
31
|
+
provide 'mapping_rescue', -> (ctx, e) {
|
32
|
+
raise CustomFakeException, "custom handler called #{ctx.record_inspect}: #{ctx.index_step.inspect}, #{e.inspect}"
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
to_field 'id' do |_context , _exception|
|
37
|
+
raise 'this was always going to fail'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
e = assert_raises(CustomFakeException) { indexer.map_record({}) }
|
41
|
+
assert e.message =~ /\(to_field \"id\" at .*error_handler_test.rb:\d+\)/
|
42
|
+
end
|
43
|
+
|
44
|
+
it "custom handler can skip and continue" do
|
45
|
+
indexer.configure do
|
46
|
+
settings do
|
47
|
+
provide "mapping_rescue", -> (context, exception) {
|
48
|
+
context.skip!
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
to_field 'id' do |_context , _exception|
|
53
|
+
raise 'this was always going to fail'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
assert_nil indexer.map_record({})
|
58
|
+
end
|
59
|
+
end
|