traject 2.3.4 → 3.0.0.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +16 -9
- data/CHANGES.md +74 -1
- data/Gemfile +2 -1
- data/README.md +104 -53
- data/Rakefile +8 -1
- data/doc/indexing_rules.md +79 -63
- data/doc/programmatic_use.md +218 -0
- data/doc/settings.md +28 -1
- data/doc/xml.md +134 -0
- data/lib/traject.rb +5 -0
- data/lib/traject/array_writer.rb +34 -0
- data/lib/traject/command_line.rb +18 -22
- data/lib/traject/debug_writer.rb +2 -5
- data/lib/traject/experimental_nokogiri_streaming_reader.rb +276 -0
- data/lib/traject/hashie/indifferent_access_fix.rb +25 -0
- data/lib/traject/indexer.rb +321 -92
- data/lib/traject/indexer/context.rb +39 -13
- data/lib/traject/indexer/marc_indexer.rb +30 -0
- data/lib/traject/indexer/nokogiri_indexer.rb +30 -0
- data/lib/traject/indexer/settings.rb +36 -53
- data/lib/traject/indexer/step.rb +27 -33
- data/lib/traject/macros/marc21.rb +37 -12
- data/lib/traject/macros/nokogiri_macros.rb +43 -0
- data/lib/traject/macros/transformation.rb +162 -0
- data/lib/traject/marc_extractor.rb +2 -0
- data/lib/traject/ndj_reader.rb +1 -1
- data/lib/traject/nokogiri_reader.rb +179 -0
- data/lib/traject/oai_pmh_nokogiri_reader.rb +159 -0
- data/lib/traject/solr_json_writer.rb +19 -12
- data/lib/traject/thread_pool.rb +13 -0
- data/lib/traject/util.rb +14 -2
- data/lib/traject/version.rb +1 -1
- data/test/debug_writer_test.rb +3 -3
- data/test/delimited_writer_test.rb +3 -3
- data/test/experimental_nokogiri_streaming_reader_test.rb +169 -0
- data/test/indexer/context_test.rb +23 -13
- data/test/indexer/error_handler_test.rb +59 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +46 -46
- data/test/indexer/macros/marc21/extract_all_marc_values_test.rb +1 -1
- data/test/indexer/macros/marc21/extract_marc_test.rb +19 -9
- data/test/indexer/macros/marc21/serialize_marc_test.rb +4 -4
- data/test/indexer/macros/to_field_test.rb +2 -2
- data/test/indexer/macros/transformation_test.rb +177 -0
- data/test/indexer/map_record_test.rb +2 -3
- data/test/indexer/nokogiri_indexer_test.rb +103 -0
- data/test/indexer/process_record_test.rb +55 -0
- data/test/indexer/process_with_test.rb +148 -0
- data/test/indexer/read_write_test.rb +52 -2
- data/test/indexer/settings_test.rb +34 -24
- data/test/indexer/to_field_test.rb +27 -2
- data/test/marc_extractor_test.rb +7 -7
- data/test/marc_reader_test.rb +4 -4
- data/test/nokogiri_reader_test.rb +158 -0
- data/test/oai_pmh_nokogiri_reader_test.rb +23 -0
- data/test/solr_json_writer_test.rb +24 -28
- data/test/test_helper.rb +8 -2
- data/test/test_support/namespace-test.xml +7 -0
- data/test/test_support/nokogiri_demo_config.rb +17 -0
- data/test/test_support/oai-pmh-one-record-2.xml +24 -0
- data/test/test_support/oai-pmh-one-record-first.xml +24 -0
- data/test/test_support/sample-oai-no-namespace.xml +197 -0
- data/test/test_support/sample-oai-pmh.xml +197 -0
- data/test/thread_pool_test.rb +38 -0
- data/test/translation_map_test.rb +3 -3
- data/test/translation_maps/ruby_map.rb +2 -1
- data/test/translation_maps/yaml_map.yaml +2 -1
- data/traject.gemspec +4 -11
- metadata +92 -6
@@ -47,6 +47,8 @@ require 'concurrent' # for atomic_fixnum
|
|
47
47
|
class Traject::SolrJsonWriter
|
48
48
|
include Traject::QualifiedConstGet
|
49
49
|
|
50
|
+
URI_REGEXP = URI::Parser.new.make_regexp.freeze
|
51
|
+
|
50
52
|
DEFAULT_MAX_SKIPPED = 0
|
51
53
|
DEFAULT_BATCH_SIZE = 100
|
52
54
|
|
@@ -105,6 +107,18 @@ class Traject::SolrJsonWriter
|
|
105
107
|
end
|
106
108
|
end
|
107
109
|
|
110
|
+
# Not part of standard writer API.
|
111
|
+
#
|
112
|
+
# If we are batching adds, and have some not-yet-written ones queued up --
|
113
|
+
# flush em all to solr.
|
114
|
+
#
|
115
|
+
# This should be thread-safe to call, but the write does take place in
|
116
|
+
# the caller's thread, no threading is done for you here, regardless of setting
|
117
|
+
# of solr_writer.thread_pool
|
118
|
+
def flush
|
119
|
+
send_batch( Traject::Util.drain_queue(@batched_queue) )
|
120
|
+
end
|
121
|
+
|
108
122
|
# Send the given batch of contexts. If something goes wrong, send
|
109
123
|
# them one at a time.
|
110
124
|
# @param [Array<Traject::Indexer::Context>] an array of contexts
|
@@ -147,7 +161,7 @@ class Traject::SolrJsonWriter
|
|
147
161
|
else
|
148
162
|
msg = "Solr error response: #{resp.status}: #{resp.body}"
|
149
163
|
end
|
150
|
-
logger.error "Could not add record #{c.
|
164
|
+
logger.error "Could not add record #{c.record_inspect}: #{msg}"
|
151
165
|
logger.debug(c.source_record.to_s)
|
152
166
|
|
153
167
|
@skipped_record_incrementer.increment
|
@@ -236,7 +250,7 @@ class Traject::SolrJsonWriter
|
|
236
250
|
|
237
251
|
# If we've got a solr.update_url, make sure it's ok
|
238
252
|
def check_solr_update_url(url)
|
239
|
-
unless /^#{
|
253
|
+
unless /^#{URI_REGEXP}$/.match(url)
|
240
254
|
raise ArgumentError.new("#{self.class.name} setting `solr.update_url` doesn't look like a URL: `#{url}`")
|
241
255
|
end
|
242
256
|
url
|
@@ -249,18 +263,11 @@ class Traject::SolrJsonWriter
|
|
249
263
|
end
|
250
264
|
|
251
265
|
# Not a URL? Bail
|
252
|
-
unless /^#{
|
266
|
+
unless /^#{URI_REGEXP}$/.match(url)
|
253
267
|
raise ArgumentError.new("#{self.class.name} setting `solr.url` doesn't look like a URL: `#{url}`")
|
254
268
|
end
|
255
269
|
|
256
|
-
#
|
257
|
-
|
258
|
-
resp = @http_client.get(candidate)
|
259
|
-
if resp.status == 404
|
260
|
-
candidate = [url.chomp('/'), 'update'].join('/')
|
261
|
-
end
|
262
|
-
candidate
|
270
|
+
# Assume the /update/json handler
|
271
|
+
return [url.chomp('/'), 'update', 'json'].join('/')
|
263
272
|
end
|
264
|
-
|
265
|
-
|
266
273
|
end
|
data/lib/traject/thread_pool.rb
CHANGED
@@ -50,11 +50,24 @@ module Traject
|
|
50
50
|
class ThreadPool
|
51
51
|
attr_reader :pool_size, :queue_capacity
|
52
52
|
|
53
|
+
@@disable_concurrency = false
|
54
|
+
|
55
|
+
# Calling Traject::ThreadPool.disable_concurrency! permanently and irrevocably (for program execution)
|
56
|
+
# forces all ThreadPools to have a pool_size of 0 -- running all work inline -- so should disable all
|
57
|
+
# use of threads in Traject.
|
58
|
+
def self.disable_concurrency! ; @@disable_concurrency = true ; end
|
59
|
+
def self.concurrency_disabled? ; @@disable_concurrency ; end
|
60
|
+
|
53
61
|
# First arg is pool size, 0 or nil and we'll be a null/no-op pool which executes
|
54
62
|
# work in caller thread.
|
55
63
|
def initialize(pool_size)
|
56
64
|
@thread_pool = nil # assume we don't have one
|
57
65
|
@exceptions_caught_queue = [] # start off without exceptions
|
66
|
+
|
67
|
+
if self.class.concurrency_disabled?
|
68
|
+
pool_size = 0
|
69
|
+
end
|
70
|
+
|
58
71
|
unless pool_size.nil? || pool_size == 0
|
59
72
|
@pool_size = pool_size.to_i
|
60
73
|
@queue_capacity = pool_size * 3
|
data/lib/traject/util.rb
CHANGED
@@ -60,7 +60,6 @@ module Traject
|
|
60
60
|
if line.start_with?(file_path)
|
61
61
|
if m = /\A.*\:(\d+)\:in/.match(line)
|
62
62
|
return m[1].to_i
|
63
|
-
break
|
64
63
|
end
|
65
64
|
end
|
66
65
|
end
|
@@ -116,11 +115,24 @@ module Traject
|
|
116
115
|
result << queue.deq(:raise_if_empty)
|
117
116
|
end
|
118
117
|
rescue ThreadError
|
119
|
-
# Need do nothing, queue was concurrently popped, no biggie
|
118
|
+
# Need do nothing, queue was concurrently popped, no biggie, but let's
|
119
|
+
# stop iterating and return what we've got.
|
120
|
+
return result
|
120
121
|
end
|
121
122
|
|
122
123
|
return result
|
123
124
|
end
|
124
125
|
|
126
|
+
def self.is_jruby?
|
127
|
+
unless defined?(@is_jruby)
|
128
|
+
@is_jruby = defined?(JRUBY_VERSION)
|
129
|
+
end
|
130
|
+
@is_jruby
|
131
|
+
end
|
132
|
+
# How can we refer to an io object input in logs? For now, if it's a file-like
|
133
|
+
# object, we can use #path.
|
134
|
+
def self.io_name(io_like_object)
|
135
|
+
io_like_object.path if io_like_object.respond_to?(:path)
|
136
|
+
end
|
125
137
|
end
|
126
138
|
end
|
data/lib/traject/version.rb
CHANGED
data/test/debug_writer_test.rb
CHANGED
@@ -9,7 +9,7 @@ describe 'Simple output' do
|
|
9
9
|
before do
|
10
10
|
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
11
11
|
@indexer = Traject::Indexer.new
|
12
|
-
@indexer.
|
12
|
+
@indexer.configure do
|
13
13
|
to_field "id", extract_marc("001", :first => true)
|
14
14
|
to_field "title", extract_marc("245ab")
|
15
15
|
end
|
@@ -46,7 +46,7 @@ describe 'Simple output' do
|
|
46
46
|
"record_num_1 title #{@title}",
|
47
47
|
]
|
48
48
|
assert_equal expected.join("\n").gsub(/\s/, ''), @io.string.gsub(/\s/, '')
|
49
|
-
assert_match
|
49
|
+
assert_match(/At least one record \(<record #1>\) doesn't define field 'id'/, logger_strio.string)
|
50
50
|
@writer.close
|
51
51
|
|
52
52
|
end
|
@@ -68,7 +68,7 @@ describe 'Simple output' do
|
|
68
68
|
"record_num_1 title #{@title}",
|
69
69
|
]
|
70
70
|
assert_equal expected.join("\n").gsub(/\s/, ''), @io.string.gsub(/\s/, '')
|
71
|
-
assert_match
|
71
|
+
assert_match(/At least one record \(<record #1, output_id:2710183>\) doesn't define field 'iden'/, logger_strio.string)
|
72
72
|
writer.close
|
73
73
|
|
74
74
|
end
|
@@ -39,13 +39,13 @@ describe "Delimited/CSV Writers" do
|
|
39
39
|
end
|
40
40
|
|
41
41
|
it "outputs a header if asked to" do
|
42
|
-
|
42
|
+
Traject::DelimitedWriter.new(@settings)
|
43
43
|
@out.string.chomp.must_equal %w[four one two].join("\t")
|
44
44
|
end
|
45
45
|
|
46
46
|
it "doesn't output a header if asked not to" do
|
47
47
|
@settings['delimited_writer.header'] = 'false'
|
48
|
-
|
48
|
+
Traject::DelimitedWriter.new(@settings)
|
49
49
|
@out.string.must_be_empty
|
50
50
|
end
|
51
51
|
|
@@ -69,7 +69,7 @@ describe "Delimited/CSV Writers" do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
it "writes the header" do
|
72
|
-
|
72
|
+
Traject::CSVWriter.new(@settings)
|
73
73
|
@out.string.chomp.must_equal 'four,one,two'
|
74
74
|
end
|
75
75
|
|
@@ -0,0 +1,169 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'traject/experimental_nokogiri_streaming_reader'
|
3
|
+
|
4
|
+
# Streaming nokogiri reader is experimental, half-finished, and not supported for real use.
|
5
|
+
describe "Traject::ExperimentalNokogiriStreamingReader" do
|
6
|
+
describe "with namespaces" do
|
7
|
+
before do
|
8
|
+
@namespaces = { "oai" => "http://www.openarchives.org/OAI/2.0/" }
|
9
|
+
@xml_sample_path = support_file_path("sample-oai-pmh.xml")
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "invalid settings" do
|
13
|
+
it "default_namespaces not a hash raises" do
|
14
|
+
error = assert_raises(ArgumentError) {
|
15
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {
|
16
|
+
"nokogiri.namespaces" => "i am not a hash",
|
17
|
+
})
|
18
|
+
}
|
19
|
+
assert(error.message =~ /nokogiri.namespaces must be a hash/)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "each_record_xpath with unregistered prefix raises" do
|
23
|
+
error = assert_raises(ArgumentError) {
|
24
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {
|
25
|
+
"nokogiri.namespaces" => @namespaces,
|
26
|
+
"nokogiri.each_record_xpath" => "//foo:bar"
|
27
|
+
})
|
28
|
+
}
|
29
|
+
assert(error.message =~ %r{Can't find namespace prefix 'foo' in '//foo:bar'})
|
30
|
+
end
|
31
|
+
|
32
|
+
it "raises on some unsupported xpath" do
|
33
|
+
error = assert_raises(ArgumentError) {
|
34
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {
|
35
|
+
"nokogiri.namespaces" => @namespaces,
|
36
|
+
"nokogiri.each_record_xpath" => "//oai:record[@id='foo']"
|
37
|
+
})
|
38
|
+
}
|
39
|
+
assert(error.message =~ /Only very simple xpaths supported\./)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "fixed path" do
|
44
|
+
before do
|
45
|
+
@each_record_xpath = "/oai:OAI-PMH/oai:ListRecords/oai:record"
|
46
|
+
end
|
47
|
+
|
48
|
+
it "reads" do
|
49
|
+
shared_tests
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "floating path" do
|
54
|
+
before do
|
55
|
+
@each_record_xpath = "//oai:record"
|
56
|
+
end
|
57
|
+
|
58
|
+
it "reads" do
|
59
|
+
shared_tests
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
describe "extra_xpath_hooks" do
|
65
|
+
it "catches oai-pmh resumption token" do
|
66
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {
|
67
|
+
"nokogiri.namespaces" => @namespaces,
|
68
|
+
"nokogiri.each_record_xpath" => "//oai:record",
|
69
|
+
"nokogiri_reader.extra_xpath_hooks" => {
|
70
|
+
"//oai:resumptionToken" => lambda do |node, clipboard|
|
71
|
+
clipboard[:resumptionToken] = node.text
|
72
|
+
end
|
73
|
+
}
|
74
|
+
})
|
75
|
+
_records = @reader.to_a
|
76
|
+
assert_equal "oai_dc.f(2018-05-03T18:09:08Z).u(2018-06-15T19:25:21Z).t(6387):100", @reader.clipboard[:resumptionToken]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe "outer namespaces" do
|
81
|
+
it "are preserved" do
|
82
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(support_file_path("namespace-test.xml")), {
|
83
|
+
"nokogiri.namespaces" => { mytop: "http://example.org/top" },
|
84
|
+
"nokogiri.each_record_xpath" => "//mytop:record"
|
85
|
+
})
|
86
|
+
yielded_records = []
|
87
|
+
@reader.each { |record|
|
88
|
+
yielded_records << record
|
89
|
+
}
|
90
|
+
|
91
|
+
assert yielded_records.length > 0
|
92
|
+
|
93
|
+
expected_namespaces = {"xmlns"=>"http://example.org/top", "xmlns:a"=>"http://example.org/a", "xmlns:b"=>"http://example.org/b"}
|
94
|
+
yielded_records.each do |rec|
|
95
|
+
assert_equal expected_namespaces, rec.namespaces
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "without namespaces" do
|
102
|
+
before do
|
103
|
+
@namespaces = {}
|
104
|
+
@xml_sample_path = support_file_path("sample-oai-no-namespace.xml")
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "fixed path" do
|
108
|
+
before do
|
109
|
+
@each_record_xpath = "/OAI-PMH/ListRecords/record"
|
110
|
+
end
|
111
|
+
|
112
|
+
it "reads" do
|
113
|
+
shared_tests
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
describe "floating path" do
|
118
|
+
before do
|
119
|
+
@each_record_xpath = "//record"
|
120
|
+
end
|
121
|
+
|
122
|
+
it "reads" do
|
123
|
+
shared_tests
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
def shared_tests
|
130
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {
|
131
|
+
"nokogiri.namespaces" => @namespaces,
|
132
|
+
"nokogiri.each_record_xpath" => @each_record_xpath
|
133
|
+
})
|
134
|
+
|
135
|
+
yielded_records = []
|
136
|
+
@reader.each { |record|
|
137
|
+
yielded_records << record
|
138
|
+
}
|
139
|
+
|
140
|
+
|
141
|
+
manually_extracted = Nokogiri::XML.parse(File.open(@xml_sample_path)).xpath(@each_record_xpath, @namespaces)
|
142
|
+
manually_extracted.collect do |node|
|
143
|
+
# nokogiri makes it so hard to reliably get an Element to serialize to XML with all
|
144
|
+
# it's inherited namespace declerations. :( We're only doing this for testing purposes
|
145
|
+
# anyway. This may not handle everything, but handles what we need in the test right now
|
146
|
+
if node.namespace
|
147
|
+
node["xmlns"] = node.namespace.href
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
assert_length manually_extracted.size, yielded_records
|
152
|
+
assert yielded_records.all? {|r| r.kind_of? Nokogiri::XML::Document }
|
153
|
+
assert_equal manually_extracted.collect(&:to_xml), yielded_records.collect(&:root).collect(&:to_xml)
|
154
|
+
end
|
155
|
+
|
156
|
+
describe "without each_record_xpath" do
|
157
|
+
before do
|
158
|
+
@xml_sample_path = support_file_path("namespace-test.xml")
|
159
|
+
end
|
160
|
+
it "yields whole file as one record" do
|
161
|
+
@reader = Traject::ExperimentalNokogiriStreamingReader.new(File.open(@xml_sample_path), {})
|
162
|
+
|
163
|
+
yielded_records = @reader.to_a
|
164
|
+
|
165
|
+
assert_length 1, yielded_records
|
166
|
+
assert_equal Nokogiri::XML.parse(File.open(@xml_sample_path)).to_xml, yielded_records.first.to_xml
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -5,7 +5,7 @@ describe "Traject::Indexer::Context" do
|
|
5
5
|
describe "source_record_id" do
|
6
6
|
before do
|
7
7
|
@record = MARC::Reader.new(support_file_path('test_data.utf8.mrc')).first
|
8
|
-
@context = Traject::Indexer::Context.new
|
8
|
+
@context = Traject::Indexer::Context.new(source_record_id_proc: Traject::Indexer::MarcIndexer.new.source_record_id_proc)
|
9
9
|
@record_001 = " 00282214 " # from the mrc file
|
10
10
|
end
|
11
11
|
|
@@ -13,23 +13,33 @@ describe "Traject::Indexer::Context" do
|
|
13
13
|
@context.source_record = @record
|
14
14
|
assert_equal @record_001, @context.source_record_id
|
15
15
|
end
|
16
|
+
end
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
describe "#record_inspect" do
|
19
|
+
before do
|
20
|
+
@record = MARC::Reader.new(support_file_path('test_data.utf8.mrc')).first
|
21
|
+
@source_record_id_proc = Traject::Indexer::MarcIndexer.new.source_record_id_proc
|
22
|
+
@record_001 = " 00282214 " # from the mrc file
|
21
23
|
|
22
|
-
|
23
|
-
@
|
24
|
-
@
|
25
|
-
assert_equal 'the_record_id', @context.source_record_id
|
24
|
+
@position = 10
|
25
|
+
@input_name = "some_file.mrc"
|
26
|
+
@position_in_input = 10
|
26
27
|
end
|
27
28
|
|
28
|
-
it "
|
29
|
-
@context
|
30
|
-
|
31
|
-
|
29
|
+
it "can print complete inspect label" do
|
30
|
+
@context = Traject::Indexer::Context.new(
|
31
|
+
source_record: @record,
|
32
|
+
source_record_id_proc: @source_record_id_proc,
|
33
|
+
position: @position,
|
34
|
+
input_name: @input_name,
|
35
|
+
position_in_input: @position_in_input
|
36
|
+
)
|
37
|
+
@context.output_hash["id"] = "output_id"
|
38
|
+
|
39
|
+
assert_equal "<record ##{@position} (#{@input_name} ##{@position_in_input}), source_id:#{@record_001} output_id:output_id>", @context.record_inspect
|
32
40
|
end
|
41
|
+
|
33
42
|
end
|
34
43
|
|
44
|
+
|
35
45
|
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe 'Custom mapping error handler' do
|
4
|
+
# the exception thrown by the custom handler
|
5
|
+
class CustomFakeException < StandardError; end
|
6
|
+
|
7
|
+
let(:indexer) { Traject::Indexer.new }
|
8
|
+
|
9
|
+
it 'invokes the default handler when custom handler is not set' do
|
10
|
+
output = StringIO.new
|
11
|
+
logger =Logger.new(output)
|
12
|
+
indexer.logger = logger
|
13
|
+
indexer.configure do
|
14
|
+
to_field 'id' do |_, _, _|
|
15
|
+
raise CustomFakeException, "I just like raising errors"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
e = assert_raises(CustomFakeException) do
|
20
|
+
indexer.map_record({})
|
21
|
+
end
|
22
|
+
|
23
|
+
assert_equal "I just like raising errors", e.message
|
24
|
+
assert output.string =~ /while executing \(to_field \"id\" at .*error_handler_test.rb:\d+\)/
|
25
|
+
assert output.string =~ /CustomFakeException: I just like raising errors/
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'invokes the custom handler when set' do
|
29
|
+
indexer.configure do
|
30
|
+
settings do
|
31
|
+
provide 'mapping_rescue', -> (ctx, e) {
|
32
|
+
raise CustomFakeException, "custom handler called #{ctx.record_inspect}: #{ctx.index_step.inspect}, #{e.inspect}"
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
to_field 'id' do |_context , _exception|
|
37
|
+
raise 'this was always going to fail'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
e = assert_raises(CustomFakeException) { indexer.map_record({}) }
|
41
|
+
assert e.message =~ /\(to_field \"id\" at .*error_handler_test.rb:\d+\)/
|
42
|
+
end
|
43
|
+
|
44
|
+
it "custom handler can skip and continue" do
|
45
|
+
indexer.configure do
|
46
|
+
settings do
|
47
|
+
provide "mapping_rescue", -> (context, exception) {
|
48
|
+
context.skip!
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
to_field 'id' do |_context , _exception|
|
53
|
+
raise 'this was always going to fail'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
assert_nil indexer.map_record({})
|
58
|
+
end
|
59
|
+
end
|