traject 2.3.4 → 3.0.0.alpha.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +16 -9
- data/CHANGES.md +74 -1
- data/Gemfile +2 -1
- data/README.md +104 -53
- data/Rakefile +8 -1
- data/doc/indexing_rules.md +79 -63
- data/doc/programmatic_use.md +218 -0
- data/doc/settings.md +28 -1
- data/doc/xml.md +134 -0
- data/lib/traject.rb +5 -0
- data/lib/traject/array_writer.rb +34 -0
- data/lib/traject/command_line.rb +18 -22
- data/lib/traject/debug_writer.rb +2 -5
- data/lib/traject/experimental_nokogiri_streaming_reader.rb +276 -0
- data/lib/traject/hashie/indifferent_access_fix.rb +25 -0
- data/lib/traject/indexer.rb +321 -92
- data/lib/traject/indexer/context.rb +39 -13
- data/lib/traject/indexer/marc_indexer.rb +30 -0
- data/lib/traject/indexer/nokogiri_indexer.rb +30 -0
- data/lib/traject/indexer/settings.rb +36 -53
- data/lib/traject/indexer/step.rb +27 -33
- data/lib/traject/macros/marc21.rb +37 -12
- data/lib/traject/macros/nokogiri_macros.rb +43 -0
- data/lib/traject/macros/transformation.rb +162 -0
- data/lib/traject/marc_extractor.rb +2 -0
- data/lib/traject/ndj_reader.rb +1 -1
- data/lib/traject/nokogiri_reader.rb +179 -0
- data/lib/traject/oai_pmh_nokogiri_reader.rb +159 -0
- data/lib/traject/solr_json_writer.rb +19 -12
- data/lib/traject/thread_pool.rb +13 -0
- data/lib/traject/util.rb +14 -2
- data/lib/traject/version.rb +1 -1
- data/test/debug_writer_test.rb +3 -3
- data/test/delimited_writer_test.rb +3 -3
- data/test/experimental_nokogiri_streaming_reader_test.rb +169 -0
- data/test/indexer/context_test.rb +23 -13
- data/test/indexer/error_handler_test.rb +59 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +46 -46
- data/test/indexer/macros/marc21/extract_all_marc_values_test.rb +1 -1
- data/test/indexer/macros/marc21/extract_marc_test.rb +19 -9
- data/test/indexer/macros/marc21/serialize_marc_test.rb +4 -4
- data/test/indexer/macros/to_field_test.rb +2 -2
- data/test/indexer/macros/transformation_test.rb +177 -0
- data/test/indexer/map_record_test.rb +2 -3
- data/test/indexer/nokogiri_indexer_test.rb +103 -0
- data/test/indexer/process_record_test.rb +55 -0
- data/test/indexer/process_with_test.rb +148 -0
- data/test/indexer/read_write_test.rb +52 -2
- data/test/indexer/settings_test.rb +34 -24
- data/test/indexer/to_field_test.rb +27 -2
- data/test/marc_extractor_test.rb +7 -7
- data/test/marc_reader_test.rb +4 -4
- data/test/nokogiri_reader_test.rb +158 -0
- data/test/oai_pmh_nokogiri_reader_test.rb +23 -0
- data/test/solr_json_writer_test.rb +24 -28
- data/test/test_helper.rb +8 -2
- data/test/test_support/namespace-test.xml +7 -0
- data/test/test_support/nokogiri_demo_config.rb +17 -0
- data/test/test_support/oai-pmh-one-record-2.xml +24 -0
- data/test/test_support/oai-pmh-one-record-first.xml +24 -0
- data/test/test_support/sample-oai-no-namespace.xml +197 -0
- data/test/test_support/sample-oai-pmh.xml +197 -0
- data/test/thread_pool_test.rb +38 -0
- data/test/translation_map_test.rb +3 -3
- data/test/translation_maps/ruby_map.rb +2 -1
- data/test/translation_maps/yaml_map.yaml +2 -1
- data/traject.gemspec +4 -11
- metadata +92 -6
@@ -0,0 +1,177 @@
|
|
1
|
+
# Encoding: UTF-8
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
require 'traject/indexer'
|
5
|
+
|
6
|
+
# should be built into every indexer
|
7
|
+
describe "Traject::Macros::Transformation" do
|
8
|
+
before do
|
9
|
+
@indexer = Traject::Indexer.new
|
10
|
+
@record = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "translation_map" do
|
14
|
+
it "translates" do
|
15
|
+
@indexer.configure do
|
16
|
+
to_field "cataloging_agency", literal("DLC"), translation_map("marc_040a_translate_test")
|
17
|
+
end
|
18
|
+
output = @indexer.map_record(@record)
|
19
|
+
assert_equal ["Library of Congress"], output["cataloging_agency"]
|
20
|
+
end
|
21
|
+
|
22
|
+
it "can merge multiple" do
|
23
|
+
@indexer.configure do
|
24
|
+
to_field "result", literal("key_to_be_overridden"), translation_map("ruby_map", "yaml_map")
|
25
|
+
end
|
26
|
+
output = @indexer.map_record(@record)
|
27
|
+
assert_equal ["value_from_yaml"], output["result"]
|
28
|
+
end
|
29
|
+
|
30
|
+
it "can merge multiple with hash" do
|
31
|
+
@indexer.configure do
|
32
|
+
to_field "result", literal("key_to_be_overridden"), translation_map("ruby_map", "yaml_map", {"key_to_be_overridden" => "value_from_inline_hash"})
|
33
|
+
end
|
34
|
+
output = @indexer.map_record(@record)
|
35
|
+
assert_equal ["value_from_inline_hash"], output["result"]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "transform" do
|
40
|
+
it "transforms with block" do
|
41
|
+
@indexer.configure do
|
42
|
+
to_field "sample_field", literal("one"), literal("two"), transform(&:upcase)
|
43
|
+
end
|
44
|
+
output = @indexer.map_record(@record)
|
45
|
+
assert_equal ["ONE", "TWO"], output["sample_field"]
|
46
|
+
end
|
47
|
+
|
48
|
+
it "transforms with proc arg" do
|
49
|
+
@indexer.configure do
|
50
|
+
to_field "sample_field", literal("one"), literal("two"), transform(->(val) { val.tr('aeiou', '!') })
|
51
|
+
end
|
52
|
+
output = @indexer.map_record(@record)
|
53
|
+
assert_equal ["!n!", "tw!"], output["sample_field"]
|
54
|
+
end
|
55
|
+
|
56
|
+
it "transforms with both, in correct order" do
|
57
|
+
@indexer.configure do
|
58
|
+
to_field "sample_field", literal("one"), literal("two"), transform(->(val) { val.tr('aeiou', '!') }, &:upcase)
|
59
|
+
end
|
60
|
+
output = @indexer.map_record(@record)
|
61
|
+
assert_equal ["!N!", "TW!"], output["sample_field"]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "default" do
|
66
|
+
it "adds default to empty accumulator" do
|
67
|
+
@indexer.configure do
|
68
|
+
to_field "test", default("default")
|
69
|
+
end
|
70
|
+
output = @indexer.map_record(@record)
|
71
|
+
assert_equal ["default"], output["test"]
|
72
|
+
end
|
73
|
+
|
74
|
+
it "does not add default if value present" do
|
75
|
+
@indexer.configure do
|
76
|
+
to_field "test", literal("value"), default("defaut")
|
77
|
+
end
|
78
|
+
output = @indexer.map_record(@record)
|
79
|
+
assert_equal ["value"], output["test"]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe "first_only" do
|
84
|
+
it "takes only first in multi-value" do
|
85
|
+
@indexer.configure do
|
86
|
+
to_field "test", literal("one"), literal("two"), literal("three"), first_only
|
87
|
+
end
|
88
|
+
output = @indexer.map_record(@record)
|
89
|
+
assert_equal ["one"], output["test"]
|
90
|
+
end
|
91
|
+
|
92
|
+
it "no-ops on nil" do
|
93
|
+
@indexer.configure do
|
94
|
+
to_field "test", first_only
|
95
|
+
end
|
96
|
+
output = @indexer.map_record(@record)
|
97
|
+
assert_nil output["test"]
|
98
|
+
end
|
99
|
+
|
100
|
+
it "no-ops on single value" do
|
101
|
+
@indexer.configure do
|
102
|
+
to_field "test", literal("one"), first_only
|
103
|
+
end
|
104
|
+
output = @indexer.map_record(@record)
|
105
|
+
assert_equal ["one"], output["test"]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
describe "unique" do
|
110
|
+
it "uniqs" do
|
111
|
+
@indexer.configure do
|
112
|
+
to_field "test", literal("one"), literal("two"), literal("one"), literal("three"), unique
|
113
|
+
end
|
114
|
+
output = @indexer.map_record(@record)
|
115
|
+
assert_equal ["one", "two", "three"], output["test"]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
describe "strip" do
|
120
|
+
it "strips" do
|
121
|
+
@indexer.configure do
|
122
|
+
to_field "test", literal(" one"), literal(" two "), strip
|
123
|
+
end
|
124
|
+
output = @indexer.map_record(@record)
|
125
|
+
assert_equal ["one", "two"], output["test"]
|
126
|
+
end
|
127
|
+
|
128
|
+
it "strips unicode whitespace" do
|
129
|
+
@indexer.configure do
|
130
|
+
to_field "test", literal(" \u00A0 \u2002 one \u202F "), strip
|
131
|
+
end
|
132
|
+
output = @indexer.map_record(@record)
|
133
|
+
assert_equal ["one"], output["test"]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
describe "split" do
|
138
|
+
it "splits" do
|
139
|
+
@indexer.configure do
|
140
|
+
to_field "test", literal("one.two"), split(".")
|
141
|
+
end
|
142
|
+
output = @indexer.map_record(@record)
|
143
|
+
assert_equal ["one", "two"], output["test"]
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "append" do
|
148
|
+
it "appends suffix" do
|
149
|
+
@indexer.configure do
|
150
|
+
to_field "test", literal("one"), literal("two"), append(".suffix")
|
151
|
+
end
|
152
|
+
output = @indexer.map_record(@record)
|
153
|
+
assert_equal ["one.suffix", "two.suffix"], output["test"]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
describe "prepend" do
|
158
|
+
it "prepends prefix" do
|
159
|
+
@indexer.configure do
|
160
|
+
to_field "test", literal("one"), literal("two"), prepend("prefix.")
|
161
|
+
end
|
162
|
+
output = @indexer.map_record(@record)
|
163
|
+
assert_equal ["prefix.one", "prefix.two"], output["test"]
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
describe "gsub" do
|
168
|
+
it "gsubs" do
|
169
|
+
@indexer.configure do
|
170
|
+
to_field "test", literal("one1212two23three"), gsub(/\d+/, ' ')
|
171
|
+
end
|
172
|
+
output = @indexer.map_record(@record)
|
173
|
+
assert_equal ["one two three"], output["test"]
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
@@ -196,12 +196,11 @@ describe "Traject::Indexer#map_record" do
|
|
196
196
|
end
|
197
197
|
|
198
198
|
@indexer.to_field('afterSkip') do |rec, acc|
|
199
|
-
|
199
|
+
raise ArgumentError, "intentional, should never happen"
|
200
200
|
end
|
201
201
|
|
202
202
|
output = @indexer.map_record(@record)
|
203
|
-
|
204
|
-
assert_nil output['afterSkip']
|
203
|
+
assert_nil output
|
205
204
|
end
|
206
205
|
|
207
206
|
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::NokogiriIndexer" do
|
4
|
+
before do
|
5
|
+
Traject::Indexer.send(:default_settings=, Traject::Indexer.default_settings.merge("solr_writer.thread_pool" => 0, "processing_thread_pool" => 0))
|
6
|
+
|
7
|
+
|
8
|
+
@xml_sample_path = support_file_path("sample-oai-pmh.xml")
|
9
|
+
@indexer = Traject::Indexer::NokogiriIndexer.new("writer_class_name" => "Traject::ArrayWriter", "solr_writer.thread_pool" => 0, "processing_thread_pool" => 0)
|
10
|
+
@namespaces = {
|
11
|
+
"oai" => "http://www.openarchives.org/OAI/2.0/",
|
12
|
+
"dc" => "http://purl.org/dc/elements/1.1/",
|
13
|
+
"oai_dc" => "http://www.openarchives.org/OAI/2.0/oai_dc/",
|
14
|
+
"edm" => "http://www.europeana.eu/schemas/edm/"
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
it "smoke test" do
|
19
|
+
namespaces = @namespaces
|
20
|
+
@indexer.configure do
|
21
|
+
settings do
|
22
|
+
provide "nokogiri.namespaces", namespaces
|
23
|
+
provide "nokogiri.each_record_xpath", "//oai:record"
|
24
|
+
end
|
25
|
+
to_field "id", extract_xpath("//oai:metadata/oai_dc:dc/dc:identifier"), first_only
|
26
|
+
to_field "title", extract_xpath("//oai:metadata/oai_dc:dc/dc:title")
|
27
|
+
end
|
28
|
+
|
29
|
+
@indexer.process(File.open(@xml_sample_path))
|
30
|
+
|
31
|
+
results = @indexer.writer.values
|
32
|
+
|
33
|
+
source_doc = Nokogiri::XML.parse(File.open(@xml_sample_path))
|
34
|
+
|
35
|
+
assert_equal source_doc.xpath("//oai:record", @namespaces).count, results.count
|
36
|
+
assert(results.all? { |hash|
|
37
|
+
hash["id"] && hash["id"].length == 1 &&
|
38
|
+
hash["title"] && hash["title"].length >= 1
|
39
|
+
}, "expected results have expected values")
|
40
|
+
end
|
41
|
+
|
42
|
+
it "namespaces to extract_xpath" do
|
43
|
+
namespaces = @namespaces.merge(edm: "http://this.is.wrong")
|
44
|
+
@indexer.configure do
|
45
|
+
settings do
|
46
|
+
provide "nokogiri.namespaces", namespaces
|
47
|
+
provide "nokogiri.each_record_xpath", "//oai:record"
|
48
|
+
end
|
49
|
+
to_field "rights", extract_xpath("//oai:metadata/oai_dc:dc/edm:rights", ns: { edm: "http://www.europeana.eu/schemas/edm/" })
|
50
|
+
end
|
51
|
+
|
52
|
+
@indexer.process(File.open(@xml_sample_path))
|
53
|
+
|
54
|
+
results = @indexer.writer.values
|
55
|
+
|
56
|
+
refute_empty results.last["rights"]
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "xpath to non-terminal element" do
|
60
|
+
before do
|
61
|
+
@xml = <<-EOS
|
62
|
+
<record>
|
63
|
+
<name>
|
64
|
+
<first>José</first>
|
65
|
+
<last>Lopez</last>
|
66
|
+
</name>
|
67
|
+
<name>
|
68
|
+
<first>Sue</first>
|
69
|
+
<last>Jones</last>
|
70
|
+
</name>
|
71
|
+
</record>
|
72
|
+
EOS
|
73
|
+
|
74
|
+
@indexer.configure do
|
75
|
+
settings do
|
76
|
+
provide "nokogiri.each_record_xpath", "//record"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
it "outputs text" do
|
82
|
+
@indexer.configure { to_field "name", extract_xpath("/record/name") }
|
83
|
+
@indexer.process(StringIO.new(@xml))
|
84
|
+
results = @indexer.writer.values
|
85
|
+
|
86
|
+
assert_equal( {"name" => ["José Lopez", "Sue Jones"]}, results.first )
|
87
|
+
end
|
88
|
+
|
89
|
+
it "outputs Nokogiri::XML::Element with to_text: false" do
|
90
|
+
@indexer.configure { to_field "name", extract_xpath("/record/name", to_text: false) }
|
91
|
+
@indexer.process(StringIO.new(@xml))
|
92
|
+
results = @indexer.writer.values
|
93
|
+
|
94
|
+
values = results.first["name"]
|
95
|
+
|
96
|
+
assert(values.each { |result|
|
97
|
+
result["name"].kind_of?(Nokogiri::XML::Element) &&
|
98
|
+
result["name"].name == "name"
|
99
|
+
})
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::Indexer#process_record" do
|
4
|
+
before do
|
5
|
+
@writer = Traject::ArrayWriter.new
|
6
|
+
@indexer = Traject::Indexer.new(writer: @writer) do
|
7
|
+
to_field "record", lambda { |rec, acc| acc << rec }
|
8
|
+
end
|
9
|
+
@record = {key: "value"}
|
10
|
+
end
|
11
|
+
|
12
|
+
it "sends to writer" do
|
13
|
+
@indexer.process_record(@record)
|
14
|
+
assert_equal [{"record" => [@record] }], @writer.values
|
15
|
+
end
|
16
|
+
|
17
|
+
it "returns context" do
|
18
|
+
context = @indexer.process_record(@record)
|
19
|
+
assert context.is_a?(Traject::Indexer::Context)
|
20
|
+
assert_equal @record, context.source_record
|
21
|
+
end
|
22
|
+
|
23
|
+
it "skips if skipped" do
|
24
|
+
@indexer = Traject::Indexer.new(writer: @writer) do
|
25
|
+
to_field "record", lambda { |rec, acc, context| acc << rec; context.skip! }
|
26
|
+
end
|
27
|
+
context = @indexer.process_record(@record)
|
28
|
+
|
29
|
+
assert context.skip?
|
30
|
+
assert_equal [], @writer.values
|
31
|
+
end
|
32
|
+
|
33
|
+
it "raises exceptions out" do
|
34
|
+
@indexer = Traject::Indexer.new(writer: @writer) do
|
35
|
+
to_field "record", lambda { |rec, acc, context| acc << rec; raise ArgumentError, "intentional" }
|
36
|
+
end
|
37
|
+
assert_raises(ArgumentError) do
|
38
|
+
@indexer.process_record(@record)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
it "aliases <<" do
|
43
|
+
assert_equal @indexer.method(:process_record), @indexer.method(:<<)
|
44
|
+
|
45
|
+
@indexer << @record
|
46
|
+
end
|
47
|
+
|
48
|
+
it "raises on completed indexer" do
|
49
|
+
@indexer.complete
|
50
|
+
assert_raises Traject::Indexer::CompletedStateError do
|
51
|
+
@indexer.process_record(@record)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::Indexer#process_with" do
|
4
|
+
let(:input_records) { [
|
5
|
+
{ one: "one" },
|
6
|
+
{ two: "two" },
|
7
|
+
{ three: "three" }
|
8
|
+
] }
|
9
|
+
let(:array_writer) { Traject::ArrayWriter.new }
|
10
|
+
let(:indexer) {
|
11
|
+
Traject::Indexer.new do
|
12
|
+
to_field "records", lambda { |rec, acc|
|
13
|
+
acc << rec
|
14
|
+
}
|
15
|
+
end
|
16
|
+
}
|
17
|
+
|
18
|
+
it "processes" do
|
19
|
+
writer = indexer.process_with(input_records, array_writer)
|
20
|
+
assert_equal([{"records"=>[{:one=>"one"}]}, {"records"=>[{:two=>"two"}]}, {"records"=>[{:three=>"three"}]}], writer.values)
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "calls close" do
|
24
|
+
before do
|
25
|
+
array_writer.extend(Module.new do
|
26
|
+
def close
|
27
|
+
@close_called = true
|
28
|
+
end
|
29
|
+
def close_called?
|
30
|
+
@close_called
|
31
|
+
end
|
32
|
+
end)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "calls by default" do
|
36
|
+
writer = indexer.process_with(input_records, array_writer)
|
37
|
+
assert writer.close_called?
|
38
|
+
end
|
39
|
+
|
40
|
+
it "does not call if told not to" do
|
41
|
+
writer = indexer.process_with(input_records, array_writer, close_writer: false)
|
42
|
+
assert ! writer.close_called?
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "after_processing steps" do
|
47
|
+
let(:indexer) {
|
48
|
+
Traject::Indexer.new do
|
49
|
+
after_processing do
|
50
|
+
raise "Don't call me"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
}
|
54
|
+
it "are not called" do
|
55
|
+
# should not raise
|
56
|
+
indexer.process_with(input_records, array_writer)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe "with block as destination" do
|
61
|
+
it "calls block for each record" do
|
62
|
+
received = []
|
63
|
+
indexer.process_with(input_records) do |context|
|
64
|
+
received << context
|
65
|
+
end
|
66
|
+
|
67
|
+
assert_equal 3, received.length
|
68
|
+
assert received.all? { |o| o.kind_of?(Traject::Indexer::Context)}
|
69
|
+
assert_equal input_records.collect { |r| [r] }, received.collect { |c| c.output_hash["records"] }
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe "exceptions" do
|
74
|
+
let(:indexer) {
|
75
|
+
Traject::Indexer.new do
|
76
|
+
to_field "foo", lambda { |rec, acc|
|
77
|
+
if rec.keys.include?(:one)
|
78
|
+
raise ArgumentError, "intentional"
|
79
|
+
end
|
80
|
+
|
81
|
+
acc << rec
|
82
|
+
}
|
83
|
+
end
|
84
|
+
}
|
85
|
+
|
86
|
+
describe "by default" do
|
87
|
+
it "raises" do
|
88
|
+
assert_raises(ArgumentError) do
|
89
|
+
indexer.process_with(input_records, array_writer)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "with rescue_with" do
|
95
|
+
it "calls block and keeps processing" do
|
96
|
+
rescued = []
|
97
|
+
rescue_lambda = lambda do |context, exception|
|
98
|
+
rescued << {
|
99
|
+
context: context,
|
100
|
+
exception: exception
|
101
|
+
}
|
102
|
+
end
|
103
|
+
|
104
|
+
_writer = indexer.process_with(input_records, array_writer, rescue_with: rescue_lambda)
|
105
|
+
|
106
|
+
# not including the one that raised
|
107
|
+
assert_equal 2, array_writer.contexts.length
|
108
|
+
# and raise was called
|
109
|
+
|
110
|
+
assert_equal 1, rescued.length
|
111
|
+
assert rescued.first[:context].is_a?(Traject::Indexer::Context)
|
112
|
+
assert_equal ArgumentError, rescued.first[:exception].class
|
113
|
+
assert_equal "intentional", rescued.first[:exception].message
|
114
|
+
end
|
115
|
+
|
116
|
+
it "can raise from rescue" do
|
117
|
+
rescue_lambda = lambda do |context, exception|
|
118
|
+
raise exception
|
119
|
+
end
|
120
|
+
|
121
|
+
assert_raises(ArgumentError) do
|
122
|
+
indexer.process_with(input_records, array_writer, rescue: rescue_lambda)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
describe "skipped records" do
|
128
|
+
let(:indexer) {
|
129
|
+
Traject::Indexer.new do
|
130
|
+
to_field "foo", literal("value")
|
131
|
+
each_record do |record, context|
|
132
|
+
context.skip!
|
133
|
+
end
|
134
|
+
end
|
135
|
+
}
|
136
|
+
it "calls on_skipped, does not send to writer" do
|
137
|
+
skip_calls = []
|
138
|
+
on_skipped = lambda { |*args| skip_calls << args }
|
139
|
+
|
140
|
+
writer = indexer.process_with(input_records, array_writer, on_skipped: on_skipped)
|
141
|
+
|
142
|
+
assert_equal writer.values, [], "nothing sent to writer"
|
143
|
+
assert_equal input_records.count, skip_calls.count, "skip proc called"
|
144
|
+
assert skip_calls.all? {|a| a.length == 1 && a[0].kind_of?(Traject::Indexer::Context) }, "skip proc called with single arg"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|