traject 3.0.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -4
- data/CHANGES.md +65 -0
- data/README.md +9 -4
- data/doc/indexing_rules.md +5 -6
- data/doc/programmatic_use.md +25 -1
- data/doc/settings.md +4 -0
- data/doc/xml.md +12 -0
- data/lib/traject/indexer.rb +40 -4
- data/lib/traject/indexer/context.rb +45 -0
- data/lib/traject/indexer/step.rb +8 -12
- data/lib/traject/line_writer.rb +36 -4
- data/lib/traject/macros/marc21.rb +2 -2
- data/lib/traject/macros/marc21_semantics.rb +15 -12
- data/lib/traject/macros/nokogiri_macros.rb +9 -3
- data/lib/traject/nokogiri_reader.rb +17 -19
- data/lib/traject/oai_pmh_nokogiri_reader.rb +9 -3
- data/lib/traject/solr_json_writer.rb +167 -29
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/marc_languages.yaml +77 -48
- data/test/delimited_writer_test.rb +14 -16
- data/test/indexer/class_level_configuration_test.rb +127 -0
- data/test/indexer/context_test.rb +64 -1
- data/test/indexer/error_handler_test.rb +18 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +4 -0
- data/test/indexer/nokogiri_indexer_test.rb +35 -0
- data/test/nokogiri_reader_test.rb +66 -3
- data/test/solr_json_writer_test.rb +175 -7
- data/test/test_support/date_resort_to_264.marc +1 -0
- data/traject.gemspec +4 -4
- metadata +37 -16
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Class-level configuration of Indexer sub-class" do
|
4
|
+
# Declaring a class inline in minitest isn't great, this really is a globally
|
5
|
+
# available class now, other tests shouldn't re-use this class name. But it works
|
6
|
+
# for testing for now.
|
7
|
+
class TestIndexerSubclass < Traject::Indexer
|
8
|
+
configure do
|
9
|
+
settings do
|
10
|
+
provide "class_level", "TestIndexerSubclass"
|
11
|
+
end
|
12
|
+
|
13
|
+
to_field "field", literal("value")
|
14
|
+
each_record do |rec, context|
|
15
|
+
context.output_hash["from_each_record"] ||= []
|
16
|
+
context.output_hash["from_each_record"] << "value"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.default_settings
|
21
|
+
@default_settings ||= super.merge(
|
22
|
+
"set_by_default_setting_no_override" => "TestIndexerSubclass",
|
23
|
+
"set_by_default_setting" => "TestIndexerSubclass"
|
24
|
+
)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
before do
|
30
|
+
@indexer = TestIndexerSubclass.new
|
31
|
+
end
|
32
|
+
|
33
|
+
it "uses class-level configuration" do
|
34
|
+
result = @indexer.map_record(Object.new)
|
35
|
+
|
36
|
+
assert_equal ['value'], result['field']
|
37
|
+
assert_equal ['value'], result['from_each_record']
|
38
|
+
end
|
39
|
+
|
40
|
+
it "uses class-level configuration and instance-level configuration" do
|
41
|
+
@indexer.configure do
|
42
|
+
to_field "field", literal("from-instance-config")
|
43
|
+
to_field "instance_field", literal("from-instance-config")
|
44
|
+
end
|
45
|
+
|
46
|
+
result = @indexer.map_record(Object.new)
|
47
|
+
assert_equal ['value', 'from-instance-config'], result['field']
|
48
|
+
assert_equal ['from-instance-config'], result["instance_field"]
|
49
|
+
end
|
50
|
+
|
51
|
+
describe "multiple class-level configure" do
|
52
|
+
class MultipleConfigureIndexer < Traject::Indexer
|
53
|
+
configure do
|
54
|
+
to_field "field", literal("value")
|
55
|
+
end
|
56
|
+
configure do
|
57
|
+
to_field "field", literal("value from second configure")
|
58
|
+
to_field "second_call", literal("value from second configure")
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
before do
|
63
|
+
@indexer = MultipleConfigureIndexer.new
|
64
|
+
end
|
65
|
+
|
66
|
+
it "lets you call class-level configure multiple times and aggregates" do
|
67
|
+
result = @indexer.map_record(Object.new)
|
68
|
+
assert_equal ['value', 'value from second configure'], result['field']
|
69
|
+
assert_equal ['value from second configure'], result['second_call']
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe "with multi-level subclass" do
|
74
|
+
class TestIndexerSubclassSubclass < TestIndexerSubclass
|
75
|
+
configure do
|
76
|
+
settings do
|
77
|
+
provide "class_level", "TestIndexerSubclassSubclass"
|
78
|
+
end
|
79
|
+
|
80
|
+
to_field "field", literal("from-sub-subclass")
|
81
|
+
to_field "subclass_field", literal("from-sub-subclass")
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.default_settings
|
85
|
+
@default_settings ||= super.merge(
|
86
|
+
"set_by_default_setting" => "TestIndexerSubclassSubclass"
|
87
|
+
)
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
before do
|
93
|
+
@indexer = TestIndexerSubclassSubclass.new
|
94
|
+
end
|
95
|
+
|
96
|
+
it "lets subclass override settings 'provide'" do
|
97
|
+
skip("This would be nice but is currently architecturally hard")
|
98
|
+
assert_equal "TestIndexerSubclassSubclass", @indexer.settings["class_level"]
|
99
|
+
end
|
100
|
+
|
101
|
+
it "lets subclass override default settings" do
|
102
|
+
assert_equal "TestIndexerSubclassSubclass", @indexer.settings["set_by_default_setting"]
|
103
|
+
assert_equal "TestIndexerSubclass", @indexer.settings["set_by_default_setting_no_override"]
|
104
|
+
end
|
105
|
+
|
106
|
+
it "uses configuraton from all inheritance" do
|
107
|
+
result = @indexer.map_record(Object.new)
|
108
|
+
|
109
|
+
assert_equal ['value', 'from-sub-subclass'], result['field']
|
110
|
+
assert_equal ['value'], result['from_each_record']
|
111
|
+
assert_equal ['from-sub-subclass'], result['subclass_field']
|
112
|
+
end
|
113
|
+
|
114
|
+
it "uses configuraton from all inheritance plus instance" do
|
115
|
+
@indexer.configure do
|
116
|
+
to_field "field", literal("from-instance")
|
117
|
+
to_field "instance_field", literal("from-instance")
|
118
|
+
end
|
119
|
+
|
120
|
+
result = @indexer.map_record(Object.new)
|
121
|
+
|
122
|
+
assert_equal ['value', 'from-sub-subclass', 'from-instance'], result['field']
|
123
|
+
assert_equal ['from-instance'], result['instance_field']
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
@@ -38,8 +38,71 @@ describe "Traject::Indexer::Context" do
|
|
38
38
|
|
39
39
|
assert_equal "<record ##{@position} (#{@input_name} ##{@position_in_input}), source_id:#{@record_001} output_id:output_id>", @context.record_inspect
|
40
40
|
end
|
41
|
-
|
42
41
|
end
|
43
42
|
|
43
|
+
describe "#add_output" do
|
44
|
+
before do
|
45
|
+
@context = Traject::Indexer::Context.new
|
46
|
+
end
|
47
|
+
it "adds one value to nil" do
|
48
|
+
@context.add_output(:key, "value")
|
49
|
+
assert_equal @context.output_hash, { "key" => ["value"] }
|
50
|
+
end
|
51
|
+
|
52
|
+
it "adds multiple values to nil" do
|
53
|
+
@context.add_output(:key, "value1", "value2")
|
54
|
+
assert_equal @context.output_hash, { "key" => ["value1", "value2"] }
|
55
|
+
end
|
56
|
+
|
57
|
+
it "adds one value to existing accumulator" do
|
58
|
+
@context.output_hash["key"] = ["value1"]
|
59
|
+
@context.add_output(:key, "value2")
|
60
|
+
assert_equal @context.output_hash, { "key" => ["value1", "value2"] }
|
61
|
+
end
|
62
|
+
|
63
|
+
it "uniqs by default" do
|
64
|
+
@context.output_hash["key"] = ["value1"]
|
65
|
+
@context.add_output(:key, "value1")
|
66
|
+
assert_equal @context.output_hash, { "key" => ["value1"] }
|
67
|
+
end
|
68
|
+
|
69
|
+
it "does not unique if allow_duplicate_values" do
|
70
|
+
@context.settings = { Traject::Indexer::ToFieldStep::ALLOW_DUPLICATE_VALUES => true }
|
71
|
+
@context.output_hash["key"] = ["value1"]
|
72
|
+
|
73
|
+
@context.add_output(:key, "value1")
|
74
|
+
assert_equal @context.output_hash, { "key" => ["value1", "value1"] }
|
75
|
+
end
|
76
|
+
|
77
|
+
it "ignores nil values by default" do
|
78
|
+
@context.add_output(:key, "value1", nil, "value2")
|
79
|
+
assert_equal @context.output_hash, { "key" => ["value1", "value2"] }
|
80
|
+
end
|
81
|
+
|
82
|
+
it "allows nil values if allow_nil_values" do
|
83
|
+
@context.settings = { Traject::Indexer::ToFieldStep::ALLOW_NIL_VALUES => true }
|
44
84
|
|
85
|
+
@context.add_output(:key, "value1", nil, "value2")
|
86
|
+
assert_equal @context.output_hash, { "key" => ["value1", nil, "value2"] }
|
87
|
+
end
|
88
|
+
|
89
|
+
it "ignores empty array by default" do
|
90
|
+
@context.add_output(:key)
|
91
|
+
@context.add_output(:key, nil)
|
92
|
+
|
93
|
+
assert_nil @context.output_hash["key"]
|
94
|
+
end
|
95
|
+
|
96
|
+
it "allows empty field if allow_empty_fields" do
|
97
|
+
@context.settings = { Traject::Indexer::ToFieldStep::ALLOW_EMPTY_FIELDS => true }
|
98
|
+
|
99
|
+
@context.add_output(:key, nil)
|
100
|
+
assert_equal @context.output_hash, { "key" => [] }
|
101
|
+
end
|
102
|
+
|
103
|
+
it "can add to multiple fields" do
|
104
|
+
@context.add_output(["field1", "field2"], "value1", "value2")
|
105
|
+
assert_equal @context.output_hash, { "field1" => ["value1", "value2"], "field2" => ["value1", "value2"] }
|
106
|
+
end
|
107
|
+
end
|
45
108
|
end
|
@@ -56,4 +56,22 @@ describe 'Custom mapping error handler' do
|
|
56
56
|
|
57
57
|
assert_nil indexer.map_record({})
|
58
58
|
end
|
59
|
+
|
60
|
+
it "uses logger from settings" do
|
61
|
+
desired_logger = Logger.new("/dev/null")
|
62
|
+
set_logger = nil
|
63
|
+
indexer.configure do
|
64
|
+
settings do
|
65
|
+
provide "logger", desired_logger
|
66
|
+
provide "mapping_rescue", -> (ctx, e) {
|
67
|
+
set_logger = ctx.logger
|
68
|
+
}
|
69
|
+
end
|
70
|
+
to_field 'id' do |_context , _exception|
|
71
|
+
raise 'this was always going to fail'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
indexer.map_record({})
|
75
|
+
assert_equal desired_logger.object_id, set_logger.object_id
|
76
|
+
end
|
59
77
|
end
|
@@ -197,6 +197,10 @@ describe "Traject::Macros::Marc21Semantics" do
|
|
197
197
|
# we take the first date. And need to deal with the u.
|
198
198
|
assert_equal 1845, Marc21Semantics.publication_date(@record)
|
199
199
|
end
|
200
|
+
it "resorts to 264c" do
|
201
|
+
@record = MARC::Reader.new(support_file_path "date_resort_to_264.marc").to_a.first
|
202
|
+
assert_equal 2015, Marc21Semantics.publication_date(@record)
|
203
|
+
end
|
200
204
|
it "resorts to 260c" do
|
201
205
|
@record = MARC::Reader.new(support_file_path "date_resort_to_260.marc").to_a.first
|
202
206
|
assert_equal 1980, Marc21Semantics.publication_date(@record)
|
@@ -109,6 +109,41 @@ describe "Traject::NokogiriIndexer" do
|
|
109
109
|
result["name"].name == "name"
|
110
110
|
})
|
111
111
|
end
|
112
|
+
end
|
112
113
|
|
114
|
+
describe "xpath to attribute" do
|
115
|
+
let(:indexer) do
|
116
|
+
namespaces = @namespaces
|
117
|
+
Traject::Indexer::NokogiriIndexer.new("nokogiri.namespaces" => namespaces,
|
118
|
+
"nokogiri.each_record_xpath" => "//oai:record") do
|
119
|
+
to_field "status", extract_xpath("//oai:record/oai:header/@status")
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
let(:records) { Traject::NokogiriReader.new(StringIO.new(
|
124
|
+
<<-XML
|
125
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
126
|
+
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
|
127
|
+
<responseDate>2020-03-03T04:16:09Z</responseDate>
|
128
|
+
<request verb="ListRecords" metadataPrefix="marc21" set="blacklight" from="2020-03-02T20:47:11Z">https://na02.alma.exlibrisgroup.com/view/oai/01TULI_INST/request</request>
|
129
|
+
<ListRecords>
|
130
|
+
<record>
|
131
|
+
<header status="deleted">
|
132
|
+
<identifier>oai:alma.01TULI_INST:991025803889703811</identifier>
|
133
|
+
<datestamp>2020-03-03T03:54:35Z</datestamp>
|
134
|
+
<setSpec>blacklight</setSpec>
|
135
|
+
<setSpec>rapid_print_journals</setSpec>
|
136
|
+
<setSpec>blacklight_qa</setSpec>
|
137
|
+
</header>
|
138
|
+
</record>
|
139
|
+
</ListRecords>
|
140
|
+
</OAI-PMH>
|
141
|
+
XML
|
142
|
+
), []).to_a }
|
143
|
+
|
144
|
+
it "extracts the correct attribute" do
|
145
|
+
statuses = indexer.map_record(records.first)["status"]
|
146
|
+
assert_equal ["deleted"], statuses
|
147
|
+
end
|
113
148
|
end
|
114
149
|
end
|
@@ -1,6 +1,12 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
require 'traject/nokogiri_reader'
|
3
3
|
|
4
|
+
# Note that JRuby Nokogiri can treat namespaces differently than MRI nokogiri.
|
5
|
+
# Particularly when we extract elements from a larger document with `each_record_xpath`,
|
6
|
+
# and put them in their own document, in JRuby nokogiri the xmlns declarations
|
7
|
+
# can end up on different elements than expected, although the document should
|
8
|
+
# be semantically equivalent to an XML-namespace-aware processor. See:
|
9
|
+
# https://github.com/sparklemotion/nokogiri/issues/1875
|
4
10
|
describe "Traject::NokogiriReader" do
|
5
11
|
describe "with namespaces" do
|
6
12
|
before do
|
@@ -80,8 +86,22 @@ describe "Traject::NokogiriReader" do
|
|
80
86
|
assert yielded_records.length > 0
|
81
87
|
|
82
88
|
expected_namespaces = {"xmlns"=>"http://example.org/top", "xmlns:a"=>"http://example.org/a", "xmlns:b"=>"http://example.org/b"}
|
83
|
-
|
84
|
-
|
89
|
+
|
90
|
+
if !Traject::Util.is_jruby?
|
91
|
+
yielded_records.each do |rec|
|
92
|
+
assert_equal expected_namespaces, rec.namespaces
|
93
|
+
end
|
94
|
+
else
|
95
|
+
# jruby nokogiri shuffles things around, all we can really do is test that the namespaces
|
96
|
+
# are somehwere in the doc :( We rely on other tests to test semantic equivalence.
|
97
|
+
yielded_records.each do |rec|
|
98
|
+
assert_equal expected_namespaces, rec.collect_namespaces
|
99
|
+
end
|
100
|
+
|
101
|
+
whole_doc = Nokogiri::XML.parse(File.open(support_file_path("namespace-test.xml")))
|
102
|
+
whole_doc.xpath("//mytop:record", mytop: "http://example.org/top").each_with_index do |original_el, i|
|
103
|
+
assert ns_semantic_equivalent_xml?(original_el, yielded_records[i])
|
104
|
+
end
|
85
105
|
end
|
86
106
|
end
|
87
107
|
end
|
@@ -114,6 +134,16 @@ describe "Traject::NokogiriReader" do
|
|
114
134
|
end
|
115
135
|
end
|
116
136
|
|
137
|
+
describe "strict_mode" do
|
138
|
+
it "raises on non-well-formed" do
|
139
|
+
# invalid because two sibling root nodes, XML requiers one root node
|
140
|
+
reader = Traject::NokogiriReader.new(StringIO.new("<doc></doc><doc></doc>"), {"nokogiri.strict_mode" => "true" })
|
141
|
+
assert_raises(Nokogiri::XML::SyntaxError) {
|
142
|
+
reader.each { |r| }
|
143
|
+
}
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
117
147
|
|
118
148
|
def shared_tests
|
119
149
|
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {
|
@@ -139,7 +169,40 @@ describe "Traject::NokogiriReader" do
|
|
139
169
|
|
140
170
|
assert_length manually_extracted.size, yielded_records
|
141
171
|
assert yielded_records.all? {|r| r.kind_of? Nokogiri::XML::Document }
|
142
|
-
|
172
|
+
|
173
|
+
expected_xml = manually_extracted
|
174
|
+
actual_xml = yielded_records.collect(&:root)
|
175
|
+
|
176
|
+
expected_xml.size.times do |i|
|
177
|
+
if !Traject::Util.is_jruby?
|
178
|
+
assert_equal expected_xml[i-1].to_xml, actual_xml[i-1].to_xml
|
179
|
+
else
|
180
|
+
# jruby shuffles the xmlns declarations around, but they should
|
181
|
+
# be semantically equivalent to an namespace-aware processor
|
182
|
+
assert ns_semantic_equivalent_xml?(expected_xml[i-1], actual_xml[i-1])
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
# Jruby nokogiri can shuffle around where the `xmlns:ns` declarations appear, although it
|
188
|
+
# _ought_ not to be semantically different for a namespace-aware parser -- nodes are still in
|
189
|
+
# same namespaces. JRuby may differ from what MRI does with same code, and may differ from
|
190
|
+
# the way an element appeared in input when extracting records from a larger input doc.
|
191
|
+
# There isn't much we can do about this, but we can write a recursive method
|
192
|
+
# that hopefully compares XML to make sure it really is semantically equivalent to
|
193
|
+
# a namespace, and hope we got that right.
|
194
|
+
def ns_semantic_equivalent_xml?(noko_a, noko_b)
|
195
|
+
noko_a = noko_a.root if noko_a.kind_of?(Nokogiri::XML::Document)
|
196
|
+
noko_b = noko_b.root if noko_b.kind_of?(Nokogiri::XML::Document)
|
197
|
+
|
198
|
+
noko_a.name == noko_b.name &&
|
199
|
+
noko_a.namespace&.prefix == noko_b.namespace&.prefix &&
|
200
|
+
noko_a.namespace&.href == noko_b.namespace&.href &&
|
201
|
+
noko_a.attributes == noko_b.attributes &&
|
202
|
+
noko_a.children.length == noko_b.children.length &&
|
203
|
+
noko_a.children.each_with_index.all? do |a_child, index|
|
204
|
+
ns_semantic_equivalent_xml?(a_child, noko_b.children[index])
|
205
|
+
end
|
143
206
|
end
|
144
207
|
|
145
208
|
describe "without each_record_xpath" do
|
@@ -137,6 +137,26 @@ describe "Traject::SolrJsonWriter" do
|
|
137
137
|
assert_length 1, JSON.parse(post_args[1][1]), "second batch posted with last remaining doc"
|
138
138
|
end
|
139
139
|
|
140
|
+
it "retries batch as individual records on failure" do
|
141
|
+
@writer = create_writer("solr_writer.batch_size" => 2, "solr_writer.max_skipped" => 10)
|
142
|
+
@fake_http_client.response_status = 500
|
143
|
+
|
144
|
+
2.times do |i|
|
145
|
+
@writer.put context_with({"id" => "doc_#{i}", "key" => "value"})
|
146
|
+
end
|
147
|
+
@writer.close
|
148
|
+
|
149
|
+
# 1 batch, then 2 for re-trying each individually
|
150
|
+
assert_length 3, @fake_http_client.post_args
|
151
|
+
|
152
|
+
batch_update = @fake_http_client.post_args.first
|
153
|
+
assert_length 2, JSON.parse(batch_update[1])
|
154
|
+
|
155
|
+
individual_update1, individual_update2 = @fake_http_client.post_args[1], @fake_http_client.post_args[2]
|
156
|
+
assert_length 1, JSON.parse(individual_update1[1])
|
157
|
+
assert_length 1, JSON.parse(individual_update2[1])
|
158
|
+
end
|
159
|
+
|
140
160
|
it "can #flush" do
|
141
161
|
2.times do |i|
|
142
162
|
doc = {"id" => "doc_#{i}", "key" => "value"}
|
@@ -150,15 +170,137 @@ describe "Traject::SolrJsonWriter" do
|
|
150
170
|
assert_length 1, @fake_http_client.post_args, "Has flushed to solr"
|
151
171
|
end
|
152
172
|
|
153
|
-
it "
|
154
|
-
|
155
|
-
|
156
|
-
|
173
|
+
it "defaults to not setting basic authentication" do
|
174
|
+
settings = { "solr.url" => "http://example.com/solr/foo" }
|
175
|
+
writer = Traject::SolrJsonWriter.new(settings)
|
176
|
+
auth = writer.instance_variable_get("@http_client")
|
177
|
+
.www_auth.basic_auth.instance_variable_get("@auth")
|
178
|
+
assert(auth.empty?)
|
179
|
+
end
|
180
|
+
|
181
|
+
it "allows basic authentication setup" do
|
182
|
+
settings = {
|
183
|
+
"solr.url" => "http://example.com/solr/foo",
|
184
|
+
"solr_writer.basic_auth_user" => "foo",
|
185
|
+
"solr_writer.basic_auth_password" => "bar",
|
186
|
+
}
|
187
|
+
|
188
|
+
writer = Traject::SolrJsonWriter.new(settings)
|
189
|
+
auth = writer.instance_variable_get("@http_client")
|
190
|
+
.www_auth.basic_auth.instance_variable_get("@auth")
|
191
|
+
assert(!auth.empty?)
|
192
|
+
end
|
193
|
+
|
194
|
+
describe "commit" do
|
195
|
+
it "commits on close when set" do
|
196
|
+
@writer = create_writer("solr.url" => "http://example.com", "solr_writer.commit_on_close" => "true")
|
197
|
+
@writer.put context_with({"id" => "one", "key" => ["value1", "value2"]})
|
198
|
+
@writer.close
|
199
|
+
|
200
|
+
last_solr_get = @fake_http_client.get_args.last
|
201
|
+
|
202
|
+
assert_equal "http://example.com/update/json?commit=true", last_solr_get[0]
|
203
|
+
end
|
204
|
+
|
205
|
+
it "commits on close with commit_solr_update_args" do
|
206
|
+
@writer = create_writer(
|
207
|
+
"solr.url" => "http://example.com",
|
208
|
+
"solr_writer.commit_on_close" => "true",
|
209
|
+
"solr_writer.commit_solr_update_args" => { softCommit: true }
|
210
|
+
)
|
211
|
+
@writer.put context_with({"id" => "one", "key" => ["value1", "value2"]})
|
212
|
+
@writer.close
|
213
|
+
|
214
|
+
last_solr_get = @fake_http_client.get_args.last
|
215
|
+
|
216
|
+
assert_equal "http://example.com/update/json?softCommit=true", last_solr_get[0]
|
217
|
+
end
|
218
|
+
|
219
|
+
it "can manually send commit" do
|
220
|
+
@writer = create_writer("solr.url" => "http://example.com")
|
221
|
+
@writer.commit
|
222
|
+
|
223
|
+
last_solr_get = @fake_http_client.get_args.last
|
224
|
+
assert_equal "http://example.com/update/json?commit=true", last_solr_get[0]
|
225
|
+
end
|
226
|
+
|
227
|
+
it "can manually send commit with specified args" do
|
228
|
+
@writer = create_writer("solr.url" => "http://example.com", "solr_writer.commit_solr_update_args" => { softCommit: true })
|
229
|
+
@writer.commit(commit: true, optimize: true, waitFlush: false)
|
230
|
+
last_solr_get = @fake_http_client.get_args.last
|
231
|
+
assert_equal "http://example.com/update/json?commit=true&optimize=true&waitFlush=false", last_solr_get[0]
|
232
|
+
end
|
233
|
+
|
234
|
+
it "uses commit_solr_update_args settings by default" do
|
235
|
+
@writer = create_writer(
|
236
|
+
"solr.url" => "http://example.com",
|
237
|
+
"solr_writer.commit_solr_update_args" => { softCommit: true }
|
238
|
+
)
|
239
|
+
@writer.commit
|
240
|
+
|
241
|
+
last_solr_get = @fake_http_client.get_args.last
|
242
|
+
assert_equal "http://example.com/update/json?softCommit=true", last_solr_get[0]
|
243
|
+
end
|
157
244
|
|
158
|
-
|
245
|
+
it "overrides commit_solr_update_args with method arg" do
|
246
|
+
@writer = create_writer(
|
247
|
+
"solr.url" => "http://example.com",
|
248
|
+
"solr_writer.commit_solr_update_args" => { softCommit: true, foo: "bar" }
|
249
|
+
)
|
250
|
+
@writer.commit(commit: true)
|
159
251
|
|
160
|
-
|
161
|
-
|
252
|
+
last_solr_get = @fake_http_client.get_args.last
|
253
|
+
assert_equal "http://example.com/update/json?commit=true", last_solr_get[0]
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
describe "solr_writer.solr_update_args" do
|
258
|
+
before do
|
259
|
+
@writer = create_writer("solr_writer.solr_update_args" => { softCommit: true } )
|
260
|
+
end
|
261
|
+
|
262
|
+
it "sends update args" do
|
263
|
+
@writer.put context_with({"id" => "one", "key" => ["value1", "value2"]})
|
264
|
+
@writer.close
|
265
|
+
|
266
|
+
assert_equal 1, @fake_http_client.post_args.count
|
267
|
+
|
268
|
+
post_args = @fake_http_client.post_args.first
|
269
|
+
|
270
|
+
assert_equal "http://example.com/solr/update/json?softCommit=true", post_args[0]
|
271
|
+
end
|
272
|
+
|
273
|
+
it "sends update args with delete" do
|
274
|
+
@writer.delete("test-id")
|
275
|
+
@writer.close
|
276
|
+
|
277
|
+
assert_equal 1, @fake_http_client.post_args.count
|
278
|
+
|
279
|
+
post_args = @fake_http_client.post_args.first
|
280
|
+
|
281
|
+
assert_equal "http://example.com/solr/update/json?softCommit=true", post_args[0]
|
282
|
+
end
|
283
|
+
|
284
|
+
it "sends update args on individual-retry after batch failure" do
|
285
|
+
@writer = create_writer(
|
286
|
+
"solr_writer.batch_size" => 2,
|
287
|
+
"solr_writer.max_skipped" => 10,
|
288
|
+
"solr_writer.solr_update_args" => { softCommit: true }
|
289
|
+
)
|
290
|
+
@fake_http_client.response_status = 500
|
291
|
+
|
292
|
+
2.times do |i|
|
293
|
+
@writer.put context_with({"id" => "doc_#{i}", "key" => "value"})
|
294
|
+
end
|
295
|
+
@writer.close
|
296
|
+
|
297
|
+
# 1 batch, then 2 for re-trying each individually
|
298
|
+
assert_length 3, @fake_http_client.post_args
|
299
|
+
|
300
|
+
individual_update1, individual_update2 = @fake_http_client.post_args[1], @fake_http_client.post_args[2]
|
301
|
+
assert_equal "http://example.com/solr/update/json?softCommit=true", individual_update1[0]
|
302
|
+
assert_equal "http://example.com/solr/update/json?softCommit=true", individual_update2[0]
|
303
|
+
end
|
162
304
|
end
|
163
305
|
|
164
306
|
describe "skipped records" do
|
@@ -225,6 +367,32 @@ describe "Traject::SolrJsonWriter" do
|
|
225
367
|
logged = strio.string
|
226
368
|
assert_includes logged, 'ArgumentError: bad stuff'
|
227
369
|
end
|
370
|
+
end
|
228
371
|
|
372
|
+
describe "#delete" do
|
373
|
+
it "deletes" do
|
374
|
+
id = "123456"
|
375
|
+
@writer.delete(id)
|
376
|
+
|
377
|
+
post_args = @fake_http_client.post_args.first
|
378
|
+
assert_equal "http://example.com/solr/update/json", post_args[0]
|
379
|
+
assert_equal JSON.generate({"delete" => id}), post_args[1]
|
380
|
+
end
|
381
|
+
|
382
|
+
it "raises on non-200 http response" do
|
383
|
+
@fake_http_client.response_status = 500
|
384
|
+
assert_raises(RuntimeError) do
|
385
|
+
@writer.delete("12345")
|
386
|
+
end
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
describe "#delete_all!" do
|
391
|
+
it "deletes all" do
|
392
|
+
@writer.delete_all!
|
393
|
+
post_args = @fake_http_client.post_args.first
|
394
|
+
assert_equal "http://example.com/solr/update/json", post_args[0]
|
395
|
+
assert_equal JSON.generate({"delete" => { "query" => "*:*"}}), post_args[1]
|
396
|
+
end
|
229
397
|
end
|
230
398
|
end
|