traject 2.3.4 → 3.0.0.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +16 -9
- data/CHANGES.md +74 -1
- data/Gemfile +2 -1
- data/README.md +104 -53
- data/Rakefile +8 -1
- data/doc/indexing_rules.md +79 -63
- data/doc/programmatic_use.md +218 -0
- data/doc/settings.md +28 -1
- data/doc/xml.md +134 -0
- data/lib/traject.rb +5 -0
- data/lib/traject/array_writer.rb +34 -0
- data/lib/traject/command_line.rb +18 -22
- data/lib/traject/debug_writer.rb +2 -5
- data/lib/traject/experimental_nokogiri_streaming_reader.rb +276 -0
- data/lib/traject/hashie/indifferent_access_fix.rb +25 -0
- data/lib/traject/indexer.rb +321 -92
- data/lib/traject/indexer/context.rb +39 -13
- data/lib/traject/indexer/marc_indexer.rb +30 -0
- data/lib/traject/indexer/nokogiri_indexer.rb +30 -0
- data/lib/traject/indexer/settings.rb +36 -53
- data/lib/traject/indexer/step.rb +27 -33
- data/lib/traject/macros/marc21.rb +37 -12
- data/lib/traject/macros/nokogiri_macros.rb +43 -0
- data/lib/traject/macros/transformation.rb +162 -0
- data/lib/traject/marc_extractor.rb +2 -0
- data/lib/traject/ndj_reader.rb +1 -1
- data/lib/traject/nokogiri_reader.rb +179 -0
- data/lib/traject/oai_pmh_nokogiri_reader.rb +159 -0
- data/lib/traject/solr_json_writer.rb +19 -12
- data/lib/traject/thread_pool.rb +13 -0
- data/lib/traject/util.rb +14 -2
- data/lib/traject/version.rb +1 -1
- data/test/debug_writer_test.rb +3 -3
- data/test/delimited_writer_test.rb +3 -3
- data/test/experimental_nokogiri_streaming_reader_test.rb +169 -0
- data/test/indexer/context_test.rb +23 -13
- data/test/indexer/error_handler_test.rb +59 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +46 -46
- data/test/indexer/macros/marc21/extract_all_marc_values_test.rb +1 -1
- data/test/indexer/macros/marc21/extract_marc_test.rb +19 -9
- data/test/indexer/macros/marc21/serialize_marc_test.rb +4 -4
- data/test/indexer/macros/to_field_test.rb +2 -2
- data/test/indexer/macros/transformation_test.rb +177 -0
- data/test/indexer/map_record_test.rb +2 -3
- data/test/indexer/nokogiri_indexer_test.rb +103 -0
- data/test/indexer/process_record_test.rb +55 -0
- data/test/indexer/process_with_test.rb +148 -0
- data/test/indexer/read_write_test.rb +52 -2
- data/test/indexer/settings_test.rb +34 -24
- data/test/indexer/to_field_test.rb +27 -2
- data/test/marc_extractor_test.rb +7 -7
- data/test/marc_reader_test.rb +4 -4
- data/test/nokogiri_reader_test.rb +158 -0
- data/test/oai_pmh_nokogiri_reader_test.rb +23 -0
- data/test/solr_json_writer_test.rb +24 -28
- data/test/test_helper.rb +8 -2
- data/test/test_support/namespace-test.xml +7 -0
- data/test/test_support/nokogiri_demo_config.rb +17 -0
- data/test/test_support/oai-pmh-one-record-2.xml +24 -0
- data/test/test_support/oai-pmh-one-record-first.xml +24 -0
- data/test/test_support/sample-oai-no-namespace.xml +197 -0
- data/test/test_support/sample-oai-pmh.xml +197 -0
- data/test/thread_pool_test.rb +38 -0
- data/test/translation_map_test.rb +3 -3
- data/test/translation_maps/ruby_map.rb +2 -1
- data/test/translation_maps/yaml_map.yaml +2 -1
- data/traject.gemspec +4 -11
- metadata +92 -6
@@ -25,7 +25,7 @@ memory_writer_class = Class.new do
|
|
25
25
|
describe "Traject::Indexer#process" do
|
26
26
|
before do
|
27
27
|
# no threading for these tests
|
28
|
-
@indexer = Traject::Indexer.new("processing_thread_pool" => nil)
|
28
|
+
@indexer = Traject::Indexer::MarcIndexer.new("processing_thread_pool" => nil)
|
29
29
|
@indexer.writer_class = memory_writer_class
|
30
30
|
@file = File.open(support_file_path "test_data.utf8.mrc")
|
31
31
|
end
|
@@ -68,7 +68,7 @@ describe "Traject::Indexer#process" do
|
|
68
68
|
|
69
69
|
require 'traject/null_writer'
|
70
70
|
it "calls after_processing after processing" do
|
71
|
-
@indexer = Traject::Indexer.new(
|
71
|
+
@indexer = Traject::Indexer::MarcIndexer.new(
|
72
72
|
"writer_class_name" => "Traject::NullWriter"
|
73
73
|
)
|
74
74
|
@file = File.open(support_file_path "test_data.utf8.mrc")
|
@@ -87,6 +87,37 @@ describe "Traject::Indexer#process" do
|
|
87
87
|
assert_equal [:one, :two], called, "Both after_processing hooks called, in order"
|
88
88
|
end
|
89
89
|
|
90
|
+
it "calls after_processing from #run_after_processing_steps" do
|
91
|
+
@indexer = Traject::Indexer.new(
|
92
|
+
"writer_class_name" => "Traject::NullWriter"
|
93
|
+
)
|
94
|
+
@file = File.open(support_file_path "test_data.utf8.mrc")
|
95
|
+
|
96
|
+
called = []
|
97
|
+
|
98
|
+
@indexer.after_processing do
|
99
|
+
called << :one
|
100
|
+
end
|
101
|
+
@indexer.after_processing do
|
102
|
+
called << :two
|
103
|
+
end
|
104
|
+
|
105
|
+
@indexer.run_after_processing_steps
|
106
|
+
assert_equal [:one, :two], called, "Both after_processing hooks called, in order"
|
107
|
+
end
|
108
|
+
|
109
|
+
it "can't be run twice" do
|
110
|
+
@file = File.open(support_file_path "test_data.utf8.mrc")
|
111
|
+
@indexer = Traject::Indexer::MarcIndexer.new(
|
112
|
+
"writer_class_name" => "Traject::NullWriter"
|
113
|
+
)
|
114
|
+
@indexer.process(@file)
|
115
|
+
|
116
|
+
assert_raises Traject::Indexer::CompletedStateError do
|
117
|
+
@indexer.process(@file)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
90
121
|
describe "demo_config.rb" do
|
91
122
|
before do
|
92
123
|
@indexer = Traject::Indexer.new(
|
@@ -102,4 +133,23 @@ describe "Traject::Indexer#process" do
|
|
102
133
|
end
|
103
134
|
end
|
104
135
|
|
136
|
+
describe "multi stream" do
|
137
|
+
before do
|
138
|
+
@file2 = File.open(support_file_path "george_eliot.marc")
|
139
|
+
@file1 = File.open(support_file_path "musical_cage.marc")
|
140
|
+
@indexer = Traject::Indexer::MarcIndexer.new do
|
141
|
+
self.writer_class = memory_writer_class
|
142
|
+
to_field "title", extract_marc("245")
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
it "parses and loads" do
|
147
|
+
@indexer.process([@file1, @file2])
|
148
|
+
# kinda ridic, yeah.
|
149
|
+
output_hashes = memory_writer_class.class_variable_get("@@last_writer_settings")["memory_writer.added"].collect(&:output_hash)
|
150
|
+
|
151
|
+
assert_length 2, output_hashes
|
152
|
+
assert output_hashes.all? { |hash| hash["title"].length > 0 }
|
153
|
+
end
|
154
|
+
end
|
105
155
|
end
|
@@ -5,10 +5,10 @@ describe "Traject::Indexer#settings" do
|
|
5
5
|
@indexer = Traject::Indexer.new
|
6
6
|
end
|
7
7
|
|
8
|
-
it "starts out a Hash, that
|
8
|
+
it "starts out a Hash, that uses it's defaults" do
|
9
9
|
assert_kind_of Hash, @indexer.settings
|
10
10
|
|
11
|
-
Traject::Indexer
|
11
|
+
Traject::Indexer.default_settings.each_pair do |key, value|
|
12
12
|
assert_equal value, @indexer.settings[key]
|
13
13
|
end
|
14
14
|
end
|
@@ -16,13 +16,15 @@ describe "Traject::Indexer#settings" do
|
|
16
16
|
it "can fill_in_defaults!" do
|
17
17
|
@indexer.settings.fill_in_defaults!
|
18
18
|
|
19
|
-
assert_equal Traject::Indexer
|
19
|
+
assert_equal Traject::Indexer.default_settings, @indexer.settings
|
20
20
|
end
|
21
21
|
|
22
22
|
it "doesn't overwrite with fill_in_defaults!" do
|
23
|
-
key = Traject::Indexer
|
23
|
+
key = Traject::Indexer.default_settings.keys.first
|
24
24
|
@indexer.settings[ key ] = "MINE KEEP IT"
|
25
25
|
|
26
|
+
assert_equal "MINE KEEP IT", @indexer.settings[key]
|
27
|
+
|
26
28
|
@indexer.settings.fill_in_defaults!
|
27
29
|
|
28
30
|
assert_equal "MINE KEEP IT", @indexer.settings[key]
|
@@ -36,7 +38,7 @@ describe "Traject::Indexer#settings" do
|
|
36
38
|
end
|
37
39
|
|
38
40
|
it "has settings DSL to set" do
|
39
|
-
@indexer.
|
41
|
+
@indexer.configure do
|
40
42
|
settings do
|
41
43
|
store "foo", "foo"
|
42
44
|
end
|
@@ -124,28 +126,36 @@ describe "Traject::Indexer#settings" do
|
|
124
126
|
assert_equal( {"a" => "a", "password" => "[hidden]", "some_password" => "[hidden]", "some.password" => "[hidden]"}, parsed)
|
125
127
|
end
|
126
128
|
end
|
127
|
-
|
128
|
-
describe "
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
assert_equal "Traject::Marc4JReader", @indexer.settings['reader_class_name']
|
136
|
-
else
|
137
|
-
assert_equal "Traject::MarcReader", @indexer.settings['reader_class_name']
|
129
|
+
|
130
|
+
describe "order of precedence" do
|
131
|
+
it "args beat 'provides'" do
|
132
|
+
# args come from command-line in typical use
|
133
|
+
|
134
|
+
@indexer = Traject::Indexer.new(sample: "from args")
|
135
|
+
@indexer.settings do
|
136
|
+
provide :sample, "from config"
|
138
137
|
end
|
138
|
+
@indexer.settings.fill_in_defaults!
|
139
|
+
|
140
|
+
assert_equal "from args", @indexer.settings["sample"]
|
139
141
|
end
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
142
|
+
|
143
|
+
it "args beat defaults" do
|
144
|
+
key = Traject::Indexer.default_settings.keys.first
|
145
|
+
@indexer = Traject::Indexer.new(key.to_sym => "from args")
|
146
|
+
@indexer.settings.fill_in_defaults!
|
147
|
+
|
148
|
+
assert_equal "from args", @indexer.settings[key]
|
149
|
+
end
|
150
|
+
|
151
|
+
it "provide beats defaults" do
|
152
|
+
key = Traject::Indexer.default_settings.keys.first
|
153
|
+
@indexer.settings do
|
154
|
+
provide key, "from config"
|
148
155
|
end
|
156
|
+
@indexer.settings.fill_in_defaults!
|
157
|
+
|
158
|
+
assert_equal "from config", @indexer.settings[key]
|
149
159
|
end
|
150
160
|
end
|
151
161
|
|
@@ -69,9 +69,34 @@ describe "Traject::Indexer.to_field" do
|
|
69
69
|
assert_equal ['hello'], output['foo']
|
70
70
|
end
|
71
71
|
|
72
|
+
describe "supports multiple procs" do
|
73
|
+
it "with no block" do
|
74
|
+
@indexer.to_field "foo",
|
75
|
+
lambda {|record, acc| acc << "one"},
|
76
|
+
lambda {|record, acc| acc << "two"},
|
77
|
+
lambda {|record, acc| acc << "three"}
|
72
78
|
|
73
|
-
|
74
|
-
|
79
|
+
output = @indexer.map_record('never looked at')
|
80
|
+
assert_equal ['one', 'two', 'three'], output['foo']
|
81
|
+
end
|
75
82
|
|
83
|
+
it "with a block too" do
|
84
|
+
@indexer.to_field "foo",
|
85
|
+
lambda {|record, acc| acc << "one"},
|
86
|
+
lambda {|record, acc| acc << "two"} do |record, acc|
|
87
|
+
acc << "three"
|
88
|
+
end
|
76
89
|
|
90
|
+
output = @indexer.map_record('never looked at')
|
91
|
+
assert_equal ['one', 'two', 'three'], output['foo']
|
92
|
+
end
|
93
|
+
end
|
77
94
|
|
95
|
+
describe "with an array argument" do
|
96
|
+
it "indexes to multiple fields" do
|
97
|
+
@indexer.to_field ["field1", "field2", "field3"], lambda {|rec, acc| acc << "value" }
|
98
|
+
output = @indexer.map_record('never looked at')
|
99
|
+
assert_equal({ "field1" => ["value"], "field2" => ["value"], "field3" => ["value"] }, output)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
data/test/marc_extractor_test.rb
CHANGED
@@ -28,7 +28,7 @@ describe "Traject::MarcExtractor" do
|
|
28
28
|
|
29
29
|
assert_kind_of Array, spec.subfields
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
it "parses specset from an array" do
|
33
33
|
parsed = Traject::MarcExtractor::SpecSet.new(%w[245abcde 810 700|*4|bcd])
|
34
34
|
assert_equal parsed.tags, %w[245 810 700]
|
@@ -60,17 +60,17 @@ describe "Traject::MarcExtractor" do
|
|
60
60
|
assert_equal "4", spec700.indicator2
|
61
61
|
assert_equal %w{b c d}, spec700.subfields
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
it "parses from an array" do
|
65
65
|
parsed = Traject::MarcExtractor::Spec.hash_from_string(%w[245abcde 810 700|*4|bcd])
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
_spec245 = parsed['245'].first
|
67
|
+
_spec810 = parsed['810'].first
|
68
|
+
_spec700 = parsed['700'].first
|
69
69
|
|
70
70
|
assert_length 3, parsed
|
71
71
|
end
|
72
|
-
|
73
|
-
|
72
|
+
|
73
|
+
|
74
74
|
|
75
75
|
it "parses fixed field byte offsets" do
|
76
76
|
parsed = Traject::MarcExtractor::Spec.hash_from_string("005[5]:008[7-10]")
|
data/test/marc_reader_test.rb
CHANGED
@@ -50,13 +50,13 @@ describe "Traject::MarcReader" do
|
|
50
50
|
a245a = array.first['245']['a']
|
51
51
|
|
52
52
|
assert a245a.encoding.name, "UTF-8"
|
53
|
-
assert a245a.valid_encoding?
|
53
|
+
assert a245a.valid_encoding?
|
54
54
|
assert_equal "Por uma outra globalização :", a245a
|
55
55
|
end
|
56
56
|
|
57
57
|
it "replaces unicode character reference in Marc8 transcode" do
|
58
58
|
file = File.new(support_file_path("escaped_character_reference.marc8.marc"))
|
59
|
-
|
59
|
+
|
60
60
|
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "MARC-8") # binary type is default
|
61
61
|
record = Traject::MarcReader.new(file, settings).to_a.first
|
62
62
|
|
@@ -67,7 +67,7 @@ describe "Traject::MarcReader" do
|
|
67
67
|
file = File.new(support_file_path "one-marc8.mrc")
|
68
68
|
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "ADFADFADF")
|
69
69
|
assert_raises(ArgumentError) do
|
70
|
-
|
70
|
+
_record = Traject::MarcReader.new(file, settings).to_a.first
|
71
71
|
end
|
72
72
|
end
|
73
73
|
|
@@ -78,7 +78,7 @@ describe "Traject::MarcReader" do
|
|
78
78
|
reader = Traject::MarcReader.new(file, settings)
|
79
79
|
|
80
80
|
record = reader.to_a.first
|
81
|
-
|
81
|
+
|
82
82
|
value = record['300']['a']
|
83
83
|
|
84
84
|
assert_equal value.encoding.name, "UTF-8"
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'traject/nokogiri_reader'
|
3
|
+
|
4
|
+
describe "Traject::NokogiriReader" do
|
5
|
+
describe "with namespaces" do
|
6
|
+
before do
|
7
|
+
@namespaces = { "oai" => "http://www.openarchives.org/OAI/2.0/" }
|
8
|
+
@xml_sample_path = support_file_path("sample-oai-pmh.xml")
|
9
|
+
end
|
10
|
+
|
11
|
+
describe "invalid settings" do
|
12
|
+
it "default_namespaces not a hash raises" do
|
13
|
+
error = assert_raises(ArgumentError) {
|
14
|
+
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {
|
15
|
+
"nokogiri.namespaces" => "i am not a hash",
|
16
|
+
})
|
17
|
+
}
|
18
|
+
assert(error.message =~ /nokogiri.namespaces must be a hash/)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "each_record_xpath with unregistered prefix raises" do
|
22
|
+
error = assert_raises(ArgumentError) {
|
23
|
+
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {
|
24
|
+
"nokogiri.namespaces" => @namespaces,
|
25
|
+
"nokogiri.each_record_xpath" => "//foo:bar"
|
26
|
+
})
|
27
|
+
}
|
28
|
+
assert(error.message =~ %r{Can't find namespace prefix 'foo' in '//foo:bar'})
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "fixed path" do
|
33
|
+
before do
|
34
|
+
@each_record_xpath = "/oai:OAI-PMH/oai:ListRecords/oai:record"
|
35
|
+
end
|
36
|
+
|
37
|
+
it "reads" do
|
38
|
+
shared_tests
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe "floating path" do
|
43
|
+
before do
|
44
|
+
@each_record_xpath = "//oai:record"
|
45
|
+
end
|
46
|
+
|
47
|
+
it "reads" do
|
48
|
+
shared_tests
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
describe "extra_xpath_hooks" do
|
54
|
+
it "catches oai-pmh resumption token" do
|
55
|
+
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {
|
56
|
+
"nokogiri.namespaces" => @namespaces,
|
57
|
+
"nokogiri.each_record_xpath" => "//oai:record",
|
58
|
+
"nokogiri_reader.extra_xpath_hooks" => {
|
59
|
+
"//oai:resumptionToken" => lambda do |node, clipboard|
|
60
|
+
clipboard[:resumptionToken] = node.text
|
61
|
+
end
|
62
|
+
}
|
63
|
+
})
|
64
|
+
_records = @reader.to_a
|
65
|
+
assert_equal "oai_dc.f(2018-05-03T18:09:08Z).u(2018-06-15T19:25:21Z).t(6387):100", @reader.clipboard[:resumptionToken]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
describe "outer namespaces" do
|
70
|
+
it "are preserved" do
|
71
|
+
@reader = Traject::NokogiriReader.new(File.open(support_file_path("namespace-test.xml")), {
|
72
|
+
"nokogiri.namespaces" => { mytop: "http://example.org/top" },
|
73
|
+
"nokogiri.each_record_xpath" => "//mytop:record"
|
74
|
+
})
|
75
|
+
yielded_records = []
|
76
|
+
@reader.each { |record|
|
77
|
+
yielded_records << record
|
78
|
+
}
|
79
|
+
|
80
|
+
assert yielded_records.length > 0
|
81
|
+
|
82
|
+
expected_namespaces = {"xmlns"=>"http://example.org/top", "xmlns:a"=>"http://example.org/a", "xmlns:b"=>"http://example.org/b"}
|
83
|
+
yielded_records.each do |rec|
|
84
|
+
assert_equal expected_namespaces, rec.namespaces
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe "without namespaces" do
|
91
|
+
before do
|
92
|
+
@namespaces = {}
|
93
|
+
@xml_sample_path = support_file_path("sample-oai-no-namespace.xml")
|
94
|
+
end
|
95
|
+
|
96
|
+
describe "fixed path" do
|
97
|
+
before do
|
98
|
+
@each_record_xpath = "/OAI-PMH/ListRecords/record"
|
99
|
+
end
|
100
|
+
|
101
|
+
it "reads" do
|
102
|
+
shared_tests
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
describe "floating path" do
|
107
|
+
before do
|
108
|
+
@each_record_xpath = "//record"
|
109
|
+
end
|
110
|
+
|
111
|
+
it "reads" do
|
112
|
+
shared_tests
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
def shared_tests
|
119
|
+
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {
|
120
|
+
"nokogiri.namespaces" => @namespaces,
|
121
|
+
"nokogiri.each_record_xpath" => @each_record_xpath
|
122
|
+
})
|
123
|
+
|
124
|
+
yielded_records = []
|
125
|
+
@reader.each { |record|
|
126
|
+
yielded_records << record
|
127
|
+
}
|
128
|
+
|
129
|
+
|
130
|
+
manually_extracted = Nokogiri::XML.parse(File.open(@xml_sample_path)).xpath(@each_record_xpath, @namespaces)
|
131
|
+
manually_extracted.collect do |node|
|
132
|
+
# nokogiri makes it so hard to reliably get an Element to serialize to XML with all
|
133
|
+
# it's inherited namespace declerations. :( We're only doing this for testing purposes
|
134
|
+
# anyway. This may not handle everything, but handles what we need in the test right now
|
135
|
+
if node.namespace
|
136
|
+
node["xmlns"] = node.namespace.href
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
assert_length manually_extracted.size, yielded_records
|
141
|
+
assert yielded_records.all? {|r| r.kind_of? Nokogiri::XML::Document }
|
142
|
+
assert_equal manually_extracted.collect(&:to_xml), yielded_records.collect(&:root).collect(&:to_xml)
|
143
|
+
end
|
144
|
+
|
145
|
+
describe "without each_record_xpath" do
|
146
|
+
before do
|
147
|
+
@xml_sample_path = support_file_path("namespace-test.xml")
|
148
|
+
end
|
149
|
+
it "yields whole file as one record" do
|
150
|
+
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {})
|
151
|
+
|
152
|
+
yielded_records = @reader.to_a
|
153
|
+
|
154
|
+
assert_length 1, yielded_records
|
155
|
+
assert_equal Nokogiri::XML.parse(File.open(@xml_sample_path)).to_xml, yielded_records.first.to_xml
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'traject/oai_pmh_nokogiri_reader'
|
3
|
+
|
4
|
+
describe "Traject::OaiPmhNokogiriReader" do
|
5
|
+
|
6
|
+
it "smoke test" do
|
7
|
+
@reader = Traject::OaiPmhNokogiriReader.new(nil,
|
8
|
+
"oai_pmh.start_url" => "http://example.com/oai?verb=ListRecords&metadataPrefix=oai_dc"
|
9
|
+
)
|
10
|
+
|
11
|
+
fetched = @reader.to_a
|
12
|
+
|
13
|
+
assert_length 2, fetched
|
14
|
+
end
|
15
|
+
|
16
|
+
before do
|
17
|
+
stub_request(:get, "http://example.com/oai?metadataPrefix=oai_dc&verb=ListRecords").
|
18
|
+
to_return(status: 200, body: File.read(support_file_path("oai-pmh-one-record-first.xml")))
|
19
|
+
|
20
|
+
stub_request(:get, "http://example.com/oai?resumptionToken=dummy_resumption&verb=ListRecords").
|
21
|
+
to_return(status: 200, body: File.read(support_file_path("oai-pmh-one-record-2.xml")))
|
22
|
+
end
|
23
|
+
end
|