traject 2.3.4 → 3.0.0.alpha.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +16 -9
- data/CHANGES.md +74 -1
- data/Gemfile +2 -1
- data/README.md +104 -53
- data/Rakefile +8 -1
- data/doc/indexing_rules.md +79 -63
- data/doc/programmatic_use.md +218 -0
- data/doc/settings.md +28 -1
- data/doc/xml.md +134 -0
- data/lib/traject.rb +5 -0
- data/lib/traject/array_writer.rb +34 -0
- data/lib/traject/command_line.rb +18 -22
- data/lib/traject/debug_writer.rb +2 -5
- data/lib/traject/experimental_nokogiri_streaming_reader.rb +276 -0
- data/lib/traject/hashie/indifferent_access_fix.rb +25 -0
- data/lib/traject/indexer.rb +321 -92
- data/lib/traject/indexer/context.rb +39 -13
- data/lib/traject/indexer/marc_indexer.rb +30 -0
- data/lib/traject/indexer/nokogiri_indexer.rb +30 -0
- data/lib/traject/indexer/settings.rb +36 -53
- data/lib/traject/indexer/step.rb +27 -33
- data/lib/traject/macros/marc21.rb +37 -12
- data/lib/traject/macros/nokogiri_macros.rb +43 -0
- data/lib/traject/macros/transformation.rb +162 -0
- data/lib/traject/marc_extractor.rb +2 -0
- data/lib/traject/ndj_reader.rb +1 -1
- data/lib/traject/nokogiri_reader.rb +179 -0
- data/lib/traject/oai_pmh_nokogiri_reader.rb +159 -0
- data/lib/traject/solr_json_writer.rb +19 -12
- data/lib/traject/thread_pool.rb +13 -0
- data/lib/traject/util.rb +14 -2
- data/lib/traject/version.rb +1 -1
- data/test/debug_writer_test.rb +3 -3
- data/test/delimited_writer_test.rb +3 -3
- data/test/experimental_nokogiri_streaming_reader_test.rb +169 -0
- data/test/indexer/context_test.rb +23 -13
- data/test/indexer/error_handler_test.rb +59 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +46 -46
- data/test/indexer/macros/marc21/extract_all_marc_values_test.rb +1 -1
- data/test/indexer/macros/marc21/extract_marc_test.rb +19 -9
- data/test/indexer/macros/marc21/serialize_marc_test.rb +4 -4
- data/test/indexer/macros/to_field_test.rb +2 -2
- data/test/indexer/macros/transformation_test.rb +177 -0
- data/test/indexer/map_record_test.rb +2 -3
- data/test/indexer/nokogiri_indexer_test.rb +103 -0
- data/test/indexer/process_record_test.rb +55 -0
- data/test/indexer/process_with_test.rb +148 -0
- data/test/indexer/read_write_test.rb +52 -2
- data/test/indexer/settings_test.rb +34 -24
- data/test/indexer/to_field_test.rb +27 -2
- data/test/marc_extractor_test.rb +7 -7
- data/test/marc_reader_test.rb +4 -4
- data/test/nokogiri_reader_test.rb +158 -0
- data/test/oai_pmh_nokogiri_reader_test.rb +23 -0
- data/test/solr_json_writer_test.rb +24 -28
- data/test/test_helper.rb +8 -2
- data/test/test_support/namespace-test.xml +7 -0
- data/test/test_support/nokogiri_demo_config.rb +17 -0
- data/test/test_support/oai-pmh-one-record-2.xml +24 -0
- data/test/test_support/oai-pmh-one-record-first.xml +24 -0
- data/test/test_support/sample-oai-no-namespace.xml +197 -0
- data/test/test_support/sample-oai-pmh.xml +197 -0
- data/test/thread_pool_test.rb +38 -0
- data/test/translation_map_test.rb +3 -3
- data/test/translation_maps/ruby_map.rb +2 -1
- data/test/translation_maps/yaml_map.yaml +2 -1
- data/traject.gemspec +4 -11
- metadata +92 -6
@@ -25,7 +25,7 @@ memory_writer_class = Class.new do
|
|
25
25
|
describe "Traject::Indexer#process" do
|
26
26
|
before do
|
27
27
|
# no threading for these tests
|
28
|
-
@indexer = Traject::Indexer.new("processing_thread_pool" => nil)
|
28
|
+
@indexer = Traject::Indexer::MarcIndexer.new("processing_thread_pool" => nil)
|
29
29
|
@indexer.writer_class = memory_writer_class
|
30
30
|
@file = File.open(support_file_path "test_data.utf8.mrc")
|
31
31
|
end
|
@@ -68,7 +68,7 @@ describe "Traject::Indexer#process" do
|
|
68
68
|
|
69
69
|
require 'traject/null_writer'
|
70
70
|
it "calls after_processing after processing" do
|
71
|
-
@indexer = Traject::Indexer.new(
|
71
|
+
@indexer = Traject::Indexer::MarcIndexer.new(
|
72
72
|
"writer_class_name" => "Traject::NullWriter"
|
73
73
|
)
|
74
74
|
@file = File.open(support_file_path "test_data.utf8.mrc")
|
@@ -87,6 +87,37 @@ describe "Traject::Indexer#process" do
|
|
87
87
|
assert_equal [:one, :two], called, "Both after_processing hooks called, in order"
|
88
88
|
end
|
89
89
|
|
90
|
+
it "calls after_processing from #run_after_processing_steps" do
|
91
|
+
@indexer = Traject::Indexer.new(
|
92
|
+
"writer_class_name" => "Traject::NullWriter"
|
93
|
+
)
|
94
|
+
@file = File.open(support_file_path "test_data.utf8.mrc")
|
95
|
+
|
96
|
+
called = []
|
97
|
+
|
98
|
+
@indexer.after_processing do
|
99
|
+
called << :one
|
100
|
+
end
|
101
|
+
@indexer.after_processing do
|
102
|
+
called << :two
|
103
|
+
end
|
104
|
+
|
105
|
+
@indexer.run_after_processing_steps
|
106
|
+
assert_equal [:one, :two], called, "Both after_processing hooks called, in order"
|
107
|
+
end
|
108
|
+
|
109
|
+
it "can't be run twice" do
|
110
|
+
@file = File.open(support_file_path "test_data.utf8.mrc")
|
111
|
+
@indexer = Traject::Indexer::MarcIndexer.new(
|
112
|
+
"writer_class_name" => "Traject::NullWriter"
|
113
|
+
)
|
114
|
+
@indexer.process(@file)
|
115
|
+
|
116
|
+
assert_raises Traject::Indexer::CompletedStateError do
|
117
|
+
@indexer.process(@file)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
90
121
|
describe "demo_config.rb" do
|
91
122
|
before do
|
92
123
|
@indexer = Traject::Indexer.new(
|
@@ -102,4 +133,23 @@ describe "Traject::Indexer#process" do
|
|
102
133
|
end
|
103
134
|
end
|
104
135
|
|
136
|
+
describe "multi stream" do
|
137
|
+
before do
|
138
|
+
@file2 = File.open(support_file_path "george_eliot.marc")
|
139
|
+
@file1 = File.open(support_file_path "musical_cage.marc")
|
140
|
+
@indexer = Traject::Indexer::MarcIndexer.new do
|
141
|
+
self.writer_class = memory_writer_class
|
142
|
+
to_field "title", extract_marc("245")
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
it "parses and loads" do
|
147
|
+
@indexer.process([@file1, @file2])
|
148
|
+
# kinda ridic, yeah.
|
149
|
+
output_hashes = memory_writer_class.class_variable_get("@@last_writer_settings")["memory_writer.added"].collect(&:output_hash)
|
150
|
+
|
151
|
+
assert_length 2, output_hashes
|
152
|
+
assert output_hashes.all? { |hash| hash["title"].length > 0 }
|
153
|
+
end
|
154
|
+
end
|
105
155
|
end
|
@@ -5,10 +5,10 @@ describe "Traject::Indexer#settings" do
|
|
5
5
|
@indexer = Traject::Indexer.new
|
6
6
|
end
|
7
7
|
|
8
|
-
it "starts out a Hash, that
|
8
|
+
it "starts out a Hash, that uses it's defaults" do
|
9
9
|
assert_kind_of Hash, @indexer.settings
|
10
10
|
|
11
|
-
Traject::Indexer
|
11
|
+
Traject::Indexer.default_settings.each_pair do |key, value|
|
12
12
|
assert_equal value, @indexer.settings[key]
|
13
13
|
end
|
14
14
|
end
|
@@ -16,13 +16,15 @@ describe "Traject::Indexer#settings" do
|
|
16
16
|
it "can fill_in_defaults!" do
|
17
17
|
@indexer.settings.fill_in_defaults!
|
18
18
|
|
19
|
-
assert_equal Traject::Indexer
|
19
|
+
assert_equal Traject::Indexer.default_settings, @indexer.settings
|
20
20
|
end
|
21
21
|
|
22
22
|
it "doesn't overwrite with fill_in_defaults!" do
|
23
|
-
key = Traject::Indexer
|
23
|
+
key = Traject::Indexer.default_settings.keys.first
|
24
24
|
@indexer.settings[ key ] = "MINE KEEP IT"
|
25
25
|
|
26
|
+
assert_equal "MINE KEEP IT", @indexer.settings[key]
|
27
|
+
|
26
28
|
@indexer.settings.fill_in_defaults!
|
27
29
|
|
28
30
|
assert_equal "MINE KEEP IT", @indexer.settings[key]
|
@@ -36,7 +38,7 @@ describe "Traject::Indexer#settings" do
|
|
36
38
|
end
|
37
39
|
|
38
40
|
it "has settings DSL to set" do
|
39
|
-
@indexer.
|
41
|
+
@indexer.configure do
|
40
42
|
settings do
|
41
43
|
store "foo", "foo"
|
42
44
|
end
|
@@ -124,28 +126,36 @@ describe "Traject::Indexer#settings" do
|
|
124
126
|
assert_equal( {"a" => "a", "password" => "[hidden]", "some_password" => "[hidden]", "some.password" => "[hidden]"}, parsed)
|
125
127
|
end
|
126
128
|
end
|
127
|
-
|
128
|
-
describe "
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
assert_equal "Traject::Marc4JReader", @indexer.settings['reader_class_name']
|
136
|
-
else
|
137
|
-
assert_equal "Traject::MarcReader", @indexer.settings['reader_class_name']
|
129
|
+
|
130
|
+
describe "order of precedence" do
|
131
|
+
it "args beat 'provides'" do
|
132
|
+
# args come from command-line in typical use
|
133
|
+
|
134
|
+
@indexer = Traject::Indexer.new(sample: "from args")
|
135
|
+
@indexer.settings do
|
136
|
+
provide :sample, "from config"
|
138
137
|
end
|
138
|
+
@indexer.settings.fill_in_defaults!
|
139
|
+
|
140
|
+
assert_equal "from args", @indexer.settings["sample"]
|
139
141
|
end
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
142
|
+
|
143
|
+
it "args beat defaults" do
|
144
|
+
key = Traject::Indexer.default_settings.keys.first
|
145
|
+
@indexer = Traject::Indexer.new(key.to_sym => "from args")
|
146
|
+
@indexer.settings.fill_in_defaults!
|
147
|
+
|
148
|
+
assert_equal "from args", @indexer.settings[key]
|
149
|
+
end
|
150
|
+
|
151
|
+
it "provide beats defaults" do
|
152
|
+
key = Traject::Indexer.default_settings.keys.first
|
153
|
+
@indexer.settings do
|
154
|
+
provide key, "from config"
|
148
155
|
end
|
156
|
+
@indexer.settings.fill_in_defaults!
|
157
|
+
|
158
|
+
assert_equal "from config", @indexer.settings[key]
|
149
159
|
end
|
150
160
|
end
|
151
161
|
|
@@ -69,9 +69,34 @@ describe "Traject::Indexer.to_field" do
|
|
69
69
|
assert_equal ['hello'], output['foo']
|
70
70
|
end
|
71
71
|
|
72
|
+
describe "supports multiple procs" do
|
73
|
+
it "with no block" do
|
74
|
+
@indexer.to_field "foo",
|
75
|
+
lambda {|record, acc| acc << "one"},
|
76
|
+
lambda {|record, acc| acc << "two"},
|
77
|
+
lambda {|record, acc| acc << "three"}
|
72
78
|
|
73
|
-
|
74
|
-
|
79
|
+
output = @indexer.map_record('never looked at')
|
80
|
+
assert_equal ['one', 'two', 'three'], output['foo']
|
81
|
+
end
|
75
82
|
|
83
|
+
it "with a block too" do
|
84
|
+
@indexer.to_field "foo",
|
85
|
+
lambda {|record, acc| acc << "one"},
|
86
|
+
lambda {|record, acc| acc << "two"} do |record, acc|
|
87
|
+
acc << "three"
|
88
|
+
end
|
76
89
|
|
90
|
+
output = @indexer.map_record('never looked at')
|
91
|
+
assert_equal ['one', 'two', 'three'], output['foo']
|
92
|
+
end
|
93
|
+
end
|
77
94
|
|
95
|
+
describe "with an array argument" do
|
96
|
+
it "indexes to multiple fields" do
|
97
|
+
@indexer.to_field ["field1", "field2", "field3"], lambda {|rec, acc| acc << "value" }
|
98
|
+
output = @indexer.map_record('never looked at')
|
99
|
+
assert_equal({ "field1" => ["value"], "field2" => ["value"], "field3" => ["value"] }, output)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
data/test/marc_extractor_test.rb
CHANGED
@@ -28,7 +28,7 @@ describe "Traject::MarcExtractor" do
|
|
28
28
|
|
29
29
|
assert_kind_of Array, spec.subfields
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
it "parses specset from an array" do
|
33
33
|
parsed = Traject::MarcExtractor::SpecSet.new(%w[245abcde 810 700|*4|bcd])
|
34
34
|
assert_equal parsed.tags, %w[245 810 700]
|
@@ -60,17 +60,17 @@ describe "Traject::MarcExtractor" do
|
|
60
60
|
assert_equal "4", spec700.indicator2
|
61
61
|
assert_equal %w{b c d}, spec700.subfields
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
it "parses from an array" do
|
65
65
|
parsed = Traject::MarcExtractor::Spec.hash_from_string(%w[245abcde 810 700|*4|bcd])
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
_spec245 = parsed['245'].first
|
67
|
+
_spec810 = parsed['810'].first
|
68
|
+
_spec700 = parsed['700'].first
|
69
69
|
|
70
70
|
assert_length 3, parsed
|
71
71
|
end
|
72
|
-
|
73
|
-
|
72
|
+
|
73
|
+
|
74
74
|
|
75
75
|
it "parses fixed field byte offsets" do
|
76
76
|
parsed = Traject::MarcExtractor::Spec.hash_from_string("005[5]:008[7-10]")
|
data/test/marc_reader_test.rb
CHANGED
@@ -50,13 +50,13 @@ describe "Traject::MarcReader" do
|
|
50
50
|
a245a = array.first['245']['a']
|
51
51
|
|
52
52
|
assert a245a.encoding.name, "UTF-8"
|
53
|
-
assert a245a.valid_encoding?
|
53
|
+
assert a245a.valid_encoding?
|
54
54
|
assert_equal "Por uma outra globalização :", a245a
|
55
55
|
end
|
56
56
|
|
57
57
|
it "replaces unicode character reference in Marc8 transcode" do
|
58
58
|
file = File.new(support_file_path("escaped_character_reference.marc8.marc"))
|
59
|
-
|
59
|
+
|
60
60
|
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "MARC-8") # binary type is default
|
61
61
|
record = Traject::MarcReader.new(file, settings).to_a.first
|
62
62
|
|
@@ -67,7 +67,7 @@ describe "Traject::MarcReader" do
|
|
67
67
|
file = File.new(support_file_path "one-marc8.mrc")
|
68
68
|
settings = Traject::Indexer::Settings.new("marc_source.encoding" => "ADFADFADF")
|
69
69
|
assert_raises(ArgumentError) do
|
70
|
-
|
70
|
+
_record = Traject::MarcReader.new(file, settings).to_a.first
|
71
71
|
end
|
72
72
|
end
|
73
73
|
|
@@ -78,7 +78,7 @@ describe "Traject::MarcReader" do
|
|
78
78
|
reader = Traject::MarcReader.new(file, settings)
|
79
79
|
|
80
80
|
record = reader.to_a.first
|
81
|
-
|
81
|
+
|
82
82
|
value = record['300']['a']
|
83
83
|
|
84
84
|
assert_equal value.encoding.name, "UTF-8"
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'traject/nokogiri_reader'
|
3
|
+
|
4
|
+
describe "Traject::NokogiriReader" do
|
5
|
+
describe "with namespaces" do
|
6
|
+
before do
|
7
|
+
@namespaces = { "oai" => "http://www.openarchives.org/OAI/2.0/" }
|
8
|
+
@xml_sample_path = support_file_path("sample-oai-pmh.xml")
|
9
|
+
end
|
10
|
+
|
11
|
+
describe "invalid settings" do
|
12
|
+
it "default_namespaces not a hash raises" do
|
13
|
+
error = assert_raises(ArgumentError) {
|
14
|
+
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {
|
15
|
+
"nokogiri.namespaces" => "i am not a hash",
|
16
|
+
})
|
17
|
+
}
|
18
|
+
assert(error.message =~ /nokogiri.namespaces must be a hash/)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "each_record_xpath with unregistered prefix raises" do
|
22
|
+
error = assert_raises(ArgumentError) {
|
23
|
+
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {
|
24
|
+
"nokogiri.namespaces" => @namespaces,
|
25
|
+
"nokogiri.each_record_xpath" => "//foo:bar"
|
26
|
+
})
|
27
|
+
}
|
28
|
+
assert(error.message =~ %r{Can't find namespace prefix 'foo' in '//foo:bar'})
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "fixed path" do
|
33
|
+
before do
|
34
|
+
@each_record_xpath = "/oai:OAI-PMH/oai:ListRecords/oai:record"
|
35
|
+
end
|
36
|
+
|
37
|
+
it "reads" do
|
38
|
+
shared_tests
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe "floating path" do
|
43
|
+
before do
|
44
|
+
@each_record_xpath = "//oai:record"
|
45
|
+
end
|
46
|
+
|
47
|
+
it "reads" do
|
48
|
+
shared_tests
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
describe "extra_xpath_hooks" do
|
54
|
+
it "catches oai-pmh resumption token" do
|
55
|
+
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {
|
56
|
+
"nokogiri.namespaces" => @namespaces,
|
57
|
+
"nokogiri.each_record_xpath" => "//oai:record",
|
58
|
+
"nokogiri_reader.extra_xpath_hooks" => {
|
59
|
+
"//oai:resumptionToken" => lambda do |node, clipboard|
|
60
|
+
clipboard[:resumptionToken] = node.text
|
61
|
+
end
|
62
|
+
}
|
63
|
+
})
|
64
|
+
_records = @reader.to_a
|
65
|
+
assert_equal "oai_dc.f(2018-05-03T18:09:08Z).u(2018-06-15T19:25:21Z).t(6387):100", @reader.clipboard[:resumptionToken]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
describe "outer namespaces" do
|
70
|
+
it "are preserved" do
|
71
|
+
@reader = Traject::NokogiriReader.new(File.open(support_file_path("namespace-test.xml")), {
|
72
|
+
"nokogiri.namespaces" => { mytop: "http://example.org/top" },
|
73
|
+
"nokogiri.each_record_xpath" => "//mytop:record"
|
74
|
+
})
|
75
|
+
yielded_records = []
|
76
|
+
@reader.each { |record|
|
77
|
+
yielded_records << record
|
78
|
+
}
|
79
|
+
|
80
|
+
assert yielded_records.length > 0
|
81
|
+
|
82
|
+
expected_namespaces = {"xmlns"=>"http://example.org/top", "xmlns:a"=>"http://example.org/a", "xmlns:b"=>"http://example.org/b"}
|
83
|
+
yielded_records.each do |rec|
|
84
|
+
assert_equal expected_namespaces, rec.namespaces
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe "without namespaces" do
|
91
|
+
before do
|
92
|
+
@namespaces = {}
|
93
|
+
@xml_sample_path = support_file_path("sample-oai-no-namespace.xml")
|
94
|
+
end
|
95
|
+
|
96
|
+
describe "fixed path" do
|
97
|
+
before do
|
98
|
+
@each_record_xpath = "/OAI-PMH/ListRecords/record"
|
99
|
+
end
|
100
|
+
|
101
|
+
it "reads" do
|
102
|
+
shared_tests
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
describe "floating path" do
|
107
|
+
before do
|
108
|
+
@each_record_xpath = "//record"
|
109
|
+
end
|
110
|
+
|
111
|
+
it "reads" do
|
112
|
+
shared_tests
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
def shared_tests
|
119
|
+
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {
|
120
|
+
"nokogiri.namespaces" => @namespaces,
|
121
|
+
"nokogiri.each_record_xpath" => @each_record_xpath
|
122
|
+
})
|
123
|
+
|
124
|
+
yielded_records = []
|
125
|
+
@reader.each { |record|
|
126
|
+
yielded_records << record
|
127
|
+
}
|
128
|
+
|
129
|
+
|
130
|
+
manually_extracted = Nokogiri::XML.parse(File.open(@xml_sample_path)).xpath(@each_record_xpath, @namespaces)
|
131
|
+
manually_extracted.collect do |node|
|
132
|
+
# nokogiri makes it so hard to reliably get an Element to serialize to XML with all
|
133
|
+
# it's inherited namespace declerations. :( We're only doing this for testing purposes
|
134
|
+
# anyway. This may not handle everything, but handles what we need in the test right now
|
135
|
+
if node.namespace
|
136
|
+
node["xmlns"] = node.namespace.href
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
assert_length manually_extracted.size, yielded_records
|
141
|
+
assert yielded_records.all? {|r| r.kind_of? Nokogiri::XML::Document }
|
142
|
+
assert_equal manually_extracted.collect(&:to_xml), yielded_records.collect(&:root).collect(&:to_xml)
|
143
|
+
end
|
144
|
+
|
145
|
+
describe "without each_record_xpath" do
|
146
|
+
before do
|
147
|
+
@xml_sample_path = support_file_path("namespace-test.xml")
|
148
|
+
end
|
149
|
+
it "yields whole file as one record" do
|
150
|
+
@reader = Traject::NokogiriReader.new(File.open(@xml_sample_path), {})
|
151
|
+
|
152
|
+
yielded_records = @reader.to_a
|
153
|
+
|
154
|
+
assert_length 1, yielded_records
|
155
|
+
assert_equal Nokogiri::XML.parse(File.open(@xml_sample_path)).to_xml, yielded_records.first.to_xml
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'traject/oai_pmh_nokogiri_reader'
|
3
|
+
|
4
|
+
describe "Traject::OaiPmhNokogiriReader" do
|
5
|
+
|
6
|
+
it "smoke test" do
|
7
|
+
@reader = Traject::OaiPmhNokogiriReader.new(nil,
|
8
|
+
"oai_pmh.start_url" => "http://example.com/oai?verb=ListRecords&metadataPrefix=oai_dc"
|
9
|
+
)
|
10
|
+
|
11
|
+
fetched = @reader.to_a
|
12
|
+
|
13
|
+
assert_length 2, fetched
|
14
|
+
end
|
15
|
+
|
16
|
+
before do
|
17
|
+
stub_request(:get, "http://example.com/oai?metadataPrefix=oai_dc&verb=ListRecords").
|
18
|
+
to_return(status: 200, body: File.read(support_file_path("oai-pmh-one-record-first.xml")))
|
19
|
+
|
20
|
+
stub_request(:get, "http://example.com/oai?resumptionToken=dummy_resumption&verb=ListRecords").
|
21
|
+
to_return(status: 200, body: File.read(support_file_path("oai-pmh-one-record-2.xml")))
|
22
|
+
end
|
23
|
+
end
|