fieldhand 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,28 @@
1
+ module Fieldhand
2
+ # A set is an optional construct for grouping items for the purpose of selective harvesting.
3
+ #
4
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#Set
5
+ class Set
6
+ attr_reader :element
7
+
8
+ def initialize(element)
9
+ @element = element
10
+ end
11
+
12
+ def to_s
13
+ spec
14
+ end
15
+
16
+ def spec
17
+ @spec ||= element.setSpec.text
18
+ end
19
+
20
+ def name
21
+ @name ||= element.setName.text
22
+ end
23
+
24
+ def descriptions
25
+ @descriptions ||= element.locate('setDescription')
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,94 @@
1
+ require 'fieldhand/arguments'
2
+ require 'fieldhand/set'
3
+ require 'fieldhand/metadata_format'
4
+ require 'ox'
5
+ require 'date'
6
+ require 'time'
7
+
8
+ module Fieldhand
9
+ RSpec.describe Arguments do
10
+ describe '#to_query' do
11
+ it 'returns a metadata prefix of "oai_dc" by default' do
12
+ arguments = described_class.new
13
+
14
+ expect(arguments.to_query).to eq('metadataPrefix' => 'oai_dc')
15
+ end
16
+
17
+ it 'allows overriding the metadata prefix' do
18
+ arguments = described_class.new(:metadata_prefix => 'xoai')
19
+
20
+ expect(arguments.to_query).to eq('metadataPrefix' => 'xoai')
21
+ end
22
+
23
+ it 'allows overriding the metadata prefix with a Metadata Format' do
24
+ element = ::Ox.parse('<metadataFormat><metadataPrefix>xoai</metadataPrefix></metadataFormat>')
25
+ format = MetadataFormat.new(element)
26
+ arguments = described_class.new(:metadata_prefix => format)
27
+
28
+ expect(arguments.to_query).to eq('metadataPrefix' => 'xoai')
29
+ end
30
+
31
+ it 'allows passing a resumption token' do
32
+ arguments = described_class.new(:resumption_token => 'foo')
33
+
34
+ expect(arguments.to_query).to include('resumptionToken' => 'foo')
35
+ end
36
+
37
+ it 'allows passing a from datestamp' do
38
+ arguments = described_class.new(:from => '2001-01-01')
39
+
40
+ expect(arguments.to_query).to include('from' => '2001-01-01')
41
+ end
42
+
43
+ it 'converts a Date from datestamp to a string' do
44
+ arguments = described_class.new(:from => ::Date.new(2001, 1, 1))
45
+
46
+ expect(arguments.to_query).to include('from' => '2001-01-01')
47
+ end
48
+
49
+ it 'converts a Time from datestamp to a string' do
50
+ arguments = described_class.new(:from => ::Time.utc(2001, 1, 1, 0, 0, 0))
51
+
52
+ expect(arguments.to_query).to include('from' => '2001-01-01T00:00:00Z')
53
+ end
54
+
55
+ it 'allows passing an until datestamp' do
56
+ arguments = described_class.new(:until => '2001-01-01')
57
+
58
+ expect(arguments.to_query).to include('until' => '2001-01-01')
59
+ end
60
+
61
+ it 'converts a Date until datestamp to a string' do
62
+ arguments = described_class.new(:until => ::Date.new(2001, 1, 1))
63
+
64
+ expect(arguments.to_query).to include('until' => '2001-01-01')
65
+ end
66
+
67
+ it 'converts a Time until datestamp to a string' do
68
+ arguments = described_class.new(:until => ::Time.utc(2001, 1, 1, 0, 0, 0))
69
+
70
+ expect(arguments.to_query).to include('until' => '2001-01-01T00:00:00Z')
71
+ end
72
+
73
+ it 'allows passing a set spec' do
74
+ arguments = described_class.new(:set => 'A')
75
+
76
+ expect(arguments.to_query).to include('set' => 'A')
77
+ end
78
+
79
+ it 'allows passing a Set as a set spec' do
80
+ element = ::Ox.parse('<set><setSpec>A</setSpec></set>')
81
+ set = Set.new(element)
82
+ arguments = described_class.new(:set => set)
83
+
84
+ expect(arguments.to_query).to include('set' => 'A')
85
+ end
86
+
87
+ it 'raises an error when given unknown arguments' do
88
+ arguments = described_class.new(:foo => 'bar')
89
+
90
+ expect { arguments.to_query }.to raise_error(::ArgumentError)
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,38 @@
1
+ require 'fieldhand/datestamp'
2
+
3
+ module Fieldhand
4
+ RSpec.describe Datestamp do
5
+ describe '.parse' do
6
+ it 'parses date-granularity datestamps into Dates' do
7
+ expect(described_class.parse('2001-01-01')).
8
+ to eq(::Date.new(2001, 1, 1))
9
+ end
10
+
11
+ it 'parses time-granularity datestamps into Times' do
12
+ expect(described_class.parse('2001-01-01T00:00:00Z')).
13
+ to eq(::Time.utc(2001, 1, 1, 0, 0, 0))
14
+ end
15
+ end
16
+
17
+ describe '.unparse' do
18
+ it 'unparses Dates into date-granularity datestamps' do
19
+ expect(described_class.unparse(::Date.new(2001, 1, 1))).
20
+ to eq('2001-01-01')
21
+ end
22
+
23
+ it 'unparses Times into time-granularity datestamps' do
24
+ expect(described_class.unparse(::Time.utc(2001, 1, 1, 0, 0, 0))).
25
+ to eq('2001-01-01T00:00:00Z')
26
+ end
27
+
28
+ it 'unparses non UTC Times into time-granularity datestamps' do
29
+ expect(described_class.unparse(::Time.parse('2001-01-01 01:00:00 +0100'))).
30
+ to eq('2001-01-01T00:00:00Z')
31
+ end
32
+
33
+ it 'unparses strings into themselves' do
34
+ expect(described_class.unparse('2001-01-01')).to eq('2001-01-01')
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ require 'fieldhand/header'
2
+ require 'ox'
3
+
4
+ module Fieldhand
5
+ RSpec.describe Header do
6
+ describe '#deleted?' do
7
+ it 'is true when the status is deleted' do
8
+ element = ::Ox.parse('<header status="deleted"/>')
9
+ header = described_class.new(element)
10
+
11
+ expect(header).to be_deleted
12
+ end
13
+
14
+ it 'is false when there is no status' do
15
+ element = ::Ox.parse('<header/>')
16
+ header = described_class.new(element)
17
+
18
+ expect(header).not_to be_deleted
19
+ end
20
+ end
21
+
22
+ describe '#datestamp' do
23
+ it 'supports time-granularity datestamps' do
24
+ element = ::Ox.parse('<header><datestamp>2001-01-01T00:00:00Z</datestamp></header>')
25
+ header = described_class.new(element)
26
+
27
+ expect(header.datestamp).to eq(::Time.utc(2001, 1, 1, 0, 0, 0))
28
+ end
29
+
30
+ it 'supports date-granularity datestamps' do
31
+ element = ::Ox.parse('<header><datestamp>2001-01-01</datestamp></header>')
32
+ header = described_class.new(element)
33
+
34
+ expect(header.datestamp).to eq(::Date.new(2001, 1, 1))
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,43 @@
1
+ require 'fieldhand/identify'
2
+ require 'ox'
3
+
4
+ module Fieldhand
5
+ RSpec.describe Identify do
6
+ describe '#base_url' do
7
+ it 'returns the base URL as a URI' do
8
+ element = ::Ox.parse(<<-XML)
9
+ <Identify>
10
+ <baseURL>http://www.example.com/oai</baseURL>
11
+ </Identify>
12
+ XML
13
+ identify = described_class.new(element)
14
+
15
+ expect(identify.base_url).to eq(URI('http://www.example.com/oai'))
16
+ end
17
+ end
18
+
19
+ describe '#earliest_datestamp' do
20
+ it 'supports time datestamps' do
21
+ element = ::Ox.parse(<<-XML)
22
+ <Identify>
23
+ <earliestDatestamp>1990-02-01T12:00:00Z</earliestDatestamp>
24
+ </Identify>
25
+ XML
26
+ identify = described_class.new(element)
27
+
28
+ expect(identify.earliest_datestamp).to eq(::Time.utc(1990, 2, 1, 12, 0, 0))
29
+ end
30
+
31
+ it 'supports date datestamps' do
32
+ element = ::Ox.parse(<<-XML)
33
+ <Identify>
34
+ <earliestDatestamp>1990-02-01</earliestDatestamp>
35
+ </Identify>
36
+ XML
37
+ identify = described_class.new(element)
38
+
39
+ expect(identify.earliest_datestamp).to eq(::Date.new(1990, 2, 1))
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,15 @@
1
+ require 'fieldhand/metadata_format'
2
+ require 'ox'
3
+
4
+ module Fieldhand
5
+ RSpec.describe MetadataFormat do
6
+ describe '#to_s' do
7
+ it 'returns the prefix' do
8
+ element = ::Ox.parse('<metadataFormat><metadataPrefix>xoai</metadataPrefix></metadataFormat>')
9
+ format = described_class.new(element)
10
+
11
+ expect(format.to_s).to eq('xoai')
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,84 @@
1
+ require 'fieldhand/paginator'
2
+
3
+ module Fieldhand
4
+ RSpec.describe Paginator do
5
+ describe '#items' do
6
+ it 'raises a Bad Argument Error if returned from the repository' do
7
+ stub_oai_request('http://www.example.com/oai?verb=Identify&bad=Argument',
8
+ 'bad_argument_error.xml')
9
+ paginator = described_class.new('http://www.example.com/oai')
10
+
11
+ expect { paginator.items('Identify', 'Identify', 'bad' => 'Argument').first }.
12
+ to raise_error(BadArgumentError)
13
+ end
14
+
15
+ it 'raises a Bad Resumption Token Error if returned from the repository' do
16
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&resumptionToken=foo',
17
+ 'bad_resumption_token_error.xml')
18
+ paginator = described_class.new('http://www.example.com/oai')
19
+
20
+ expect { paginator.items('ListRecords', 'ListRecords/record', 'resumptionToken' => 'foo').first }.
21
+ to raise_error(BadResumptionTokenError)
22
+ end
23
+
24
+ it 'raises a Bad Verb Error if returned from the repository' do
25
+ stub_oai_request('http://www.example.com/oai?verb=Bad',
26
+ 'bad_verb_error.xml')
27
+ paginator = described_class.new('http://www.example.com/oai')
28
+
29
+ expect { paginator.items('Bad', 'Bad').first }.
30
+ to raise_error(BadVerbError)
31
+ end
32
+
33
+ it 'raises a Cannot Disseminate Format Error if returned from the repository' do
34
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=bad',
35
+ 'cannot_disseminate_format_error.xml')
36
+ paginator = described_class.new('http://www.example.com/oai')
37
+
38
+ expect { paginator.items('ListRecords', 'ListRecords/record', 'metadataPrefix' => 'bad').first }.
39
+ to raise_error(CannotDisseminateFormatError)
40
+ end
41
+
42
+ it 'raises an ID Does Not Exist Error if returned from the repository' do
43
+ stub_oai_request('http://www.example.com/oai?verb=GetRecord&metadataPrefix=oai_dc&identifier=bad',
44
+ 'id_does_not_exist_error.xml')
45
+ paginator = described_class.new('http://www.example.com/oai')
46
+
47
+ expect {
48
+ paginator.items('GetRecord', 'GetRecord/record', 'metadataPrefix' => 'oai_dc', 'identifier' => 'bad').first
49
+ }.to raise_error(IdDoesNotExistError)
50
+ end
51
+
52
+ it 'raises a No Records Match Error if returned from the repository' do
53
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc&from=2999-01-01',
54
+ 'no_records_match_error.xml')
55
+ paginator = described_class.new('http://www.example.com/oai')
56
+
57
+ expect {
58
+ paginator.
59
+ items('ListRecords', 'ListRecords/record', 'metadataPrefix' => 'oai_dc', 'from' => '2999-01-01').
60
+ first
61
+ }.to raise_error(NoRecordsMatchError)
62
+ end
63
+
64
+ it 'raises a No Metadata Formats Error if returned from the repository' do
65
+ stub_oai_request('http://www.example.com/oai?verb=ListMetadataFormats&identifier=bad',
66
+ 'no_metadata_formats_error.xml')
67
+ paginator = described_class.new('http://www.example.com/oai')
68
+
69
+ expect {
70
+ paginator.items('ListMetadataFormats', 'ListMetadataFormats/metadataFormat', 'identifier' => 'bad').first
71
+ }.to raise_error(NoMetadataFormatsError)
72
+ end
73
+
74
+ it 'raises a No Set Hierarchy Error if returned from the repository' do
75
+ stub_oai_request('http://www.example.com/oai?verb=ListSets',
76
+ 'no_set_hierarchy_error.xml')
77
+ paginator = described_class.new('http://www.example.com/oai')
78
+
79
+ expect { paginator.items('ListSets', 'ListSets/set').first }.
80
+ to raise_error(NoSetHierarchyError)
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,38 @@
1
+ require 'fieldhand/record'
2
+ require 'ox'
3
+
4
+ module Fieldhand
5
+ RSpec.describe Record do
6
+ describe '#deleted?' do
7
+ it 'is true when the record has a deleted status' do
8
+ element = ::Ox.parse('<record><header status="deleted"/></record>')
9
+ record = described_class.new(element)
10
+
11
+ expect(record).to be_deleted
12
+ end
13
+
14
+ it 'is false when the record does not have a status' do
15
+ element = ::Ox.parse('<record><header/></record>')
16
+ record = described_class.new(element)
17
+
18
+ expect(record).not_to be_deleted
19
+ end
20
+ end
21
+
22
+ describe '#about' do
23
+ it 'returns an empty array if there are no about elements' do
24
+ element = ::Ox.parse('<record/>')
25
+ record = described_class.new(element)
26
+
27
+ expect(record.about).to be_empty
28
+ end
29
+
30
+ it 'returns about sections when present' do
31
+ element = ::Ox.parse('<record><about/><about/></record>')
32
+ record = described_class.new(element)
33
+
34
+ expect(record.about.size).to eq(2)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,191 @@
1
+ require 'fieldhand/repository'
2
+
3
+ module Fieldhand
4
+ RSpec.describe Repository do
5
+ describe '#metadata_formats' do
6
+ it 'returns the supported metadata formats for this repository' do
7
+ stub_oai_request('http://www.example.com/oai?verb=ListMetadataFormats', 'list_metadata_formats.xml')
8
+ repository = described_class.new('http://www.example.com/oai')
9
+ formats = repository.metadata_formats.to_a
10
+
11
+ expect(formats.size).to eq(1)
12
+ end
13
+
14
+ it 'populates metadata formats with the right information' do
15
+ stub_oai_request('http://www.example.com/oai?verb=ListMetadataFormats', 'list_metadata_formats.xml')
16
+ repository = described_class.new('http://www.example.com/oai')
17
+ format = repository.metadata_formats.first
18
+
19
+ expect(format).to have_attributes(:prefix => 'oai_dc',
20
+ :schema => URI('http://www.openarchives.org/OAI/2.0/oai_dc.xsd'),
21
+ :namespace => URI('http://www.openarchives.org/OAI/2.0/oai_dc/'))
22
+ end
23
+
24
+ it 'raises an error if the connection times out' do
25
+ stub_request(:get, 'http://www.example.com/oai?verb=ListMetadataFormats').
26
+ to_timeout
27
+ repository = described_class.new('http://www.example.com/oai')
28
+
29
+ expect { repository.metadata_formats.to_a }.to raise_error(NetworkError)
30
+ end
31
+
32
+ it 'raises an error if the connection resets' do
33
+ stub_request(:get, 'http://www.example.com/oai?verb=ListMetadataFormats').
34
+ to_raise(::Errno::ECONNRESET)
35
+ repository = described_class.new('http://www.example.com/oai')
36
+
37
+ expect { repository.metadata_formats.to_a }.to raise_error(NetworkError)
38
+ end
39
+
40
+ it 'raises an error if the host is unreachable' do
41
+ stub_request(:get, 'http://www.example.com/oai?verb=ListMetadataFormats').
42
+ to_raise(::Errno::EHOSTUNREACH)
43
+ repository = described_class.new('http://www.example.com/oai')
44
+
45
+ expect { repository.metadata_formats.to_a }.to raise_error(NetworkError)
46
+ end
47
+
48
+ it 'supports an optional identifier argument' do
49
+ stub_oai_request('http://www.example.com/oai?verb=ListMetadataFormats&identifier=foobar',
50
+ 'list_metadata_formats.xml')
51
+ repository = described_class.new('http://www.example.com/oai')
52
+
53
+ repository.metadata_formats('foobar').to_a
54
+ end
55
+ end
56
+
57
+ describe '#sets' do
58
+ it 'returns the sets for this repository' do
59
+ stub_oai_request('http://www.example.com/oai?verb=ListSets', 'list_sets_2.xml')
60
+ repository = described_class.new('http://www.example.com/oai')
61
+ set_b = repository.sets.first
62
+
63
+ expect(set_b).to have_attributes(:spec => 'B', :name => 'Set B.')
64
+ end
65
+
66
+ it 'paginates over all sets for this repository' do
67
+ stub_oai_request('http://www.example.com/oai?verb=ListSets', 'list_sets_1.xml')
68
+ stub_oai_request('http://www.example.com/oai?verb=ListSets&resumptionToken=foobar', 'list_sets_2.xml')
69
+ repository = described_class.new('http://www.example.com/oai')
70
+ sets = repository.sets.to_a
71
+
72
+ expect(sets.size).to eq(2)
73
+ end
74
+
75
+ it 'raises an error if the connection times out while consuming' do
76
+ stub_request(:get, 'http://www.example.com/oai?verb=ListSets').to_timeout
77
+ repository = described_class.new('http://www.example.com/oai')
78
+
79
+ expect { repository.sets.to_a }.to raise_error(NetworkError)
80
+ end
81
+ end
82
+
83
+ describe '#records' do
84
+ it 'defaults to using a metadata prefix of oai_dc' do
85
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc', 'list_records_1.xml')
86
+ repository = described_class.new('http://www.example.com/oai')
87
+
88
+ repository.records.first
89
+ end
90
+
91
+ it 'returns all records for this repository' do
92
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc', 'list_records_1.xml')
93
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&resumptionToken=foobar', 'list_records_2.xml')
94
+ repository = described_class.new('http://www.example.com/oai')
95
+ records = repository.records(:metadata_prefix => 'oai_dc').to_a
96
+
97
+ expect(records.size).to eq(4)
98
+ end
99
+
100
+ it 'populates records with the right information' do
101
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc', 'list_records_1.xml')
102
+ repository = described_class.new('http://www.example.com/oai')
103
+ record = repository.records.first
104
+
105
+ expect(record).to have_attributes(:identifier => 'oai:oai.datacite.org:32355',
106
+ :datestamp => ::Time.xmlschema('2011-07-07T11:19:03Z'),
107
+ :sets => %w[TIB TIB.DAGST])
108
+ end
109
+
110
+ it 'populates deleted records with the right information' do
111
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc', 'list_records_2.xml')
112
+ repository = described_class.new('http://www.example.com/oai')
113
+ record = repository.records.first
114
+
115
+ expect(record).to have_attributes(:status => 'deleted',
116
+ :datestamp => ::Time.xmlschema('2011-03-04T14:18:47Z'),
117
+ :sets => %w[BL BL.WAP])
118
+ end
119
+
120
+ it 'supports passing extra arguments to the request' do
121
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc&from=2001-01-01&until=2002-01-01',
122
+ 'list_records_2.xml')
123
+ repository = described_class.new('http://www.example.com/oai')
124
+
125
+ repository.records(:from => '2001-01-01', :until => '2002-01-01')
126
+ end
127
+ end
128
+
129
+ describe '#identifiers' do
130
+ it 'defaults to a metadata prefix of "oai_dc"' do
131
+ stub_oai_request('http://www.example.com/oai?verb=ListIdentifiers&metadataPrefix=oai_dc',
132
+ 'list_identifiers.xml')
133
+ repository = described_class.new('http://www.example.com/oai')
134
+
135
+ repository.identifiers.first
136
+ end
137
+
138
+ it 'returns all headers from the repository' do
139
+ stub_oai_request('http://www.example.com/oai?verb=ListIdentifiers&metadataPrefix=oai_dc',
140
+ 'list_identifiers.xml')
141
+ repository = described_class.new('http://www.example.com/oai')
142
+ headers = repository.identifiers(:metadata_prefix => 'oai_dc').to_a
143
+
144
+ expect(headers.size).to eq(2)
145
+ end
146
+ end
147
+
148
+ describe '#identify' do
149
+ it 'returns information about the repository' do
150
+ stub_oai_request('http://www.example.com/oai?verb=Identify', 'identify.xml')
151
+ repository = described_class.new('http://www.example.com/oai')
152
+ identify = repository.identify
153
+
154
+ expect(identify).to have_attributes(:name => 'DataCite MDS',
155
+ :base_url => URI('http://oai.datacite.org/oai'),
156
+ :protocol_version => '2.0',
157
+ :earliest_datestamp => ::Time.xmlschema('2011-01-01T00:00:00Z'),
158
+ :deleted_record => 'persistent',
159
+ :granularity => 'YYYY-MM-DDThh:mm:ssZ',
160
+ :admin_emails => %w[admin@datacite.org],
161
+ :compression => %w[gzip deflate])
162
+ end
163
+
164
+ it 'supports HTTPS repositories' do
165
+ stub_oai_request('https://www.example.com/oai?verb=Identify', 'identify.xml')
166
+ repository = described_class.new('https://www.example.com/oai')
167
+
168
+ expect(repository.identify).not_to be_nil
169
+ end
170
+ end
171
+
172
+ describe '#get' do
173
+ it 'defaults to a metadata prefix of "oai_dc"' do
174
+ stub_oai_request('http://www.example.com/oai?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:oai.datacite.org:32356',
175
+ 'get_record.xml')
176
+ repository = described_class.new('http://www.example.com/oai')
177
+
178
+ repository.get('oai:oai.datacite.org:32356')
179
+ end
180
+
181
+ it 'fetches the record by identifier' do
182
+ stub_oai_request('http://www.example.com/oai?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:oai.datacite.org:32356',
183
+ 'get_record.xml')
184
+ repository = described_class.new('http://www.example.com/oai')
185
+
186
+ expect(repository.get('oai:oai.datacite.org:32356', :metadata_prefix => 'oai_dc')).
187
+ to have_attributes(:identifier => 'oai:oai.datacite.org:32356')
188
+ end
189
+ end
190
+ end
191
+ end