fieldhand 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ module Fieldhand
2
+ # A set is an optional construct for grouping items for the purpose of selective harvesting.
3
+ #
4
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#Set
5
+ class Set
6
+ attr_reader :element
7
+
8
+ def initialize(element)
9
+ @element = element
10
+ end
11
+
12
+ def to_s
13
+ spec
14
+ end
15
+
16
+ def spec
17
+ @spec ||= element.setSpec.text
18
+ end
19
+
20
+ def name
21
+ @name ||= element.setName.text
22
+ end
23
+
24
+ def descriptions
25
+ @descriptions ||= element.locate('setDescription')
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,94 @@
1
+ require 'fieldhand/arguments'
2
+ require 'fieldhand/set'
3
+ require 'fieldhand/metadata_format'
4
+ require 'ox'
5
+ require 'date'
6
+ require 'time'
7
+
8
+ module Fieldhand
9
+ RSpec.describe Arguments do
10
+ describe '#to_query' do
11
+ it 'returns a metadata prefix of "oai_dc" by default' do
12
+ arguments = described_class.new
13
+
14
+ expect(arguments.to_query).to eq('metadataPrefix' => 'oai_dc')
15
+ end
16
+
17
+ it 'allows overriding the metadata prefix' do
18
+ arguments = described_class.new(:metadata_prefix => 'xoai')
19
+
20
+ expect(arguments.to_query).to eq('metadataPrefix' => 'xoai')
21
+ end
22
+
23
+ it 'allows overriding the metadata prefix with a Metadata Format' do
24
+ element = ::Ox.parse('<metadataFormat><metadataPrefix>xoai</metadataPrefix></metadataFormat>')
25
+ format = MetadataFormat.new(element)
26
+ arguments = described_class.new(:metadata_prefix => format)
27
+
28
+ expect(arguments.to_query).to eq('metadataPrefix' => 'xoai')
29
+ end
30
+
31
+ it 'allows passing a resumption token' do
32
+ arguments = described_class.new(:resumption_token => 'foo')
33
+
34
+ expect(arguments.to_query).to include('resumptionToken' => 'foo')
35
+ end
36
+
37
+ it 'allows passing a from datestamp' do
38
+ arguments = described_class.new(:from => '2001-01-01')
39
+
40
+ expect(arguments.to_query).to include('from' => '2001-01-01')
41
+ end
42
+
43
+ it 'converts a Date from datestamp to a string' do
44
+ arguments = described_class.new(:from => ::Date.new(2001, 1, 1))
45
+
46
+ expect(arguments.to_query).to include('from' => '2001-01-01')
47
+ end
48
+
49
+ it 'converts a Time from datestamp to a string' do
50
+ arguments = described_class.new(:from => ::Time.utc(2001, 1, 1, 0, 0, 0))
51
+
52
+ expect(arguments.to_query).to include('from' => '2001-01-01T00:00:00Z')
53
+ end
54
+
55
+ it 'allows passing an until datestamp' do
56
+ arguments = described_class.new(:until => '2001-01-01')
57
+
58
+ expect(arguments.to_query).to include('until' => '2001-01-01')
59
+ end
60
+
61
+ it 'converts a Date until datestamp to a string' do
62
+ arguments = described_class.new(:until => ::Date.new(2001, 1, 1))
63
+
64
+ expect(arguments.to_query).to include('until' => '2001-01-01')
65
+ end
66
+
67
+ it 'converts a Time until datestamp to a string' do
68
+ arguments = described_class.new(:until => ::Time.utc(2001, 1, 1, 0, 0, 0))
69
+
70
+ expect(arguments.to_query).to include('until' => '2001-01-01T00:00:00Z')
71
+ end
72
+
73
+ it 'allows passing a set spec' do
74
+ arguments = described_class.new(:set => 'A')
75
+
76
+ expect(arguments.to_query).to include('set' => 'A')
77
+ end
78
+
79
+ it 'allows passing a Set as a set spec' do
80
+ element = ::Ox.parse('<set><setSpec>A</setSpec></set>')
81
+ set = Set.new(element)
82
+ arguments = described_class.new(:set => set)
83
+
84
+ expect(arguments.to_query).to include('set' => 'A')
85
+ end
86
+
87
+ it 'raises an error when given unknown arguments' do
88
+ arguments = described_class.new(:foo => 'bar')
89
+
90
+ expect { arguments.to_query }.to raise_error(::ArgumentError)
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,38 @@
1
+ require 'fieldhand/datestamp'
2
+
3
+ module Fieldhand
4
+ RSpec.describe Datestamp do
5
+ describe '.parse' do
6
+ it 'parses date-granularity datestamps into Dates' do
7
+ expect(described_class.parse('2001-01-01')).
8
+ to eq(::Date.new(2001, 1, 1))
9
+ end
10
+
11
+ it 'parses time-granularity datestamps into Times' do
12
+ expect(described_class.parse('2001-01-01T00:00:00Z')).
13
+ to eq(::Time.utc(2001, 1, 1, 0, 0, 0))
14
+ end
15
+ end
16
+
17
+ describe '.unparse' do
18
+ it 'unparses Dates into date-granularity datestamps' do
19
+ expect(described_class.unparse(::Date.new(2001, 1, 1))).
20
+ to eq('2001-01-01')
21
+ end
22
+
23
+ it 'unparses Times into time-granularity datestamps' do
24
+ expect(described_class.unparse(::Time.utc(2001, 1, 1, 0, 0, 0))).
25
+ to eq('2001-01-01T00:00:00Z')
26
+ end
27
+
28
+ it 'unparses non UTC Times into time-granularity datestamps' do
29
+ expect(described_class.unparse(::Time.parse('2001-01-01 01:00:00 +0100'))).
30
+ to eq('2001-01-01T00:00:00Z')
31
+ end
32
+
33
+ it 'unparses strings into themselves' do
34
+ expect(described_class.unparse('2001-01-01')).to eq('2001-01-01')
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ require 'fieldhand/header'
2
+ require 'ox'
3
+
4
+ module Fieldhand
5
+ RSpec.describe Header do
6
+ describe '#deleted?' do
7
+ it 'is true when the status is deleted' do
8
+ element = ::Ox.parse('<header status="deleted"/>')
9
+ header = described_class.new(element)
10
+
11
+ expect(header).to be_deleted
12
+ end
13
+
14
+ it 'is false when there is no status' do
15
+ element = ::Ox.parse('<header/>')
16
+ header = described_class.new(element)
17
+
18
+ expect(header).not_to be_deleted
19
+ end
20
+ end
21
+
22
+ describe '#datestamp' do
23
+ it 'supports time-granularity datestamps' do
24
+ element = ::Ox.parse('<header><datestamp>2001-01-01T00:00:00Z</datestamp></header>')
25
+ header = described_class.new(element)
26
+
27
+ expect(header.datestamp).to eq(::Time.utc(2001, 1, 1, 0, 0, 0))
28
+ end
29
+
30
+ it 'supports date-granularity datestamps' do
31
+ element = ::Ox.parse('<header><datestamp>2001-01-01</datestamp></header>')
32
+ header = described_class.new(element)
33
+
34
+ expect(header.datestamp).to eq(::Date.new(2001, 1, 1))
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,43 @@
1
+ require 'fieldhand/identify'
2
+ require 'ox'
3
+
4
+ module Fieldhand
5
+ RSpec.describe Identify do
6
+ describe '#base_url' do
7
+ it 'returns the base URL as a URI' do
8
+ element = ::Ox.parse(<<-XML)
9
+ <Identify>
10
+ <baseURL>http://www.example.com/oai</baseURL>
11
+ </Identify>
12
+ XML
13
+ identify = described_class.new(element)
14
+
15
+ expect(identify.base_url).to eq(URI('http://www.example.com/oai'))
16
+ end
17
+ end
18
+
19
+ describe '#earliest_datestamp' do
20
+ it 'supports time datestamps' do
21
+ element = ::Ox.parse(<<-XML)
22
+ <Identify>
23
+ <earliestDatestamp>1990-02-01T12:00:00Z</earliestDatestamp>
24
+ </Identify>
25
+ XML
26
+ identify = described_class.new(element)
27
+
28
+ expect(identify.earliest_datestamp).to eq(::Time.utc(1990, 2, 1, 12, 0, 0))
29
+ end
30
+
31
+ it 'supports date datestamps' do
32
+ element = ::Ox.parse(<<-XML)
33
+ <Identify>
34
+ <earliestDatestamp>1990-02-01</earliestDatestamp>
35
+ </Identify>
36
+ XML
37
+ identify = described_class.new(element)
38
+
39
+ expect(identify.earliest_datestamp).to eq(::Date.new(1990, 2, 1))
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,15 @@
1
+ require 'fieldhand/metadata_format'
2
+ require 'ox'
3
+
4
+ module Fieldhand
5
+ RSpec.describe MetadataFormat do
6
+ describe '#to_s' do
7
+ it 'returns the prefix' do
8
+ element = ::Ox.parse('<metadataFormat><metadataPrefix>xoai</metadataPrefix></metadataFormat>')
9
+ format = described_class.new(element)
10
+
11
+ expect(format.to_s).to eq('xoai')
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,84 @@
1
+ require 'fieldhand/paginator'
2
+
3
+ module Fieldhand
4
+ RSpec.describe Paginator do
5
+ describe '#items' do
6
+ it 'raises a Bad Argument Error if returned from the repository' do
7
+ stub_oai_request('http://www.example.com/oai?verb=Identify&bad=Argument',
8
+ 'bad_argument_error.xml')
9
+ paginator = described_class.new('http://www.example.com/oai')
10
+
11
+ expect { paginator.items('Identify', 'Identify', 'bad' => 'Argument').first }.
12
+ to raise_error(BadArgumentError)
13
+ end
14
+
15
+ it 'raises a Bad Resumption Token Error if returned from the repository' do
16
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&resumptionToken=foo',
17
+ 'bad_resumption_token_error.xml')
18
+ paginator = described_class.new('http://www.example.com/oai')
19
+
20
+ expect { paginator.items('ListRecords', 'ListRecords/record', 'resumptionToken' => 'foo').first }.
21
+ to raise_error(BadResumptionTokenError)
22
+ end
23
+
24
+ it 'raises a Bad Verb Error if returned from the repository' do
25
+ stub_oai_request('http://www.example.com/oai?verb=Bad',
26
+ 'bad_verb_error.xml')
27
+ paginator = described_class.new('http://www.example.com/oai')
28
+
29
+ expect { paginator.items('Bad', 'Bad').first }.
30
+ to raise_error(BadVerbError)
31
+ end
32
+
33
+ it 'raises a Cannot Disseminate Format Error if returned from the repository' do
34
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=bad',
35
+ 'cannot_disseminate_format_error.xml')
36
+ paginator = described_class.new('http://www.example.com/oai')
37
+
38
+ expect { paginator.items('ListRecords', 'ListRecords/record', 'metadataPrefix' => 'bad').first }.
39
+ to raise_error(CannotDisseminateFormatError)
40
+ end
41
+
42
+ it 'raises an ID Does Not Exist Error if returned from the repository' do
43
+ stub_oai_request('http://www.example.com/oai?verb=GetRecord&metadataPrefix=oai_dc&identifier=bad',
44
+ 'id_does_not_exist_error.xml')
45
+ paginator = described_class.new('http://www.example.com/oai')
46
+
47
+ expect {
48
+ paginator.items('GetRecord', 'GetRecord/record', 'metadataPrefix' => 'oai_dc', 'identifier' => 'bad').first
49
+ }.to raise_error(IdDoesNotExistError)
50
+ end
51
+
52
+ it 'raises a No Records Match Error if returned from the repository' do
53
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc&from=2999-01-01',
54
+ 'no_records_match_error.xml')
55
+ paginator = described_class.new('http://www.example.com/oai')
56
+
57
+ expect {
58
+ paginator.
59
+ items('ListRecords', 'ListRecords/record', 'metadataPrefix' => 'oai_dc', 'from' => '2999-01-01').
60
+ first
61
+ }.to raise_error(NoRecordsMatchError)
62
+ end
63
+
64
+ it 'raises a No Metadata Formats Error if returned from the repository' do
65
+ stub_oai_request('http://www.example.com/oai?verb=ListMetadataFormats&identifier=bad',
66
+ 'no_metadata_formats_error.xml')
67
+ paginator = described_class.new('http://www.example.com/oai')
68
+
69
+ expect {
70
+ paginator.items('ListMetadataFormats', 'ListMetadataFormats/metadataFormat', 'identifier' => 'bad').first
71
+ }.to raise_error(NoMetadataFormatsError)
72
+ end
73
+
74
+ it 'raises a No Set Hierarchy Error if returned from the repository' do
75
+ stub_oai_request('http://www.example.com/oai?verb=ListSets',
76
+ 'no_set_hierarchy_error.xml')
77
+ paginator = described_class.new('http://www.example.com/oai')
78
+
79
+ expect { paginator.items('ListSets', 'ListSets/set').first }.
80
+ to raise_error(NoSetHierarchyError)
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,38 @@
1
+ require 'fieldhand/record'
2
+ require 'ox'
3
+
4
+ module Fieldhand
5
+ RSpec.describe Record do
6
+ describe '#deleted?' do
7
+ it 'is true when the record has a deleted status' do
8
+ element = ::Ox.parse('<record><header status="deleted"/></record>')
9
+ record = described_class.new(element)
10
+
11
+ expect(record).to be_deleted
12
+ end
13
+
14
+ it 'is false when the record does not have a status' do
15
+ element = ::Ox.parse('<record><header/></record>')
16
+ record = described_class.new(element)
17
+
18
+ expect(record).not_to be_deleted
19
+ end
20
+ end
21
+
22
+ describe '#about' do
23
+ it 'returns an empty array if there are no about elements' do
24
+ element = ::Ox.parse('<record/>')
25
+ record = described_class.new(element)
26
+
27
+ expect(record.about).to be_empty
28
+ end
29
+
30
+ it 'returns about sections when present' do
31
+ element = ::Ox.parse('<record><about/><about/></record>')
32
+ record = described_class.new(element)
33
+
34
+ expect(record.about.size).to eq(2)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,191 @@
1
+ require 'fieldhand/repository'
2
+
3
+ module Fieldhand
4
+ RSpec.describe Repository do
5
+ describe '#metadata_formats' do
6
+ it 'returns the supported metadata formats for this repository' do
7
+ stub_oai_request('http://www.example.com/oai?verb=ListMetadataFormats', 'list_metadata_formats.xml')
8
+ repository = described_class.new('http://www.example.com/oai')
9
+ formats = repository.metadata_formats.to_a
10
+
11
+ expect(formats.size).to eq(1)
12
+ end
13
+
14
+ it 'populates metadata formats with the right information' do
15
+ stub_oai_request('http://www.example.com/oai?verb=ListMetadataFormats', 'list_metadata_formats.xml')
16
+ repository = described_class.new('http://www.example.com/oai')
17
+ format = repository.metadata_formats.first
18
+
19
+ expect(format).to have_attributes(:prefix => 'oai_dc',
20
+ :schema => URI('http://www.openarchives.org/OAI/2.0/oai_dc.xsd'),
21
+ :namespace => URI('http://www.openarchives.org/OAI/2.0/oai_dc/'))
22
+ end
23
+
24
+ it 'raises an error if the connection times out' do
25
+ stub_request(:get, 'http://www.example.com/oai?verb=ListMetadataFormats').
26
+ to_timeout
27
+ repository = described_class.new('http://www.example.com/oai')
28
+
29
+ expect { repository.metadata_formats.to_a }.to raise_error(NetworkError)
30
+ end
31
+
32
+ it 'raises an error if the connection resets' do
33
+ stub_request(:get, 'http://www.example.com/oai?verb=ListMetadataFormats').
34
+ to_raise(::Errno::ECONNRESET)
35
+ repository = described_class.new('http://www.example.com/oai')
36
+
37
+ expect { repository.metadata_formats.to_a }.to raise_error(NetworkError)
38
+ end
39
+
40
+ it 'raises an error if the host is unreachable' do
41
+ stub_request(:get, 'http://www.example.com/oai?verb=ListMetadataFormats').
42
+ to_raise(::Errno::EHOSTUNREACH)
43
+ repository = described_class.new('http://www.example.com/oai')
44
+
45
+ expect { repository.metadata_formats.to_a }.to raise_error(NetworkError)
46
+ end
47
+
48
+ it 'supports an optional identifier argument' do
49
+ stub_oai_request('http://www.example.com/oai?verb=ListMetadataFormats&identifier=foobar',
50
+ 'list_metadata_formats.xml')
51
+ repository = described_class.new('http://www.example.com/oai')
52
+
53
+ repository.metadata_formats('foobar').to_a
54
+ end
55
+ end
56
+
57
+ describe '#sets' do
58
+ it 'returns the sets for this repository' do
59
+ stub_oai_request('http://www.example.com/oai?verb=ListSets', 'list_sets_2.xml')
60
+ repository = described_class.new('http://www.example.com/oai')
61
+ set_b = repository.sets.first
62
+
63
+ expect(set_b).to have_attributes(:spec => 'B', :name => 'Set B.')
64
+ end
65
+
66
+ it 'paginates over all sets for this repository' do
67
+ stub_oai_request('http://www.example.com/oai?verb=ListSets', 'list_sets_1.xml')
68
+ stub_oai_request('http://www.example.com/oai?verb=ListSets&resumptionToken=foobar', 'list_sets_2.xml')
69
+ repository = described_class.new('http://www.example.com/oai')
70
+ sets = repository.sets.to_a
71
+
72
+ expect(sets.size).to eq(2)
73
+ end
74
+
75
+ it 'raises an error if the connection times out while consuming' do
76
+ stub_request(:get, 'http://www.example.com/oai?verb=ListSets').to_timeout
77
+ repository = described_class.new('http://www.example.com/oai')
78
+
79
+ expect { repository.sets.to_a }.to raise_error(NetworkError)
80
+ end
81
+ end
82
+
83
+ describe '#records' do
84
+ it 'defaults to using a metadata prefix of oai_dc' do
85
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc', 'list_records_1.xml')
86
+ repository = described_class.new('http://www.example.com/oai')
87
+
88
+ repository.records.first
89
+ end
90
+
91
+ it 'returns all records for this repository' do
92
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc', 'list_records_1.xml')
93
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&resumptionToken=foobar', 'list_records_2.xml')
94
+ repository = described_class.new('http://www.example.com/oai')
95
+ records = repository.records(:metadata_prefix => 'oai_dc').to_a
96
+
97
+ expect(records.size).to eq(4)
98
+ end
99
+
100
+ it 'populates records with the right information' do
101
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc', 'list_records_1.xml')
102
+ repository = described_class.new('http://www.example.com/oai')
103
+ record = repository.records.first
104
+
105
+ expect(record).to have_attributes(:identifier => 'oai:oai.datacite.org:32355',
106
+ :datestamp => ::Time.xmlschema('2011-07-07T11:19:03Z'),
107
+ :sets => %w[TIB TIB.DAGST])
108
+ end
109
+
110
+ it 'populates deleted records with the right information' do
111
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc', 'list_records_2.xml')
112
+ repository = described_class.new('http://www.example.com/oai')
113
+ record = repository.records.first
114
+
115
+ expect(record).to have_attributes(:status => 'deleted',
116
+ :datestamp => ::Time.xmlschema('2011-03-04T14:18:47Z'),
117
+ :sets => %w[BL BL.WAP])
118
+ end
119
+
120
+ it 'supports passing extra arguments to the request' do
121
+ stub_oai_request('http://www.example.com/oai?verb=ListRecords&metadataPrefix=oai_dc&from=2001-01-01&until=2002-01-01',
122
+ 'list_records_2.xml')
123
+ repository = described_class.new('http://www.example.com/oai')
124
+
125
+ repository.records(:from => '2001-01-01', :until => '2002-01-01')
126
+ end
127
+ end
128
+
129
+ describe '#identifiers' do
130
+ it 'defaults to a metadata prefix of "oai_dc"' do
131
+ stub_oai_request('http://www.example.com/oai?verb=ListIdentifiers&metadataPrefix=oai_dc',
132
+ 'list_identifiers.xml')
133
+ repository = described_class.new('http://www.example.com/oai')
134
+
135
+ repository.identifiers.first
136
+ end
137
+
138
+ it 'returns all headers from the repository' do
139
+ stub_oai_request('http://www.example.com/oai?verb=ListIdentifiers&metadataPrefix=oai_dc',
140
+ 'list_identifiers.xml')
141
+ repository = described_class.new('http://www.example.com/oai')
142
+ headers = repository.identifiers(:metadata_prefix => 'oai_dc').to_a
143
+
144
+ expect(headers.size).to eq(2)
145
+ end
146
+ end
147
+
148
+ describe '#identify' do
149
+ it 'returns information about the repository' do
150
+ stub_oai_request('http://www.example.com/oai?verb=Identify', 'identify.xml')
151
+ repository = described_class.new('http://www.example.com/oai')
152
+ identify = repository.identify
153
+
154
+ expect(identify).to have_attributes(:name => 'DataCite MDS',
155
+ :base_url => URI('http://oai.datacite.org/oai'),
156
+ :protocol_version => '2.0',
157
+ :earliest_datestamp => ::Time.xmlschema('2011-01-01T00:00:00Z'),
158
+ :deleted_record => 'persistent',
159
+ :granularity => 'YYYY-MM-DDThh:mm:ssZ',
160
+ :admin_emails => %w[admin@datacite.org],
161
+ :compression => %w[gzip deflate])
162
+ end
163
+
164
+ it 'supports HTTPS repositories' do
165
+ stub_oai_request('https://www.example.com/oai?verb=Identify', 'identify.xml')
166
+ repository = described_class.new('https://www.example.com/oai')
167
+
168
+ expect(repository.identify).not_to be_nil
169
+ end
170
+ end
171
+
172
+ describe '#get' do
173
+ it 'defaults to a metadata prefix of "oai_dc"' do
174
+ stub_oai_request('http://www.example.com/oai?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:oai.datacite.org:32356',
175
+ 'get_record.xml')
176
+ repository = described_class.new('http://www.example.com/oai')
177
+
178
+ repository.get('oai:oai.datacite.org:32356')
179
+ end
180
+
181
+ it 'fetches the record by identifier' do
182
+ stub_oai_request('http://www.example.com/oai?verb=GetRecord&metadataPrefix=oai_dc&identifier=oai:oai.datacite.org:32356',
183
+ 'get_record.xml')
184
+ repository = described_class.new('http://www.example.com/oai')
185
+
186
+ expect(repository.get('oai:oai.datacite.org:32356', :metadata_prefix => 'oai_dc')).
187
+ to have_attributes(:identifier => 'oai:oai.datacite.org:32356')
188
+ end
189
+ end
190
+ end
191
+ end