fieldhand 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +538 -0
- data/lib/fieldhand.rb +1 -0
- data/lib/fieldhand/arguments.rb +42 -0
- data/lib/fieldhand/datestamp.rb +28 -0
- data/lib/fieldhand/header.rb +42 -0
- data/lib/fieldhand/identify.rb +51 -0
- data/lib/fieldhand/logger.rb +28 -0
- data/lib/fieldhand/metadata_format.rb +30 -0
- data/lib/fieldhand/paginator.rb +90 -0
- data/lib/fieldhand/record.rb +46 -0
- data/lib/fieldhand/repository.rb +95 -0
- data/lib/fieldhand/set.rb +28 -0
- data/spec/fieldhand/arguments_spec.rb +94 -0
- data/spec/fieldhand/datestamp_spec.rb +38 -0
- data/spec/fieldhand/header_spec.rb +38 -0
- data/spec/fieldhand/identify_spec.rb +43 -0
- data/spec/fieldhand/metadata_format_spec.rb +15 -0
- data/spec/fieldhand/paginator_spec.rb +84 -0
- data/spec/fieldhand/record_spec.rb +38 -0
- data/spec/fieldhand/repository_spec.rb +191 -0
- data/spec/fieldhand/set_spec.rb +31 -0
- data/spec/spec_helper.rb +28 -0
- metadata +145 -0
data/lib/fieldhand.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'fieldhand/repository'
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'fieldhand/datestamp'
|
2
|
+
|
3
|
+
module Fieldhand
|
4
|
+
# A class for converting Fieldhand arguments into OAI-PMH query parameters.
|
5
|
+
class Arguments
|
6
|
+
VALID_KEYS = {
|
7
|
+
:metadata_prefix => 'metadataPrefix',
|
8
|
+
:resumption_token => 'resumptionToken',
|
9
|
+
:from => 'from',
|
10
|
+
:until => 'until',
|
11
|
+
:set => 'set'
|
12
|
+
}.freeze
|
13
|
+
|
14
|
+
attr_reader :options
|
15
|
+
|
16
|
+
def initialize(options = {})
|
17
|
+
@options = options
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_query
|
21
|
+
options.inject(defaults) do |query, (key, value)|
|
22
|
+
raise ::ArgumentError, "unknown argument: #{key}" unless VALID_KEYS.key?(key)
|
23
|
+
|
24
|
+
query[VALID_KEYS.fetch(key)] = convert_value(key, value)
|
25
|
+
|
26
|
+
query
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def defaults
|
31
|
+
{ 'metadataPrefix' => 'oai_dc' }
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def convert_value(key, value)
|
37
|
+
return value.to_s unless key == :from || key == :until
|
38
|
+
|
39
|
+
Datestamp.unparse(value)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'time'
|
3
|
+
|
4
|
+
module Fieldhand
|
5
|
+
# A class to handle datestamps of varying granularity.
|
6
|
+
class Datestamp
|
7
|
+
def self.parse(datestamp)
|
8
|
+
if datestamp.size == 10
|
9
|
+
::Date.strptime(datestamp)
|
10
|
+
else
|
11
|
+
::Time.xmlschema(datestamp)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.unparse(datestamp)
|
16
|
+
case datestamp
|
17
|
+
when ::String
|
18
|
+
datestamp
|
19
|
+
when ::Date
|
20
|
+
datestamp.strftime
|
21
|
+
when ::Time
|
22
|
+
datestamp.utc.xmlschema
|
23
|
+
else
|
24
|
+
datestamp.xmlschema
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'fieldhand/datestamp'
|
2
|
+
|
3
|
+
module Fieldhand
|
4
|
+
# Contains the unique identifier of the item and properties necessary for selective harvesting. The header consists of
|
5
|
+
# the following parts:
|
6
|
+
#
|
7
|
+
# * the unique identifier -- the unique identifier of an item in a repository;
|
8
|
+
# * the datestamp -- the date of creation, modification or deletion of the record for the purpose of selective
|
9
|
+
# harvesting.
|
10
|
+
# * zero or more setSpec elements -- the set membership of the item for the purpose of selective harvesting.
|
11
|
+
# * an optional status attribute with a value of deleted indicates the withdrawal of availability of the specified
|
12
|
+
# metadata format for the item, dependent on the repository support for deletions.
|
13
|
+
#
|
14
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#header
|
15
|
+
class Header
|
16
|
+
attr_reader :element
|
17
|
+
|
18
|
+
def initialize(element)
|
19
|
+
@element = element
|
20
|
+
end
|
21
|
+
|
22
|
+
def deleted?
|
23
|
+
status == 'deleted'
|
24
|
+
end
|
25
|
+
|
26
|
+
def status
|
27
|
+
element['status']
|
28
|
+
end
|
29
|
+
|
30
|
+
def identifier
|
31
|
+
@identifier ||= element.identifier.text
|
32
|
+
end
|
33
|
+
|
34
|
+
def datestamp
|
35
|
+
@datestamp ||= Datestamp.parse(element.datestamp.text)
|
36
|
+
end
|
37
|
+
|
38
|
+
def sets
|
39
|
+
@sets ||= element.locate('setSpec/^String')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'fieldhand/datestamp'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module Fieldhand
|
5
|
+
# Information about a repository.
|
6
|
+
#
|
7
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#Identify
|
8
|
+
class Identify
|
9
|
+
attr_reader :element
|
10
|
+
|
11
|
+
def initialize(element)
|
12
|
+
@element = element
|
13
|
+
end
|
14
|
+
|
15
|
+
def name
|
16
|
+
@name ||= element.repositoryName.text
|
17
|
+
end
|
18
|
+
|
19
|
+
def base_url
|
20
|
+
@base_url ||= URI(element.baseURL.text)
|
21
|
+
end
|
22
|
+
|
23
|
+
def protocol_version
|
24
|
+
@protocol_version ||= element.protocolVersion.text
|
25
|
+
end
|
26
|
+
|
27
|
+
def earliest_datestamp
|
28
|
+
@earliest_datestamp ||= Datestamp.parse(element.earliestDatestamp.text)
|
29
|
+
end
|
30
|
+
|
31
|
+
def deleted_record
|
32
|
+
@deleted_record ||= element.deletedRecord.text
|
33
|
+
end
|
34
|
+
|
35
|
+
def granularity
|
36
|
+
@granularity ||= element.granularity.text
|
37
|
+
end
|
38
|
+
|
39
|
+
def admin_emails
|
40
|
+
@admin_emails ||= element.locate('adminEmail/^String')
|
41
|
+
end
|
42
|
+
|
43
|
+
def compression
|
44
|
+
@compression ||= element.locate('compression/^String')
|
45
|
+
end
|
46
|
+
|
47
|
+
def descriptions
|
48
|
+
@descriptions ||= element.locate('description')
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'rbconfig'
|
3
|
+
|
4
|
+
module Fieldhand
|
5
|
+
# A default null logger for discarding log messages.
|
6
|
+
module Logger
|
7
|
+
module_function
|
8
|
+
|
9
|
+
def null
|
10
|
+
::Logger.new(null_device)
|
11
|
+
end
|
12
|
+
|
13
|
+
# Determine the null device on this platform, a backport of more recent Rubies' File::NULL
|
14
|
+
# See https://github.com/marcandre/backports/blob/v3.8.0/lib/backports/1.9.3/file/null.rb
|
15
|
+
def null_device
|
16
|
+
platform = ::RUBY_PLATFORM
|
17
|
+
platform = ::RbConfig::CONFIG['host_os'] if platform == 'java'
|
18
|
+
|
19
|
+
case platform
|
20
|
+
when /mswin|mingw/i then 'NUL'
|
21
|
+
when /amiga/i then 'NIL:'
|
22
|
+
when /openvms/i then 'NL:'
|
23
|
+
else
|
24
|
+
'/dev/null'
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Fieldhand
|
4
|
+
# A metadata format supported by the repository.
|
5
|
+
#
|
6
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListMetadataFormats
|
7
|
+
class MetadataFormat
|
8
|
+
attr_reader :element
|
9
|
+
|
10
|
+
def initialize(element)
|
11
|
+
@element = element
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
prefix
|
16
|
+
end
|
17
|
+
|
18
|
+
def prefix
|
19
|
+
@prefix ||= element.metadataPrefix.text
|
20
|
+
end
|
21
|
+
|
22
|
+
def schema
|
23
|
+
@schema ||= URI(element.schema.text)
|
24
|
+
end
|
25
|
+
|
26
|
+
def namespace
|
27
|
+
@namespace ||= URI(element.metadataNamespace.text)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'fieldhand/logger'
|
2
|
+
require 'ox'
|
3
|
+
require 'cgi'
|
4
|
+
require 'net/http'
|
5
|
+
require 'uri'
|
6
|
+
|
7
|
+
module Fieldhand
|
8
|
+
NetworkError = ::Class.new(::StandardError)
|
9
|
+
ProtocolError = ::Class.new(::StandardError)
|
10
|
+
BadArgumentError = ::Class.new(ProtocolError)
|
11
|
+
BadResumptionTokenError = ::Class.new(ProtocolError)
|
12
|
+
BadVerbError = ::Class.new(ProtocolError)
|
13
|
+
CannotDisseminateFormatError = ::Class.new(ProtocolError)
|
14
|
+
IdDoesNotExistError = ::Class.new(ProtocolError)
|
15
|
+
NoRecordsMatchError = ::Class.new(ProtocolError)
|
16
|
+
NoMetadataFormatsError = ::Class.new(ProtocolError)
|
17
|
+
NoSetHierarchyError = ::Class.new(ProtocolError)
|
18
|
+
|
19
|
+
# An abstraction over interactions with an OAI-PMH repository, handling requests, responses and paginating over
|
20
|
+
# results using a resumption token.
|
21
|
+
#
|
22
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#FlowControl
|
23
|
+
class Paginator
|
24
|
+
ERROR_CODES = {
|
25
|
+
'badArgument' => BadArgumentError,
|
26
|
+
'badResumptionToken' => BadResumptionTokenError,
|
27
|
+
'badVerb' => BadVerbError,
|
28
|
+
'cannotDisseminateFormat' => CannotDisseminateFormatError,
|
29
|
+
'idDoesNotExist' => IdDoesNotExistError,
|
30
|
+
'noRecordsMatch' => NoRecordsMatchError,
|
31
|
+
'noMetadataFormats' => NoMetadataFormatsError,
|
32
|
+
'noSetHierarchy' => NoSetHierarchyError
|
33
|
+
}.freeze
|
34
|
+
|
35
|
+
attr_reader :uri, :logger, :http
|
36
|
+
|
37
|
+
def initialize(uri, logger = Logger.null)
|
38
|
+
@uri = uri.is_a?(::URI) ? uri : URI(uri)
|
39
|
+
@logger = logger
|
40
|
+
@http = ::Net::HTTP.new(@uri.host, @uri.port)
|
41
|
+
@http.use_ssl = true if @uri.scheme == 'https'
|
42
|
+
end
|
43
|
+
|
44
|
+
def items(verb, path, query = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
45
|
+
return enum_for(:items, verb, path, query) unless block_given?
|
46
|
+
|
47
|
+
loop do
|
48
|
+
document = ::Ox.parse(request(query.merge('verb' => verb)))
|
49
|
+
|
50
|
+
document.root.locate('error').each do |error|
|
51
|
+
convert_error(error)
|
52
|
+
end
|
53
|
+
|
54
|
+
document.root.locate(path).each do |item|
|
55
|
+
yield item
|
56
|
+
end
|
57
|
+
|
58
|
+
resumption_token = document.root.locate('?/resumptionToken/^String').first
|
59
|
+
break unless resumption_token
|
60
|
+
|
61
|
+
logger.debug('Fieldhand') { "Resumption token for #{verb}: #{resumption_token}" }
|
62
|
+
query = { 'resumptionToken' => resumption_token }
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def request(query = {})
|
69
|
+
request_uri = uri.dup
|
70
|
+
request_uri.query = encode_query(query)
|
71
|
+
|
72
|
+
logger.info('Fieldhand') { "GET #{request_uri}" }
|
73
|
+
http.get(request_uri.request_uri).body
|
74
|
+
rescue ::Timeout::Error => e
|
75
|
+
raise NetworkError, "timeout requesting #{query}: #{e}"
|
76
|
+
rescue => e
|
77
|
+
raise NetworkError, "error requesting #{query}: #{e}"
|
78
|
+
end
|
79
|
+
|
80
|
+
def convert_error(error)
|
81
|
+
return unless ERROR_CODES.key?(error['code'])
|
82
|
+
|
83
|
+
raise ERROR_CODES.fetch(error['code']), error.text
|
84
|
+
end
|
85
|
+
|
86
|
+
def encode_query(query = {})
|
87
|
+
query.map { |k, v| ::CGI.escape(k) << '=' << ::CGI.escape(v) }.join('&')
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'fieldhand/header'
|
2
|
+
|
3
|
+
module Fieldhand
|
4
|
+
# A record is metadata expressed in a single format.
|
5
|
+
#
|
6
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#Record
|
7
|
+
class Record
|
8
|
+
attr_reader :element
|
9
|
+
|
10
|
+
def initialize(element)
|
11
|
+
@element = element
|
12
|
+
end
|
13
|
+
|
14
|
+
def deleted?
|
15
|
+
header.deleted?
|
16
|
+
end
|
17
|
+
|
18
|
+
def status
|
19
|
+
header.status
|
20
|
+
end
|
21
|
+
|
22
|
+
def identifier
|
23
|
+
header.identifier
|
24
|
+
end
|
25
|
+
|
26
|
+
def datestamp
|
27
|
+
header.datestamp
|
28
|
+
end
|
29
|
+
|
30
|
+
def sets
|
31
|
+
header.sets
|
32
|
+
end
|
33
|
+
|
34
|
+
def metadata
|
35
|
+
@metadata ||= element.locate('metadata[0]').first
|
36
|
+
end
|
37
|
+
|
38
|
+
def about
|
39
|
+
@about ||= element.locate('about')
|
40
|
+
end
|
41
|
+
|
42
|
+
def header
|
43
|
+
@header ||= Header.new(element.header)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'fieldhand/arguments'
|
2
|
+
require 'fieldhand/header'
|
3
|
+
require 'fieldhand/identify'
|
4
|
+
require 'fieldhand/logger'
|
5
|
+
require 'fieldhand/metadata_format'
|
6
|
+
require 'fieldhand/paginator'
|
7
|
+
require 'fieldhand/record'
|
8
|
+
require 'fieldhand/set'
|
9
|
+
require 'uri'
|
10
|
+
|
11
|
+
module Fieldhand
|
12
|
+
# A repository is a network accessible server that can process the 6 OAI-PMH requests.
|
13
|
+
#
|
14
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html
|
15
|
+
class Repository
|
16
|
+
attr_reader :uri, :logger
|
17
|
+
|
18
|
+
def initialize(uri, logger = Logger.null)
|
19
|
+
@uri = uri.is_a?(::URI) ? uri : URI(uri)
|
20
|
+
@logger = logger
|
21
|
+
end
|
22
|
+
|
23
|
+
def identify
|
24
|
+
paginator.
|
25
|
+
items('Identify', 'Identify').
|
26
|
+
map { |identify| Identify.new(identify) }.
|
27
|
+
first
|
28
|
+
end
|
29
|
+
|
30
|
+
def metadata_formats(identifier = nil)
|
31
|
+
return enum_for(:metadata_formats, identifier) unless block_given?
|
32
|
+
|
33
|
+
arguments = {}
|
34
|
+
arguments['identifier'] = identifier if identifier
|
35
|
+
|
36
|
+
paginator.
|
37
|
+
items('ListMetadataFormats', 'ListMetadataFormats/metadataFormat', arguments).
|
38
|
+
each do |format|
|
39
|
+
yield MetadataFormat.new(format)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def sets
|
44
|
+
return enum_for(:sets) unless block_given?
|
45
|
+
|
46
|
+
paginator.
|
47
|
+
items('ListSets', 'ListSets/set').
|
48
|
+
each do |set|
|
49
|
+
yield Set.new(set)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def records(arguments = {})
|
54
|
+
return enum_for(:records, arguments) unless block_given?
|
55
|
+
|
56
|
+
query = Arguments.new(arguments).to_query
|
57
|
+
|
58
|
+
paginator.
|
59
|
+
items('ListRecords', 'ListRecords/record', query).
|
60
|
+
each do |record|
|
61
|
+
yield Record.new(record)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def identifiers(arguments = {})
|
66
|
+
return enum_for(:identifiers, arguments) unless block_given?
|
67
|
+
|
68
|
+
query = Arguments.new(arguments).to_query
|
69
|
+
|
70
|
+
paginator.
|
71
|
+
items('ListIdentifiers', 'ListIdentifiers/header', query).
|
72
|
+
each do |header|
|
73
|
+
yield Header.new(header)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def get(identifier, arguments = {})
|
78
|
+
query = {
|
79
|
+
'identifier' => identifier,
|
80
|
+
'metadataPrefix' => arguments.fetch(:metadata_prefix, 'oai_dc')
|
81
|
+
}
|
82
|
+
|
83
|
+
paginator.
|
84
|
+
items('GetRecord', 'GetRecord/record', query).
|
85
|
+
map { |record| Record.new(record) }.
|
86
|
+
first
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def paginator
|
92
|
+
@paginator ||= Paginator.new(uri, logger)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|