fieldhand 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +538 -0
- data/lib/fieldhand.rb +1 -0
- data/lib/fieldhand/arguments.rb +42 -0
- data/lib/fieldhand/datestamp.rb +28 -0
- data/lib/fieldhand/header.rb +42 -0
- data/lib/fieldhand/identify.rb +51 -0
- data/lib/fieldhand/logger.rb +28 -0
- data/lib/fieldhand/metadata_format.rb +30 -0
- data/lib/fieldhand/paginator.rb +90 -0
- data/lib/fieldhand/record.rb +46 -0
- data/lib/fieldhand/repository.rb +95 -0
- data/lib/fieldhand/set.rb +28 -0
- data/spec/fieldhand/arguments_spec.rb +94 -0
- data/spec/fieldhand/datestamp_spec.rb +38 -0
- data/spec/fieldhand/header_spec.rb +38 -0
- data/spec/fieldhand/identify_spec.rb +43 -0
- data/spec/fieldhand/metadata_format_spec.rb +15 -0
- data/spec/fieldhand/paginator_spec.rb +84 -0
- data/spec/fieldhand/record_spec.rb +38 -0
- data/spec/fieldhand/repository_spec.rb +191 -0
- data/spec/fieldhand/set_spec.rb +31 -0
- data/spec/spec_helper.rb +28 -0
- metadata +145 -0
data/lib/fieldhand.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'fieldhand/repository'
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'fieldhand/datestamp'
|
2
|
+
|
3
|
+
module Fieldhand
|
4
|
+
# A class for converting Fieldhand arguments into OAI-PMH query parameters.
|
5
|
+
class Arguments
|
6
|
+
VALID_KEYS = {
|
7
|
+
:metadata_prefix => 'metadataPrefix',
|
8
|
+
:resumption_token => 'resumptionToken',
|
9
|
+
:from => 'from',
|
10
|
+
:until => 'until',
|
11
|
+
:set => 'set'
|
12
|
+
}.freeze
|
13
|
+
|
14
|
+
attr_reader :options
|
15
|
+
|
16
|
+
def initialize(options = {})
|
17
|
+
@options = options
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_query
|
21
|
+
options.inject(defaults) do |query, (key, value)|
|
22
|
+
raise ::ArgumentError, "unknown argument: #{key}" unless VALID_KEYS.key?(key)
|
23
|
+
|
24
|
+
query[VALID_KEYS.fetch(key)] = convert_value(key, value)
|
25
|
+
|
26
|
+
query
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def defaults
|
31
|
+
{ 'metadataPrefix' => 'oai_dc' }
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def convert_value(key, value)
|
37
|
+
return value.to_s unless key == :from || key == :until
|
38
|
+
|
39
|
+
Datestamp.unparse(value)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'time'
|
3
|
+
|
4
|
+
module Fieldhand
|
5
|
+
# A class to handle datestamps of varying granularity.
|
6
|
+
class Datestamp
|
7
|
+
def self.parse(datestamp)
|
8
|
+
if datestamp.size == 10
|
9
|
+
::Date.strptime(datestamp)
|
10
|
+
else
|
11
|
+
::Time.xmlschema(datestamp)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.unparse(datestamp)
|
16
|
+
case datestamp
|
17
|
+
when ::String
|
18
|
+
datestamp
|
19
|
+
when ::Date
|
20
|
+
datestamp.strftime
|
21
|
+
when ::Time
|
22
|
+
datestamp.utc.xmlschema
|
23
|
+
else
|
24
|
+
datestamp.xmlschema
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'fieldhand/datestamp'
|
2
|
+
|
3
|
+
module Fieldhand
|
4
|
+
# Contains the unique identifier of the item and properties necessary for selective harvesting. The header consists of
|
5
|
+
# the following parts:
|
6
|
+
#
|
7
|
+
# * the unique identifier -- the unique identifier of an item in a repository;
|
8
|
+
# * the datestamp -- the date of creation, modification or deletion of the record for the purpose of selective
|
9
|
+
# harvesting.
|
10
|
+
# * zero or more setSpec elements -- the set membership of the item for the purpose of selective harvesting.
|
11
|
+
# * an optional status attribute with a value of deleted indicates the withdrawal of availability of the specified
|
12
|
+
# metadata format for the item, dependent on the repository support for deletions.
|
13
|
+
#
|
14
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#header
|
15
|
+
class Header
|
16
|
+
attr_reader :element
|
17
|
+
|
18
|
+
def initialize(element)
|
19
|
+
@element = element
|
20
|
+
end
|
21
|
+
|
22
|
+
def deleted?
|
23
|
+
status == 'deleted'
|
24
|
+
end
|
25
|
+
|
26
|
+
def status
|
27
|
+
element['status']
|
28
|
+
end
|
29
|
+
|
30
|
+
def identifier
|
31
|
+
@identifier ||= element.identifier.text
|
32
|
+
end
|
33
|
+
|
34
|
+
def datestamp
|
35
|
+
@datestamp ||= Datestamp.parse(element.datestamp.text)
|
36
|
+
end
|
37
|
+
|
38
|
+
def sets
|
39
|
+
@sets ||= element.locate('setSpec/^String')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'fieldhand/datestamp'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module Fieldhand
|
5
|
+
# Information about a repository.
|
6
|
+
#
|
7
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#Identify
|
8
|
+
class Identify
|
9
|
+
attr_reader :element
|
10
|
+
|
11
|
+
def initialize(element)
|
12
|
+
@element = element
|
13
|
+
end
|
14
|
+
|
15
|
+
def name
|
16
|
+
@name ||= element.repositoryName.text
|
17
|
+
end
|
18
|
+
|
19
|
+
def base_url
|
20
|
+
@base_url ||= URI(element.baseURL.text)
|
21
|
+
end
|
22
|
+
|
23
|
+
def protocol_version
|
24
|
+
@protocol_version ||= element.protocolVersion.text
|
25
|
+
end
|
26
|
+
|
27
|
+
def earliest_datestamp
|
28
|
+
@earliest_datestamp ||= Datestamp.parse(element.earliestDatestamp.text)
|
29
|
+
end
|
30
|
+
|
31
|
+
def deleted_record
|
32
|
+
@deleted_record ||= element.deletedRecord.text
|
33
|
+
end
|
34
|
+
|
35
|
+
def granularity
|
36
|
+
@granularity ||= element.granularity.text
|
37
|
+
end
|
38
|
+
|
39
|
+
def admin_emails
|
40
|
+
@admin_emails ||= element.locate('adminEmail/^String')
|
41
|
+
end
|
42
|
+
|
43
|
+
def compression
|
44
|
+
@compression ||= element.locate('compression/^String')
|
45
|
+
end
|
46
|
+
|
47
|
+
def descriptions
|
48
|
+
@descriptions ||= element.locate('description')
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'rbconfig'
|
3
|
+
|
4
|
+
module Fieldhand
|
5
|
+
# A default null logger for discarding log messages.
|
6
|
+
module Logger
|
7
|
+
module_function
|
8
|
+
|
9
|
+
def null
|
10
|
+
::Logger.new(null_device)
|
11
|
+
end
|
12
|
+
|
13
|
+
# Determine the null device on this platform, a backport of more recent Rubies' File::NULL
|
14
|
+
# See https://github.com/marcandre/backports/blob/v3.8.0/lib/backports/1.9.3/file/null.rb
|
15
|
+
def null_device
|
16
|
+
platform = ::RUBY_PLATFORM
|
17
|
+
platform = ::RbConfig::CONFIG['host_os'] if platform == 'java'
|
18
|
+
|
19
|
+
case platform
|
20
|
+
when /mswin|mingw/i then 'NUL'
|
21
|
+
when /amiga/i then 'NIL:'
|
22
|
+
when /openvms/i then 'NL:'
|
23
|
+
else
|
24
|
+
'/dev/null'
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Fieldhand
|
4
|
+
# A metadata format supported by the repository.
|
5
|
+
#
|
6
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListMetadataFormats
|
7
|
+
class MetadataFormat
|
8
|
+
attr_reader :element
|
9
|
+
|
10
|
+
def initialize(element)
|
11
|
+
@element = element
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
prefix
|
16
|
+
end
|
17
|
+
|
18
|
+
def prefix
|
19
|
+
@prefix ||= element.metadataPrefix.text
|
20
|
+
end
|
21
|
+
|
22
|
+
def schema
|
23
|
+
@schema ||= URI(element.schema.text)
|
24
|
+
end
|
25
|
+
|
26
|
+
def namespace
|
27
|
+
@namespace ||= URI(element.metadataNamespace.text)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'fieldhand/logger'
|
2
|
+
require 'ox'
|
3
|
+
require 'cgi'
|
4
|
+
require 'net/http'
|
5
|
+
require 'uri'
|
6
|
+
|
7
|
+
module Fieldhand
|
8
|
+
NetworkError = ::Class.new(::StandardError)
|
9
|
+
ProtocolError = ::Class.new(::StandardError)
|
10
|
+
BadArgumentError = ::Class.new(ProtocolError)
|
11
|
+
BadResumptionTokenError = ::Class.new(ProtocolError)
|
12
|
+
BadVerbError = ::Class.new(ProtocolError)
|
13
|
+
CannotDisseminateFormatError = ::Class.new(ProtocolError)
|
14
|
+
IdDoesNotExistError = ::Class.new(ProtocolError)
|
15
|
+
NoRecordsMatchError = ::Class.new(ProtocolError)
|
16
|
+
NoMetadataFormatsError = ::Class.new(ProtocolError)
|
17
|
+
NoSetHierarchyError = ::Class.new(ProtocolError)
|
18
|
+
|
19
|
+
# An abstraction over interactions with an OAI-PMH repository, handling requests, responses and paginating over
|
20
|
+
# results using a resumption token.
|
21
|
+
#
|
22
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#FlowControl
|
23
|
+
class Paginator
|
24
|
+
ERROR_CODES = {
|
25
|
+
'badArgument' => BadArgumentError,
|
26
|
+
'badResumptionToken' => BadResumptionTokenError,
|
27
|
+
'badVerb' => BadVerbError,
|
28
|
+
'cannotDisseminateFormat' => CannotDisseminateFormatError,
|
29
|
+
'idDoesNotExist' => IdDoesNotExistError,
|
30
|
+
'noRecordsMatch' => NoRecordsMatchError,
|
31
|
+
'noMetadataFormats' => NoMetadataFormatsError,
|
32
|
+
'noSetHierarchy' => NoSetHierarchyError
|
33
|
+
}.freeze
|
34
|
+
|
35
|
+
attr_reader :uri, :logger, :http
|
36
|
+
|
37
|
+
def initialize(uri, logger = Logger.null)
|
38
|
+
@uri = uri.is_a?(::URI) ? uri : URI(uri)
|
39
|
+
@logger = logger
|
40
|
+
@http = ::Net::HTTP.new(@uri.host, @uri.port)
|
41
|
+
@http.use_ssl = true if @uri.scheme == 'https'
|
42
|
+
end
|
43
|
+
|
44
|
+
def items(verb, path, query = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
45
|
+
return enum_for(:items, verb, path, query) unless block_given?
|
46
|
+
|
47
|
+
loop do
|
48
|
+
document = ::Ox.parse(request(query.merge('verb' => verb)))
|
49
|
+
|
50
|
+
document.root.locate('error').each do |error|
|
51
|
+
convert_error(error)
|
52
|
+
end
|
53
|
+
|
54
|
+
document.root.locate(path).each do |item|
|
55
|
+
yield item
|
56
|
+
end
|
57
|
+
|
58
|
+
resumption_token = document.root.locate('?/resumptionToken/^String').first
|
59
|
+
break unless resumption_token
|
60
|
+
|
61
|
+
logger.debug('Fieldhand') { "Resumption token for #{verb}: #{resumption_token}" }
|
62
|
+
query = { 'resumptionToken' => resumption_token }
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def request(query = {})
|
69
|
+
request_uri = uri.dup
|
70
|
+
request_uri.query = encode_query(query)
|
71
|
+
|
72
|
+
logger.info('Fieldhand') { "GET #{request_uri}" }
|
73
|
+
http.get(request_uri.request_uri).body
|
74
|
+
rescue ::Timeout::Error => e
|
75
|
+
raise NetworkError, "timeout requesting #{query}: #{e}"
|
76
|
+
rescue => e
|
77
|
+
raise NetworkError, "error requesting #{query}: #{e}"
|
78
|
+
end
|
79
|
+
|
80
|
+
def convert_error(error)
|
81
|
+
return unless ERROR_CODES.key?(error['code'])
|
82
|
+
|
83
|
+
raise ERROR_CODES.fetch(error['code']), error.text
|
84
|
+
end
|
85
|
+
|
86
|
+
def encode_query(query = {})
|
87
|
+
query.map { |k, v| ::CGI.escape(k) << '=' << ::CGI.escape(v) }.join('&')
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'fieldhand/header'
|
2
|
+
|
3
|
+
module Fieldhand
|
4
|
+
# A record is metadata expressed in a single format.
|
5
|
+
#
|
6
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html#Record
|
7
|
+
class Record
|
8
|
+
attr_reader :element
|
9
|
+
|
10
|
+
def initialize(element)
|
11
|
+
@element = element
|
12
|
+
end
|
13
|
+
|
14
|
+
def deleted?
|
15
|
+
header.deleted?
|
16
|
+
end
|
17
|
+
|
18
|
+
def status
|
19
|
+
header.status
|
20
|
+
end
|
21
|
+
|
22
|
+
def identifier
|
23
|
+
header.identifier
|
24
|
+
end
|
25
|
+
|
26
|
+
def datestamp
|
27
|
+
header.datestamp
|
28
|
+
end
|
29
|
+
|
30
|
+
def sets
|
31
|
+
header.sets
|
32
|
+
end
|
33
|
+
|
34
|
+
def metadata
|
35
|
+
@metadata ||= element.locate('metadata[0]').first
|
36
|
+
end
|
37
|
+
|
38
|
+
def about
|
39
|
+
@about ||= element.locate('about')
|
40
|
+
end
|
41
|
+
|
42
|
+
def header
|
43
|
+
@header ||= Header.new(element.header)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'fieldhand/arguments'
|
2
|
+
require 'fieldhand/header'
|
3
|
+
require 'fieldhand/identify'
|
4
|
+
require 'fieldhand/logger'
|
5
|
+
require 'fieldhand/metadata_format'
|
6
|
+
require 'fieldhand/paginator'
|
7
|
+
require 'fieldhand/record'
|
8
|
+
require 'fieldhand/set'
|
9
|
+
require 'uri'
|
10
|
+
|
11
|
+
module Fieldhand
|
12
|
+
# A repository is a network accessible server that can process the 6 OAI-PMH requests.
|
13
|
+
#
|
14
|
+
# See https://www.openarchives.org/OAI/openarchivesprotocol.html
|
15
|
+
class Repository
|
16
|
+
attr_reader :uri, :logger
|
17
|
+
|
18
|
+
def initialize(uri, logger = Logger.null)
|
19
|
+
@uri = uri.is_a?(::URI) ? uri : URI(uri)
|
20
|
+
@logger = logger
|
21
|
+
end
|
22
|
+
|
23
|
+
def identify
|
24
|
+
paginator.
|
25
|
+
items('Identify', 'Identify').
|
26
|
+
map { |identify| Identify.new(identify) }.
|
27
|
+
first
|
28
|
+
end
|
29
|
+
|
30
|
+
def metadata_formats(identifier = nil)
|
31
|
+
return enum_for(:metadata_formats, identifier) unless block_given?
|
32
|
+
|
33
|
+
arguments = {}
|
34
|
+
arguments['identifier'] = identifier if identifier
|
35
|
+
|
36
|
+
paginator.
|
37
|
+
items('ListMetadataFormats', 'ListMetadataFormats/metadataFormat', arguments).
|
38
|
+
each do |format|
|
39
|
+
yield MetadataFormat.new(format)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def sets
|
44
|
+
return enum_for(:sets) unless block_given?
|
45
|
+
|
46
|
+
paginator.
|
47
|
+
items('ListSets', 'ListSets/set').
|
48
|
+
each do |set|
|
49
|
+
yield Set.new(set)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def records(arguments = {})
|
54
|
+
return enum_for(:records, arguments) unless block_given?
|
55
|
+
|
56
|
+
query = Arguments.new(arguments).to_query
|
57
|
+
|
58
|
+
paginator.
|
59
|
+
items('ListRecords', 'ListRecords/record', query).
|
60
|
+
each do |record|
|
61
|
+
yield Record.new(record)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def identifiers(arguments = {})
|
66
|
+
return enum_for(:identifiers, arguments) unless block_given?
|
67
|
+
|
68
|
+
query = Arguments.new(arguments).to_query
|
69
|
+
|
70
|
+
paginator.
|
71
|
+
items('ListIdentifiers', 'ListIdentifiers/header', query).
|
72
|
+
each do |header|
|
73
|
+
yield Header.new(header)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def get(identifier, arguments = {})
|
78
|
+
query = {
|
79
|
+
'identifier' => identifier,
|
80
|
+
'metadataPrefix' => arguments.fetch(:metadata_prefix, 'oai_dc')
|
81
|
+
}
|
82
|
+
|
83
|
+
paginator.
|
84
|
+
items('GetRecord', 'GetRecord/record', query).
|
85
|
+
map { |record| Record.new(record) }.
|
86
|
+
first
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def paginator
|
92
|
+
@paginator ||= Paginator.new(uri, logger)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|