fieldhand 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ require 'fieldhand/repository'
@@ -0,0 +1,42 @@
1
+ require 'fieldhand/datestamp'
2
+
3
+ module Fieldhand
4
+ # A class for converting Fieldhand arguments into OAI-PMH query parameters.
5
+ class Arguments
6
+ VALID_KEYS = {
7
+ :metadata_prefix => 'metadataPrefix',
8
+ :resumption_token => 'resumptionToken',
9
+ :from => 'from',
10
+ :until => 'until',
11
+ :set => 'set'
12
+ }.freeze
13
+
14
+ attr_reader :options
15
+
16
+ def initialize(options = {})
17
+ @options = options
18
+ end
19
+
20
+ def to_query
21
+ options.inject(defaults) do |query, (key, value)|
22
+ raise ::ArgumentError, "unknown argument: #{key}" unless VALID_KEYS.key?(key)
23
+
24
+ query[VALID_KEYS.fetch(key)] = convert_value(key, value)
25
+
26
+ query
27
+ end
28
+ end
29
+
30
+ def defaults
31
+ { 'metadataPrefix' => 'oai_dc' }
32
+ end
33
+
34
+ private
35
+
36
+ def convert_value(key, value)
37
+ return value.to_s unless key == :from || key == :until
38
+
39
+ Datestamp.unparse(value)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,28 @@
1
+ require 'date'
2
+ require 'time'
3
+
4
+ module Fieldhand
5
+ # A class to handle datestamps of varying granularity.
6
+ class Datestamp
7
+ def self.parse(datestamp)
8
+ if datestamp.size == 10
9
+ ::Date.strptime(datestamp)
10
+ else
11
+ ::Time.xmlschema(datestamp)
12
+ end
13
+ end
14
+
15
+ def self.unparse(datestamp)
16
+ case datestamp
17
+ when ::String
18
+ datestamp
19
+ when ::Date
20
+ datestamp.strftime
21
+ when ::Time
22
+ datestamp.utc.xmlschema
23
+ else
24
+ datestamp.xmlschema
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,42 @@
1
+ require 'fieldhand/datestamp'
2
+
3
+ module Fieldhand
4
+ # Contains the unique identifier of the item and properties necessary for selective harvesting. The header consists of
5
+ # the following parts:
6
+ #
7
+ # * the unique identifier -- the unique identifier of an item in a repository;
8
+ # * the datestamp -- the date of creation, modification or deletion of the record for the purpose of selective
9
+ # harvesting.
10
+ # * zero or more setSpec elements -- the set membership of the item for the purpose of selective harvesting.
11
+ # * an optional status attribute with a value of deleted indicates the withdrawal of availability of the specified
12
+ # metadata format for the item, dependent on the repository support for deletions.
13
+ #
14
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#header
15
+ class Header
16
+ attr_reader :element
17
+
18
+ def initialize(element)
19
+ @element = element
20
+ end
21
+
22
+ def deleted?
23
+ status == 'deleted'
24
+ end
25
+
26
+ def status
27
+ element['status']
28
+ end
29
+
30
+ def identifier
31
+ @identifier ||= element.identifier.text
32
+ end
33
+
34
+ def datestamp
35
+ @datestamp ||= Datestamp.parse(element.datestamp.text)
36
+ end
37
+
38
+ def sets
39
+ @sets ||= element.locate('setSpec/^String')
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,51 @@
1
+ require 'fieldhand/datestamp'
2
+ require 'uri'
3
+
4
+ module Fieldhand
5
+ # Information about a repository.
6
+ #
7
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#Identify
8
+ class Identify
9
+ attr_reader :element
10
+
11
+ def initialize(element)
12
+ @element = element
13
+ end
14
+
15
+ def name
16
+ @name ||= element.repositoryName.text
17
+ end
18
+
19
+ def base_url
20
+ @base_url ||= URI(element.baseURL.text)
21
+ end
22
+
23
+ def protocol_version
24
+ @protocol_version ||= element.protocolVersion.text
25
+ end
26
+
27
+ def earliest_datestamp
28
+ @earliest_datestamp ||= Datestamp.parse(element.earliestDatestamp.text)
29
+ end
30
+
31
+ def deleted_record
32
+ @deleted_record ||= element.deletedRecord.text
33
+ end
34
+
35
+ def granularity
36
+ @granularity ||= element.granularity.text
37
+ end
38
+
39
+ def admin_emails
40
+ @admin_emails ||= element.locate('adminEmail/^String')
41
+ end
42
+
43
+ def compression
44
+ @compression ||= element.locate('compression/^String')
45
+ end
46
+
47
+ def descriptions
48
+ @descriptions ||= element.locate('description')
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,28 @@
1
+ require 'logger'
2
+ require 'rbconfig'
3
+
4
+ module Fieldhand
5
+ # A default null logger for discarding log messages.
6
+ module Logger
7
+ module_function
8
+
9
+ def null
10
+ ::Logger.new(null_device)
11
+ end
12
+
13
+ # Determine the null device on this platform, a backport of more recent Rubies' File::NULL
14
+ # See https://github.com/marcandre/backports/blob/v3.8.0/lib/backports/1.9.3/file/null.rb
15
+ def null_device
16
+ platform = ::RUBY_PLATFORM
17
+ platform = ::RbConfig::CONFIG['host_os'] if platform == 'java'
18
+
19
+ case platform
20
+ when /mswin|mingw/i then 'NUL'
21
+ when /amiga/i then 'NIL:'
22
+ when /openvms/i then 'NL:'
23
+ else
24
+ '/dev/null'
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,30 @@
1
+ require 'uri'
2
+
3
+ module Fieldhand
4
+ # A metadata format supported by the repository.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListMetadataFormats
7
+ class MetadataFormat
8
+ attr_reader :element
9
+
10
+ def initialize(element)
11
+ @element = element
12
+ end
13
+
14
+ def to_s
15
+ prefix
16
+ end
17
+
18
+ def prefix
19
+ @prefix ||= element.metadataPrefix.text
20
+ end
21
+
22
+ def schema
23
+ @schema ||= URI(element.schema.text)
24
+ end
25
+
26
+ def namespace
27
+ @namespace ||= URI(element.metadataNamespace.text)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,90 @@
1
+ require 'fieldhand/logger'
2
+ require 'ox'
3
+ require 'cgi'
4
+ require 'net/http'
5
+ require 'uri'
6
+
7
+ module Fieldhand
8
+ NetworkError = ::Class.new(::StandardError)
9
+ ProtocolError = ::Class.new(::StandardError)
10
+ BadArgumentError = ::Class.new(ProtocolError)
11
+ BadResumptionTokenError = ::Class.new(ProtocolError)
12
+ BadVerbError = ::Class.new(ProtocolError)
13
+ CannotDisseminateFormatError = ::Class.new(ProtocolError)
14
+ IdDoesNotExistError = ::Class.new(ProtocolError)
15
+ NoRecordsMatchError = ::Class.new(ProtocolError)
16
+ NoMetadataFormatsError = ::Class.new(ProtocolError)
17
+ NoSetHierarchyError = ::Class.new(ProtocolError)
18
+
19
+ # An abstraction over interactions with an OAI-PMH repository, handling requests, responses and paginating over
20
+ # results using a resumption token.
21
+ #
22
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#FlowControl
23
+ class Paginator
24
+ ERROR_CODES = {
25
+ 'badArgument' => BadArgumentError,
26
+ 'badResumptionToken' => BadResumptionTokenError,
27
+ 'badVerb' => BadVerbError,
28
+ 'cannotDisseminateFormat' => CannotDisseminateFormatError,
29
+ 'idDoesNotExist' => IdDoesNotExistError,
30
+ 'noRecordsMatch' => NoRecordsMatchError,
31
+ 'noMetadataFormats' => NoMetadataFormatsError,
32
+ 'noSetHierarchy' => NoSetHierarchyError
33
+ }.freeze
34
+
35
+ attr_reader :uri, :logger, :http
36
+
37
+ def initialize(uri, logger = Logger.null)
38
+ @uri = uri.is_a?(::URI) ? uri : URI(uri)
39
+ @logger = logger
40
+ @http = ::Net::HTTP.new(@uri.host, @uri.port)
41
+ @http.use_ssl = true if @uri.scheme == 'https'
42
+ end
43
+
44
+ def items(verb, path, query = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
45
+ return enum_for(:items, verb, path, query) unless block_given?
46
+
47
+ loop do
48
+ document = ::Ox.parse(request(query.merge('verb' => verb)))
49
+
50
+ document.root.locate('error').each do |error|
51
+ convert_error(error)
52
+ end
53
+
54
+ document.root.locate(path).each do |item|
55
+ yield item
56
+ end
57
+
58
+ resumption_token = document.root.locate('?/resumptionToken/^String').first
59
+ break unless resumption_token
60
+
61
+ logger.debug('Fieldhand') { "Resumption token for #{verb}: #{resumption_token}" }
62
+ query = { 'resumptionToken' => resumption_token }
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def request(query = {})
69
+ request_uri = uri.dup
70
+ request_uri.query = encode_query(query)
71
+
72
+ logger.info('Fieldhand') { "GET #{request_uri}" }
73
+ http.get(request_uri.request_uri).body
74
+ rescue ::Timeout::Error => e
75
+ raise NetworkError, "timeout requesting #{query}: #{e}"
76
+ rescue => e
77
+ raise NetworkError, "error requesting #{query}: #{e}"
78
+ end
79
+
80
+ def convert_error(error)
81
+ return unless ERROR_CODES.key?(error['code'])
82
+
83
+ raise ERROR_CODES.fetch(error['code']), error.text
84
+ end
85
+
86
+ def encode_query(query = {})
87
+ query.map { |k, v| ::CGI.escape(k) << '=' << ::CGI.escape(v) }.join('&')
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,46 @@
1
+ require 'fieldhand/header'
2
+
3
+ module Fieldhand
4
+ # A record is metadata expressed in a single format.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#Record
7
+ class Record
8
+ attr_reader :element
9
+
10
+ def initialize(element)
11
+ @element = element
12
+ end
13
+
14
+ def deleted?
15
+ header.deleted?
16
+ end
17
+
18
+ def status
19
+ header.status
20
+ end
21
+
22
+ def identifier
23
+ header.identifier
24
+ end
25
+
26
+ def datestamp
27
+ header.datestamp
28
+ end
29
+
30
+ def sets
31
+ header.sets
32
+ end
33
+
34
+ def metadata
35
+ @metadata ||= element.locate('metadata[0]').first
36
+ end
37
+
38
+ def about
39
+ @about ||= element.locate('about')
40
+ end
41
+
42
+ def header
43
+ @header ||= Header.new(element.header)
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,95 @@
1
+ require 'fieldhand/arguments'
2
+ require 'fieldhand/header'
3
+ require 'fieldhand/identify'
4
+ require 'fieldhand/logger'
5
+ require 'fieldhand/metadata_format'
6
+ require 'fieldhand/paginator'
7
+ require 'fieldhand/record'
8
+ require 'fieldhand/set'
9
+ require 'uri'
10
+
11
+ module Fieldhand
12
+ # A repository is a network accessible server that can process the 6 OAI-PMH requests.
13
+ #
14
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html
15
+ class Repository
16
+ attr_reader :uri, :logger
17
+
18
+ def initialize(uri, logger = Logger.null)
19
+ @uri = uri.is_a?(::URI) ? uri : URI(uri)
20
+ @logger = logger
21
+ end
22
+
23
+ def identify
24
+ paginator.
25
+ items('Identify', 'Identify').
26
+ map { |identify| Identify.new(identify) }.
27
+ first
28
+ end
29
+
30
+ def metadata_formats(identifier = nil)
31
+ return enum_for(:metadata_formats, identifier) unless block_given?
32
+
33
+ arguments = {}
34
+ arguments['identifier'] = identifier if identifier
35
+
36
+ paginator.
37
+ items('ListMetadataFormats', 'ListMetadataFormats/metadataFormat', arguments).
38
+ each do |format|
39
+ yield MetadataFormat.new(format)
40
+ end
41
+ end
42
+
43
+ def sets
44
+ return enum_for(:sets) unless block_given?
45
+
46
+ paginator.
47
+ items('ListSets', 'ListSets/set').
48
+ each do |set|
49
+ yield Set.new(set)
50
+ end
51
+ end
52
+
53
+ def records(arguments = {})
54
+ return enum_for(:records, arguments) unless block_given?
55
+
56
+ query = Arguments.new(arguments).to_query
57
+
58
+ paginator.
59
+ items('ListRecords', 'ListRecords/record', query).
60
+ each do |record|
61
+ yield Record.new(record)
62
+ end
63
+ end
64
+
65
+ def identifiers(arguments = {})
66
+ return enum_for(:identifiers, arguments) unless block_given?
67
+
68
+ query = Arguments.new(arguments).to_query
69
+
70
+ paginator.
71
+ items('ListIdentifiers', 'ListIdentifiers/header', query).
72
+ each do |header|
73
+ yield Header.new(header)
74
+ end
75
+ end
76
+
77
+ def get(identifier, arguments = {})
78
+ query = {
79
+ 'identifier' => identifier,
80
+ 'metadataPrefix' => arguments.fetch(:metadata_prefix, 'oai_dc')
81
+ }
82
+
83
+ paginator.
84
+ items('GetRecord', 'GetRecord/record', query).
85
+ map { |record| Record.new(record) }.
86
+ first
87
+ end
88
+
89
+ private
90
+
91
+ def paginator
92
+ @paginator ||= Paginator.new(uri, logger)
93
+ end
94
+ end
95
+ end