fieldhand 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1 @@
1
+ require 'fieldhand/repository'
@@ -0,0 +1,42 @@
1
+ require 'fieldhand/datestamp'
2
+
3
+ module Fieldhand
4
+ # A class for converting Fieldhand arguments into OAI-PMH query parameters.
5
+ class Arguments
6
+ VALID_KEYS = {
7
+ :metadata_prefix => 'metadataPrefix',
8
+ :resumption_token => 'resumptionToken',
9
+ :from => 'from',
10
+ :until => 'until',
11
+ :set => 'set'
12
+ }.freeze
13
+
14
+ attr_reader :options
15
+
16
+ def initialize(options = {})
17
+ @options = options
18
+ end
19
+
20
+ def to_query
21
+ options.inject(defaults) do |query, (key, value)|
22
+ raise ::ArgumentError, "unknown argument: #{key}" unless VALID_KEYS.key?(key)
23
+
24
+ query[VALID_KEYS.fetch(key)] = convert_value(key, value)
25
+
26
+ query
27
+ end
28
+ end
29
+
30
+ def defaults
31
+ { 'metadataPrefix' => 'oai_dc' }
32
+ end
33
+
34
+ private
35
+
36
+ def convert_value(key, value)
37
+ return value.to_s unless key == :from || key == :until
38
+
39
+ Datestamp.unparse(value)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,28 @@
1
+ require 'date'
2
+ require 'time'
3
+
4
+ module Fieldhand
5
+ # A class to handle datestamps of varying granularity.
6
+ class Datestamp
7
+ def self.parse(datestamp)
8
+ if datestamp.size == 10
9
+ ::Date.strptime(datestamp)
10
+ else
11
+ ::Time.xmlschema(datestamp)
12
+ end
13
+ end
14
+
15
+ def self.unparse(datestamp)
16
+ case datestamp
17
+ when ::String
18
+ datestamp
19
+ when ::Date
20
+ datestamp.strftime
21
+ when ::Time
22
+ datestamp.utc.xmlschema
23
+ else
24
+ datestamp.xmlschema
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,42 @@
1
+ require 'fieldhand/datestamp'
2
+
3
+ module Fieldhand
4
+ # Contains the unique identifier of the item and properties necessary for selective harvesting. The header consists of
5
+ # the following parts:
6
+ #
7
+ # * the unique identifier -- the unique identifier of an item in a repository;
8
+ # * the datestamp -- the date of creation, modification or deletion of the record for the purpose of selective
9
+ # harvesting.
10
+ # * zero or more setSpec elements -- the set membership of the item for the purpose of selective harvesting.
11
+ # * an optional status attribute with a value of deleted indicates the withdrawal of availability of the specified
12
+ # metadata format for the item, dependent on the repository support for deletions.
13
+ #
14
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#header
15
+ class Header
16
+ attr_reader :element
17
+
18
+ def initialize(element)
19
+ @element = element
20
+ end
21
+
22
+ def deleted?
23
+ status == 'deleted'
24
+ end
25
+
26
+ def status
27
+ element['status']
28
+ end
29
+
30
+ def identifier
31
+ @identifier ||= element.identifier.text
32
+ end
33
+
34
+ def datestamp
35
+ @datestamp ||= Datestamp.parse(element.datestamp.text)
36
+ end
37
+
38
+ def sets
39
+ @sets ||= element.locate('setSpec/^String')
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,51 @@
1
+ require 'fieldhand/datestamp'
2
+ require 'uri'
3
+
4
+ module Fieldhand
5
+ # Information about a repository.
6
+ #
7
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#Identify
8
+ class Identify
9
+ attr_reader :element
10
+
11
+ def initialize(element)
12
+ @element = element
13
+ end
14
+
15
+ def name
16
+ @name ||= element.repositoryName.text
17
+ end
18
+
19
+ def base_url
20
+ @base_url ||= URI(element.baseURL.text)
21
+ end
22
+
23
+ def protocol_version
24
+ @protocol_version ||= element.protocolVersion.text
25
+ end
26
+
27
+ def earliest_datestamp
28
+ @earliest_datestamp ||= Datestamp.parse(element.earliestDatestamp.text)
29
+ end
30
+
31
+ def deleted_record
32
+ @deleted_record ||= element.deletedRecord.text
33
+ end
34
+
35
+ def granularity
36
+ @granularity ||= element.granularity.text
37
+ end
38
+
39
+ def admin_emails
40
+ @admin_emails ||= element.locate('adminEmail/^String')
41
+ end
42
+
43
+ def compression
44
+ @compression ||= element.locate('compression/^String')
45
+ end
46
+
47
+ def descriptions
48
+ @descriptions ||= element.locate('description')
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,28 @@
1
+ require 'logger'
2
+ require 'rbconfig'
3
+
4
+ module Fieldhand
5
+ # A default null logger for discarding log messages.
6
+ module Logger
7
+ module_function
8
+
9
+ def null
10
+ ::Logger.new(null_device)
11
+ end
12
+
13
+ # Determine the null device on this platform, a backport of more recent Rubies' File::NULL
14
+ # See https://github.com/marcandre/backports/blob/v3.8.0/lib/backports/1.9.3/file/null.rb
15
+ def null_device
16
+ platform = ::RUBY_PLATFORM
17
+ platform = ::RbConfig::CONFIG['host_os'] if platform == 'java'
18
+
19
+ case platform
20
+ when /mswin|mingw/i then 'NUL'
21
+ when /amiga/i then 'NIL:'
22
+ when /openvms/i then 'NL:'
23
+ else
24
+ '/dev/null'
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,30 @@
1
+ require 'uri'
2
+
3
+ module Fieldhand
4
+ # A metadata format supported by the repository.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListMetadataFormats
7
+ class MetadataFormat
8
+ attr_reader :element
9
+
10
+ def initialize(element)
11
+ @element = element
12
+ end
13
+
14
+ def to_s
15
+ prefix
16
+ end
17
+
18
+ def prefix
19
+ @prefix ||= element.metadataPrefix.text
20
+ end
21
+
22
+ def schema
23
+ @schema ||= URI(element.schema.text)
24
+ end
25
+
26
+ def namespace
27
+ @namespace ||= URI(element.metadataNamespace.text)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,90 @@
1
+ require 'fieldhand/logger'
2
+ require 'ox'
3
+ require 'cgi'
4
+ require 'net/http'
5
+ require 'uri'
6
+
7
+ module Fieldhand
8
+ NetworkError = ::Class.new(::StandardError)
9
+ ProtocolError = ::Class.new(::StandardError)
10
+ BadArgumentError = ::Class.new(ProtocolError)
11
+ BadResumptionTokenError = ::Class.new(ProtocolError)
12
+ BadVerbError = ::Class.new(ProtocolError)
13
+ CannotDisseminateFormatError = ::Class.new(ProtocolError)
14
+ IdDoesNotExistError = ::Class.new(ProtocolError)
15
+ NoRecordsMatchError = ::Class.new(ProtocolError)
16
+ NoMetadataFormatsError = ::Class.new(ProtocolError)
17
+ NoSetHierarchyError = ::Class.new(ProtocolError)
18
+
19
+ # An abstraction over interactions with an OAI-PMH repository, handling requests, responses and paginating over
20
+ # results using a resumption token.
21
+ #
22
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#FlowControl
23
+ class Paginator
24
+ ERROR_CODES = {
25
+ 'badArgument' => BadArgumentError,
26
+ 'badResumptionToken' => BadResumptionTokenError,
27
+ 'badVerb' => BadVerbError,
28
+ 'cannotDisseminateFormat' => CannotDisseminateFormatError,
29
+ 'idDoesNotExist' => IdDoesNotExistError,
30
+ 'noRecordsMatch' => NoRecordsMatchError,
31
+ 'noMetadataFormats' => NoMetadataFormatsError,
32
+ 'noSetHierarchy' => NoSetHierarchyError
33
+ }.freeze
34
+
35
+ attr_reader :uri, :logger, :http
36
+
37
+ def initialize(uri, logger = Logger.null)
38
+ @uri = uri.is_a?(::URI) ? uri : URI(uri)
39
+ @logger = logger
40
+ @http = ::Net::HTTP.new(@uri.host, @uri.port)
41
+ @http.use_ssl = true if @uri.scheme == 'https'
42
+ end
43
+
44
+ def items(verb, path, query = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
45
+ return enum_for(:items, verb, path, query) unless block_given?
46
+
47
+ loop do
48
+ document = ::Ox.parse(request(query.merge('verb' => verb)))
49
+
50
+ document.root.locate('error').each do |error|
51
+ convert_error(error)
52
+ end
53
+
54
+ document.root.locate(path).each do |item|
55
+ yield item
56
+ end
57
+
58
+ resumption_token = document.root.locate('?/resumptionToken/^String').first
59
+ break unless resumption_token
60
+
61
+ logger.debug('Fieldhand') { "Resumption token for #{verb}: #{resumption_token}" }
62
+ query = { 'resumptionToken' => resumption_token }
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def request(query = {})
69
+ request_uri = uri.dup
70
+ request_uri.query = encode_query(query)
71
+
72
+ logger.info('Fieldhand') { "GET #{request_uri}" }
73
+ http.get(request_uri.request_uri).body
74
+ rescue ::Timeout::Error => e
75
+ raise NetworkError, "timeout requesting #{query}: #{e}"
76
+ rescue => e
77
+ raise NetworkError, "error requesting #{query}: #{e}"
78
+ end
79
+
80
+ def convert_error(error)
81
+ return unless ERROR_CODES.key?(error['code'])
82
+
83
+ raise ERROR_CODES.fetch(error['code']), error.text
84
+ end
85
+
86
+ def encode_query(query = {})
87
+ query.map { |k, v| ::CGI.escape(k) << '=' << ::CGI.escape(v) }.join('&')
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,46 @@
1
+ require 'fieldhand/header'
2
+
3
+ module Fieldhand
4
+ # A record is metadata expressed in a single format.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#Record
7
+ class Record
8
+ attr_reader :element
9
+
10
+ def initialize(element)
11
+ @element = element
12
+ end
13
+
14
+ def deleted?
15
+ header.deleted?
16
+ end
17
+
18
+ def status
19
+ header.status
20
+ end
21
+
22
+ def identifier
23
+ header.identifier
24
+ end
25
+
26
+ def datestamp
27
+ header.datestamp
28
+ end
29
+
30
+ def sets
31
+ header.sets
32
+ end
33
+
34
+ def metadata
35
+ @metadata ||= element.locate('metadata[0]').first
36
+ end
37
+
38
+ def about
39
+ @about ||= element.locate('about')
40
+ end
41
+
42
+ def header
43
+ @header ||= Header.new(element.header)
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,95 @@
1
+ require 'fieldhand/arguments'
2
+ require 'fieldhand/header'
3
+ require 'fieldhand/identify'
4
+ require 'fieldhand/logger'
5
+ require 'fieldhand/metadata_format'
6
+ require 'fieldhand/paginator'
7
+ require 'fieldhand/record'
8
+ require 'fieldhand/set'
9
+ require 'uri'
10
+
11
+ module Fieldhand
12
+ # A repository is a network accessible server that can process the 6 OAI-PMH requests.
13
+ #
14
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html
15
+ class Repository
16
+ attr_reader :uri, :logger
17
+
18
+ def initialize(uri, logger = Logger.null)
19
+ @uri = uri.is_a?(::URI) ? uri : URI(uri)
20
+ @logger = logger
21
+ end
22
+
23
+ def identify
24
+ paginator.
25
+ items('Identify', 'Identify').
26
+ map { |identify| Identify.new(identify) }.
27
+ first
28
+ end
29
+
30
+ def metadata_formats(identifier = nil)
31
+ return enum_for(:metadata_formats, identifier) unless block_given?
32
+
33
+ arguments = {}
34
+ arguments['identifier'] = identifier if identifier
35
+
36
+ paginator.
37
+ items('ListMetadataFormats', 'ListMetadataFormats/metadataFormat', arguments).
38
+ each do |format|
39
+ yield MetadataFormat.new(format)
40
+ end
41
+ end
42
+
43
+ def sets
44
+ return enum_for(:sets) unless block_given?
45
+
46
+ paginator.
47
+ items('ListSets', 'ListSets/set').
48
+ each do |set|
49
+ yield Set.new(set)
50
+ end
51
+ end
52
+
53
+ def records(arguments = {})
54
+ return enum_for(:records, arguments) unless block_given?
55
+
56
+ query = Arguments.new(arguments).to_query
57
+
58
+ paginator.
59
+ items('ListRecords', 'ListRecords/record', query).
60
+ each do |record|
61
+ yield Record.new(record)
62
+ end
63
+ end
64
+
65
+ def identifiers(arguments = {})
66
+ return enum_for(:identifiers, arguments) unless block_given?
67
+
68
+ query = Arguments.new(arguments).to_query
69
+
70
+ paginator.
71
+ items('ListIdentifiers', 'ListIdentifiers/header', query).
72
+ each do |header|
73
+ yield Header.new(header)
74
+ end
75
+ end
76
+
77
+ def get(identifier, arguments = {})
78
+ query = {
79
+ 'identifier' => identifier,
80
+ 'metadataPrefix' => arguments.fetch(:metadata_prefix, 'oai_dc')
81
+ }
82
+
83
+ paginator.
84
+ items('GetRecord', 'GetRecord/record', query).
85
+ map { |record| Record.new(record) }.
86
+ first
87
+ end
88
+
89
+ private
90
+
91
+ def paginator
92
+ @paginator ||= Paginator.new(uri, logger)
93
+ end
94
+ end
95
+ end