fieldhand 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fe77387e599f04fe11981420174c0c53c6f1b3ce
4
- data.tar.gz: f3bc88686b42bca448c52a06988273dcae890b8f
3
+ metadata.gz: cbb2e358fe735e190c98cdbb891d1e061ac9c94a
4
+ data.tar.gz: 64d94a760f881779790518e59a9508a1678d2be2
5
5
  SHA512:
6
- metadata.gz: aac19ca6cdda6acc837c4d8f2e33874fccaa30ee91b3e78e9f0e0a5948b93712785507e960a3c13cfa55da963695b7ce695ddd86333358041335868ee6dca6bd
7
- data.tar.gz: e1dfdb2adf541cf2a4dc2c3cfbe39da2b5843595b484ff6a7fcf31f22694431a38b09dcbbbc855d6ee3426ad789ce3fcea45f4263f215b62001be4cf076c8f48
6
+ metadata.gz: 59ba22ea64add43e36fa2dab85230818515950ca78ddaa4e9db6fb322de59b9b59ecf7ae4fcbdc3cf85f177ebc084773bfaf96f86da15e5843b148296e529de6
7
+ data.tar.gz: 114f1acb87901c6935dea29b9b03e7d115ca937486f069ef4af939a1f3a39592fe4a9d9abb50557ac7ff8ac9f27676f6b7cfb619a79cb9313aeb82f6b46e73ec
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  A Ruby library for harvesting metadata from [OAI-PMH](https://www.openarchives.org/OAI/openarchivesprotocol.html) repositories.
4
4
 
5
- **Current version:** 0.3.0
5
+ **Current version:** 0.3.1
6
6
  **Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0, 2.1, 2.2
7
7
 
8
8
  ## Installation
@@ -2,6 +2,16 @@ require 'fieldhand/datestamp'
2
2
 
3
3
  module Fieldhand
4
4
  # A class for converting Fieldhand arguments into OAI-PMH query parameters.
5
+ #
6
+ # Specifically:
7
+ #
8
+ # * :metadata_prefix
9
+ # * :resumption_token
10
+ # * :from
11
+ # * :until
12
+ # * :set
13
+ #
14
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#HTTPRequestFormat
5
15
  class Arguments
6
16
  VALID_KEYS = {
7
17
  :metadata_prefix => 'metadataPrefix',
@@ -13,10 +23,32 @@ module Fieldhand
13
23
 
14
24
  attr_reader :options
15
25
 
26
+ # Return a new `Arguments` with the given `Hash`.
16
27
  def initialize(options = {})
17
28
  @options = options
18
29
  end
19
30
 
31
+ # Return a query as a `Hash` suitable for encoding as a query string in an OAI-PMH request.
32
+ #
33
+ # Converts arguments passed with symbol keys into the corresponding strings as defined in the OAI-PMH protocol,
34
+ # converting values into the appropriate format (e.g. `Time`s, `Date`s, `MetadataFormat`s and `Set`s into strings).
35
+ #
36
+ # Defaults to returning a metadata prefix of "oai_dc".
37
+ #
38
+ # Raises an `ArgumentError` if an unknown argument is encountered.
39
+ #
40
+ # # Examples
41
+ #
42
+ # ```
43
+ # Fieldhand::Arguments.new(:metadata_prefix => 'xoai', :from => Date.new(2001, 1, 1)).to_query
44
+ # #=> { "metadataPrefix" => "xoai", "from" => "2001-01-01" }
45
+ #
46
+ # Fieldhand::Arguments.new(:until => Time.utc(2001, 1, 1, 12, 0, 0)).to_query
47
+ # #=> { "metadataPrefix"=>"oai_dc", "until" => "2001-01-01T12:00:00Z" }
48
+ #
49
+ # Fieldhand::Arguments.new(:foo => "bar").to_query
50
+ # # ArgumentError: unknown argument: foo
51
+ # ```
20
52
  def to_query
21
53
  options.inject(defaults) do |query, (key, value)|
22
54
  raise ::ArgumentError, "unknown argument: #{key}" unless VALID_KEYS.key?(key)
@@ -27,12 +59,12 @@ module Fieldhand
27
59
  end
28
60
  end
29
61
 
62
+ private
63
+
30
64
  def defaults
31
65
  { 'metadataPrefix' => 'oai_dc' }
32
66
  end
33
67
 
34
- private
35
-
36
68
  def convert_value(key, value)
37
69
  return value.to_s unless key == :from || key == :until
38
70
 
@@ -4,6 +4,10 @@ require 'time'
4
4
  module Fieldhand
5
5
  # A class to handle datestamps of varying granularity.
6
6
  class Datestamp
7
+ # Return either a `Date` or `Time` for the given string datestamp.
8
+ #
9
+ # As repositories may only support date-level granularity rather than time-level granularity, we need to handle both
10
+ # types of datestamp.
7
11
  def self.parse(datestamp)
8
12
  if datestamp.size == 10
9
13
  ::Date.strptime(datestamp)
@@ -12,14 +16,21 @@ module Fieldhand
12
16
  end
13
17
  end
14
18
 
19
+ # Return a string UTC datestamp given a string, `Date`, `Time` or anything responding to `xmlschema`.
20
+ #
21
+ # The granularity of the resulting datestamp depends on the input type:
22
+ #
23
+ # * Strings are returned untouched (assuming they are already formatted datestamps)
24
+ # * Dates will return a date-level granularity datestamp, e.g. 2001-01-01
25
+ # * Times will return a time-level granularity UTC datestamp, e.g. 2001-01-01T00:00:00Z
26
+ # * DateTimes will return a time-level granularity UTC datestamp, e.g. 2001-01-01T00:00:00Z
27
+ # * Anything else is assumed to respond to `xmlschema`
15
28
  def self.unparse(datestamp)
16
29
  case datestamp
17
- when ::String
18
- datestamp
19
- when ::Date
20
- datestamp.strftime
21
- when ::Time
22
- datestamp.utc.xmlschema
30
+ when ::String then datestamp
31
+ when ::DateTime then unparse(::Time.xmlschema(datestamp.to_s))
32
+ when ::Date then datestamp.strftime
33
+ when ::Time then datestamp.utc.xmlschema
23
34
  else
24
35
  datestamp.xmlschema
25
36
  end
@@ -0,0 +1,23 @@
1
+ require 'fieldhand/record'
2
+
3
+ module Fieldhand
4
+ # A parser for GetRecord responses
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#GetRecord
7
+ class GetRecordParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a new parser populated with the given response parser.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `Record`s found in the response.
16
+ def items
17
+ response_parser.
18
+ root.
19
+ locate('GetRecord/record').
20
+ map { |item| Record.new(item, response_parser.response_date) }
21
+ end
22
+ end
23
+ end
@@ -15,27 +15,38 @@ module Fieldhand
15
15
  class Header
16
16
  attr_reader :element, :response_date
17
17
 
18
+ # Return a new Header with the given element and an optional response date.
19
+ #
20
+ # Defaults the response date to the current time.
18
21
  def initialize(element, response_date = Time.now)
19
22
  @element = element
20
23
  @response_date = response_date
21
24
  end
22
25
 
26
+ # Test whether this item is marked as deleted or not.
27
+ #
28
+ # Note that a repository's support for deleted records can be interrogated through the `Identify` request, see
29
+ # https://www.openarchives.org/OAI/openarchivesprotocol.html#DeletedRecords
23
30
  def deleted?
24
31
  status == 'deleted'
25
32
  end
26
33
 
34
+ # Return the optional status of this item.
27
35
  def status
28
36
  element['status']
29
37
  end
30
38
 
39
+ # Return the unique identifier of this item.
31
40
  def identifier
32
41
  @identifier ||= element.identifier.text
33
42
  end
34
43
 
44
+ # Return the UTC datestamp of this item.
35
45
  def datestamp
36
46
  @datestamp ||= Datestamp.parse(element.datestamp.text)
37
47
  end
38
48
 
49
+ # Return any set memberships of this item.
39
50
  def sets
40
51
  @sets ||= element.locate('setSpec/^String')
41
52
  end
@@ -8,43 +8,71 @@ module Fieldhand
8
8
  class Identify
9
9
  attr_reader :element, :response_date
10
10
 
11
+ # Return a new Identify with the given element and optional response date.
12
+ #
13
+ # Defaults the response date to the current time.
11
14
  def initialize(element, response_date = Time.now)
12
15
  @element = element
13
16
  @response_date = response_date
14
17
  end
15
18
 
19
+ # Return the human readable name for the repository.
16
20
  def name
17
21
  @name ||= element.repositoryName.text
18
22
  end
19
23
 
24
+ # Return the base URL of the repository as a URI.
25
+ #
26
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#HTTPRequestFormat
20
27
  def base_url
21
28
  @base_url ||= URI(element.baseURL.text)
22
29
  end
23
30
 
31
+ # Return the version of the OAI-PMH protocol supported by the repository as a string.
24
32
  def protocol_version
25
33
  @protocol_version ||= element.protocolVersion.text
26
34
  end
27
35
 
36
+ # Return the guaranteed lower limit of all datestamps recording changes, modifications, or deletions in the
37
+ # repository as a `Date` or `Time` depending on the granularity of the repository.
28
38
  def earliest_datestamp
29
39
  @earliest_datestamp ||= Datestamp.parse(element.earliestDatestamp.text)
30
40
  end
31
41
 
42
+ # Return the manner in which the repository supports the notion of deleted records as a string.
43
+ #
44
+ # Possible values are:
45
+ #
46
+ # * no
47
+ # * transient
48
+ # * persistent
49
+ #
50
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#DeletedRecords
32
51
  def deleted_record
33
52
  @deleted_record ||= element.deletedRecord.text
34
53
  end
35
54
 
55
+ # Return the finest harvesting granularity supported by the repository. The legitimate values are YYYY-MM-DD and
56
+ # YYYY-MM-DDThh:mm:ssZ with meanings as defined in ISO 8601.
57
+ #
58
+ # See http://www.w3.org/TR/NOTE-datetime
36
59
  def granularity
37
60
  @granularity ||= element.granularity.text
38
61
  end
39
62
 
63
+ # Return any e-mail addresses of administrators of the repository as an array of strings.
40
64
  def admin_emails
41
65
  @admin_emails ||= element.locate('adminEmail/^String')
42
66
  end
43
67
 
68
+ # Return any compression encodings supported by the repository as an array of strings.
44
69
  def compression
45
70
  @compression ||= element.locate('compression/^String')
46
71
  end
47
72
 
73
+ # Return any raw description elements used by communities to describe their repositories as an array of strings.
74
+ #
75
+ # As these can be in any format, Fieldhand does not attempt to parse the elements but leaves that to users.
48
76
  def descriptions
49
77
  @descriptions ||= element.locate('description')
50
78
  end
@@ -0,0 +1,26 @@
1
+ require 'fieldhand/identify'
2
+
3
+ module Fieldhand
4
+ # A parser for Identify responses.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#Identify
7
+ class IdentifyParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a new parser for the given response parser.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `Identify`s found in the response.
16
+ #
17
+ # In reality, there will only ever be at most one `Identify` in a response but having a consistent interface with
18
+ # the other parsers keeps the supporting code simpler.
19
+ def items
20
+ response_parser.
21
+ root.
22
+ locate('Identify').
23
+ map { |item| Identify.new(item, response_parser.response_date) }
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,23 @@
1
+ require 'fieldhand/header'
2
+
3
+ module Fieldhand
4
+ # A parser for ListIdentifiers responses.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListIdentifiers
7
+ class ListIdentifiersParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a new parser for the given response parser.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `Header`s found in the response.
16
+ def items
17
+ response_parser.
18
+ root.
19
+ locate('ListIdentifiers/header').
20
+ map { |item| Header.new(item, response_parser.response_date) }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'fieldhand/metadata_format'
2
+
3
+ module Fieldhand
4
+ # A parser for ListMetadataFormats responses.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListMetadataFormats
7
+ class ListMetadataFormatsParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a parser for the given response parser.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `MetadataFormat`s found in the response.
16
+ def items
17
+ response_parser.
18
+ root.
19
+ locate('ListMetadataFormats/metadataFormat').
20
+ map { |item| MetadataFormat.new(item, response_parser.response_date) }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'fieldhand/record'
2
+
3
+ module Fieldhand
4
+ # A parser for ListRecords responses.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListRecords
7
+ class ListRecordsParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a parser for the given response body.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `Record`s found in the response.
16
+ def items
17
+ response_parser.
18
+ root.
19
+ locate('ListRecords/record').
20
+ map { |item| Record.new(item, response_parser.response_date) }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'fieldhand/set'
2
+
3
+ module Fieldhand
4
+ # A parser for ListSets responses.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListSets
7
+ class ListSetsParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a new parser for the given response parser.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `Set`s found in the response.
16
+ def items
17
+ response_parser.
18
+ root.
19
+ locate('ListSets/set').
20
+ map { |item| Set.new(item, response_parser.response_date) }
21
+ end
22
+ end
23
+ end
@@ -6,6 +6,7 @@ module Fieldhand
6
6
  module Logger
7
7
  module_function
8
8
 
9
+ # Return a new `Logger` that logs to the null device on this platform.
9
10
  def null
10
11
  ::Logger.new(null_device)
11
12
  end
@@ -7,23 +7,32 @@ module Fieldhand
7
7
  class MetadataFormat
8
8
  attr_reader :element, :response_date
9
9
 
10
+ # Return a new Metadata Format for the given element with an optional response date.
11
+ #
12
+ # The response date defaults to the current time.
10
13
  def initialize(element, response_date = Time.now)
11
14
  @element = element
12
15
  @response_date = response_date
13
16
  end
14
17
 
18
+ # Return the prefix as a string representation of the format.
19
+ #
20
+ # This makes it possible to pass a Metadata Format to methods that expect a string metadata prefix.
15
21
  def to_s
16
22
  prefix
17
23
  end
18
24
 
25
+ # Return the string metadata prefix for the format.
19
26
  def prefix
20
27
  @prefix ||= element.metadataPrefix.text
21
28
  end
22
29
 
30
+ # Return the location of an XML Schema describing the format as a URI.
23
31
  def schema
24
32
  @schema ||= URI(element.schema.text)
25
33
  end
26
34
 
35
+ # Return the XML Namespace URI for the format.
27
36
  def namespace
28
37
  @namespace ||= URI(element.metadataNamespace.text)
29
38
  end
@@ -1,40 +1,24 @@
1
- require 'fieldhand/datestamp'
2
1
  require 'fieldhand/logger'
3
- require 'ox'
2
+ require 'fieldhand/response_parser'
4
3
  require 'cgi'
5
4
  require 'net/http'
6
5
  require 'uri'
7
6
 
8
7
  module Fieldhand
9
8
  NetworkError = ::Class.new(::StandardError)
10
- ProtocolError = ::Class.new(::StandardError)
11
- BadArgumentError = ::Class.new(ProtocolError)
12
- BadResumptionTokenError = ::Class.new(ProtocolError)
13
- BadVerbError = ::Class.new(ProtocolError)
14
- CannotDisseminateFormatError = ::Class.new(ProtocolError)
15
- IdDoesNotExistError = ::Class.new(ProtocolError)
16
- NoRecordsMatchError = ::Class.new(ProtocolError)
17
- NoMetadataFormatsError = ::Class.new(ProtocolError)
18
- NoSetHierarchyError = ::Class.new(ProtocolError)
19
9
 
20
10
  # An abstraction over interactions with an OAI-PMH repository, handling requests, responses and paginating over
21
11
  # results using a resumption token.
22
12
  #
23
13
  # See https://www.openarchives.org/OAI/openarchivesprotocol.html#FlowControl
24
14
  class Paginator
25
- ERROR_CODES = {
26
- 'badArgument' => BadArgumentError,
27
- 'badResumptionToken' => BadResumptionTokenError,
28
- 'badVerb' => BadVerbError,
29
- 'cannotDisseminateFormat' => CannotDisseminateFormatError,
30
- 'idDoesNotExist' => IdDoesNotExistError,
31
- 'noRecordsMatch' => NoRecordsMatchError,
32
- 'noMetadataFormats' => NoMetadataFormatsError,
33
- 'noSetHierarchy' => NoSetHierarchyError
34
- }.freeze
35
-
36
15
  attr_reader :uri, :logger, :http
37
16
 
17
+ # Return a new paginator for the given repository base URI and optional logger.
18
+ #
19
+ # The URI can be passed as either a `URI` or something that can be parsed as a URI such as a string.
20
+ #
21
+ # The logger will default to a null logger appropriate to this platform.
38
22
  def initialize(uri, logger = Logger.null)
39
23
  @uri = uri.is_a?(::URI) ? uri : URI(uri)
40
24
  @logger = logger
@@ -42,31 +26,56 @@ module Fieldhand
42
26
  @http.use_ssl = true if @uri.scheme == 'https'
43
27
  end
44
28
 
45
- def items(verb, path, query = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
46
- return enum_for(:items, verb, path, query) unless block_given?
29
+ # Return an `Enumerator` of items retrieved from the repository with the given `verb` and `query`, parsed with the
30
+ # given `parser_class`.
31
+ #
32
+ # The query defaults to an empty hash but will be merged with the given `verb` when making requests to the
33
+ # repository.
34
+ #
35
+ # Expects the `parser_class` to respond to `items`, returning an `Enumerable` list of items that will be yielded to
36
+ # the caller.
37
+ #
38
+ # Raises a `ProtocolError` for any errors in the response.
39
+ #
40
+ # Fieldhand attempts to handle all flow control for the user using resumption tokens from the response so they only
41
+ # need handle lazy enumerators and not worry about pagination and underlying network requests.
42
+ #
43
+ # # Examples
44
+ #
45
+ # ```
46
+ # paginator = Fieldhand::Paginator.new('http://www.example.com/oai')
47
+ # paginator.items('ListRecords', Fieldhand::ListRecordsParser).take(10_000)
48
+ # #=> [#<Fieldhand::Record: ...>, ...]
49
+ # ```
50
+ #
51
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#FlowControl
52
+ def items(verb, parser_class, query = {})
53
+ return enum_for(:items, verb, parser_class, query) unless block_given?
47
54
 
48
55
  loop do
49
- document = ::Ox.parse(request(query.merge('verb' => verb)))
50
- response_date = document.root.locate('responseDate[0]/^String').map { |date| Datestamp.parse(date) }.first
51
-
52
- document.root.locate('error').each do |error|
53
- convert_error(error)
54
- end
55
-
56
- document.root.locate(path).each do |item|
57
- yield item, response_date
56
+ response_parser = parse_response(query.merge('verb' => verb))
57
+ parser_class.new(response_parser).items.each do |item|
58
+ yield item
58
59
  end
59
60
 
60
- resumption_token = document.root.locate('?/resumptionToken/^String').first
61
- break unless resumption_token
61
+ break unless response_parser.resumption_token
62
62
 
63
- logger.debug('Fieldhand') { "Resumption token for #{verb}: #{resumption_token}" }
64
- query = { 'resumptionToken' => resumption_token }
63
+ logger.debug('Fieldhand') { "Resumption token for #{verb}: #{response_parser.resumption_token}" }
64
+ query = { 'resumptionToken' => response_parser.resumption_token }
65
65
  end
66
66
  end
67
67
 
68
68
  private
69
69
 
70
+ def parse_response(query = {})
71
+ response_parser = ResponseParser.new(request(query))
72
+ response_parser.errors.each do |error|
73
+ raise error
74
+ end
75
+
76
+ response_parser
77
+ end
78
+
70
79
  def request(query = {})
71
80
  request_uri = uri.dup
72
81
  request_uri.query = encode_query(query)
@@ -79,12 +88,6 @@ module Fieldhand
79
88
  raise NetworkError, "error requesting #{query}: #{e}"
80
89
  end
81
90
 
82
- def convert_error(error)
83
- return unless ERROR_CODES.key?(error['code'])
84
-
85
- raise ERROR_CODES.fetch(error['code']), error.text
86
- end
87
-
88
91
  def encode_query(query = {})
89
92
  query.map { |k, v| ::CGI.escape(k) << '=' << ::CGI.escape(v) }.join('&')
90
93
  end
@@ -8,39 +8,58 @@ module Fieldhand
8
8
  class Record
9
9
  attr_reader :element, :response_date
10
10
 
11
+ # Return a new Record for the given element with an optional response date.
12
+ #
13
+ # Defaults the response date to the current time.
11
14
  def initialize(element, response_date = Time.now)
12
15
  @element = element
13
16
  @response_date = response_date
14
17
  end
15
18
 
19
+ # Test whether this item is marked as deleted or not according to its header.
20
+ #
21
+ # Note that a repository's support for deleted records can be interrogated through the `Identify` request, see
22
+ # https://www.openarchives.org/OAI/openarchivesprotocol.html#DeletedRecords
16
23
  def deleted?
17
24
  header.deleted?
18
25
  end
19
26
 
27
+ # Return the optional status of this item according to its header.
20
28
  def status
21
29
  header.status
22
30
  end
23
31
 
32
+ # Return the unique identifier of this item according to its header.
24
33
  def identifier
25
34
  header.identifier
26
35
  end
27
36
 
37
+ # Return the UTC datestamp of this item according to its header as a `Date` or `Time` depending on the granularity
38
+ # of this repository.
28
39
  def datestamp
29
40
  header.datestamp
30
41
  end
31
42
 
43
+ # Return any set memberships of this item according to its header.
32
44
  def sets
33
45
  header.sets
34
46
  end
35
47
 
48
+ # Return the single manifestation of the metadata of this item as a string, if present.
49
+ #
50
+ # As metadata can be in any format, Fieldhand does not attempt to parse it but leave that to the user.
36
51
  def metadata
37
52
  @metadata ||= element.locate('metadata[0]').map { |metadata| Ox.dump(metadata) }.first
38
53
  end
39
54
 
55
+ # Return any about elements describing the metadata of this record as an array of strings.
56
+ #
57
+ # As about elements can be in any format, Fieldhand does not attempt to parse them but leave that to the user.
40
58
  def about
41
59
  @about ||= element.locate('about').map { |about| Ox.dump(about) }
42
60
  end
43
61
 
62
+ # Return the associated Header for this record.
44
63
  def header
45
64
  @header ||= Header.new(element.header)
46
65
  end
@@ -1,11 +1,12 @@
1
1
  require 'fieldhand/arguments'
2
- require 'fieldhand/header'
3
- require 'fieldhand/identify'
2
+ require 'fieldhand/get_record_parser'
3
+ require 'fieldhand/identify_parser'
4
+ require 'fieldhand/list_identifiers_parser'
5
+ require 'fieldhand/list_metadata_formats_parser'
6
+ require 'fieldhand/list_records_parser'
7
+ require 'fieldhand/list_sets_parser'
4
8
  require 'fieldhand/logger'
5
- require 'fieldhand/metadata_format'
6
9
  require 'fieldhand/paginator'
7
- require 'fieldhand/record'
8
- require 'fieldhand/set'
9
10
  require 'uri'
10
11
 
11
12
  module Fieldhand
@@ -15,75 +16,112 @@ module Fieldhand
15
16
  class Repository
16
17
  attr_reader :uri, :logger
17
18
 
19
+ # Return a new repository with the given base URL and an optional logger.
20
+ #
21
+ # The base URL can be passed as a `URI` or anything that can be parsed as a URI such as a string.
22
+ #
23
+ # Defaults to using a null logger specific to this platform.
18
24
  def initialize(uri, logger = Logger.null)
19
25
  @uri = uri.is_a?(::URI) ? uri : URI(uri)
20
26
  @logger = logger
21
27
  end
22
28
 
29
+ # Send an Identify request to the repository and return an `Identify` response.
30
+ #
31
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
32
+ # response.
33
+ #
34
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#Identify
23
35
  def identify
24
- paginator.
25
- items('Identify', 'Identify').
26
- map { |identify, response_date| Identify.new(identify, response_date) }.
27
- first
36
+ paginator.items('Identify', IdentifyParser).first
28
37
  end
29
38
 
39
+ # Send a ListMetadataFormats request to the repository (with an optional identifier) and return an `Enumerator` of
40
+ # `MetadataFormat`s.
41
+ #
42
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
43
+ # response.
44
+ #
45
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListMetadataFormats
30
46
  def metadata_formats(identifier = nil)
31
- return enum_for(:metadata_formats, identifier) unless block_given?
47
+ query = {}
48
+ query['identifier'] = identifier if identifier
32
49
 
33
- arguments = {}
34
- arguments['identifier'] = identifier if identifier
35
-
36
- paginator.
37
- items('ListMetadataFormats', 'ListMetadataFormats/metadataFormat', arguments).
38
- each do |format, response_date|
39
- yield MetadataFormat.new(format, response_date)
40
- end
50
+ paginator.items('ListMetadataFormats', ListMetadataFormatsParser, query)
41
51
  end
42
52
 
53
+ # Send a ListSets request to the repository and return an `Enumerator` of `Set`s.
54
+ #
55
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
56
+ # response.
57
+ #
58
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListSets
43
59
  def sets
44
- return enum_for(:sets) unless block_given?
45
-
46
- paginator.
47
- items('ListSets', 'ListSets/set').
48
- each do |set, response_date|
49
- yield Set.new(set, response_date)
50
- end
60
+ paginator.items('ListSets', ListSetsParser)
51
61
  end
52
62
 
63
+ # Send a ListRecords request to the repository with optional arguments and return an `Enumerator` of `Records`s.
64
+ #
65
+ # The following arguments can be used:
66
+ #
67
+ # * :metadata_prefix - The prefix of the metadata format to be used for record metadata, defaults to "oai_dc"
68
+ # * :from - A `Date`, `Time` or formatted string specifying a lower bound for datestamp-based selective harvesting
69
+ # * :until - A `Date`, `Time` or formatted string specifying an upper bound for datestamp-based selective harvesting
70
+ # * :set - A `Set` or string set spec which specifies set criteria for selective harvesting
71
+ # * :resumption_token - A valid resumption token for resuming a previous request (note that Fieldhand typically
72
+ # handles resumption internally so this should not be normally used)
73
+ #
74
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
75
+ # response.
76
+ #
77
+ # # Examples
78
+ #
79
+ # ```
80
+ # repository = Fieldhand::Repository.new('http://www.example.com/oai')
81
+ # repository.records.each do |record|
82
+ # next if record.deleted?
83
+ #
84
+ # puts record.metadata
85
+ # end
86
+ # ```
87
+ #
88
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListRecords
53
89
  def records(arguments = {})
54
- return enum_for(:records, arguments) unless block_given?
55
-
56
90
  query = Arguments.new(arguments).to_query
57
91
 
58
- paginator.
59
- items('ListRecords', 'ListRecords/record', query).
60
- each do |record, response_date|
61
- yield Record.new(record, response_date)
62
- end
92
+ paginator.items('ListRecords', ListRecordsParser, query)
63
93
  end
64
94
 
95
+ # Send a ListIdentifiers request to the repository with optional arguments and return an `Enumerator` of `Header`s.
96
+ #
97
+ # This supports the same arguments as `Fieldhand::Repository#records` but only returns record headers.
98
+ #
99
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
100
+ # response.
101
+ #
102
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListIdentifiers
65
103
  def identifiers(arguments = {})
66
- return enum_for(:identifiers, arguments) unless block_given?
67
-
68
104
  query = Arguments.new(arguments).to_query
69
105
 
70
- paginator.
71
- items('ListIdentifiers', 'ListIdentifiers/header', query).
72
- each do |header, response_date|
73
- yield Header.new(header, response_date)
74
- end
106
+ paginator.items('ListIdentifiers', ListIdentifiersParser, query)
75
107
  end
76
108
 
109
+ # Send a GetRecord request to the repository with the given identifier and optional metadata prefix and return a
110
+ # `Record`.
111
+ #
112
+ # Supports passing a :metadata_prefix argument with a given metadata prefix which otherwise defaults to "oai_dc".
113
+ #
114
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
115
+ # response.
116
+ #
117
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#GetRecord
77
118
  def get(identifier, arguments = {})
78
119
  query = {
79
120
  'identifier' => identifier,
80
121
  'metadataPrefix' => arguments.fetch(:metadata_prefix, 'oai_dc')
81
122
  }
82
123
 
83
- paginator.
84
- items('GetRecord', 'GetRecord/record', query).
85
- map { |record, response_date| Record.new(record, response_date) }.
86
- first
124
+ paginator.items('GetRecord', GetRecordParser, query).first
87
125
  end
88
126
 
89
127
  private
@@ -0,0 +1,67 @@
1
+ require 'fieldhand/datestamp'
2
+ require 'ox'
3
+
4
+ module Fieldhand
5
+ ProtocolError = ::Class.new(::StandardError)
6
+ BadArgumentError = ::Class.new(ProtocolError)
7
+ BadResumptionTokenError = ::Class.new(ProtocolError)
8
+ BadVerbError = ::Class.new(ProtocolError)
9
+ CannotDisseminateFormatError = ::Class.new(ProtocolError)
10
+ IdDoesNotExistError = ::Class.new(ProtocolError)
11
+ NoRecordsMatchError = ::Class.new(ProtocolError)
12
+ NoMetadataFormatsError = ::Class.new(ProtocolError)
13
+ NoSetHierarchyError = ::Class.new(ProtocolError)
14
+
15
+ # A parser for elements common to all OAI-PMH HTTP responses.
16
+ #
17
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#HTTPResponseFormat
18
+ class ResponseParser
19
+ ERROR_CODES = {
20
+ 'badArgument' => BadArgumentError,
21
+ 'badResumptionToken' => BadResumptionTokenError,
22
+ 'badVerb' => BadVerbError,
23
+ 'cannotDisseminateFormat' => CannotDisseminateFormatError,
24
+ 'idDoesNotExist' => IdDoesNotExistError,
25
+ 'noRecordsMatch' => NoRecordsMatchError,
26
+ 'noMetadataFormats' => NoMetadataFormatsError,
27
+ 'noSetHierarchy' => NoSetHierarchyError
28
+ }.freeze
29
+
30
+ attr_reader :response
31
+
32
+ # Return a new parser for the given response body.
33
+ def initialize(response)
34
+ @response = response
35
+ end
36
+
37
+ # Return the response date as a `Date` or `Time` depending on the granularity of the repository.
38
+ def response_date
39
+ @response_date ||= root.locate('responseDate[0]/^String').map { |date| Datestamp.parse(date) }.first
40
+ end
41
+
42
+ # Return any errors found in the response as `ProtocolError`s.
43
+ #
44
+ # Note that this does not _raise_ the errors but simply returns them.
45
+ def errors
46
+ @errors ||= root.locate('error').map { |error| convert_error(error) }
47
+ end
48
+
49
+ # Return the resumption token from the response, if present.
50
+ def resumption_token
51
+ @resumption_token ||= root.locate('?/resumptionToken[0]/^String').first
52
+ end
53
+
54
+ # Return the root element of the parsed document.
55
+ def root
56
+ @root ||= ::Ox.parse(response).root
57
+ end
58
+
59
+ private
60
+
61
+ def convert_error(element)
62
+ return unless ERROR_CODES.key?(element['code'])
63
+
64
+ ERROR_CODES.fetch(element['code']).new(element.text)
65
+ end
66
+ end
67
+ end
data/lib/fieldhand/set.rb CHANGED
@@ -7,23 +7,34 @@ module Fieldhand
7
7
  class Set
8
8
  attr_reader :element, :response_date
9
9
 
10
+ # Return a Set with the given element and optional response date.
11
+ #
12
+ # Defaults the response date to the current time.
10
13
  def initialize(element, response_date = Time.now)
11
14
  @element = element
12
15
  @response_date = response_date
13
16
  end
14
17
 
18
+ # Return the set's spec as its string representation.
19
+ #
20
+ # This means that Sets can be used as arguments that expect a set spec.
15
21
  def to_s
16
22
  spec
17
23
  end
18
24
 
25
+ # Return the set's unique identifier within the repository.
19
26
  def spec
20
27
  @spec ||= element.setSpec.text
21
28
  end
22
29
 
30
+ # Return the set's short human-readable name.
23
31
  def name
24
32
  @name ||= element.setName.text
25
33
  end
26
34
 
35
+ # Return any descriptions of the set as an array of strings.
36
+ #
37
+ # As descriptions can be in any format, Fieldhand does not attempt to parse them but leave this to the user.
27
38
  def descriptions
28
39
  @descriptions ||= element.locate('setDescription').map { |description| Ox.dump(description) }
29
40
  end
@@ -1,6 +1,6 @@
1
1
  require 'fieldhand/arguments'
2
- require 'fieldhand/set'
3
2
  require 'fieldhand/metadata_format'
3
+ require 'fieldhand/set'
4
4
  require 'ox'
5
5
  require 'date'
6
6
  require 'time'
@@ -33,6 +33,16 @@ module Fieldhand
33
33
  it 'unparses strings into themselves' do
34
34
  expect(described_class.unparse('2001-01-01')).to eq('2001-01-01')
35
35
  end
36
+
37
+ it 'unparses DateTimes into time-granularity datestamps' do
38
+ expect(described_class.unparse(::DateTime.new(2001, 1, 1, 0, 0, 0))).to eq('2001-01-01T00:00:00Z')
39
+ end
40
+
41
+ it 'unparses unknown types by calling xmlschema' do
42
+ datestamp = Struct.new(:xmlschema).new('2001-01-01')
43
+
44
+ expect(described_class.unparse(datestamp)).to eq('2001-01-01')
45
+ end
36
46
  end
37
47
  end
38
48
  end
@@ -1,3 +1,8 @@
1
+ require 'fieldhand/get_record_parser'
2
+ require 'fieldhand/identify_parser'
3
+ require 'fieldhand/list_metadata_formats_parser'
4
+ require 'fieldhand/list_records_parser'
5
+ require 'fieldhand/list_sets_parser'
1
6
  require 'fieldhand/paginator'
2
7
 
3
8
  module Fieldhand
@@ -8,7 +13,7 @@ module Fieldhand
8
13
  'bad_argument_error.xml')
9
14
  paginator = described_class.new('http://www.example.com/oai')
10
15
 
11
- expect { paginator.items('Identify', 'Identify', 'bad' => 'Argument').first }.
16
+ expect { paginator.items('Identify', IdentifyParser, 'bad' => 'Argument').first }.
12
17
  to raise_error(BadArgumentError)
13
18
  end
14
19
 
@@ -17,7 +22,7 @@ module Fieldhand
17
22
  'bad_resumption_token_error.xml')
18
23
  paginator = described_class.new('http://www.example.com/oai')
19
24
 
20
- expect { paginator.items('ListRecords', 'ListRecords/record', 'resumptionToken' => 'foo').first }.
25
+ expect { paginator.items('ListRecords', ListRecordsParser, 'resumptionToken' => 'foo').first }.
21
26
  to raise_error(BadResumptionTokenError)
22
27
  end
23
28
 
@@ -26,7 +31,7 @@ module Fieldhand
26
31
  'bad_verb_error.xml')
27
32
  paginator = described_class.new('http://www.example.com/oai')
28
33
 
29
- expect { paginator.items('Bad', 'Bad').first }.
34
+ expect { paginator.items('Bad', IdentifyParser).first }.
30
35
  to raise_error(BadVerbError)
31
36
  end
32
37
 
@@ -35,7 +40,7 @@ module Fieldhand
35
40
  'cannot_disseminate_format_error.xml')
36
41
  paginator = described_class.new('http://www.example.com/oai')
37
42
 
38
- expect { paginator.items('ListRecords', 'ListRecords/record', 'metadataPrefix' => 'bad').first }.
43
+ expect { paginator.items('ListRecords', ListRecordsParser, 'metadataPrefix' => 'bad').first }.
39
44
  to raise_error(CannotDisseminateFormatError)
40
45
  end
41
46
 
@@ -45,7 +50,7 @@ module Fieldhand
45
50
  paginator = described_class.new('http://www.example.com/oai')
46
51
 
47
52
  expect {
48
- paginator.items('GetRecord', 'GetRecord/record', 'metadataPrefix' => 'oai_dc', 'identifier' => 'bad').first
53
+ paginator.items('GetRecord', GetRecordParser, 'metadataPrefix' => 'oai_dc', 'identifier' => 'bad').first
49
54
  }.to raise_error(IdDoesNotExistError)
50
55
  end
51
56
 
@@ -56,7 +61,7 @@ module Fieldhand
56
61
 
57
62
  expect {
58
63
  paginator.
59
- items('ListRecords', 'ListRecords/record', 'metadataPrefix' => 'oai_dc', 'from' => '2999-01-01').
64
+ items('ListRecords', ListRecordsParser, 'metadataPrefix' => 'oai_dc', 'from' => '2999-01-01').
60
65
  first
61
66
  }.to raise_error(NoRecordsMatchError)
62
67
  end
@@ -67,7 +72,7 @@ module Fieldhand
67
72
  paginator = described_class.new('http://www.example.com/oai')
68
73
 
69
74
  expect {
70
- paginator.items('ListMetadataFormats', 'ListMetadataFormats/metadataFormat', 'identifier' => 'bad').first
75
+ paginator.items('ListMetadataFormats', ListMetadataFormatsParser, 'identifier' => 'bad').first
71
76
  }.to raise_error(NoMetadataFormatsError)
72
77
  end
73
78
 
@@ -76,7 +81,7 @@ module Fieldhand
76
81
  'no_set_hierarchy_error.xml')
77
82
  paginator = described_class.new('http://www.example.com/oai')
78
83
 
79
- expect { paginator.items('ListSets', 'ListSets/set').first }.
84
+ expect { paginator.items('ListSets', ListSetsParser).first }.
80
85
  to raise_error(NoSetHierarchyError)
81
86
  end
82
87
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fieldhand
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-05-08 00:00:00.000000000 Z
12
+ date: 2017-05-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ox
@@ -90,13 +90,20 @@ files:
90
90
  - lib/fieldhand.rb
91
91
  - lib/fieldhand/arguments.rb
92
92
  - lib/fieldhand/datestamp.rb
93
+ - lib/fieldhand/get_record_parser.rb
93
94
  - lib/fieldhand/header.rb
94
95
  - lib/fieldhand/identify.rb
96
+ - lib/fieldhand/identify_parser.rb
97
+ - lib/fieldhand/list_identifiers_parser.rb
98
+ - lib/fieldhand/list_metadata_formats_parser.rb
99
+ - lib/fieldhand/list_records_parser.rb
100
+ - lib/fieldhand/list_sets_parser.rb
95
101
  - lib/fieldhand/logger.rb
96
102
  - lib/fieldhand/metadata_format.rb
97
103
  - lib/fieldhand/paginator.rb
98
104
  - lib/fieldhand/record.rb
99
105
  - lib/fieldhand/repository.rb
106
+ - lib/fieldhand/response_parser.rb
100
107
  - lib/fieldhand/set.rb
101
108
  - spec/fieldhand/arguments_spec.rb
102
109
  - spec/fieldhand/datestamp_spec.rb