fieldhand 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fe77387e599f04fe11981420174c0c53c6f1b3ce
4
- data.tar.gz: f3bc88686b42bca448c52a06988273dcae890b8f
3
+ metadata.gz: cbb2e358fe735e190c98cdbb891d1e061ac9c94a
4
+ data.tar.gz: 64d94a760f881779790518e59a9508a1678d2be2
5
5
  SHA512:
6
- metadata.gz: aac19ca6cdda6acc837c4d8f2e33874fccaa30ee91b3e78e9f0e0a5948b93712785507e960a3c13cfa55da963695b7ce695ddd86333358041335868ee6dca6bd
7
- data.tar.gz: e1dfdb2adf541cf2a4dc2c3cfbe39da2b5843595b484ff6a7fcf31f22694431a38b09dcbbbc855d6ee3426ad789ce3fcea45f4263f215b62001be4cf076c8f48
6
+ metadata.gz: 59ba22ea64add43e36fa2dab85230818515950ca78ddaa4e9db6fb322de59b9b59ecf7ae4fcbdc3cf85f177ebc084773bfaf96f86da15e5843b148296e529de6
7
+ data.tar.gz: 114f1acb87901c6935dea29b9b03e7d115ca937486f069ef4af939a1f3a39592fe4a9d9abb50557ac7ff8ac9f27676f6b7cfb619a79cb9313aeb82f6b46e73ec
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  A Ruby library for harvesting metadata from [OAI-PMH](https://www.openarchives.org/OAI/openarchivesprotocol.html) repositories.
4
4
 
5
- **Current version:** 0.3.0
5
+ **Current version:** 0.3.1
6
6
  **Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0, 2.1, 2.2
7
7
 
8
8
  ## Installation
@@ -2,6 +2,16 @@ require 'fieldhand/datestamp'
2
2
 
3
3
  module Fieldhand
4
4
  # A class for converting Fieldhand arguments into OAI-PMH query parameters.
5
+ #
6
+ # Specifically:
7
+ #
8
+ # * :metadata_prefix
9
+ # * :resumption_token
10
+ # * :from
11
+ # * :until
12
+ # * :set
13
+ #
14
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#HTTPRequestFormat
5
15
  class Arguments
6
16
  VALID_KEYS = {
7
17
  :metadata_prefix => 'metadataPrefix',
@@ -13,10 +23,32 @@ module Fieldhand
13
23
 
14
24
  attr_reader :options
15
25
 
26
+ # Return a new `Arguments` with the given `Hash`.
16
27
  def initialize(options = {})
17
28
  @options = options
18
29
  end
19
30
 
31
+ # Return a query as a `Hash` suitable for encoding as a query string in an OAI-PMH request.
32
+ #
33
+ # Converts arguments passed with symbol keys into the corresponding strings as defined in the OAI-PMH protocol,
34
+ # converting values into the appropriate format (e.g. `Time`s, `Date`s, `MetadataFormat`s and `Set`s into strings).
35
+ #
36
+ # Defaults to returning a metadata prefix of "oai_dc".
37
+ #
38
+ # Raises an `ArgumentError` if an unknown argument is encountered.
39
+ #
40
+ # # Examples
41
+ #
42
+ # ```
43
+ # Fieldhand::Arguments.new(:metadata_prefix => 'xoai', :from => Date.new(2001, 1, 1)).to_query
44
+ # #=> { "metadataPrefix" => "xoai", "from" => "2001-01-01" }
45
+ #
46
+ # Fieldhand::Arguments.new(:until => Time.utc(2001, 1, 1, 12, 0, 0)).to_query
47
+ # #=> { "metadataPrefix"=>"oai_dc", "until" => "2001-01-01T12:00:00Z" }
48
+ #
49
+ # Fieldhand::Arguments.new(:foo => "bar").to_query
50
+ # # ArgumentError: unknown argument: foo
51
+ # ```
20
52
  def to_query
21
53
  options.inject(defaults) do |query, (key, value)|
22
54
  raise ::ArgumentError, "unknown argument: #{key}" unless VALID_KEYS.key?(key)
@@ -27,12 +59,12 @@ module Fieldhand
27
59
  end
28
60
  end
29
61
 
62
+ private
63
+
30
64
  def defaults
31
65
  { 'metadataPrefix' => 'oai_dc' }
32
66
  end
33
67
 
34
- private
35
-
36
68
  def convert_value(key, value)
37
69
  return value.to_s unless key == :from || key == :until
38
70
 
@@ -4,6 +4,10 @@ require 'time'
4
4
  module Fieldhand
5
5
  # A class to handle datestamps of varying granularity.
6
6
  class Datestamp
7
+ # Return either a `Date` or `Time` for the given string datestamp.
8
+ #
9
+ # As repositories may only support date-level granularity rather than time-level granularity, we need to handle both
10
+ # types of datestamp.
7
11
  def self.parse(datestamp)
8
12
  if datestamp.size == 10
9
13
  ::Date.strptime(datestamp)
@@ -12,14 +16,21 @@ module Fieldhand
12
16
  end
13
17
  end
14
18
 
19
+ # Return a string UTC datestamp given a string, `Date`, `Time` or anything responding to `xmlschema`.
20
+ #
21
+ # The granularity of the resulting datestamp depends on the input type:
22
+ #
23
+ # * Strings are returned untouched (assuming they are already formatted datestamps)
24
+ # * Dates will return a date-level granularity datestamp, e.g. 2001-01-01
25
+ # * Times will return a time-level granularity UTC datestamp, e.g. 2001-01-01T00:00:00Z
26
+ # * DateTimes will return a time-level granularity UTC datestamp, e.g. 2001-01-01T00:00:00Z
27
+ # * Anything else is assumed to respond to `xmlschema`
15
28
  def self.unparse(datestamp)
16
29
  case datestamp
17
- when ::String
18
- datestamp
19
- when ::Date
20
- datestamp.strftime
21
- when ::Time
22
- datestamp.utc.xmlschema
30
+ when ::String then datestamp
31
+ when ::DateTime then unparse(::Time.xmlschema(datestamp.to_s))
32
+ when ::Date then datestamp.strftime
33
+ when ::Time then datestamp.utc.xmlschema
23
34
  else
24
35
  datestamp.xmlschema
25
36
  end
@@ -0,0 +1,23 @@
1
+ require 'fieldhand/record'
2
+
3
+ module Fieldhand
4
+ # A parser for GetRecord responses
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#GetRecord
7
+ class GetRecordParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a new parser populated with the given response parser.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `Record`s found in the response.
16
+ def items
17
+ response_parser.
18
+ root.
19
+ locate('GetRecord/record').
20
+ map { |item| Record.new(item, response_parser.response_date) }
21
+ end
22
+ end
23
+ end
@@ -15,27 +15,38 @@ module Fieldhand
15
15
  class Header
16
16
  attr_reader :element, :response_date
17
17
 
18
+ # Return a new Header with the given element and an optional response date.
19
+ #
20
+ # Defaults the response date to the current time.
18
21
  def initialize(element, response_date = Time.now)
19
22
  @element = element
20
23
  @response_date = response_date
21
24
  end
22
25
 
26
+ # Test whether this item is marked as deleted or not.
27
+ #
28
+ # Note that a repository's support for deleted records can be interrogated through the `Identify` request, see
29
+ # https://www.openarchives.org/OAI/openarchivesprotocol.html#DeletedRecords
23
30
  def deleted?
24
31
  status == 'deleted'
25
32
  end
26
33
 
34
+ # Return the optional status of this item.
27
35
  def status
28
36
  element['status']
29
37
  end
30
38
 
39
+ # Return the unique identifier of this item.
31
40
  def identifier
32
41
  @identifier ||= element.identifier.text
33
42
  end
34
43
 
44
+ # Return the UTC datestamp of this item.
35
45
  def datestamp
36
46
  @datestamp ||= Datestamp.parse(element.datestamp.text)
37
47
  end
38
48
 
49
+ # Return any set memberships of this item.
39
50
  def sets
40
51
  @sets ||= element.locate('setSpec/^String')
41
52
  end
@@ -8,43 +8,71 @@ module Fieldhand
8
8
  class Identify
9
9
  attr_reader :element, :response_date
10
10
 
11
+ # Return a new Identify with the given element and optional response date.
12
+ #
13
+ # Defaults the response date to the current time.
11
14
  def initialize(element, response_date = Time.now)
12
15
  @element = element
13
16
  @response_date = response_date
14
17
  end
15
18
 
19
+ # Return the human readable name for the repository.
16
20
  def name
17
21
  @name ||= element.repositoryName.text
18
22
  end
19
23
 
24
+ # Return the base URL of the repository as a URI.
25
+ #
26
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#HTTPRequestFormat
20
27
  def base_url
21
28
  @base_url ||= URI(element.baseURL.text)
22
29
  end
23
30
 
31
+ # Return the version of the OAI-PMH protocol supported by the repository as a string.
24
32
  def protocol_version
25
33
  @protocol_version ||= element.protocolVersion.text
26
34
  end
27
35
 
36
+ # Return the guaranteed lower limit of all datestamps recording changes, modifications, or deletions in the
37
+ # repository as a `Date` or `Time` depending on the granularity of the repository.
28
38
  def earliest_datestamp
29
39
  @earliest_datestamp ||= Datestamp.parse(element.earliestDatestamp.text)
30
40
  end
31
41
 
42
+ # Return the manner in which the repository supports the notion of deleted records as a string.
43
+ #
44
+ # Possible values are:
45
+ #
46
+ # * no
47
+ # * transient
48
+ # * persistent
49
+ #
50
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#DeletedRecords
32
51
  def deleted_record
33
52
  @deleted_record ||= element.deletedRecord.text
34
53
  end
35
54
 
55
+ # Return the finest harvesting granularity supported by the repository. The legitimate values are YYYY-MM-DD and
56
+ # YYYY-MM-DDThh:mm:ssZ with meanings as defined in ISO 8601.
57
+ #
58
+ # See http://www.w3.org/TR/NOTE-datetime
36
59
  def granularity
37
60
  @granularity ||= element.granularity.text
38
61
  end
39
62
 
63
+ # Return any e-mail addresses of administrators of the repository as an array of strings.
40
64
  def admin_emails
41
65
  @admin_emails ||= element.locate('adminEmail/^String')
42
66
  end
43
67
 
68
+ # Return any compression encodings supported by the repository as an array of strings.
44
69
  def compression
45
70
  @compression ||= element.locate('compression/^String')
46
71
  end
47
72
 
73
+ # Return any raw description elements used by communities to describe their repositories as an array of strings.
74
+ #
75
+ # As these can be in any format, Fieldhand does not attempt to parse the elements but leaves that to users.
48
76
  def descriptions
49
77
  @descriptions ||= element.locate('description')
50
78
  end
@@ -0,0 +1,26 @@
1
+ require 'fieldhand/identify'
2
+
3
+ module Fieldhand
4
+ # A parser for Identify responses.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#Identify
7
+ class IdentifyParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a new parser for the given response parser.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `Identify`s found in the response.
16
+ #
17
+ # In reality, there will only ever be at most one `Identify` in a response but having a consistent interface with
18
+ # the other parsers keeps the supporting code simpler.
19
+ def items
20
+ response_parser.
21
+ root.
22
+ locate('Identify').
23
+ map { |item| Identify.new(item, response_parser.response_date) }
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,23 @@
1
+ require 'fieldhand/header'
2
+
3
+ module Fieldhand
4
+ # A parser for ListIdentifiers responses.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListIdentifiers
7
+ class ListIdentifiersParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a new parser for the given response parser.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `Header`s found in the response.
16
+ def items
17
+ response_parser.
18
+ root.
19
+ locate('ListIdentifiers/header').
20
+ map { |item| Header.new(item, response_parser.response_date) }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'fieldhand/metadata_format'
2
+
3
+ module Fieldhand
4
+ # A parser for ListMetadataFormats responses.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListMetadataFormats
7
+ class ListMetadataFormatsParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a parser for the given response parser.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `MetadataFormat`s found in the response.
16
+ def items
17
+ response_parser.
18
+ root.
19
+ locate('ListMetadataFormats/metadataFormat').
20
+ map { |item| MetadataFormat.new(item, response_parser.response_date) }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'fieldhand/record'
2
+
3
+ module Fieldhand
4
+ # A parser for ListRecords responses.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListRecords
7
+ class ListRecordsParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a parser for the given response body.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `Record`s found in the response.
16
+ def items
17
+ response_parser.
18
+ root.
19
+ locate('ListRecords/record').
20
+ map { |item| Record.new(item, response_parser.response_date) }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require 'fieldhand/set'
2
+
3
+ module Fieldhand
4
+ # A parser for ListSets responses.
5
+ #
6
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListSets
7
+ class ListSetsParser
8
+ attr_reader :response_parser
9
+
10
+ # Return a new parser for the given response parser.
11
+ def initialize(response_parser)
12
+ @response_parser = response_parser
13
+ end
14
+
15
+ # Return an array of `Set`s found in the response.
16
+ def items
17
+ response_parser.
18
+ root.
19
+ locate('ListSets/set').
20
+ map { |item| Set.new(item, response_parser.response_date) }
21
+ end
22
+ end
23
+ end
@@ -6,6 +6,7 @@ module Fieldhand
6
6
  module Logger
7
7
  module_function
8
8
 
9
+ # Return a new `Logger` that logs to the null device on this platform.
9
10
  def null
10
11
  ::Logger.new(null_device)
11
12
  end
@@ -7,23 +7,32 @@ module Fieldhand
7
7
  class MetadataFormat
8
8
  attr_reader :element, :response_date
9
9
 
10
+ # Return a new Metadata Format for the given element with an optional response date.
11
+ #
12
+ # The response date defaults to the current time.
10
13
  def initialize(element, response_date = Time.now)
11
14
  @element = element
12
15
  @response_date = response_date
13
16
  end
14
17
 
18
+ # Return the prefix as a string representation of the format.
19
+ #
20
+ # This makes it possible to pass a Metadata Format to methods that expect a string metadata prefix.
15
21
  def to_s
16
22
  prefix
17
23
  end
18
24
 
25
+ # Return the string metadata prefix for the format.
19
26
  def prefix
20
27
  @prefix ||= element.metadataPrefix.text
21
28
  end
22
29
 
30
+ # Return the location of an XML Schema describing the format as a URI.
23
31
  def schema
24
32
  @schema ||= URI(element.schema.text)
25
33
  end
26
34
 
35
+ # Return the XML Namespace URI for the format.
27
36
  def namespace
28
37
  @namespace ||= URI(element.metadataNamespace.text)
29
38
  end
@@ -1,40 +1,24 @@
1
- require 'fieldhand/datestamp'
2
1
  require 'fieldhand/logger'
3
- require 'ox'
2
+ require 'fieldhand/response_parser'
4
3
  require 'cgi'
5
4
  require 'net/http'
6
5
  require 'uri'
7
6
 
8
7
  module Fieldhand
9
8
  NetworkError = ::Class.new(::StandardError)
10
- ProtocolError = ::Class.new(::StandardError)
11
- BadArgumentError = ::Class.new(ProtocolError)
12
- BadResumptionTokenError = ::Class.new(ProtocolError)
13
- BadVerbError = ::Class.new(ProtocolError)
14
- CannotDisseminateFormatError = ::Class.new(ProtocolError)
15
- IdDoesNotExistError = ::Class.new(ProtocolError)
16
- NoRecordsMatchError = ::Class.new(ProtocolError)
17
- NoMetadataFormatsError = ::Class.new(ProtocolError)
18
- NoSetHierarchyError = ::Class.new(ProtocolError)
19
9
 
20
10
  # An abstraction over interactions with an OAI-PMH repository, handling requests, responses and paginating over
21
11
  # results using a resumption token.
22
12
  #
23
13
  # See https://www.openarchives.org/OAI/openarchivesprotocol.html#FlowControl
24
14
  class Paginator
25
- ERROR_CODES = {
26
- 'badArgument' => BadArgumentError,
27
- 'badResumptionToken' => BadResumptionTokenError,
28
- 'badVerb' => BadVerbError,
29
- 'cannotDisseminateFormat' => CannotDisseminateFormatError,
30
- 'idDoesNotExist' => IdDoesNotExistError,
31
- 'noRecordsMatch' => NoRecordsMatchError,
32
- 'noMetadataFormats' => NoMetadataFormatsError,
33
- 'noSetHierarchy' => NoSetHierarchyError
34
- }.freeze
35
-
36
15
  attr_reader :uri, :logger, :http
37
16
 
17
+ # Return a new paginator for the given repository base URI and optional logger.
18
+ #
19
+ # The URI can be passed as either a `URI` or something that can be parsed as a URI such as a string.
20
+ #
21
+ # The logger will default to a null logger appropriate to this platform.
38
22
  def initialize(uri, logger = Logger.null)
39
23
  @uri = uri.is_a?(::URI) ? uri : URI(uri)
40
24
  @logger = logger
@@ -42,31 +26,56 @@ module Fieldhand
42
26
  @http.use_ssl = true if @uri.scheme == 'https'
43
27
  end
44
28
 
45
- def items(verb, path, query = {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
46
- return enum_for(:items, verb, path, query) unless block_given?
29
+ # Return an `Enumerator` of items retrieved from the repository with the given `verb` and `query`, parsed with the
30
+ # given `parser_class`.
31
+ #
32
+ # The query defaults to an empty hash but will be merged with the given `verb` when making requests to the
33
+ # repository.
34
+ #
35
+ # Expects the `parser_class` to respond to `items`, returning an `Enumerable` list of items that will be yielded to
36
+ # the caller.
37
+ #
38
+ # Raises a `ProtocolError` for any errors in the response.
39
+ #
40
+ # Fieldhand attempts to handle all flow control for the user using resumption tokens from the response so they only
41
+ # need handle lazy enumerators and not worry about pagination and underlying network requests.
42
+ #
43
+ # # Examples
44
+ #
45
+ # ```
46
+ # paginator = Fieldhand::Paginator.new('http://www.example.com/oai')
47
+ # paginator.items('ListRecords', Fieldhand::ListRecordsParser).take(10_000)
48
+ # #=> [#<Fieldhand::Record: ...>, ...]
49
+ # ```
50
+ #
51
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#FlowControl
52
+ def items(verb, parser_class, query = {})
53
+ return enum_for(:items, verb, parser_class, query) unless block_given?
47
54
 
48
55
  loop do
49
- document = ::Ox.parse(request(query.merge('verb' => verb)))
50
- response_date = document.root.locate('responseDate[0]/^String').map { |date| Datestamp.parse(date) }.first
51
-
52
- document.root.locate('error').each do |error|
53
- convert_error(error)
54
- end
55
-
56
- document.root.locate(path).each do |item|
57
- yield item, response_date
56
+ response_parser = parse_response(query.merge('verb' => verb))
57
+ parser_class.new(response_parser).items.each do |item|
58
+ yield item
58
59
  end
59
60
 
60
- resumption_token = document.root.locate('?/resumptionToken/^String').first
61
- break unless resumption_token
61
+ break unless response_parser.resumption_token
62
62
 
63
- logger.debug('Fieldhand') { "Resumption token for #{verb}: #{resumption_token}" }
64
- query = { 'resumptionToken' => resumption_token }
63
+ logger.debug('Fieldhand') { "Resumption token for #{verb}: #{response_parser.resumption_token}" }
64
+ query = { 'resumptionToken' => response_parser.resumption_token }
65
65
  end
66
66
  end
67
67
 
68
68
  private
69
69
 
70
+ def parse_response(query = {})
71
+ response_parser = ResponseParser.new(request(query))
72
+ response_parser.errors.each do |error|
73
+ raise error
74
+ end
75
+
76
+ response_parser
77
+ end
78
+
70
79
  def request(query = {})
71
80
  request_uri = uri.dup
72
81
  request_uri.query = encode_query(query)
@@ -79,12 +88,6 @@ module Fieldhand
79
88
  raise NetworkError, "error requesting #{query}: #{e}"
80
89
  end
81
90
 
82
- def convert_error(error)
83
- return unless ERROR_CODES.key?(error['code'])
84
-
85
- raise ERROR_CODES.fetch(error['code']), error.text
86
- end
87
-
88
91
  def encode_query(query = {})
89
92
  query.map { |k, v| ::CGI.escape(k) << '=' << ::CGI.escape(v) }.join('&')
90
93
  end
@@ -8,39 +8,58 @@ module Fieldhand
8
8
  class Record
9
9
  attr_reader :element, :response_date
10
10
 
11
+ # Return a new Record for the given element with an optional response date.
12
+ #
13
+ # Defaults the response date to the current time.
11
14
  def initialize(element, response_date = Time.now)
12
15
  @element = element
13
16
  @response_date = response_date
14
17
  end
15
18
 
19
+ # Test whether this item is marked as deleted or not according to its header.
20
+ #
21
+ # Note that a repository's support for deleted records can be interrogated through the `Identify` request, see
22
+ # https://www.openarchives.org/OAI/openarchivesprotocol.html#DeletedRecords
16
23
  def deleted?
17
24
  header.deleted?
18
25
  end
19
26
 
27
+ # Return the optional status of this item according to its header.
20
28
  def status
21
29
  header.status
22
30
  end
23
31
 
32
+ # Return the unique identifier of this item according to its header.
24
33
  def identifier
25
34
  header.identifier
26
35
  end
27
36
 
37
+ # Return the UTC datestamp of this item according to its header as a `Date` or `Time` depending on the granularity
38
+ # of this repository.
28
39
  def datestamp
29
40
  header.datestamp
30
41
  end
31
42
 
43
+ # Return any set memberships of this item according to its header.
32
44
  def sets
33
45
  header.sets
34
46
  end
35
47
 
48
+ # Return the single manifestation of the metadata of this item as a string, if present.
49
+ #
50
+ # As metadata can be in any format, Fieldhand does not attempt to parse it but leave that to the user.
36
51
  def metadata
37
52
  @metadata ||= element.locate('metadata[0]').map { |metadata| Ox.dump(metadata) }.first
38
53
  end
39
54
 
55
+ # Return any about elements describing the metadata of this record as an array of strings.
56
+ #
57
+ # As about elements can be in any format, Fieldhand does not attempt to parse them but leave that to the user.
40
58
  def about
41
59
  @about ||= element.locate('about').map { |about| Ox.dump(about) }
42
60
  end
43
61
 
62
+ # Return the associated Header for this record.
44
63
  def header
45
64
  @header ||= Header.new(element.header)
46
65
  end
@@ -1,11 +1,12 @@
1
1
  require 'fieldhand/arguments'
2
- require 'fieldhand/header'
3
- require 'fieldhand/identify'
2
+ require 'fieldhand/get_record_parser'
3
+ require 'fieldhand/identify_parser'
4
+ require 'fieldhand/list_identifiers_parser'
5
+ require 'fieldhand/list_metadata_formats_parser'
6
+ require 'fieldhand/list_records_parser'
7
+ require 'fieldhand/list_sets_parser'
4
8
  require 'fieldhand/logger'
5
- require 'fieldhand/metadata_format'
6
9
  require 'fieldhand/paginator'
7
- require 'fieldhand/record'
8
- require 'fieldhand/set'
9
10
  require 'uri'
10
11
 
11
12
  module Fieldhand
@@ -15,75 +16,112 @@ module Fieldhand
15
16
  class Repository
16
17
  attr_reader :uri, :logger
17
18
 
19
+ # Return a new repository with the given base URL and an optional logger.
20
+ #
21
+ # The base URL can be passed as a `URI` or anything that can be parsed as a URI such as a string.
22
+ #
23
+ # Defaults to using a null logger specific to this platform.
18
24
  def initialize(uri, logger = Logger.null)
19
25
  @uri = uri.is_a?(::URI) ? uri : URI(uri)
20
26
  @logger = logger
21
27
  end
22
28
 
29
+ # Send an Identify request to the repository and return an `Identify` response.
30
+ #
31
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
32
+ # response.
33
+ #
34
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#Identify
23
35
  def identify
24
- paginator.
25
- items('Identify', 'Identify').
26
- map { |identify, response_date| Identify.new(identify, response_date) }.
27
- first
36
+ paginator.items('Identify', IdentifyParser).first
28
37
  end
29
38
 
39
+ # Send a ListMetadataFormats request to the repository (with an optional identifier) and return an `Enumerator` of
40
+ # `MetadataFormat`s.
41
+ #
42
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
43
+ # response.
44
+ #
45
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListMetadataFormats
30
46
  def metadata_formats(identifier = nil)
31
- return enum_for(:metadata_formats, identifier) unless block_given?
47
+ query = {}
48
+ query['identifier'] = identifier if identifier
32
49
 
33
- arguments = {}
34
- arguments['identifier'] = identifier if identifier
35
-
36
- paginator.
37
- items('ListMetadataFormats', 'ListMetadataFormats/metadataFormat', arguments).
38
- each do |format, response_date|
39
- yield MetadataFormat.new(format, response_date)
40
- end
50
+ paginator.items('ListMetadataFormats', ListMetadataFormatsParser, query)
41
51
  end
42
52
 
53
+ # Send a ListSets request to the repository and return an `Enumerator` of `Set`s.
54
+ #
55
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
56
+ # response.
57
+ #
58
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListSets
43
59
  def sets
44
- return enum_for(:sets) unless block_given?
45
-
46
- paginator.
47
- items('ListSets', 'ListSets/set').
48
- each do |set, response_date|
49
- yield Set.new(set, response_date)
50
- end
60
+ paginator.items('ListSets', ListSetsParser)
51
61
  end
52
62
 
63
+ # Send a ListRecords request to the repository with optional arguments and return an `Enumerator` of `Records`s.
64
+ #
65
+ # The following arguments can be used:
66
+ #
67
+ # * :metadata_prefix - The prefix of the metadata format to be used for record metadata, defaults to "oai_dc"
68
+ # * :from - A `Date`, `Time` or formatted string specifying a lower bound for datestamp-based selective harvesting
69
+ # * :until - A `Date`, `Time` or formatted string specifying an upper bound for datestamp-based selective harvesting
70
+ # * :set - A `Set` or string set spec which specifies set criteria for selective harvesting
71
+ # * :resumption_token - A valid resumption token for resuming a previous request (note that Fieldhand typically
72
+ # handles resumption internally so this should not be normally used)
73
+ #
74
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
75
+ # response.
76
+ #
77
+ # # Examples
78
+ #
79
+ # ```
80
+ # repository = Fieldhand::Repository.new('http://www.example.com/oai')
81
+ # repository.records.each do |record|
82
+ # next if record.deleted?
83
+ #
84
+ # puts record.metadata
85
+ # end
86
+ # ```
87
+ #
88
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListRecords
53
89
  def records(arguments = {})
54
- return enum_for(:records, arguments) unless block_given?
55
-
56
90
  query = Arguments.new(arguments).to_query
57
91
 
58
- paginator.
59
- items('ListRecords', 'ListRecords/record', query).
60
- each do |record, response_date|
61
- yield Record.new(record, response_date)
62
- end
92
+ paginator.items('ListRecords', ListRecordsParser, query)
63
93
  end
64
94
 
95
+ # Send a ListIdentifiers request to the repository with optional arguments and return an `Enumerator` of `Header`s.
96
+ #
97
+ # This supports the same arguments as `Fieldhand::Repository#records` but only returns record headers.
98
+ #
99
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
100
+ # response.
101
+ #
102
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#ListIdentifiers
65
103
  def identifiers(arguments = {})
66
- return enum_for(:identifiers, arguments) unless block_given?
67
-
68
104
  query = Arguments.new(arguments).to_query
69
105
 
70
- paginator.
71
- items('ListIdentifiers', 'ListIdentifiers/header', query).
72
- each do |header, response_date|
73
- yield Header.new(header, response_date)
74
- end
106
+ paginator.items('ListIdentifiers', ListIdentifiersParser, query)
75
107
  end
76
108
 
109
+ # Send a GetRecord request to the repository with the given identifier and optional metadata prefix and return a
110
+ # `Record`.
111
+ #
112
+ # Supports passing a :metadata_prefix argument with a given metadata prefix which otherwise defaults to "oai_dc".
113
+ #
114
+ # Raises a `NetworkError` if there is an issue contacting the repository or a `ProtocolError` if received in
115
+ # response.
116
+ #
117
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#GetRecord
77
118
  def get(identifier, arguments = {})
78
119
  query = {
79
120
  'identifier' => identifier,
80
121
  'metadataPrefix' => arguments.fetch(:metadata_prefix, 'oai_dc')
81
122
  }
82
123
 
83
- paginator.
84
- items('GetRecord', 'GetRecord/record', query).
85
- map { |record, response_date| Record.new(record, response_date) }.
86
- first
124
+ paginator.items('GetRecord', GetRecordParser, query).first
87
125
  end
88
126
 
89
127
  private
@@ -0,0 +1,67 @@
1
+ require 'fieldhand/datestamp'
2
+ require 'ox'
3
+
4
+ module Fieldhand
5
+ ProtocolError = ::Class.new(::StandardError)
6
+ BadArgumentError = ::Class.new(ProtocolError)
7
+ BadResumptionTokenError = ::Class.new(ProtocolError)
8
+ BadVerbError = ::Class.new(ProtocolError)
9
+ CannotDisseminateFormatError = ::Class.new(ProtocolError)
10
+ IdDoesNotExistError = ::Class.new(ProtocolError)
11
+ NoRecordsMatchError = ::Class.new(ProtocolError)
12
+ NoMetadataFormatsError = ::Class.new(ProtocolError)
13
+ NoSetHierarchyError = ::Class.new(ProtocolError)
14
+
15
+ # A parser for elements common to all OAI-PMH HTTP responses.
16
+ #
17
+ # See https://www.openarchives.org/OAI/openarchivesprotocol.html#HTTPResponseFormat
18
+ class ResponseParser
19
+ ERROR_CODES = {
20
+ 'badArgument' => BadArgumentError,
21
+ 'badResumptionToken' => BadResumptionTokenError,
22
+ 'badVerb' => BadVerbError,
23
+ 'cannotDisseminateFormat' => CannotDisseminateFormatError,
24
+ 'idDoesNotExist' => IdDoesNotExistError,
25
+ 'noRecordsMatch' => NoRecordsMatchError,
26
+ 'noMetadataFormats' => NoMetadataFormatsError,
27
+ 'noSetHierarchy' => NoSetHierarchyError
28
+ }.freeze
29
+
30
+ attr_reader :response
31
+
32
+ # Return a new parser for the given response body.
33
+ def initialize(response)
34
+ @response = response
35
+ end
36
+
37
+ # Return the response date as a `Date` or `Time` depending on the granularity of the repository.
38
+ def response_date
39
+ @response_date ||= root.locate('responseDate[0]/^String').map { |date| Datestamp.parse(date) }.first
40
+ end
41
+
42
+ # Return any errors found in the response as `ProtocolError`s.
43
+ #
44
+ # Note that this does not _raise_ the errors but simply returns them.
45
+ def errors
46
+ @errors ||= root.locate('error').map { |error| convert_error(error) }
47
+ end
48
+
49
+ # Return the resumption token from the response, if present.
50
+ def resumption_token
51
+ @resumption_token ||= root.locate('?/resumptionToken[0]/^String').first
52
+ end
53
+
54
+ # Return the root element of the parsed document.
55
+ def root
56
+ @root ||= ::Ox.parse(response).root
57
+ end
58
+
59
+ private
60
+
61
+ def convert_error(element)
62
+ return unless ERROR_CODES.key?(element['code'])
63
+
64
+ ERROR_CODES.fetch(element['code']).new(element.text)
65
+ end
66
+ end
67
+ end
data/lib/fieldhand/set.rb CHANGED
@@ -7,23 +7,34 @@ module Fieldhand
7
7
  class Set
8
8
  attr_reader :element, :response_date
9
9
 
10
+ # Return a Set with the given element and optional response date.
11
+ #
12
+ # Defaults the response date to the current time.
10
13
  def initialize(element, response_date = Time.now)
11
14
  @element = element
12
15
  @response_date = response_date
13
16
  end
14
17
 
18
+ # Return the set's spec as its string representation.
19
+ #
20
+ # This means that Sets can be used as arguments that expect a set spec.
15
21
  def to_s
16
22
  spec
17
23
  end
18
24
 
25
+ # Return the set's unique identifier within the repository.
19
26
  def spec
20
27
  @spec ||= element.setSpec.text
21
28
  end
22
29
 
30
+ # Return the set's short human-readable name.
23
31
  def name
24
32
  @name ||= element.setName.text
25
33
  end
26
34
 
35
+ # Return any descriptions of the set as an array of strings.
36
+ #
37
+ # As descriptions can be in any format, Fieldhand does not attempt to parse them but leave this to the user.
27
38
  def descriptions
28
39
  @descriptions ||= element.locate('setDescription').map { |description| Ox.dump(description) }
29
40
  end
@@ -1,6 +1,6 @@
1
1
  require 'fieldhand/arguments'
2
- require 'fieldhand/set'
3
2
  require 'fieldhand/metadata_format'
3
+ require 'fieldhand/set'
4
4
  require 'ox'
5
5
  require 'date'
6
6
  require 'time'
@@ -33,6 +33,16 @@ module Fieldhand
33
33
  it 'unparses strings into themselves' do
34
34
  expect(described_class.unparse('2001-01-01')).to eq('2001-01-01')
35
35
  end
36
+
37
+ it 'unparses DateTimes into time-granularity datestamps' do
38
+ expect(described_class.unparse(::DateTime.new(2001, 1, 1, 0, 0, 0))).to eq('2001-01-01T00:00:00Z')
39
+ end
40
+
41
+ it 'unparses unknown types by calling xmlschema' do
42
+ datestamp = Struct.new(:xmlschema).new('2001-01-01')
43
+
44
+ expect(described_class.unparse(datestamp)).to eq('2001-01-01')
45
+ end
36
46
  end
37
47
  end
38
48
  end
@@ -1,3 +1,8 @@
1
+ require 'fieldhand/get_record_parser'
2
+ require 'fieldhand/identify_parser'
3
+ require 'fieldhand/list_metadata_formats_parser'
4
+ require 'fieldhand/list_records_parser'
5
+ require 'fieldhand/list_sets_parser'
1
6
  require 'fieldhand/paginator'
2
7
 
3
8
  module Fieldhand
@@ -8,7 +13,7 @@ module Fieldhand
8
13
  'bad_argument_error.xml')
9
14
  paginator = described_class.new('http://www.example.com/oai')
10
15
 
11
- expect { paginator.items('Identify', 'Identify', 'bad' => 'Argument').first }.
16
+ expect { paginator.items('Identify', IdentifyParser, 'bad' => 'Argument').first }.
12
17
  to raise_error(BadArgumentError)
13
18
  end
14
19
 
@@ -17,7 +22,7 @@ module Fieldhand
17
22
  'bad_resumption_token_error.xml')
18
23
  paginator = described_class.new('http://www.example.com/oai')
19
24
 
20
- expect { paginator.items('ListRecords', 'ListRecords/record', 'resumptionToken' => 'foo').first }.
25
+ expect { paginator.items('ListRecords', ListRecordsParser, 'resumptionToken' => 'foo').first }.
21
26
  to raise_error(BadResumptionTokenError)
22
27
  end
23
28
 
@@ -26,7 +31,7 @@ module Fieldhand
26
31
  'bad_verb_error.xml')
27
32
  paginator = described_class.new('http://www.example.com/oai')
28
33
 
29
- expect { paginator.items('Bad', 'Bad').first }.
34
+ expect { paginator.items('Bad', IdentifyParser).first }.
30
35
  to raise_error(BadVerbError)
31
36
  end
32
37
 
@@ -35,7 +40,7 @@ module Fieldhand
35
40
  'cannot_disseminate_format_error.xml')
36
41
  paginator = described_class.new('http://www.example.com/oai')
37
42
 
38
- expect { paginator.items('ListRecords', 'ListRecords/record', 'metadataPrefix' => 'bad').first }.
43
+ expect { paginator.items('ListRecords', ListRecordsParser, 'metadataPrefix' => 'bad').first }.
39
44
  to raise_error(CannotDisseminateFormatError)
40
45
  end
41
46
 
@@ -45,7 +50,7 @@ module Fieldhand
45
50
  paginator = described_class.new('http://www.example.com/oai')
46
51
 
47
52
  expect {
48
- paginator.items('GetRecord', 'GetRecord/record', 'metadataPrefix' => 'oai_dc', 'identifier' => 'bad').first
53
+ paginator.items('GetRecord', GetRecordParser, 'metadataPrefix' => 'oai_dc', 'identifier' => 'bad').first
49
54
  }.to raise_error(IdDoesNotExistError)
50
55
  end
51
56
 
@@ -56,7 +61,7 @@ module Fieldhand
56
61
 
57
62
  expect {
58
63
  paginator.
59
- items('ListRecords', 'ListRecords/record', 'metadataPrefix' => 'oai_dc', 'from' => '2999-01-01').
64
+ items('ListRecords', ListRecordsParser, 'metadataPrefix' => 'oai_dc', 'from' => '2999-01-01').
60
65
  first
61
66
  }.to raise_error(NoRecordsMatchError)
62
67
  end
@@ -67,7 +72,7 @@ module Fieldhand
67
72
  paginator = described_class.new('http://www.example.com/oai')
68
73
 
69
74
  expect {
70
- paginator.items('ListMetadataFormats', 'ListMetadataFormats/metadataFormat', 'identifier' => 'bad').first
75
+ paginator.items('ListMetadataFormats', ListMetadataFormatsParser, 'identifier' => 'bad').first
71
76
  }.to raise_error(NoMetadataFormatsError)
72
77
  end
73
78
 
@@ -76,7 +81,7 @@ module Fieldhand
76
81
  'no_set_hierarchy_error.xml')
77
82
  paginator = described_class.new('http://www.example.com/oai')
78
83
 
79
- expect { paginator.items('ListSets', 'ListSets/set').first }.
84
+ expect { paginator.items('ListSets', ListSetsParser).first }.
80
85
  to raise_error(NoSetHierarchyError)
81
86
  end
82
87
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fieldhand
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-05-08 00:00:00.000000000 Z
12
+ date: 2017-05-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ox
@@ -90,13 +90,20 @@ files:
90
90
  - lib/fieldhand.rb
91
91
  - lib/fieldhand/arguments.rb
92
92
  - lib/fieldhand/datestamp.rb
93
+ - lib/fieldhand/get_record_parser.rb
93
94
  - lib/fieldhand/header.rb
94
95
  - lib/fieldhand/identify.rb
96
+ - lib/fieldhand/identify_parser.rb
97
+ - lib/fieldhand/list_identifiers_parser.rb
98
+ - lib/fieldhand/list_metadata_formats_parser.rb
99
+ - lib/fieldhand/list_records_parser.rb
100
+ - lib/fieldhand/list_sets_parser.rb
95
101
  - lib/fieldhand/logger.rb
96
102
  - lib/fieldhand/metadata_format.rb
97
103
  - lib/fieldhand/paginator.rb
98
104
  - lib/fieldhand/record.rb
99
105
  - lib/fieldhand/repository.rb
106
+ - lib/fieldhand/response_parser.rb
100
107
  - lib/fieldhand/set.rb
101
108
  - spec/fieldhand/arguments_spec.rb
102
109
  - spec/fieldhand/datestamp_spec.rb