oai 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. data/README +80 -0
  2. data/Rakefile +113 -0
  3. data/bin/oai +68 -0
  4. data/examples/models/file_model.rb +63 -0
  5. data/examples/providers/dublin_core.rb +474 -0
  6. data/lib/oai.rb +7 -13
  7. data/lib/oai/client.rb +133 -83
  8. data/lib/oai/{get_record.rb → client/get_record.rb} +0 -0
  9. data/lib/oai/{header.rb → client/header.rb} +2 -2
  10. data/lib/oai/{identify.rb → client/identify.rb} +0 -0
  11. data/lib/oai/{list_identifiers.rb → client/list_identifiers.rb} +0 -0
  12. data/lib/oai/{list_metadata_formats.rb → client/list_metadata_formats.rb} +0 -0
  13. data/lib/oai/{list_records.rb → client/list_records.rb} +0 -0
  14. data/lib/oai/{list_sets.rb → client/list_sets.rb} +1 -1
  15. data/lib/oai/{metadata_format.rb → client/metadata_format.rb} +0 -0
  16. data/lib/oai/{record.rb → client/record.rb} +0 -0
  17. data/lib/oai/{response.rb → client/response.rb} +1 -1
  18. data/lib/oai/constants.rb +34 -0
  19. data/lib/oai/exception.rb +72 -1
  20. data/lib/oai/harvester.rb +38 -0
  21. data/lib/oai/harvester/config.rb +41 -0
  22. data/lib/oai/harvester/harvest.rb +144 -0
  23. data/lib/oai/harvester/logging.rb +70 -0
  24. data/lib/oai/harvester/mailer.rb +17 -0
  25. data/lib/oai/harvester/shell.rb +334 -0
  26. data/lib/oai/provider.rb +300 -0
  27. data/lib/oai/provider/metadata_format.rb +72 -0
  28. data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
  29. data/lib/oai/provider/model.rb +71 -0
  30. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +135 -0
  31. data/lib/oai/provider/model/activerecord_wrapper.rb +136 -0
  32. data/lib/oai/provider/partial_result.rb +18 -0
  33. data/lib/oai/provider/response.rb +119 -0
  34. data/lib/oai/provider/response/error.rb +16 -0
  35. data/lib/oai/provider/response/get_record.rb +32 -0
  36. data/lib/oai/provider/response/identify.rb +24 -0
  37. data/lib/oai/provider/response/list_identifiers.rb +29 -0
  38. data/lib/oai/provider/response/list_metadata_formats.rb +21 -0
  39. data/lib/oai/provider/response/list_records.rb +32 -0
  40. data/lib/oai/provider/response/list_sets.rb +23 -0
  41. data/lib/oai/provider/response/record_response.rb +68 -0
  42. data/lib/oai/provider/resumption_token.rb +106 -0
  43. data/lib/oai/set.rb +14 -5
  44. data/test/activerecord_provider/config/connection.rb +5 -0
  45. data/test/activerecord_provider/config/database.yml +6 -0
  46. data/test/activerecord_provider/database/ar_migration.rb +59 -0
  47. data/test/activerecord_provider/database/oaipmhtest +0 -0
  48. data/test/activerecord_provider/fixtures/dc.yml +1501 -0
  49. data/test/activerecord_provider/helpers/providers.rb +44 -0
  50. data/test/activerecord_provider/helpers/set_provider.rb +36 -0
  51. data/test/activerecord_provider/models/dc_field.rb +7 -0
  52. data/test/activerecord_provider/models/dc_set.rb +6 -0
  53. data/test/activerecord_provider/models/oai_token.rb +3 -0
  54. data/test/activerecord_provider/tc_ar_provider.rb +93 -0
  55. data/test/activerecord_provider/tc_ar_sets_provider.rb +66 -0
  56. data/test/activerecord_provider/tc_caching_paging_provider.rb +53 -0
  57. data/test/activerecord_provider/tc_simple_paging_provider.rb +55 -0
  58. data/test/activerecord_provider/test_helper.rb +4 -0
  59. data/test/client/helpers/provider.rb +68 -0
  60. data/test/client/helpers/test_wrapper.rb +11 -0
  61. data/test/client/tc_exception.rb +36 -0
  62. data/test/{tc_get_record.rb → client/tc_get_record.rb} +11 -7
  63. data/test/client/tc_identify.rb +13 -0
  64. data/test/{tc_libxml.rb → client/tc_libxml.rb} +20 -10
  65. data/test/{tc_list_identifiers.rb → client/tc_list_identifiers.rb} +10 -8
  66. data/test/{tc_list_metadata_formats.rb → client/tc_list_metadata_formats.rb} +4 -1
  67. data/test/{tc_list_records.rb → client/tc_list_records.rb} +4 -1
  68. data/test/{tc_list_sets.rb → client/tc_list_sets.rb} +4 -2
  69. data/test/{tc_xpath.rb → client/tc_xpath.rb} +1 -1
  70. data/test/client/test_helper.rb +5 -0
  71. data/test/provider/models.rb +230 -0
  72. data/test/provider/tc_exceptions.rb +63 -0
  73. data/test/provider/tc_functional_tokens.rb +42 -0
  74. data/test/provider/tc_provider.rb +69 -0
  75. data/test/provider/tc_resumption_tokens.rb +46 -0
  76. data/test/provider/tc_simple_provider.rb +85 -0
  77. data/test/provider/test_helper.rb +36 -0
  78. metadata +123 -27
  79. data/test/tc_exception.rb +0 -38
  80. data/test/tc_identify.rb +0 -8
data/lib/oai.rb CHANGED
@@ -1,14 +1,8 @@
1
- require 'oai/xpath'
2
- require 'oai/response'
3
- require 'oai/exception'
4
- require 'oai/header'
5
- require 'oai/record'
6
- require 'oai/set'
7
- require 'oai/metadata_format'
1
+ require 'rubygems'
2
+ require 'date'
3
+
4
+ # Sub projects (client, provider) require their own libraries so the user
5
+ # can selectively load them.
8
6
  require 'oai/client'
9
- require 'oai/identify'
10
- require 'oai/list_identifiers'
11
- require 'oai/list_metadata_formats'
12
- require 'oai/get_record'
13
- require 'oai/list_records'
14
- require 'oai/list_sets'
7
+ require 'oai/provider'
8
+
data/lib/oai/client.rb CHANGED
@@ -1,7 +1,27 @@
1
+ # External dependencies
1
2
  require 'uri'
2
3
  require 'net/http'
3
4
  require 'cgi'
4
- require 'date'
5
+
6
+ if not defined?(OAI::Const::VERBS)
7
+ # Shared stuff
8
+ require 'oai/exception'
9
+ require 'oai/constants'
10
+ require 'oai/xpath'
11
+ require 'oai/set'
12
+ end
13
+
14
+ # Localize requires so user can select a subset of functionality
15
+ require 'oai/client/metadata_format'
16
+ require 'oai/client/response'
17
+ require 'oai/client/header'
18
+ require 'oai/client/record'
19
+ require 'oai/client/identify'
20
+ require 'oai/client/get_record'
21
+ require 'oai/client/list_identifiers'
22
+ require 'oai/client/list_metadata_formats'
23
+ require 'oai/client/list_records'
24
+ require 'oai/client/list_sets'
5
25
 
6
26
  module OAI
7
27
 
@@ -33,29 +53,35 @@ module OAI
33
53
  # The constructor which must be passed a valid base url for an oai
34
54
  # service:
35
55
  #
36
- # client = OAI::Harvseter.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
56
+ # client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
37
57
  #
38
58
  # If you want to see debugging messages on STDERR use:
39
59
  #
40
- # client = OAI::Harvester.new 'http://example.com', :debug => true
60
+ # client = OAI::Client.new 'http://example.com', :debug => true
41
61
  #
42
62
  # By default OAI verbs called on the client will return REXML::Element
43
63
  # objects for metadata records, however if you wish you can use the
44
64
  # :parser option to indicate you want to use 'libxml' instead, and get
45
65
  # back XML::Node objects
46
66
  #
47
- # client = OAI::Harvester.new 'http://example.com', :parser => 'libxml'
48
-
67
+ # client = OAI::Client.new 'http://example.com', :parser => 'libxml'
68
+ #
69
+ # === HIGH PERFORMANCE
70
+ #
71
+ # If you want to supercharge this api install libxml-ruby >= 0.3.8 and
72
+ # use the :parser option when you construct your OAI::Client.
73
+ #
49
74
  def initialize(base_url, options={})
50
75
  @base = URI.parse base_url
51
76
  @debug = options.fetch(:debug, false)
52
77
  @parser = options.fetch(:parser, 'rexml')
78
+ @follow_redirects = options.fetch(:redirects, true)
53
79
 
54
80
  # load appropriate parser
55
81
  case @parser
56
82
  when 'libxml'
57
83
  begin
58
- require 'rubygems'
84
+ require 'rubygems'
59
85
  require 'xml/libxml'
60
86
  rescue
61
87
  raise OAI::Exception.new("xml/libxml not available")
@@ -74,15 +100,14 @@ module OAI
74
100
  # parser then you will get an XML::Node object instead.
75
101
 
76
102
  def identify
77
- return IdentifyResponse.new(do_request(:verb => 'Identify'))
103
+ return OAI::IdentifyResponse.new(do_request('Identify'))
78
104
  end
79
105
 
80
106
  # Equivalent to a ListMetadataFormats request. A ListMetadataFormatsResponse
81
107
  # object is returned to you.
82
108
 
83
109
  def list_metadata_formats(opts={})
84
- sanitize_verb_arguments 'ListMetadataFormats', opts, [:verb, :identifier]
85
- return ListMetadataFormatsResponse.new(do_request(opts))
110
+ return OAI::ListMetadataFormatsResponse.new(do_request('ListMetadataFormats', opts))
86
111
  end
87
112
 
88
113
  # Equivalent to a ListIdentifiers request. Pass in :from, :until arguments
@@ -90,10 +115,7 @@ module OAI
90
115
  # supported by the server.
91
116
 
92
117
  def list_identifiers(opts={})
93
- sanitize_verb_arguments 'ListIdentifiers', opts,
94
- [:verb, :from, :until, :metadata_prefix, :set, :resumption_token]
95
- add_default_metadata_prefix opts
96
- return ListIdentifiersResponse.new(do_request(opts))
118
+ return OAI::ListIdentifiersResponse.new(do_request('ListIdentifiers', opts))
97
119
  end
98
120
 
99
121
  # Equivalent to a GetRecord request. You must supply an identifier
@@ -101,10 +123,7 @@ module OAI
101
123
  # which you can extract a OAI::Record object from.
102
124
 
103
125
  def get_record(opts={})
104
- sanitize_verb_arguments 'GetRecord', opts,
105
- [:verb, :identifier, :metadata_prefix]
106
- add_default_metadata_prefix opts
107
- return GetRecordResponse.new(do_request(opts))
126
+ return OAI::GetRecordResponse.new(do_request('GetRecord', opts))
108
127
  end
109
128
 
110
129
  # Equivalent to the ListRecords request. A ListRecordsResponse
@@ -115,10 +134,7 @@ module OAI
115
134
  # end
116
135
 
117
136
  def list_records(opts={})
118
- sanitize_verb_arguments 'ListRecords', opts, [:verb, :from, :until, :set,
119
- :resumption_token, :metadata_prefix]
120
- add_default_metadata_prefix opts
121
- return ListRecordsResponse.new(do_request(opts))
137
+ return OAI::ListRecordsResponse.new(do_request('ListRecords', opts))
122
138
  end
123
139
 
124
140
  # Equivalent to the ListSets request. A ListSetsResponse object
@@ -130,43 +146,39 @@ module OAI
130
146
  # end
131
147
 
132
148
  def list_sets(opts={})
133
- sanitize_verb_arguments 'ListSets', opts, [:verb, :resumptionToken]
134
- return ListSetsResponse.new(do_request(opts))
149
+ return OAI::ListSetsResponse.new(do_request('ListSets', opts))
135
150
  end
136
151
 
137
152
  private
138
153
 
139
- def do_request(hash)
140
- uri = @base.clone
141
-
142
- # build up the query string
143
- parts = hash.entries.map do |entry|
144
- key = studly(entry[0].to_s)
145
- value = entry[1]
146
- # dates get stringified using ISO8601, strings are url encoded
147
- value = case value
148
- when DateTime then value.strftime('%Y-%m-%dT%H:%M:%SZ');
149
- when Date then value.strftime('%Y-%m-%d')
150
- else CGI.escape(entry[1].to_s)
151
- end
152
- "#{key}=#{value}"
153
- end
154
- uri.query = parts.join('&')
155
- debug("doing request: #{uri.to_s}")
156
-
154
+ def do_request(verb, opts = nil)
157
155
  # fire off the request and return appropriate DOM object
158
- begin
159
- xml = Net::HTTP.get(uri)
160
- if @parser == 'libxml'
161
- # remove default namespace for oai-pmh since libxml
162
- # isn't able to use our xpaths to get at them
163
- # if you know a way around thins please let me know
164
- xml = xml.gsub(
165
- /xmlns=\"http:\/\/www.openarchives.org\/OAI\/.\..\/\"/, '')
166
- end
167
- return load_document(xml)
168
- rescue StandardError => e
169
- raise OAI::Exception, 'HTTP level error during OAI request: '+e, caller
156
+ uri = build_uri(verb, opts)
157
+ xml = get(uri)
158
+ if @parser == 'libxml'
159
+ # remove default namespace for oai-pmh since libxml
160
+ # isn't able to use our xpaths to get at them
161
+ # if you know a way around thins please let me know
162
+ xml = xml.gsub(
163
+ /xmlns=\"http:\/\/www.openarchives.org\/OAI\/.\..\/\"/, '')
164
+ end
165
+ return load_document(xml)
166
+ end
167
+
168
+ def build_uri(verb, opts)
169
+ opts = validate_options(verb, opts)
170
+ uri = @base.clone
171
+ uri.query = "verb=" << verb
172
+ opts.each_pair { |k,v| uri.query << '&' << externalize(k) << '=' << encode(v) }
173
+ uri
174
+ end
175
+
176
+ def encode(value)
177
+ return CGI.escape(value) unless value.respond_to?(:strftime)
178
+ if value.respond_to?(:to_time) # Usually a DateTime or Time
179
+ value.to_time.utc.xmlschema
180
+ else # Assume something date like
181
+ value.strftime('%Y-%m-%d')
170
182
  end
171
183
  end
172
184
 
@@ -189,45 +201,83 @@ module OAI
189
201
  end
190
202
  end
191
203
 
192
- # convert foo_bar to fooBar thus allowing our ruby code to use
193
- # the typical underscore idiom
194
- def studly(s)
195
- s.gsub(/_(\w)/) do |match|
196
- match.sub! '_', ''
197
- match.upcase
204
+ # Do the actual HTTP get, following any temporary redirects
205
+ def get(uri)
206
+ response = Net::HTTP.get_response(uri)
207
+ case response
208
+ when Net::HTTPSuccess
209
+ return response.body
210
+ when Net::HTTPMovedPermanently
211
+ if @follow_redirects
212
+ response = get(URI.parse(response['location']))
213
+ else
214
+ raise ArgumentError, "Permanently Redirected to [#{response['location']}]"
215
+ end
216
+ when Net::HTTPTemporaryRedirect
217
+ response = get(URI.parse(response['location']))
218
+ else
219
+ raise ArgumentError, "#{response.code_type} [#{response.code}]"
198
220
  end
199
221
  end
200
222
 
201
- # add a metadata prefix unless it's there or we are working with
202
- # a resumption token, and having one added could cause problems
203
- def add_default_metadata_prefix(opts)
204
- unless opts.has_key? :metadata_prefix or opts.has_key? :resumption_token
205
- opts[:metadata_prefix] = 'oai_dc'
206
- end
223
+ def debug(msg)
224
+ $stderr.print("#{msg}\n") if @debug
207
225
  end
226
+
227
+ # Massage the standard OAI options to make them a bit more palatable.
228
+ def validate_options(verb, opts = {})
229
+ raise OAI::VerbException.new unless Const::VERBS.keys.include?(verb)
208
230
 
209
- def sanitize_verb_arguments(verb, opts, valid_opts)
210
- # opts could mistakenly not be a hash if the method was called wrong
211
- # client.get_record(12) instead of client.get_record(:identifier => 12)
212
- unless opts.kind_of?(Hash)
213
- raise OAI::Exception.new("method options must be passed as a hash")
214
- end
231
+ return {} if opts.nil?
215
232
 
216
- # add the verb
217
- opts[:verb] = verb
233
+ raise OAI::ArgumentException.new unless opts.respond_to?(:keys)
234
+
235
+ realopts = {}
236
+ # Internalize the hash
237
+ opts.keys.each do |key|
238
+ realopts[key.to_s.gsub(/([A-Z])/, '_\1').downcase.intern] = opts.delete(key)
239
+ end
240
+
241
+ return realopts if is_resumption?(realopts)
242
+
243
+ # add in a default metadataPrefix if none exists
244
+ if(Const::VERBS[verb].include?(:metadata_prefix))
245
+ realopts[:metadata_prefix] ||= 'oai_dc'
246
+ end
247
+
248
+ # Convert date formated strings in dates.
249
+ realopts[:from] = parse_date(realopts[:from]) if realopts[:from]
250
+ realopts[:until] = parse_date(realopts[:until]) if realopts[:until]
218
251
 
219
- # make sure options aren't using studly caps, and that they're legit
220
- opts.keys.each do |opt|
221
- if opt =~ /[A-Z]/
222
- raise OAI::Exception.new("#{opt} should use underscores")
223
- elsif not valid_opts.include? opt
224
- raise OAI::Exception.new("invalid option #{opt} in #{opts['verb']}")
225
- end
252
+ # check for any bad options
253
+ unless (realopts.keys - OAI::Const::VERBS[verb]).empty?
254
+ raise OAI::ArgumentException.new
226
255
  end
256
+ realopts
227
257
  end
228
-
229
- def debug(msg)
230
- $stderr.print("#{msg}\n") if @debug
258
+
259
+ def is_resumption?(opts)
260
+ if opts.keys.include?(:resumption_token)
261
+ return true if 1 == opts.keys.size
262
+ raise OAI::ArgumentException.new
263
+ end
264
+ end
265
+
266
+ # Convert our internal representations back into standard OAI options
267
+ def externalize(value)
268
+ value.to_s.gsub(/_[a-z]/) { |m| m.sub("_", '').capitalize }
231
269
  end
270
+
271
+ def parse_date(value)
272
+ return value if value.respond_to?(:strftime)
273
+
274
+ # Oddly Chronic doesn't parse an UTC encoded datetime.
275
+ # Luckily Time does
276
+ dt = Chronic.parse(value) || Time.parse(value)
277
+ raise OAI::ArgumentError.new unless dt
278
+
279
+ dt.utc
280
+ end
281
+
232
282
  end
233
283
  end
File without changes
@@ -1,7 +1,7 @@
1
1
  module OAI
2
2
  class Header
3
3
  include OAI::XPath
4
- attr_accessor :identifier, :datestamp, :set_spec
4
+ attr_accessor :status, :identifier, :datestamp, :set_spec
5
5
 
6
6
  def initialize(element)
7
7
  @status = get_attribute(element, 'status')
@@ -11,7 +11,7 @@ module OAI
11
11
  end
12
12
 
13
13
  def deleted?
14
- return true unless @status == 'deleted'
14
+ return true if @status.to_s == "deleted"
15
15
  end
16
16
 
17
17
  end
File without changes
File without changes
@@ -12,7 +12,7 @@ module OAI
12
12
 
13
13
  def each
14
14
  for set_element in xpath_all(@doc, './/set')
15
- yield OAI::Set.new(set_element)
15
+ yield OAI::Set.parse(set_element)
16
16
  end
17
17
  end
18
18
  end
File without changes
@@ -19,7 +19,7 @@ module OAI
19
19
  message = error.content
20
20
  code = error.property('code')
21
21
  end
22
- raise OAI::Exception.new("#{message} [#{code}]")
22
+ raise OAI::Exception.new(message, code)
23
23
  end
24
24
 
25
25
  end
@@ -0,0 +1,34 @@
1
+ module OAI
2
+
3
+ module Const
4
+ # OAI defines six verbs with various allowable options.
5
+ VERBS = {
6
+ 'Identify' => [],
7
+ 'ListMetadataFormats' => [],
8
+ 'ListSets' => [:resumption_token], # unused currently
9
+ 'GetRecord' => [:identifier, :from, :until, :set, :metadata_prefix],
10
+ 'ListIdentifiers' => [:from, :until, :set, :metadata_prefix, :resumption_token],
11
+ 'ListRecords' => [:from, :until, :set, :metadata_prefix, :resumption_token]
12
+ }.freeze
13
+
14
+ RESERVED_WORDS = %w{type id}
15
+
16
+ # Two granularities are supported in OIA-PMH, daily or seconds.
17
+ module Granularity
18
+ LOW = 'YYYY-MM-DD'
19
+ HIGH = 'YYYY-MM-DDThh:mm:ssZ'
20
+ end
21
+
22
+ # Repositories can support three different schemes for dealing with deletions.
23
+ # * NO - No deletions allowed
24
+ # * TRANSIENT - Deletions are supported but may not be permanently maintained.
25
+ # * PERSISTENT - Deletions are supported and are permanently maintained.
26
+ module Delete
27
+ NO = :no
28
+ TRANSIENT = :transient
29
+ PERSISTENT = :persistent
30
+ end
31
+
32
+ end
33
+
34
+ end
data/lib/oai/exception.rb CHANGED
@@ -1,4 +1,75 @@
1
1
  module OAI
2
+
3
+ # Standard error responses for problems serving OAI content. These
4
+ # messages will be wrapped in an XML response to the client.
5
+
2
6
  class Exception < RuntimeError
7
+ attr_reader :code
8
+
9
+ def initialize(message, code = nil)
10
+ super(message)
11
+ @code = code
12
+ end
3
13
  end
4
- end
14
+
15
+ class ArgumentException < Exception
16
+ def initialize()
17
+ super('The request includes ' \
18
+ 'illegal arguments, is missing required arguments, includes a ' \
19
+ 'repeated argument, or values for arguments have an illegal syntax.',
20
+ 'badArgument')
21
+ end
22
+ end
23
+
24
+ class VerbException < Exception
25
+ def initialize()
26
+ super('Value of the verb argument is not a legal OAI-PMH '\
27
+ 'verb, the verb argument is missing, or the verb argument is repeated.',
28
+ 'badVerb')
29
+ end
30
+ end
31
+
32
+ class FormatException < Exception
33
+ def initialize()
34
+ super('The metadata format identified by '\
35
+ 'the value given for the metadataPrefix argument is not supported '\
36
+ 'by the item or by the repository.', 'cannotDisseminateFormat')
37
+ end
38
+ end
39
+
40
+ class IdException < Exception
41
+ def initialize()
42
+ super('The value of the identifier argument is '\
43
+ 'unknown or illegal in this repository.', 'idDoesNotExist')
44
+ end
45
+ end
46
+
47
+ class NoMatchException < Exception
48
+ def initialize()
49
+ super('The combination of the values of the from, '\
50
+ 'until, set and metadataPrefix arguments results in an empty list.',
51
+ 'noRecordsMatch')
52
+ end
53
+ end
54
+
55
+ class MetadataFormatException < Exception
56
+ def initialize()
57
+ super('There are no metadata formats available '\
58
+ 'for the specified item.', 'noMetadataFormats')
59
+ end
60
+ end
61
+
62
+ class SetException < Exception
63
+ def initialize()
64
+ super('This repository does not support sets.', 'noSetHierarchy')
65
+ end
66
+ end
67
+
68
+ class ResumptionTokenException < Exception
69
+ def initialize()
70
+ super('The value of the resumptionToken argument is invalid or expired.',
71
+ 'badResumptionToken')
72
+ end
73
+ end
74
+
75
+ end