oai 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. data/README +80 -0
  2. data/Rakefile +113 -0
  3. data/bin/oai +68 -0
  4. data/examples/models/file_model.rb +63 -0
  5. data/examples/providers/dublin_core.rb +474 -0
  6. data/lib/oai.rb +7 -13
  7. data/lib/oai/client.rb +133 -83
  8. data/lib/oai/{get_record.rb → client/get_record.rb} +0 -0
  9. data/lib/oai/{header.rb → client/header.rb} +2 -2
  10. data/lib/oai/{identify.rb → client/identify.rb} +0 -0
  11. data/lib/oai/{list_identifiers.rb → client/list_identifiers.rb} +0 -0
  12. data/lib/oai/{list_metadata_formats.rb → client/list_metadata_formats.rb} +0 -0
  13. data/lib/oai/{list_records.rb → client/list_records.rb} +0 -0
  14. data/lib/oai/{list_sets.rb → client/list_sets.rb} +1 -1
  15. data/lib/oai/{metadata_format.rb → client/metadata_format.rb} +0 -0
  16. data/lib/oai/{record.rb → client/record.rb} +0 -0
  17. data/lib/oai/{response.rb → client/response.rb} +1 -1
  18. data/lib/oai/constants.rb +34 -0
  19. data/lib/oai/exception.rb +72 -1
  20. data/lib/oai/harvester.rb +38 -0
  21. data/lib/oai/harvester/config.rb +41 -0
  22. data/lib/oai/harvester/harvest.rb +144 -0
  23. data/lib/oai/harvester/logging.rb +70 -0
  24. data/lib/oai/harvester/mailer.rb +17 -0
  25. data/lib/oai/harvester/shell.rb +334 -0
  26. data/lib/oai/provider.rb +300 -0
  27. data/lib/oai/provider/metadata_format.rb +72 -0
  28. data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
  29. data/lib/oai/provider/model.rb +71 -0
  30. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +135 -0
  31. data/lib/oai/provider/model/activerecord_wrapper.rb +136 -0
  32. data/lib/oai/provider/partial_result.rb +18 -0
  33. data/lib/oai/provider/response.rb +119 -0
  34. data/lib/oai/provider/response/error.rb +16 -0
  35. data/lib/oai/provider/response/get_record.rb +32 -0
  36. data/lib/oai/provider/response/identify.rb +24 -0
  37. data/lib/oai/provider/response/list_identifiers.rb +29 -0
  38. data/lib/oai/provider/response/list_metadata_formats.rb +21 -0
  39. data/lib/oai/provider/response/list_records.rb +32 -0
  40. data/lib/oai/provider/response/list_sets.rb +23 -0
  41. data/lib/oai/provider/response/record_response.rb +68 -0
  42. data/lib/oai/provider/resumption_token.rb +106 -0
  43. data/lib/oai/set.rb +14 -5
  44. data/test/activerecord_provider/config/connection.rb +5 -0
  45. data/test/activerecord_provider/config/database.yml +6 -0
  46. data/test/activerecord_provider/database/ar_migration.rb +59 -0
  47. data/test/activerecord_provider/database/oaipmhtest +0 -0
  48. data/test/activerecord_provider/fixtures/dc.yml +1501 -0
  49. data/test/activerecord_provider/helpers/providers.rb +44 -0
  50. data/test/activerecord_provider/helpers/set_provider.rb +36 -0
  51. data/test/activerecord_provider/models/dc_field.rb +7 -0
  52. data/test/activerecord_provider/models/dc_set.rb +6 -0
  53. data/test/activerecord_provider/models/oai_token.rb +3 -0
  54. data/test/activerecord_provider/tc_ar_provider.rb +93 -0
  55. data/test/activerecord_provider/tc_ar_sets_provider.rb +66 -0
  56. data/test/activerecord_provider/tc_caching_paging_provider.rb +53 -0
  57. data/test/activerecord_provider/tc_simple_paging_provider.rb +55 -0
  58. data/test/activerecord_provider/test_helper.rb +4 -0
  59. data/test/client/helpers/provider.rb +68 -0
  60. data/test/client/helpers/test_wrapper.rb +11 -0
  61. data/test/client/tc_exception.rb +36 -0
  62. data/test/{tc_get_record.rb → client/tc_get_record.rb} +11 -7
  63. data/test/client/tc_identify.rb +13 -0
  64. data/test/{tc_libxml.rb → client/tc_libxml.rb} +20 -10
  65. data/test/{tc_list_identifiers.rb → client/tc_list_identifiers.rb} +10 -8
  66. data/test/{tc_list_metadata_formats.rb → client/tc_list_metadata_formats.rb} +4 -1
  67. data/test/{tc_list_records.rb → client/tc_list_records.rb} +4 -1
  68. data/test/{tc_list_sets.rb → client/tc_list_sets.rb} +4 -2
  69. data/test/{tc_xpath.rb → client/tc_xpath.rb} +1 -1
  70. data/test/client/test_helper.rb +5 -0
  71. data/test/provider/models.rb +230 -0
  72. data/test/provider/tc_exceptions.rb +63 -0
  73. data/test/provider/tc_functional_tokens.rb +42 -0
  74. data/test/provider/tc_provider.rb +69 -0
  75. data/test/provider/tc_resumption_tokens.rb +46 -0
  76. data/test/provider/tc_simple_provider.rb +85 -0
  77. data/test/provider/test_helper.rb +36 -0
  78. metadata +123 -27
  79. data/test/tc_exception.rb +0 -38
  80. data/test/tc_identify.rb +0 -8
data/lib/oai.rb CHANGED
@@ -1,14 +1,8 @@
1
- require 'oai/xpath'
2
- require 'oai/response'
3
- require 'oai/exception'
4
- require 'oai/header'
5
- require 'oai/record'
6
- require 'oai/set'
7
- require 'oai/metadata_format'
1
+ require 'rubygems'
2
+ require 'date'
3
+
4
+ # Sub projects (client, provider) require their own libraries so the user
5
+ # can selectively load them.
8
6
  require 'oai/client'
9
- require 'oai/identify'
10
- require 'oai/list_identifiers'
11
- require 'oai/list_metadata_formats'
12
- require 'oai/get_record'
13
- require 'oai/list_records'
14
- require 'oai/list_sets'
7
+ require 'oai/provider'
8
+
data/lib/oai/client.rb CHANGED
@@ -1,7 +1,27 @@
1
+ # External dependencies
1
2
  require 'uri'
2
3
  require 'net/http'
3
4
  require 'cgi'
4
- require 'date'
5
+
6
+ if not defined?(OAI::Const::VERBS)
7
+ # Shared stuff
8
+ require 'oai/exception'
9
+ require 'oai/constants'
10
+ require 'oai/xpath'
11
+ require 'oai/set'
12
+ end
13
+
14
+ # Localize requires so user can select a subset of functionality
15
+ require 'oai/client/metadata_format'
16
+ require 'oai/client/response'
17
+ require 'oai/client/header'
18
+ require 'oai/client/record'
19
+ require 'oai/client/identify'
20
+ require 'oai/client/get_record'
21
+ require 'oai/client/list_identifiers'
22
+ require 'oai/client/list_metadata_formats'
23
+ require 'oai/client/list_records'
24
+ require 'oai/client/list_sets'
5
25
 
6
26
  module OAI
7
27
 
@@ -33,29 +53,35 @@ module OAI
33
53
  # The constructor which must be passed a valid base url for an oai
34
54
  # service:
35
55
  #
36
- # client = OAI::Harvseter.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
56
+ # client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
37
57
  #
38
58
  # If you want to see debugging messages on STDERR use:
39
59
  #
40
- # client = OAI::Harvester.new 'http://example.com', :debug => true
60
+ # client = OAI::Client.new 'http://example.com', :debug => true
41
61
  #
42
62
  # By default OAI verbs called on the client will return REXML::Element
43
63
  # objects for metadata records, however if you wish you can use the
44
64
  # :parser option to indicate you want to use 'libxml' instead, and get
45
65
  # back XML::Node objects
46
66
  #
47
- # client = OAI::Harvester.new 'http://example.com', :parser => 'libxml'
48
-
67
+ # client = OAI::Client.new 'http://example.com', :parser => 'libxml'
68
+ #
69
+ # === HIGH PERFORMANCE
70
+ #
71
+ # If you want to supercharge this api install libxml-ruby >= 0.3.8 and
72
+ # use the :parser option when you construct your OAI::Client.
73
+ #
49
74
  def initialize(base_url, options={})
50
75
  @base = URI.parse base_url
51
76
  @debug = options.fetch(:debug, false)
52
77
  @parser = options.fetch(:parser, 'rexml')
78
+ @follow_redirects = options.fetch(:redirects, true)
53
79
 
54
80
  # load appropriate parser
55
81
  case @parser
56
82
  when 'libxml'
57
83
  begin
58
- require 'rubygems'
84
+ require 'rubygems'
59
85
  require 'xml/libxml'
60
86
  rescue
61
87
  raise OAI::Exception.new("xml/libxml not available")
@@ -74,15 +100,14 @@ module OAI
74
100
  # parser then you will get an XML::Node object instead.
75
101
 
76
102
  def identify
77
- return IdentifyResponse.new(do_request(:verb => 'Identify'))
103
+ return OAI::IdentifyResponse.new(do_request('Identify'))
78
104
  end
79
105
 
80
106
  # Equivalent to a ListMetadataFormats request. A ListMetadataFormatsResponse
81
107
  # object is returned to you.
82
108
 
83
109
  def list_metadata_formats(opts={})
84
- sanitize_verb_arguments 'ListMetadataFormats', opts, [:verb, :identifier]
85
- return ListMetadataFormatsResponse.new(do_request(opts))
110
+ return OAI::ListMetadataFormatsResponse.new(do_request('ListMetadataFormats', opts))
86
111
  end
87
112
 
88
113
  # Equivalent to a ListIdentifiers request. Pass in :from, :until arguments
@@ -90,10 +115,7 @@ module OAI
90
115
  # supported by the server.
91
116
 
92
117
  def list_identifiers(opts={})
93
- sanitize_verb_arguments 'ListIdentifiers', opts,
94
- [:verb, :from, :until, :metadata_prefix, :set, :resumption_token]
95
- add_default_metadata_prefix opts
96
- return ListIdentifiersResponse.new(do_request(opts))
118
+ return OAI::ListIdentifiersResponse.new(do_request('ListIdentifiers', opts))
97
119
  end
98
120
 
99
121
  # Equivalent to a GetRecord request. You must supply an identifier
@@ -101,10 +123,7 @@ module OAI
101
123
  # which you can extract a OAI::Record object from.
102
124
 
103
125
  def get_record(opts={})
104
- sanitize_verb_arguments 'GetRecord', opts,
105
- [:verb, :identifier, :metadata_prefix]
106
- add_default_metadata_prefix opts
107
- return GetRecordResponse.new(do_request(opts))
126
+ return OAI::GetRecordResponse.new(do_request('GetRecord', opts))
108
127
  end
109
128
 
110
129
  # Equivalent to the ListRecords request. A ListRecordsResponse
@@ -115,10 +134,7 @@ module OAI
115
134
  # end
116
135
 
117
136
  def list_records(opts={})
118
- sanitize_verb_arguments 'ListRecords', opts, [:verb, :from, :until, :set,
119
- :resumption_token, :metadata_prefix]
120
- add_default_metadata_prefix opts
121
- return ListRecordsResponse.new(do_request(opts))
137
+ return OAI::ListRecordsResponse.new(do_request('ListRecords', opts))
122
138
  end
123
139
 
124
140
  # Equivalent to the ListSets request. A ListSetsResponse object
@@ -130,43 +146,39 @@ module OAI
130
146
  # end
131
147
 
132
148
  def list_sets(opts={})
133
- sanitize_verb_arguments 'ListSets', opts, [:verb, :resumptionToken]
134
- return ListSetsResponse.new(do_request(opts))
149
+ return OAI::ListSetsResponse.new(do_request('ListSets', opts))
135
150
  end
136
151
 
137
152
  private
138
153
 
139
- def do_request(hash)
140
- uri = @base.clone
141
-
142
- # build up the query string
143
- parts = hash.entries.map do |entry|
144
- key = studly(entry[0].to_s)
145
- value = entry[1]
146
- # dates get stringified using ISO8601, strings are url encoded
147
- value = case value
148
- when DateTime then value.strftime('%Y-%m-%dT%H:%M:%SZ');
149
- when Date then value.strftime('%Y-%m-%d')
150
- else CGI.escape(entry[1].to_s)
151
- end
152
- "#{key}=#{value}"
153
- end
154
- uri.query = parts.join('&')
155
- debug("doing request: #{uri.to_s}")
156
-
154
+ def do_request(verb, opts = nil)
157
155
  # fire off the request and return appropriate DOM object
158
- begin
159
- xml = Net::HTTP.get(uri)
160
- if @parser == 'libxml'
161
- # remove default namespace for oai-pmh since libxml
162
- # isn't able to use our xpaths to get at them
163
- # if you know a way around thins please let me know
164
- xml = xml.gsub(
165
- /xmlns=\"http:\/\/www.openarchives.org\/OAI\/.\..\/\"/, '')
166
- end
167
- return load_document(xml)
168
- rescue StandardError => e
169
- raise OAI::Exception, 'HTTP level error during OAI request: '+e, caller
156
+ uri = build_uri(verb, opts)
157
+ xml = get(uri)
158
+ if @parser == 'libxml'
159
+ # remove default namespace for oai-pmh since libxml
160
+ # isn't able to use our xpaths to get at them
161
+ # if you know a way around thins please let me know
162
+ xml = xml.gsub(
163
+ /xmlns=\"http:\/\/www.openarchives.org\/OAI\/.\..\/\"/, '')
164
+ end
165
+ return load_document(xml)
166
+ end
167
+
168
+ def build_uri(verb, opts)
169
+ opts = validate_options(verb, opts)
170
+ uri = @base.clone
171
+ uri.query = "verb=" << verb
172
+ opts.each_pair { |k,v| uri.query << '&' << externalize(k) << '=' << encode(v) }
173
+ uri
174
+ end
175
+
176
+ def encode(value)
177
+ return CGI.escape(value) unless value.respond_to?(:strftime)
178
+ if value.respond_to?(:to_time) # Usually a DateTime or Time
179
+ value.to_time.utc.xmlschema
180
+ else # Assume something date like
181
+ value.strftime('%Y-%m-%d')
170
182
  end
171
183
  end
172
184
 
@@ -189,45 +201,83 @@ module OAI
189
201
  end
190
202
  end
191
203
 
192
- # convert foo_bar to fooBar thus allowing our ruby code to use
193
- # the typical underscore idiom
194
- def studly(s)
195
- s.gsub(/_(\w)/) do |match|
196
- match.sub! '_', ''
197
- match.upcase
204
+ # Do the actual HTTP get, following any temporary redirects
205
+ def get(uri)
206
+ response = Net::HTTP.get_response(uri)
207
+ case response
208
+ when Net::HTTPSuccess
209
+ return response.body
210
+ when Net::HTTPMovedPermanently
211
+ if @follow_redirects
212
+ response = get(URI.parse(response['location']))
213
+ else
214
+ raise ArgumentError, "Permanently Redirected to [#{response['location']}]"
215
+ end
216
+ when Net::HTTPTemporaryRedirect
217
+ response = get(URI.parse(response['location']))
218
+ else
219
+ raise ArgumentError, "#{response.code_type} [#{response.code}]"
198
220
  end
199
221
  end
200
222
 
201
- # add a metadata prefix unless it's there or we are working with
202
- # a resumption token, and having one added could cause problems
203
- def add_default_metadata_prefix(opts)
204
- unless opts.has_key? :metadata_prefix or opts.has_key? :resumption_token
205
- opts[:metadata_prefix] = 'oai_dc'
206
- end
223
+ def debug(msg)
224
+ $stderr.print("#{msg}\n") if @debug
207
225
  end
226
+
227
+ # Massage the standard OAI options to make them a bit more palatable.
228
+ def validate_options(verb, opts = {})
229
+ raise OAI::VerbException.new unless Const::VERBS.keys.include?(verb)
208
230
 
209
- def sanitize_verb_arguments(verb, opts, valid_opts)
210
- # opts could mistakenly not be a hash if the method was called wrong
211
- # client.get_record(12) instead of client.get_record(:identifier => 12)
212
- unless opts.kind_of?(Hash)
213
- raise OAI::Exception.new("method options must be passed as a hash")
214
- end
231
+ return {} if opts.nil?
215
232
 
216
- # add the verb
217
- opts[:verb] = verb
233
+ raise OAI::ArgumentException.new unless opts.respond_to?(:keys)
234
+
235
+ realopts = {}
236
+ # Internalize the hash
237
+ opts.keys.each do |key|
238
+ realopts[key.to_s.gsub(/([A-Z])/, '_\1').downcase.intern] = opts.delete(key)
239
+ end
240
+
241
+ return realopts if is_resumption?(realopts)
242
+
243
+ # add in a default metadataPrefix if none exists
244
+ if(Const::VERBS[verb].include?(:metadata_prefix))
245
+ realopts[:metadata_prefix] ||= 'oai_dc'
246
+ end
247
+
248
+ # Convert date formated strings in dates.
249
+ realopts[:from] = parse_date(realopts[:from]) if realopts[:from]
250
+ realopts[:until] = parse_date(realopts[:until]) if realopts[:until]
218
251
 
219
- # make sure options aren't using studly caps, and that they're legit
220
- opts.keys.each do |opt|
221
- if opt =~ /[A-Z]/
222
- raise OAI::Exception.new("#{opt} should use underscores")
223
- elsif not valid_opts.include? opt
224
- raise OAI::Exception.new("invalid option #{opt} in #{opts['verb']}")
225
- end
252
+ # check for any bad options
253
+ unless (realopts.keys - OAI::Const::VERBS[verb]).empty?
254
+ raise OAI::ArgumentException.new
226
255
  end
256
+ realopts
227
257
  end
228
-
229
- def debug(msg)
230
- $stderr.print("#{msg}\n") if @debug
258
+
259
+ def is_resumption?(opts)
260
+ if opts.keys.include?(:resumption_token)
261
+ return true if 1 == opts.keys.size
262
+ raise OAI::ArgumentException.new
263
+ end
264
+ end
265
+
266
+ # Convert our internal representations back into standard OAI options
267
+ def externalize(value)
268
+ value.to_s.gsub(/_[a-z]/) { |m| m.sub("_", '').capitalize }
231
269
  end
270
+
271
+ def parse_date(value)
272
+ return value if value.respond_to?(:strftime)
273
+
274
+ # Oddly Chronic doesn't parse an UTC encoded datetime.
275
+ # Luckily Time does
276
+ dt = Chronic.parse(value) || Time.parse(value)
277
+ raise OAI::ArgumentError.new unless dt
278
+
279
+ dt.utc
280
+ end
281
+
232
282
  end
233
283
  end
File without changes
@@ -1,7 +1,7 @@
1
1
  module OAI
2
2
  class Header
3
3
  include OAI::XPath
4
- attr_accessor :identifier, :datestamp, :set_spec
4
+ attr_accessor :status, :identifier, :datestamp, :set_spec
5
5
 
6
6
  def initialize(element)
7
7
  @status = get_attribute(element, 'status')
@@ -11,7 +11,7 @@ module OAI
11
11
  end
12
12
 
13
13
  def deleted?
14
- return true unless @status == 'deleted'
14
+ return true if @status.to_s == "deleted"
15
15
  end
16
16
 
17
17
  end
File without changes
File without changes
@@ -12,7 +12,7 @@ module OAI
12
12
 
13
13
  def each
14
14
  for set_element in xpath_all(@doc, './/set')
15
- yield OAI::Set.new(set_element)
15
+ yield OAI::Set.parse(set_element)
16
16
  end
17
17
  end
18
18
  end
File without changes
@@ -19,7 +19,7 @@ module OAI
19
19
  message = error.content
20
20
  code = error.property('code')
21
21
  end
22
- raise OAI::Exception.new("#{message} [#{code}]")
22
+ raise OAI::Exception.new(message, code)
23
23
  end
24
24
 
25
25
  end
@@ -0,0 +1,34 @@
1
+ module OAI
2
+
3
+ module Const
4
+ # OAI defines six verbs with various allowable options.
5
+ VERBS = {
6
+ 'Identify' => [],
7
+ 'ListMetadataFormats' => [],
8
+ 'ListSets' => [:resumption_token], # unused currently
9
+ 'GetRecord' => [:identifier, :from, :until, :set, :metadata_prefix],
10
+ 'ListIdentifiers' => [:from, :until, :set, :metadata_prefix, :resumption_token],
11
+ 'ListRecords' => [:from, :until, :set, :metadata_prefix, :resumption_token]
12
+ }.freeze
13
+
14
+ RESERVED_WORDS = %w{type id}
15
+
16
+ # Two granularities are supported in OIA-PMH, daily or seconds.
17
+ module Granularity
18
+ LOW = 'YYYY-MM-DD'
19
+ HIGH = 'YYYY-MM-DDThh:mm:ssZ'
20
+ end
21
+
22
+ # Repositories can support three different schemes for dealing with deletions.
23
+ # * NO - No deletions allowed
24
+ # * TRANSIENT - Deletions are supported but may not be permanently maintained.
25
+ # * PERSISTENT - Deletions are supported and are permanently maintained.
26
+ module Delete
27
+ NO = :no
28
+ TRANSIENT = :transient
29
+ PERSISTENT = :persistent
30
+ end
31
+
32
+ end
33
+
34
+ end
data/lib/oai/exception.rb CHANGED
@@ -1,4 +1,75 @@
1
1
  module OAI
2
+
3
+ # Standard error responses for problems serving OAI content. These
4
+ # messages will be wrapped in an XML response to the client.
5
+
2
6
  class Exception < RuntimeError
7
+ attr_reader :code
8
+
9
+ def initialize(message, code = nil)
10
+ super(message)
11
+ @code = code
12
+ end
3
13
  end
4
- end
14
+
15
+ class ArgumentException < Exception
16
+ def initialize()
17
+ super('The request includes ' \
18
+ 'illegal arguments, is missing required arguments, includes a ' \
19
+ 'repeated argument, or values for arguments have an illegal syntax.',
20
+ 'badArgument')
21
+ end
22
+ end
23
+
24
+ class VerbException < Exception
25
+ def initialize()
26
+ super('Value of the verb argument is not a legal OAI-PMH '\
27
+ 'verb, the verb argument is missing, or the verb argument is repeated.',
28
+ 'badVerb')
29
+ end
30
+ end
31
+
32
+ class FormatException < Exception
33
+ def initialize()
34
+ super('The metadata format identified by '\
35
+ 'the value given for the metadataPrefix argument is not supported '\
36
+ 'by the item or by the repository.', 'cannotDisseminateFormat')
37
+ end
38
+ end
39
+
40
+ class IdException < Exception
41
+ def initialize()
42
+ super('The value of the identifier argument is '\
43
+ 'unknown or illegal in this repository.', 'idDoesNotExist')
44
+ end
45
+ end
46
+
47
+ class NoMatchException < Exception
48
+ def initialize()
49
+ super('The combination of the values of the from, '\
50
+ 'until, set and metadataPrefix arguments results in an empty list.',
51
+ 'noRecordsMatch')
52
+ end
53
+ end
54
+
55
+ class MetadataFormatException < Exception
56
+ def initialize()
57
+ super('There are no metadata formats available '\
58
+ 'for the specified item.', 'noMetadataFormats')
59
+ end
60
+ end
61
+
62
+ class SetException < Exception
63
+ def initialize()
64
+ super('This repository does not support sets.', 'noSetHierarchy')
65
+ end
66
+ end
67
+
68
+ class ResumptionTokenException < Exception
69
+ def initialize()
70
+ super('The value of the resumptionToken argument is invalid or expired.',
71
+ 'badResumptionToken')
72
+ end
73
+ end
74
+
75
+ end