oai 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. data/README +80 -0
  2. data/Rakefile +113 -0
  3. data/bin/oai +68 -0
  4. data/examples/models/file_model.rb +63 -0
  5. data/examples/providers/dublin_core.rb +474 -0
  6. data/lib/oai.rb +7 -13
  7. data/lib/oai/client.rb +133 -83
  8. data/lib/oai/{get_record.rb → client/get_record.rb} +0 -0
  9. data/lib/oai/{header.rb → client/header.rb} +2 -2
  10. data/lib/oai/{identify.rb → client/identify.rb} +0 -0
  11. data/lib/oai/{list_identifiers.rb → client/list_identifiers.rb} +0 -0
  12. data/lib/oai/{list_metadata_formats.rb → client/list_metadata_formats.rb} +0 -0
  13. data/lib/oai/{list_records.rb → client/list_records.rb} +0 -0
  14. data/lib/oai/{list_sets.rb → client/list_sets.rb} +1 -1
  15. data/lib/oai/{metadata_format.rb → client/metadata_format.rb} +0 -0
  16. data/lib/oai/{record.rb → client/record.rb} +0 -0
  17. data/lib/oai/{response.rb → client/response.rb} +1 -1
  18. data/lib/oai/constants.rb +34 -0
  19. data/lib/oai/exception.rb +72 -1
  20. data/lib/oai/harvester.rb +38 -0
  21. data/lib/oai/harvester/config.rb +41 -0
  22. data/lib/oai/harvester/harvest.rb +144 -0
  23. data/lib/oai/harvester/logging.rb +70 -0
  24. data/lib/oai/harvester/mailer.rb +17 -0
  25. data/lib/oai/harvester/shell.rb +334 -0
  26. data/lib/oai/provider.rb +300 -0
  27. data/lib/oai/provider/metadata_format.rb +72 -0
  28. data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
  29. data/lib/oai/provider/model.rb +71 -0
  30. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +135 -0
  31. data/lib/oai/provider/model/activerecord_wrapper.rb +136 -0
  32. data/lib/oai/provider/partial_result.rb +18 -0
  33. data/lib/oai/provider/response.rb +119 -0
  34. data/lib/oai/provider/response/error.rb +16 -0
  35. data/lib/oai/provider/response/get_record.rb +32 -0
  36. data/lib/oai/provider/response/identify.rb +24 -0
  37. data/lib/oai/provider/response/list_identifiers.rb +29 -0
  38. data/lib/oai/provider/response/list_metadata_formats.rb +21 -0
  39. data/lib/oai/provider/response/list_records.rb +32 -0
  40. data/lib/oai/provider/response/list_sets.rb +23 -0
  41. data/lib/oai/provider/response/record_response.rb +68 -0
  42. data/lib/oai/provider/resumption_token.rb +106 -0
  43. data/lib/oai/set.rb +14 -5
  44. data/test/activerecord_provider/config/connection.rb +5 -0
  45. data/test/activerecord_provider/config/database.yml +6 -0
  46. data/test/activerecord_provider/database/ar_migration.rb +59 -0
  47. data/test/activerecord_provider/database/oaipmhtest +0 -0
  48. data/test/activerecord_provider/fixtures/dc.yml +1501 -0
  49. data/test/activerecord_provider/helpers/providers.rb +44 -0
  50. data/test/activerecord_provider/helpers/set_provider.rb +36 -0
  51. data/test/activerecord_provider/models/dc_field.rb +7 -0
  52. data/test/activerecord_provider/models/dc_set.rb +6 -0
  53. data/test/activerecord_provider/models/oai_token.rb +3 -0
  54. data/test/activerecord_provider/tc_ar_provider.rb +93 -0
  55. data/test/activerecord_provider/tc_ar_sets_provider.rb +66 -0
  56. data/test/activerecord_provider/tc_caching_paging_provider.rb +53 -0
  57. data/test/activerecord_provider/tc_simple_paging_provider.rb +55 -0
  58. data/test/activerecord_provider/test_helper.rb +4 -0
  59. data/test/client/helpers/provider.rb +68 -0
  60. data/test/client/helpers/test_wrapper.rb +11 -0
  61. data/test/client/tc_exception.rb +36 -0
  62. data/test/{tc_get_record.rb → client/tc_get_record.rb} +11 -7
  63. data/test/client/tc_identify.rb +13 -0
  64. data/test/{tc_libxml.rb → client/tc_libxml.rb} +20 -10
  65. data/test/{tc_list_identifiers.rb → client/tc_list_identifiers.rb} +10 -8
  66. data/test/{tc_list_metadata_formats.rb → client/tc_list_metadata_formats.rb} +4 -1
  67. data/test/{tc_list_records.rb → client/tc_list_records.rb} +4 -1
  68. data/test/{tc_list_sets.rb → client/tc_list_sets.rb} +4 -2
  69. data/test/{tc_xpath.rb → client/tc_xpath.rb} +1 -1
  70. data/test/client/test_helper.rb +5 -0
  71. data/test/provider/models.rb +230 -0
  72. data/test/provider/tc_exceptions.rb +63 -0
  73. data/test/provider/tc_functional_tokens.rb +42 -0
  74. data/test/provider/tc_provider.rb +69 -0
  75. data/test/provider/tc_resumption_tokens.rb +46 -0
  76. data/test/provider/tc_simple_provider.rb +85 -0
  77. data/test/provider/test_helper.rb +36 -0
  78. metadata +123 -27
  79. data/test/tc_exception.rb +0 -38
  80. data/test/tc_identify.rb +0 -8
@@ -0,0 +1,300 @@
1
+ require 'active_support'
2
+ require 'builder'
3
+ require 'chronic'
4
+
5
+ if not defined?(OAI::Const::VERBS)
6
+ require 'oai/exception'
7
+ require 'oai/constants'
8
+ require 'oai/xpath'
9
+ require 'oai/set'
10
+ end
11
+
12
+ %w{ response metadata_format resumption_token model partial_result
13
+ response/record_response response/identify response/get_record
14
+ response/list_identifiers response/list_records
15
+ response/list_metadata_formats response/list_sets response/error
16
+ }.each { |lib| require File.dirname(__FILE__) + "/provider/#{lib}" }
17
+
18
+ if defined?(ActiveRecord)
19
+ require File.dirname(__FILE__) + "/provider/model/activerecord_wrapper"
20
+ require File.dirname(__FILE__) + "/provider/model/activerecord_caching_wrapper"
21
+ end
22
+
23
+ # = OAI::Provider
24
+ #
25
+ # Open Archives Initiative - Protocol for Metadata Harvesting see
26
+ # http://www.openarchives.org/
27
+ #
28
+ # == Features
29
+ # * Easily setup a simple repository
30
+ # * Simple integration with ActiveRecord
31
+ # * Dublin Core metadata format included
32
+ # * Easily add addition metadata formats
33
+ # * Adaptable to any data source
34
+ # * Simple resumption token support
35
+ #
36
+ # == Usage
37
+ #
38
+ # To create a functional provider either subclass Provider::Base,
39
+ # or reconfigure the defaults.
40
+ #
41
+ # === Sub classing a provider
42
+ #
43
+ # class MyProvider < Oai::Provider
44
+ # repository_name 'My little OAI provider'
45
+ # repository_url 'http://localhost/provider'
46
+ # record_prefix 'oai:localhost'
47
+ # admin_email 'root@localhost' # String or Array
48
+ # source_model MyModel.new # Subclass of OAI::Provider::Model
49
+ # end
50
+ #
51
+ # === Configuring the default provider
52
+ #
53
+ # class Oai::Provider::Base
54
+ # repository_name 'My little OAI Provider'
55
+ # repository_url 'http://localhost/provider'
56
+ # record_prefix 'oai:localhost'
57
+ # admin_email 'root@localhost'
58
+ # source_model MyModel.new
59
+ # end
60
+ #
61
+ # The provider does allow a URL to be passed in at request processing time
62
+ # in case the repository URL cannot be determined ahead of time.
63
+ #
64
+ # == Integrating with frameworks
65
+ #
66
+ # === Camping
67
+ #
68
+ # In the Models module of your camping application post model definition:
69
+ #
70
+ # class CampingProvider < OAI::Provider::Base
71
+ # repository_name 'Camping Test OAI Repository'
72
+ # source_model ActiveRecordWrapper.new(YOUR_ACTIVE_RECORD_MODEL)
73
+ # end
74
+ #
75
+ # In the Controllers module:
76
+ #
77
+ # class Oai
78
+ # def get
79
+ # @headers['Content-Type'] = 'text/xml'
80
+ # provider = Models::CampingProvider.new
81
+ # provider.process_request(@input.merge(:url => "http:"+URL(Oai).to_s))
82
+ # end
83
+ # end
84
+ #
85
+ # The provider will be available at "/oai"
86
+ #
87
+ # === Rails
88
+ #
89
+ # At the bottom of environment.rb create a OAI Provider:
90
+ #
91
+ # # forgive the standard blog example.
92
+ #
93
+ # require 'oai'
94
+ # class BlogProvider < OAI::Provider::Base
95
+ # repository_name 'My little OAI Provider'
96
+ # repository_url 'http://localhost:3000/provider'
97
+ # record_prefix 'oai:blog'
98
+ # admin_email 'root@localhost'
99
+ # source_model OAI::Provider::ActiveRecordWrapper.new(Post)
100
+ # end
101
+ #
102
+ # Create a custom controller:
103
+ #
104
+ # class OaiController < ApplicationController
105
+ # def index
106
+ # # Remove controller and action from the options. Rails adds them automatically.
107
+ # options = params.delete_if { |k,v| %w{controller action}.include?(k) }
108
+ # provider = BlogProvider.new
109
+ # response = provider.process_request(options)
110
+ # render :text => response, :content_type => 'text/xml'
111
+ # end
112
+ # end
113
+ #
114
+ # Special thanks to Jose Hales-Garcia for this solution.
115
+ #
116
+ # == Supporting custom metadata formats
117
+ #
118
+ # See Oai::Metadata for details.
119
+ #
120
+ # == ActiveRecord Integration
121
+ #
122
+ # ActiveRecord integration is provided by the ActiveRecordWrapper class.
123
+ # It takes one required paramater, the class name of the AR class to wrap,
124
+ # and optional hash of options.
125
+ #
126
+ # Valid options include:
127
+ # * timestamp_field - Specifies the model field to use as the update
128
+ # filter. Defaults to 'updated_at'.
129
+ # * limit - Maximum number of records to return in each page/set.
130
+ # Defaults to 100. The wrapper will paginate the result via resumption tokens.
131
+ # Caution: specifying too large a limit will adversely affect performance.
132
+ #
133
+ # Mapping from a ActiveRecord object to a specific metadata format follows
134
+ # this set of rules:
135
+ #
136
+ # 1. Does Model#to_{metadata_prefix} exist? If so just return the result.
137
+ # 2. Does the model provide a map via Model.map_{metadata_prefix}? If so
138
+ # use the map to generate the xml document.
139
+ # 3. Loop thru the fields of the metadata format and check to see if the
140
+ # model responds to either the plural, or singular of the field.
141
+ #
142
+ # For maximum control of the xml metadata generated, it's usually best to
143
+ # provide a 'to_{metadata_prefix}' in the model. If using Builder be sure
144
+ # not to include any instruct! in the xml object.
145
+ #
146
+ # === Explicit creation example
147
+ #
148
+ # class Post < ActiveRecord::Base
149
+ # def to_oai_dc
150
+ # xml = Builder::XmlMarkup.new
151
+ # xml.tag!("oai_dc:dc",
152
+ # 'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/",
153
+ # 'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
154
+ # 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
155
+ # 'xsi:schemaLocation' =>
156
+ # %{http://www.openarchives.org/OAI/2.0/oai_dc/
157
+ # http://www.openarchives.org/OAI/2.0/oai_dc.xsd}) do
158
+ # xml.tag!('oai_dc:title', title)
159
+ # xml.tag!('oai_dc:description', text)
160
+ # xml.tag!('oai_dc:creator', user)
161
+ # tags.each do |tag|
162
+ # xml.tag!('oai_dc:subject', tag)
163
+ # end
164
+ # end
165
+ # xml.target!
166
+ # end
167
+ # end
168
+ #
169
+ # === Mapping Example
170
+ #
171
+ # # Extremely contrived mapping
172
+ # class Post < ActiveRecord::Base
173
+ # def self.map_oai_dc
174
+ # {:subject => :tags,
175
+ # :description => :text,
176
+ # :creator => :user,
177
+ # :contibutor => :comments}
178
+ # end
179
+ # end
180
+ #
181
+ module OAI::Provider
182
+ class Base
183
+ include OAI::Provider
184
+
185
+ class << self
186
+ attr_reader :formats
187
+ attr_accessor :name, :url, :prefix, :email, :delete_support, :granularity, :model
188
+
189
+ def register_format(format)
190
+ @formats ||= {}
191
+ @formats[format.prefix] = format
192
+ end
193
+
194
+ def format_supported?(prefix)
195
+ @formats.keys.include?(prefix)
196
+ end
197
+
198
+ def format(prefix)
199
+ @formats[prefix]
200
+ end
201
+
202
+ protected
203
+
204
+ def inherited(klass)
205
+ self.instance_variables.each do |iv|
206
+ klass.instance_variable_set(iv, self.instance_variable_get(iv))
207
+ end
208
+ end
209
+
210
+ alias_method :repository_name, :name=
211
+ alias_method :repository_url, :url=
212
+ alias_method :record_prefix, :prefix=
213
+ alias_method :admin_email, :email=
214
+ alias_method :deletion_support, :delete_support=
215
+ alias_method :update_granularity, :granularity=
216
+ alias_method :source_model, :model=
217
+
218
+ end
219
+
220
+ # Default configuration of a repository
221
+ Base.repository_name 'Open Archives Initiative Data Provider'
222
+ Base.repository_url 'unknown'
223
+ Base.record_prefix 'oai:localhost'
224
+ Base.admin_email 'nobody@localhost'
225
+ Base.deletion_support OAI::Const::Delete::TRANSIENT
226
+ Base.update_granularity OAI::Const::Granularity::HIGH
227
+
228
+ Base.register_format(OAI::Provider::Metadata::DublinCore.instance)
229
+
230
+ # Equivalent to '&verb=Identify', returns information about the repository
231
+ def identify(options = {})
232
+ Response::Identify.new(self.class, options).to_xml
233
+ end
234
+
235
+ # Equivalent to '&verb=ListSets', returns a list of sets that are supported
236
+ # by the repository or an error if sets are not supported.
237
+ def list_sets(options = {})
238
+ Response::ListSets.new(self.class, options).to_xml
239
+ end
240
+
241
+ # Equivalent to '&verb=ListMetadataFormats', returns a list of metadata formats
242
+ # supported by the repository.
243
+ def list_metadata_formats(options = {})
244
+ Response::ListMetadataFormats.new(self.class, options).to_xml
245
+ end
246
+
247
+ # Equivalent to '&verb=ListIdentifiers', returns a list of record headers that
248
+ # meet the supplied criteria.
249
+ def list_identifiers(options = {})
250
+ Response::ListIdentifiers.new(self.class, options).to_xml
251
+ end
252
+
253
+ # Equivalent to '&verb=ListRecords', returns a list of records that meet the
254
+ # supplied criteria.
255
+ def list_records(options = {})
256
+ Response::ListRecords.new(self.class, options).to_xml
257
+ end
258
+
259
+ # Equivalent to '&verb=GetRecord', returns a record matching the required
260
+ # :identifier option
261
+ def get_record(options = {})
262
+ Response::GetRecord.new(self.class, options).to_xml
263
+ end
264
+
265
+ # xml_response = process_verb('ListRecords', :from => 'October',
266
+ # :until => 'November') # thanks Chronic!
267
+ #
268
+ # If you are implementing a web interface using process_request is the
269
+ # preferred way.
270
+ def process_request(params = {})
271
+ begin
272
+
273
+ # Allow the request to pass in a url
274
+ self.class.url = params['url'] ? params.delete('url') : self.class.url
275
+
276
+ verb = params.delete('verb') || params.delete(:verb)
277
+
278
+ unless verb and OAI::Const::VERBS.keys.include?(verb)
279
+ raise OAI::VerbException.new
280
+ end
281
+
282
+ send(methodize(verb), params)
283
+
284
+ rescue => err
285
+ if err.respond_to?(:code)
286
+ Response::Error.new(self.class, err).to_xml
287
+ else
288
+ raise err
289
+ end
290
+ end
291
+ end
292
+
293
+ # Convert valid OAI-PMH verbs into ruby method calls
294
+ def methodize(verb)
295
+ verb.gsub(/[A-Z]/) {|m| "_#{m.downcase}"}.sub(/^\_/,'')
296
+ end
297
+
298
+ end
299
+
300
+ end
@@ -0,0 +1,72 @@
1
+ module OAI::Provider::Metadata
2
+ # == Metadata Base Class
3
+ #
4
+ # MetadataFormat is the base class from which all other format classes
5
+ # should inherit. Format classes provide mapping of record fields into XML.
6
+ #
7
+ # * prefix - contains the metadata_prefix used to select the format
8
+ # * schema - location of the xml schema
9
+ # * namespace - location of the namespace document
10
+ # * element_namespace - the namespace portion of the XML elements
11
+ # * fields - list of fields in this metadata format
12
+ #
13
+ # See OAI::Metadata::DublinCore for an example
14
+ #
15
+ class Format
16
+ include Singleton
17
+
18
+ attr_accessor :prefix, :schema, :namespace, :element_namespace, :fields
19
+
20
+ # Provided a model, and a record belonging to that model this method
21
+ # will return an xml represention of the record. This is the method
22
+ # that should be extended if you need to create more complex xml
23
+ # representations.
24
+ def encode(model, record)
25
+ if record.respond_to?("to_#{prefix}")
26
+ record.send("to_#{prefix}")
27
+ else
28
+ xml = Builder::XmlMarkup.new
29
+ map = model.respond_to?("map_#{prefix}") ? model.send("map_#{prefix}") : {}
30
+ xml.tag!("#{prefix}:#{element_namespace}", header_specification) do
31
+ fields.each do |field|
32
+ values = value_for(field, record, map)
33
+ values.each do |value|
34
+ xml.tag! "#{element_namespace}:#{field}", value
35
+ end
36
+ end
37
+ end
38
+ xml.target!
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ # We try a bunch of different methods to get the data from the model.
45
+ #
46
+ # 1. Check if the model defines a field mapping for the field of
47
+ # interest.
48
+ # 2. Try calling the pluralized name method on the model.
49
+ # 3. Try calling the singular name method on the model
50
+ def value_for(field, record, map)
51
+ method = map[field] ? map[field].to_s : field.to_s
52
+
53
+ methods = record.public_methods(false)
54
+ if methods.include?(method.pluralize)
55
+ record.send method.pluralize
56
+ elsif methods.include?(method)
57
+ record.send method
58
+ else
59
+ []
60
+ end
61
+ end
62
+
63
+ # Subclasses must override
64
+ def header_specification
65
+ raise NotImplementedError.new
66
+ end
67
+
68
+ end
69
+
70
+ end
71
+
72
+ Dir.glob(File.dirname(__FILE__) + '/metadata_format/*.rb').each {|lib| require lib}
@@ -0,0 +1,29 @@
1
+ module OAI::Provider::Metadata
2
+ # = OAI::Metadata::DublinCore
3
+ #
4
+ # Simple implementation of the Dublin Core metadata format.
5
+ class DublinCore < Format
6
+
7
+ def initialize
8
+ @prefix = 'oai_dc'
9
+ @schema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
10
+ @namespace = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
11
+ @element_namespace = 'dc'
12
+ @fields = [ :title, :creator, :subject, :description, :publisher,
13
+ :contributor, :date, :type, :format, :identifier,
14
+ :source, :language, :relation, :coverage, :rights]
15
+ end
16
+
17
+ def header_specification
18
+ {
19
+ 'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/",
20
+ 'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
21
+ 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
22
+ 'xsi:schemaLocation' =>
23
+ %{http://www.openarchives.org/OAI/2.0/oai_dc/
24
+ http://www.openarchives.org/OAI/2.0/oai_dc.xsd}
25
+ }
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,71 @@
1
+ module OAI::Provider
2
+ # = OAI::Provider::Model
3
+ #
4
+ # Model implementers should subclass OAI::Provider::Model and override
5
+ # Model#earliest, Model#latest, and Model#find. Optionally Model#sets and
6
+ # Model#deleted? can be used to support sets and record deletions. It
7
+ # is also the responsibility of the model implementer to account for
8
+ # resumption tokens if support is required. Models that don't support
9
+ # resumption tokens should raise an exception if a limit is requested
10
+ # during initialization.
11
+ #
12
+ # earliest - should return the earliest update time in the repository.
13
+ # latest - should return the most recent update time in the repository.
14
+ # sets - should return an array of sets supported by the repository.
15
+ # deleted? - individual records returned should respond true or false
16
+ # when sent the deleted? message.
17
+ #
18
+ # == Resumption Tokens
19
+ #
20
+ # For examples of using resumption tokens see the
21
+ # ActiveRecordWrapper, and ActiveRecordCachingWrapper classes.
22
+ #
23
+ # There are several helper models for dealing with resumption tokens please
24
+ # see the ResumptionToken class for more details.
25
+ #
26
+
27
+ class Model
28
+ attr_reader :timestamp_field
29
+
30
+ def initialize(limit = nil, timestamp_field = 'updated_at')
31
+ @limit = limit
32
+ @timestamp_field = timestamp_field
33
+ end
34
+
35
+ # should return the earliest timestamp available from this model.
36
+ def earliest
37
+ raise NotImplementedError.new
38
+ end
39
+
40
+ # should return the latest timestamp available from this model.
41
+ def latest
42
+ raise NotImplementedError.new
43
+ end
44
+
45
+ def sets
46
+ nil
47
+ end
48
+
49
+ # find is the core method of a model, it returns records from the model
50
+ # bases on the parameters passed in.
51
+ #
52
+ # <tt>selector</tt> can be a singular id, or the symbol :all
53
+ # <tt>options</tt> is a hash of options to be used to constrain the query.
54
+ #
55
+ # Valid options:
56
+ # * :from => earliest timestamp to be included in the results
57
+ # * :until => latest timestamp to be included in the results
58
+ # * :set => the set from which to retrieve the results
59
+ # * :metadata_prefix => type of metadata requested (this may be useful if
60
+ # not all records are available in all formats)
61
+ def find(selector, options={})
62
+ raise NotImplementedError.new
63
+ end
64
+
65
+ def deleted?
66
+ false
67
+ end
68
+
69
+ end
70
+
71
+ end