oai 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. data/README +80 -0
  2. data/Rakefile +113 -0
  3. data/bin/oai +68 -0
  4. data/examples/models/file_model.rb +63 -0
  5. data/examples/providers/dublin_core.rb +474 -0
  6. data/lib/oai.rb +7 -13
  7. data/lib/oai/client.rb +133 -83
  8. data/lib/oai/{get_record.rb → client/get_record.rb} +0 -0
  9. data/lib/oai/{header.rb → client/header.rb} +2 -2
  10. data/lib/oai/{identify.rb → client/identify.rb} +0 -0
  11. data/lib/oai/{list_identifiers.rb → client/list_identifiers.rb} +0 -0
  12. data/lib/oai/{list_metadata_formats.rb → client/list_metadata_formats.rb} +0 -0
  13. data/lib/oai/{list_records.rb → client/list_records.rb} +0 -0
  14. data/lib/oai/{list_sets.rb → client/list_sets.rb} +1 -1
  15. data/lib/oai/{metadata_format.rb → client/metadata_format.rb} +0 -0
  16. data/lib/oai/{record.rb → client/record.rb} +0 -0
  17. data/lib/oai/{response.rb → client/response.rb} +1 -1
  18. data/lib/oai/constants.rb +34 -0
  19. data/lib/oai/exception.rb +72 -1
  20. data/lib/oai/harvester.rb +38 -0
  21. data/lib/oai/harvester/config.rb +41 -0
  22. data/lib/oai/harvester/harvest.rb +144 -0
  23. data/lib/oai/harvester/logging.rb +70 -0
  24. data/lib/oai/harvester/mailer.rb +17 -0
  25. data/lib/oai/harvester/shell.rb +334 -0
  26. data/lib/oai/provider.rb +300 -0
  27. data/lib/oai/provider/metadata_format.rb +72 -0
  28. data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
  29. data/lib/oai/provider/model.rb +71 -0
  30. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +135 -0
  31. data/lib/oai/provider/model/activerecord_wrapper.rb +136 -0
  32. data/lib/oai/provider/partial_result.rb +18 -0
  33. data/lib/oai/provider/response.rb +119 -0
  34. data/lib/oai/provider/response/error.rb +16 -0
  35. data/lib/oai/provider/response/get_record.rb +32 -0
  36. data/lib/oai/provider/response/identify.rb +24 -0
  37. data/lib/oai/provider/response/list_identifiers.rb +29 -0
  38. data/lib/oai/provider/response/list_metadata_formats.rb +21 -0
  39. data/lib/oai/provider/response/list_records.rb +32 -0
  40. data/lib/oai/provider/response/list_sets.rb +23 -0
  41. data/lib/oai/provider/response/record_response.rb +68 -0
  42. data/lib/oai/provider/resumption_token.rb +106 -0
  43. data/lib/oai/set.rb +14 -5
  44. data/test/activerecord_provider/config/connection.rb +5 -0
  45. data/test/activerecord_provider/config/database.yml +6 -0
  46. data/test/activerecord_provider/database/ar_migration.rb +59 -0
  47. data/test/activerecord_provider/database/oaipmhtest +0 -0
  48. data/test/activerecord_provider/fixtures/dc.yml +1501 -0
  49. data/test/activerecord_provider/helpers/providers.rb +44 -0
  50. data/test/activerecord_provider/helpers/set_provider.rb +36 -0
  51. data/test/activerecord_provider/models/dc_field.rb +7 -0
  52. data/test/activerecord_provider/models/dc_set.rb +6 -0
  53. data/test/activerecord_provider/models/oai_token.rb +3 -0
  54. data/test/activerecord_provider/tc_ar_provider.rb +93 -0
  55. data/test/activerecord_provider/tc_ar_sets_provider.rb +66 -0
  56. data/test/activerecord_provider/tc_caching_paging_provider.rb +53 -0
  57. data/test/activerecord_provider/tc_simple_paging_provider.rb +55 -0
  58. data/test/activerecord_provider/test_helper.rb +4 -0
  59. data/test/client/helpers/provider.rb +68 -0
  60. data/test/client/helpers/test_wrapper.rb +11 -0
  61. data/test/client/tc_exception.rb +36 -0
  62. data/test/{tc_get_record.rb → client/tc_get_record.rb} +11 -7
  63. data/test/client/tc_identify.rb +13 -0
  64. data/test/{tc_libxml.rb → client/tc_libxml.rb} +20 -10
  65. data/test/{tc_list_identifiers.rb → client/tc_list_identifiers.rb} +10 -8
  66. data/test/{tc_list_metadata_formats.rb → client/tc_list_metadata_formats.rb} +4 -1
  67. data/test/{tc_list_records.rb → client/tc_list_records.rb} +4 -1
  68. data/test/{tc_list_sets.rb → client/tc_list_sets.rb} +4 -2
  69. data/test/{tc_xpath.rb → client/tc_xpath.rb} +1 -1
  70. data/test/client/test_helper.rb +5 -0
  71. data/test/provider/models.rb +230 -0
  72. data/test/provider/tc_exceptions.rb +63 -0
  73. data/test/provider/tc_functional_tokens.rb +42 -0
  74. data/test/provider/tc_provider.rb +69 -0
  75. data/test/provider/tc_resumption_tokens.rb +46 -0
  76. data/test/provider/tc_simple_provider.rb +85 -0
  77. data/test/provider/test_helper.rb +36 -0
  78. metadata +123 -27
  79. data/test/tc_exception.rb +0 -38
  80. data/test/tc_identify.rb +0 -8
@@ -0,0 +1,300 @@
1
+ require 'active_support'
2
+ require 'builder'
3
+ require 'chronic'
4
+
5
+ if not defined?(OAI::Const::VERBS)
6
+ require 'oai/exception'
7
+ require 'oai/constants'
8
+ require 'oai/xpath'
9
+ require 'oai/set'
10
+ end
11
+
12
+ %w{ response metadata_format resumption_token model partial_result
13
+ response/record_response response/identify response/get_record
14
+ response/list_identifiers response/list_records
15
+ response/list_metadata_formats response/list_sets response/error
16
+ }.each { |lib| require File.dirname(__FILE__) + "/provider/#{lib}" }
17
+
18
+ if defined?(ActiveRecord)
19
+ require File.dirname(__FILE__) + "/provider/model/activerecord_wrapper"
20
+ require File.dirname(__FILE__) + "/provider/model/activerecord_caching_wrapper"
21
+ end
22
+
23
+ # = OAI::Provider
24
+ #
25
+ # Open Archives Initiative - Protocol for Metadata Harvesting see
26
+ # http://www.openarchives.org/
27
+ #
28
+ # == Features
29
+ # * Easily setup a simple repository
30
+ # * Simple integration with ActiveRecord
31
+ # * Dublin Core metadata format included
32
+ # * Easily add addition metadata formats
33
+ # * Adaptable to any data source
34
+ # * Simple resumption token support
35
+ #
36
+ # == Usage
37
+ #
38
+ # To create a functional provider either subclass Provider::Base,
39
+ # or reconfigure the defaults.
40
+ #
41
+ # === Sub classing a provider
42
+ #
43
+ # class MyProvider < Oai::Provider
44
+ # repository_name 'My little OAI provider'
45
+ # repository_url 'http://localhost/provider'
46
+ # record_prefix 'oai:localhost'
47
+ # admin_email 'root@localhost' # String or Array
48
+ # source_model MyModel.new # Subclass of OAI::Provider::Model
49
+ # end
50
+ #
51
+ # === Configuring the default provider
52
+ #
53
+ # class Oai::Provider::Base
54
+ # repository_name 'My little OAI Provider'
55
+ # repository_url 'http://localhost/provider'
56
+ # record_prefix 'oai:localhost'
57
+ # admin_email 'root@localhost'
58
+ # source_model MyModel.new
59
+ # end
60
+ #
61
+ # The provider does allow a URL to be passed in at request processing time
62
+ # in case the repository URL cannot be determined ahead of time.
63
+ #
64
+ # == Integrating with frameworks
65
+ #
66
+ # === Camping
67
+ #
68
+ # In the Models module of your camping application post model definition:
69
+ #
70
+ # class CampingProvider < OAI::Provider::Base
71
+ # repository_name 'Camping Test OAI Repository'
72
+ # source_model ActiveRecordWrapper.new(YOUR_ACTIVE_RECORD_MODEL)
73
+ # end
74
+ #
75
+ # In the Controllers module:
76
+ #
77
+ # class Oai
78
+ # def get
79
+ # @headers['Content-Type'] = 'text/xml'
80
+ # provider = Models::CampingProvider.new
81
+ # provider.process_request(@input.merge(:url => "http:"+URL(Oai).to_s))
82
+ # end
83
+ # end
84
+ #
85
+ # The provider will be available at "/oai"
86
+ #
87
+ # === Rails
88
+ #
89
+ # At the bottom of environment.rb create a OAI Provider:
90
+ #
91
+ # # forgive the standard blog example.
92
+ #
93
+ # require 'oai'
94
+ # class BlogProvider < OAI::Provider::Base
95
+ # repository_name 'My little OAI Provider'
96
+ # repository_url 'http://localhost:3000/provider'
97
+ # record_prefix 'oai:blog'
98
+ # admin_email 'root@localhost'
99
+ # source_model OAI::Provider::ActiveRecordWrapper.new(Post)
100
+ # end
101
+ #
102
+ # Create a custom controller:
103
+ #
104
+ # class OaiController < ApplicationController
105
+ # def index
106
+ # # Remove controller and action from the options. Rails adds them automatically.
107
+ # options = params.delete_if { |k,v| %w{controller action}.include?(k) }
108
+ # provider = BlogProvider.new
109
+ # response = provider.process_request(options)
110
+ # render :text => response, :content_type => 'text/xml'
111
+ # end
112
+ # end
113
+ #
114
+ # Special thanks to Jose Hales-Garcia for this solution.
115
+ #
116
+ # == Supporting custom metadata formats
117
+ #
118
+ # See Oai::Metadata for details.
119
+ #
120
+ # == ActiveRecord Integration
121
+ #
122
+ # ActiveRecord integration is provided by the ActiveRecordWrapper class.
123
+ # It takes one required paramater, the class name of the AR class to wrap,
124
+ # and optional hash of options.
125
+ #
126
+ # Valid options include:
127
+ # * timestamp_field - Specifies the model field to use as the update
128
+ # filter. Defaults to 'updated_at'.
129
+ # * limit - Maximum number of records to return in each page/set.
130
+ # Defaults to 100. The wrapper will paginate the result via resumption tokens.
131
+ # Caution: specifying too large a limit will adversely affect performance.
132
+ #
133
+ # Mapping from a ActiveRecord object to a specific metadata format follows
134
+ # this set of rules:
135
+ #
136
+ # 1. Does Model#to_{metadata_prefix} exist? If so just return the result.
137
+ # 2. Does the model provide a map via Model.map_{metadata_prefix}? If so
138
+ # use the map to generate the xml document.
139
+ # 3. Loop thru the fields of the metadata format and check to see if the
140
+ # model responds to either the plural, or singular of the field.
141
+ #
142
+ # For maximum control of the xml metadata generated, it's usually best to
143
+ # provide a 'to_{metadata_prefix}' in the model. If using Builder be sure
144
+ # not to include any instruct! in the xml object.
145
+ #
146
+ # === Explicit creation example
147
+ #
148
+ # class Post < ActiveRecord::Base
149
+ # def to_oai_dc
150
+ # xml = Builder::XmlMarkup.new
151
+ # xml.tag!("oai_dc:dc",
152
+ # 'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/",
153
+ # 'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
154
+ # 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
155
+ # 'xsi:schemaLocation' =>
156
+ # %{http://www.openarchives.org/OAI/2.0/oai_dc/
157
+ # http://www.openarchives.org/OAI/2.0/oai_dc.xsd}) do
158
+ # xml.tag!('oai_dc:title', title)
159
+ # xml.tag!('oai_dc:description', text)
160
+ # xml.tag!('oai_dc:creator', user)
161
+ # tags.each do |tag|
162
+ # xml.tag!('oai_dc:subject', tag)
163
+ # end
164
+ # end
165
+ # xml.target!
166
+ # end
167
+ # end
168
+ #
169
+ # === Mapping Example
170
+ #
171
+ # # Extremely contrived mapping
172
+ # class Post < ActiveRecord::Base
173
+ # def self.map_oai_dc
174
+ # {:subject => :tags,
175
+ # :description => :text,
176
+ # :creator => :user,
177
+ # :contibutor => :comments}
178
+ # end
179
+ # end
180
+ #
181
+ module OAI::Provider
182
+ class Base
183
+ include OAI::Provider
184
+
185
+ class << self
186
+ attr_reader :formats
187
+ attr_accessor :name, :url, :prefix, :email, :delete_support, :granularity, :model
188
+
189
+ def register_format(format)
190
+ @formats ||= {}
191
+ @formats[format.prefix] = format
192
+ end
193
+
194
+ def format_supported?(prefix)
195
+ @formats.keys.include?(prefix)
196
+ end
197
+
198
+ def format(prefix)
199
+ @formats[prefix]
200
+ end
201
+
202
+ protected
203
+
204
+ def inherited(klass)
205
+ self.instance_variables.each do |iv|
206
+ klass.instance_variable_set(iv, self.instance_variable_get(iv))
207
+ end
208
+ end
209
+
210
+ alias_method :repository_name, :name=
211
+ alias_method :repository_url, :url=
212
+ alias_method :record_prefix, :prefix=
213
+ alias_method :admin_email, :email=
214
+ alias_method :deletion_support, :delete_support=
215
+ alias_method :update_granularity, :granularity=
216
+ alias_method :source_model, :model=
217
+
218
+ end
219
+
220
+ # Default configuration of a repository
221
+ Base.repository_name 'Open Archives Initiative Data Provider'
222
+ Base.repository_url 'unknown'
223
+ Base.record_prefix 'oai:localhost'
224
+ Base.admin_email 'nobody@localhost'
225
+ Base.deletion_support OAI::Const::Delete::TRANSIENT
226
+ Base.update_granularity OAI::Const::Granularity::HIGH
227
+
228
+ Base.register_format(OAI::Provider::Metadata::DublinCore.instance)
229
+
230
+ # Equivalent to '&verb=Identify', returns information about the repository
231
+ def identify(options = {})
232
+ Response::Identify.new(self.class, options).to_xml
233
+ end
234
+
235
+ # Equivalent to '&verb=ListSets', returns a list of sets that are supported
236
+ # by the repository or an error if sets are not supported.
237
+ def list_sets(options = {})
238
+ Response::ListSets.new(self.class, options).to_xml
239
+ end
240
+
241
+ # Equivalent to '&verb=ListMetadataFormats', returns a list of metadata formats
242
+ # supported by the repository.
243
+ def list_metadata_formats(options = {})
244
+ Response::ListMetadataFormats.new(self.class, options).to_xml
245
+ end
246
+
247
+ # Equivalent to '&verb=ListIdentifiers', returns a list of record headers that
248
+ # meet the supplied criteria.
249
+ def list_identifiers(options = {})
250
+ Response::ListIdentifiers.new(self.class, options).to_xml
251
+ end
252
+
253
+ # Equivalent to '&verb=ListRecords', returns a list of records that meet the
254
+ # supplied criteria.
255
+ def list_records(options = {})
256
+ Response::ListRecords.new(self.class, options).to_xml
257
+ end
258
+
259
+ # Equivalent to '&verb=GetRecord', returns a record matching the required
260
+ # :identifier option
261
+ def get_record(options = {})
262
+ Response::GetRecord.new(self.class, options).to_xml
263
+ end
264
+
265
+ # xml_response = process_verb('ListRecords', :from => 'October',
266
+ # :until => 'November') # thanks Chronic!
267
+ #
268
+ # If you are implementing a web interface using process_request is the
269
+ # preferred way.
270
+ def process_request(params = {})
271
+ begin
272
+
273
+ # Allow the request to pass in a url
274
+ self.class.url = params['url'] ? params.delete('url') : self.class.url
275
+
276
+ verb = params.delete('verb') || params.delete(:verb)
277
+
278
+ unless verb and OAI::Const::VERBS.keys.include?(verb)
279
+ raise OAI::VerbException.new
280
+ end
281
+
282
+ send(methodize(verb), params)
283
+
284
+ rescue => err
285
+ if err.respond_to?(:code)
286
+ Response::Error.new(self.class, err).to_xml
287
+ else
288
+ raise err
289
+ end
290
+ end
291
+ end
292
+
293
+ # Convert valid OAI-PMH verbs into ruby method calls
294
+ def methodize(verb)
295
+ verb.gsub(/[A-Z]/) {|m| "_#{m.downcase}"}.sub(/^\_/,'')
296
+ end
297
+
298
+ end
299
+
300
+ end
@@ -0,0 +1,72 @@
1
+ module OAI::Provider::Metadata
2
+ # == Metadata Base Class
3
+ #
4
+ # MetadataFormat is the base class from which all other format classes
5
+ # should inherit. Format classes provide mapping of record fields into XML.
6
+ #
7
+ # * prefix - contains the metadata_prefix used to select the format
8
+ # * schema - location of the xml schema
9
+ # * namespace - location of the namespace document
10
+ # * element_namespace - the namespace portion of the XML elements
11
+ # * fields - list of fields in this metadata format
12
+ #
13
+ # See OAI::Metadata::DublinCore for an example
14
+ #
15
+ class Format
16
+ include Singleton
17
+
18
+ attr_accessor :prefix, :schema, :namespace, :element_namespace, :fields
19
+
20
+ # Provided a model, and a record belonging to that model this method
21
+ # will return an xml represention of the record. This is the method
22
+ # that should be extended if you need to create more complex xml
23
+ # representations.
24
+ def encode(model, record)
25
+ if record.respond_to?("to_#{prefix}")
26
+ record.send("to_#{prefix}")
27
+ else
28
+ xml = Builder::XmlMarkup.new
29
+ map = model.respond_to?("map_#{prefix}") ? model.send("map_#{prefix}") : {}
30
+ xml.tag!("#{prefix}:#{element_namespace}", header_specification) do
31
+ fields.each do |field|
32
+ values = value_for(field, record, map)
33
+ values.each do |value|
34
+ xml.tag! "#{element_namespace}:#{field}", value
35
+ end
36
+ end
37
+ end
38
+ xml.target!
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ # We try a bunch of different methods to get the data from the model.
45
+ #
46
+ # 1. Check if the model defines a field mapping for the field of
47
+ # interest.
48
+ # 2. Try calling the pluralized name method on the model.
49
+ # 3. Try calling the singular name method on the model
50
+ def value_for(field, record, map)
51
+ method = map[field] ? map[field].to_s : field.to_s
52
+
53
+ methods = record.public_methods(false)
54
+ if methods.include?(method.pluralize)
55
+ record.send method.pluralize
56
+ elsif methods.include?(method)
57
+ record.send method
58
+ else
59
+ []
60
+ end
61
+ end
62
+
63
+ # Subclasses must override
64
+ def header_specification
65
+ raise NotImplementedError.new
66
+ end
67
+
68
+ end
69
+
70
+ end
71
+
72
+ Dir.glob(File.dirname(__FILE__) + '/metadata_format/*.rb').each {|lib| require lib}
@@ -0,0 +1,29 @@
1
+ module OAI::Provider::Metadata
2
+ # = OAI::Metadata::DublinCore
3
+ #
4
+ # Simple implementation of the Dublin Core metadata format.
5
+ class DublinCore < Format
6
+
7
+ def initialize
8
+ @prefix = 'oai_dc'
9
+ @schema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
10
+ @namespace = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
11
+ @element_namespace = 'dc'
12
+ @fields = [ :title, :creator, :subject, :description, :publisher,
13
+ :contributor, :date, :type, :format, :identifier,
14
+ :source, :language, :relation, :coverage, :rights]
15
+ end
16
+
17
+ def header_specification
18
+ {
19
+ 'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/",
20
+ 'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
21
+ 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
22
+ 'xsi:schemaLocation' =>
23
+ %{http://www.openarchives.org/OAI/2.0/oai_dc/
24
+ http://www.openarchives.org/OAI/2.0/oai_dc.xsd}
25
+ }
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,71 @@
1
+ module OAI::Provider
2
+ # = OAI::Provider::Model
3
+ #
4
+ # Model implementers should subclass OAI::Provider::Model and override
5
+ # Model#earliest, Model#latest, and Model#find. Optionally Model#sets and
6
+ # Model#deleted? can be used to support sets and record deletions. It
7
+ # is also the responsibility of the model implementer to account for
8
+ # resumption tokens if support is required. Models that don't support
9
+ # resumption tokens should raise an exception if a limit is requested
10
+ # during initialization.
11
+ #
12
+ # earliest - should return the earliest update time in the repository.
13
+ # latest - should return the most recent update time in the repository.
14
+ # sets - should return an array of sets supported by the repository.
15
+ # deleted? - individual records returned should respond true or false
16
+ # when sent the deleted? message.
17
+ #
18
+ # == Resumption Tokens
19
+ #
20
+ # For examples of using resumption tokens see the
21
+ # ActiveRecordWrapper, and ActiveRecordCachingWrapper classes.
22
+ #
23
+ # There are several helper models for dealing with resumption tokens please
24
+ # see the ResumptionToken class for more details.
25
+ #
26
+
27
+ class Model
28
+ attr_reader :timestamp_field
29
+
30
+ def initialize(limit = nil, timestamp_field = 'updated_at')
31
+ @limit = limit
32
+ @timestamp_field = timestamp_field
33
+ end
34
+
35
+ # should return the earliest timestamp available from this model.
36
+ def earliest
37
+ raise NotImplementedError.new
38
+ end
39
+
40
+ # should return the latest timestamp available from this model.
41
+ def latest
42
+ raise NotImplementedError.new
43
+ end
44
+
45
+ def sets
46
+ nil
47
+ end
48
+
49
+ # find is the core method of a model, it returns records from the model
50
+ # bases on the parameters passed in.
51
+ #
52
+ # <tt>selector</tt> can be a singular id, or the symbol :all
53
+ # <tt>options</tt> is a hash of options to be used to constrain the query.
54
+ #
55
+ # Valid options:
56
+ # * :from => earliest timestamp to be included in the results
57
+ # * :until => latest timestamp to be included in the results
58
+ # * :set => the set from which to retrieve the results
59
+ # * :metadata_prefix => type of metadata requested (this may be useful if
60
+ # not all records are available in all formats)
61
+ def find(selector, options={})
62
+ raise NotImplementedError.new
63
+ end
64
+
65
+ def deleted?
66
+ false
67
+ end
68
+
69
+ end
70
+
71
+ end