oai_talia 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +81 -0
- data/Rakefile +127 -0
- data/bin/oai +68 -0
- data/examples/models/file_model.rb +63 -0
- data/examples/providers/dublin_core.rb +474 -0
- data/lib/oai/client/get_record.rb +15 -0
- data/lib/oai/client/header.rb +18 -0
- data/lib/oai/client/identify.rb +30 -0
- data/lib/oai/client/list_identifiers.rb +12 -0
- data/lib/oai/client/list_metadata_formats.rb +12 -0
- data/lib/oai/client/list_records.rb +21 -0
- data/lib/oai/client/list_sets.rb +19 -0
- data/lib/oai/client/metadata_format.rb +12 -0
- data/lib/oai/client/record.rb +26 -0
- data/lib/oai/client/response.rb +35 -0
- data/lib/oai/client.rb +301 -0
- data/lib/oai/constants.rb +34 -0
- data/lib/oai/exception.rb +75 -0
- data/lib/oai/harvester/config.rb +41 -0
- data/lib/oai/harvester/harvest.rb +150 -0
- data/lib/oai/harvester/logging.rb +70 -0
- data/lib/oai/harvester/mailer.rb +17 -0
- data/lib/oai/harvester/shell.rb +338 -0
- data/lib/oai/harvester.rb +39 -0
- data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
- data/lib/oai/provider/metadata_format/oai_europeana.rb +38 -0
- data/lib/oai/provider/metadata_format.rb +143 -0
- data/lib/oai/provider/model/activerecord_caching_wrapper.rb +134 -0
- data/lib/oai/provider/model/activerecord_wrapper.rb +139 -0
- data/lib/oai/provider/model.rb +74 -0
- data/lib/oai/provider/partial_result.rb +18 -0
- data/lib/oai/provider/response/error.rb +16 -0
- data/lib/oai/provider/response/get_record.rb +26 -0
- data/lib/oai/provider/response/identify.rb +25 -0
- data/lib/oai/provider/response/list_identifiers.rb +35 -0
- data/lib/oai/provider/response/list_metadata_formats.rb +34 -0
- data/lib/oai/provider/response/list_records.rb +34 -0
- data/lib/oai/provider/response/list_sets.rb +23 -0
- data/lib/oai/provider/response/record_response.rb +70 -0
- data/lib/oai/provider/response.rb +161 -0
- data/lib/oai/provider/resumption_token.rb +106 -0
- data/lib/oai/provider.rb +304 -0
- data/lib/oai/set.rb +29 -0
- data/lib/oai/xpath.rb +75 -0
- data/lib/oai.rb +8 -0
- data/lib/test.rb +25 -0
- data/test/activerecord_provider/config/connection.rb +5 -0
- data/test/activerecord_provider/config/database.yml +6 -0
- data/test/activerecord_provider/database/ar_migration.rb +59 -0
- data/test/activerecord_provider/database/oaipmhtest +0 -0
- data/test/activerecord_provider/fixtures/dc.yml +1501 -0
- data/test/activerecord_provider/helpers/providers.rb +44 -0
- data/test/activerecord_provider/helpers/set_provider.rb +36 -0
- data/test/activerecord_provider/models/dc_field.rb +7 -0
- data/test/activerecord_provider/models/dc_set.rb +6 -0
- data/test/activerecord_provider/models/oai_token.rb +3 -0
- data/test/activerecord_provider/tc_ar_provider.rb +113 -0
- data/test/activerecord_provider/tc_ar_sets_provider.rb +72 -0
- data/test/activerecord_provider/tc_caching_paging_provider.rb +55 -0
- data/test/activerecord_provider/tc_simple_paging_provider.rb +57 -0
- data/test/activerecord_provider/test_helper.rb +4 -0
- data/test/client/helpers/provider.rb +68 -0
- data/test/client/helpers/test_wrapper.rb +11 -0
- data/test/client/tc_exception.rb +36 -0
- data/test/client/tc_get_record.rb +37 -0
- data/test/client/tc_identify.rb +13 -0
- data/test/client/tc_libxml.rb +61 -0
- data/test/client/tc_list_identifiers.rb +52 -0
- data/test/client/tc_list_metadata_formats.rb +18 -0
- data/test/client/tc_list_records.rb +13 -0
- data/test/client/tc_list_sets.rb +19 -0
- data/test/client/tc_low_resolution_dates.rb +14 -0
- data/test/client/tc_utf8_escaping.rb +11 -0
- data/test/client/tc_xpath.rb +26 -0
- data/test/client/test_helper.rb +5 -0
- data/test/provider/models.rb +234 -0
- data/test/provider/tc_exceptions.rb +96 -0
- data/test/provider/tc_functional_tokens.rb +43 -0
- data/test/provider/tc_provider.rb +71 -0
- data/test/provider/tc_resumption_tokens.rb +46 -0
- data/test/provider/tc_simple_provider.rb +92 -0
- data/test/provider/test_helper.rb +36 -0
- data/test/test.xml +22 -0
- metadata +181 -0
@@ -0,0 +1,161 @@
|
|
1
|
+
require 'builder' unless defined?(Builder)
|
2
|
+
module OAI
|
3
|
+
module Provider
|
4
|
+
module Response
|
5
|
+
|
6
|
+
class Base
|
7
|
+
attr_reader :provider, :options
|
8
|
+
|
9
|
+
class << self
|
10
|
+
attr_reader :valid_options, :default_options, :required_options
|
11
|
+
def valid_parameters(*args)
|
12
|
+
@valid_options ||= []
|
13
|
+
@valid_options = (@valid_options + args.dup).uniq
|
14
|
+
end
|
15
|
+
|
16
|
+
def default_parameters(options = {})
|
17
|
+
@default_options ||= {}
|
18
|
+
@default_options.merge! options.dup
|
19
|
+
end
|
20
|
+
|
21
|
+
def required_parameters(*args)
|
22
|
+
valid_parameters(*args)
|
23
|
+
@required_options ||= []
|
24
|
+
@required_options = (@required_options + args.dup).uniq
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize(provider, options = {})
|
29
|
+
@provider = provider
|
30
|
+
@original_options = options.dup
|
31
|
+
@options = internalize(options)
|
32
|
+
raise OAI::ArgumentException.new unless valid?
|
33
|
+
end
|
34
|
+
|
35
|
+
def response
|
36
|
+
@builder = Builder::XmlMarkup.new
|
37
|
+
@builder.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
|
38
|
+
@builder.tag!('OAI-PMH', header) do
|
39
|
+
@builder.responseDate Time.now.utc.xmlschema
|
40
|
+
#options parameter has been removed here because with it
|
41
|
+
#the data won't validate against oai validators. Without, it
|
42
|
+
#validates.
|
43
|
+
@builder.request(provider.url) #-- OAI 2.0 Hack - removed request options
|
44
|
+
yield @builder
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def header
|
51
|
+
{
|
52
|
+
'xmlns' => "http://www.openarchives.org/OAI/2.0/",
|
53
|
+
'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
|
54
|
+
'xsi:schemaLocation' => %{http://www.openarchives.org/OAI/2.0/
|
55
|
+
http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd}
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
def extract_identifier(id)
|
60
|
+
id.sub("#{provider.prefix}/", '')
|
61
|
+
end
|
62
|
+
|
63
|
+
def valid?
|
64
|
+
return true if resumption?
|
65
|
+
|
66
|
+
return true if self.class.valid_options.nil? and options.empty?
|
67
|
+
|
68
|
+
if self.class.required_options
|
69
|
+
return false unless (self.class.required_options - @options.keys).empty?
|
70
|
+
end
|
71
|
+
|
72
|
+
return false if !@options.keys.empty? && (self.class.valid_options.nil? || self.class.valid_options.empty?)
|
73
|
+
return false unless (@options.keys - self.class.valid_options).empty?
|
74
|
+
return false unless valid_times?
|
75
|
+
return false unless valid_format?
|
76
|
+
populate_defaults
|
77
|
+
true
|
78
|
+
end
|
79
|
+
|
80
|
+
def valid_format?
|
81
|
+
return true if @options[:metadata_prefix].nil?
|
82
|
+
raise OAI::FormatException.new unless provider.format_supported?(@options[:metadata_prefix])
|
83
|
+
true
|
84
|
+
end
|
85
|
+
|
86
|
+
def valid_times?
|
87
|
+
|
88
|
+
if (@original_options[:from].nil? ||
|
89
|
+
@original_options[:from] =~ /^\d\d\d\d-\d\d-\d\d(T\d\d:\d\d:\d\dZ)?/ ||
|
90
|
+
@original_options[:from].instance_of?(Time))
|
91
|
+
|
92
|
+
|
93
|
+
if (@original_options[:until].nil? ||
|
94
|
+
@original_options[:until] =~ /^\d\d\d\d-\d\d-\d\d(T\d\d:\d\d:\d\dZ)?/ ||
|
95
|
+
@original_options[:until].instance_of?(Time))
|
96
|
+
else
|
97
|
+
return false
|
98
|
+
end
|
99
|
+
else
|
100
|
+
return false
|
101
|
+
end
|
102
|
+
# if dates are not nil and are strings, make sure they're the same length
|
103
|
+
# testing granularity
|
104
|
+
if ((!@original_options[:from].nil? && @original_options[:from].respond_to?(:length)) &&
|
105
|
+
(!@original_options[:until].nil? && @original_options[:until].respond_to?(:length)))
|
106
|
+
if @original_options[:from].length != @original_options[:until].length
|
107
|
+
return false
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
true
|
113
|
+
end
|
114
|
+
|
115
|
+
def populate_defaults
|
116
|
+
self.class.default_options.each do |k,v|
|
117
|
+
@options[k] = v.respond_to?(:call) ? v.call(self) : v if not @options[k]
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def resumption?
|
122
|
+
if @options.keys.include?(:resumption_token)
|
123
|
+
return true if 1 == @options.keys.size
|
124
|
+
raise OAI::ArgumentException.new
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Convert our internal representations back into standard OAI options
|
129
|
+
def externalize(value)
|
130
|
+
value.to_s.gsub(/_[a-z]/) { |m| m.sub("_", '').capitalize }
|
131
|
+
end
|
132
|
+
|
133
|
+
def parse_date(value)
|
134
|
+
return value if value.respond_to?(:strftime)
|
135
|
+
|
136
|
+
Date.parse(value) # This will raise an exception for badly formatted dates
|
137
|
+
Time.parse(value).utc # -- UTC Bug fix hack 8/08 not in core
|
138
|
+
rescue
|
139
|
+
raise OAI::ArgumentException.new
|
140
|
+
end
|
141
|
+
|
142
|
+
def internalize(hash = {})
|
143
|
+
internal = {}
|
144
|
+
hash.keys.each do |key|
|
145
|
+
internal[key.to_s.gsub(/([A-Z])/, '_\1').downcase.intern] = hash[key].dup
|
146
|
+
end
|
147
|
+
|
148
|
+
# Convert date formated strings into internal time values
|
149
|
+
# Convert date formated strings in dates.
|
150
|
+
internal[:from] = parse_date(internal[:from]) if internal[:from]
|
151
|
+
internal[:until] = parse_date(internal[:until]) if internal[:until]
|
152
|
+
|
153
|
+
internal
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'enumerator'
|
3
|
+
require File.dirname(__FILE__) + "/partial_result"
|
4
|
+
|
5
|
+
module OAI::Provider
|
6
|
+
# = OAI::Provider::ResumptionToken
|
7
|
+
#
|
8
|
+
# The ResumptionToken class forms the basis of paging query results. It
|
9
|
+
# provides several helper methods for dealing with resumption tokens.
|
10
|
+
#
|
11
|
+
class ResumptionToken
|
12
|
+
attr_reader :prefix, :set, :from, :until, :last, :expiration, :total
|
13
|
+
|
14
|
+
# parses a token string and returns a ResumptionToken
|
15
|
+
def self.parse(token_string)
|
16
|
+
begin
|
17
|
+
options = {}
|
18
|
+
matches = /(.+):(\d+)$/.match(token_string)
|
19
|
+
options[:last] = matches.captures[1].to_i
|
20
|
+
|
21
|
+
parts = matches.captures[0].split('.')
|
22
|
+
options[:metadata_prefix] = parts.shift
|
23
|
+
parts.each do |part|
|
24
|
+
case part
|
25
|
+
when /^s/
|
26
|
+
options[:set] = part.sub(/^s\(/, '').sub(/\)$/, '')
|
27
|
+
when /^f/
|
28
|
+
options[:from] = Time.parse(part.sub(/^f\(/, '').sub(/\)$/, '')).localtime
|
29
|
+
when /^u/
|
30
|
+
options[:until] = Time.parse(part.sub(/^u\(/, '').sub(/\)$/, '')).localtime
|
31
|
+
end
|
32
|
+
end
|
33
|
+
self.new(options)
|
34
|
+
rescue => err
|
35
|
+
raise ResumptionTokenException.new
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# extracts the metadata prefix from a token string
|
40
|
+
def self.extract_format(token_string)
|
41
|
+
return token_string.split('.')[0]
|
42
|
+
end
|
43
|
+
|
44
|
+
def initialize(options, expiration = nil, total = nil)
|
45
|
+
@prefix = options[:metadata_prefix]
|
46
|
+
@set = options[:set]
|
47
|
+
@last = options[:last]
|
48
|
+
@from = options[:from] if options[:from]
|
49
|
+
@until = options[:until] if options[:until]
|
50
|
+
@expiration = expiration if expiration
|
51
|
+
@total = total if total
|
52
|
+
end
|
53
|
+
|
54
|
+
# convenience method for setting the offset of the next set of results
|
55
|
+
def next(last)
|
56
|
+
@last = last
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
def ==(other)
|
61
|
+
prefix == other.prefix and set == other.set and from == other.from and
|
62
|
+
self.until == other.until and last == other.last and
|
63
|
+
expiration == other.expiration and total == other.total
|
64
|
+
end
|
65
|
+
|
66
|
+
# output an xml resumption token
|
67
|
+
def to_xml
|
68
|
+
xml = Builder::XmlMarkup.new
|
69
|
+
xml.resumptionToken(encode_conditions, hash_of_attributes)
|
70
|
+
xml.target!
|
71
|
+
end
|
72
|
+
|
73
|
+
# return a hash containing just the model selection parameters
|
74
|
+
def to_conditions_hash
|
75
|
+
conditions = {:metadata_prefix => self.prefix }
|
76
|
+
conditions[:set] = self.set if self.set
|
77
|
+
conditions[:from] = self.from if self.from
|
78
|
+
conditions[:until] = self.until if self.until
|
79
|
+
conditions
|
80
|
+
end
|
81
|
+
|
82
|
+
# return the a string representation of the token minus the offset
|
83
|
+
def to_s
|
84
|
+
encode_conditions.gsub(/:\w+?$/, '')
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def encode_conditions
|
90
|
+
encoded_token = @prefix.to_s.dup
|
91
|
+
encoded_token << ".s(#{set})" if set
|
92
|
+
encoded_token << ".f(#{self.from.utc.xmlschema})" if self.from
|
93
|
+
encoded_token << ".u(#{self.until.utc.xmlschema})" if self.until
|
94
|
+
encoded_token << ":#{last}"
|
95
|
+
end
|
96
|
+
|
97
|
+
def hash_of_attributes
|
98
|
+
attributes = {}
|
99
|
+
attributes[:completeListSize] = self.total if self.total
|
100
|
+
attributes[:expirationDate] = self.expiration.utc.xmlschema if self.expiration
|
101
|
+
attributes
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
data/lib/oai/provider.rb
ADDED
@@ -0,0 +1,304 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'singleton'
|
3
|
+
require 'builder'
|
4
|
+
|
5
|
+
if not defined?(OAI::Const::VERBS)
|
6
|
+
require 'oai/exception'
|
7
|
+
require 'oai/constants'
|
8
|
+
require 'oai/xpath'
|
9
|
+
require 'oai/set'
|
10
|
+
end
|
11
|
+
|
12
|
+
%w{ response metadata_format resumption_token model partial_result
|
13
|
+
response/record_response response/identify response/get_record
|
14
|
+
response/list_identifiers response/list_records
|
15
|
+
response/list_metadata_formats response/list_sets response/error
|
16
|
+
}.each { |lib| require File.dirname(__FILE__) + "/provider/#{lib}" }
|
17
|
+
|
18
|
+
if defined?(ActiveRecord)
|
19
|
+
require File.dirname(__FILE__) + "/provider/model/activerecord_wrapper"
|
20
|
+
require File.dirname(__FILE__) + "/provider/model/activerecord_caching_wrapper"
|
21
|
+
end
|
22
|
+
|
23
|
+
# = OAI::Provider
|
24
|
+
#
|
25
|
+
# Open Archives Initiative - Protocol for Metadata Harvesting see
|
26
|
+
# http://www.openarchives.org/
|
27
|
+
#
|
28
|
+
# == Features
|
29
|
+
# * Easily setup a simple repository
|
30
|
+
# * Simple integration with ActiveRecord
|
31
|
+
# * Dublin Core metadata format included
|
32
|
+
# * Easily add addition metadata formats
|
33
|
+
# * Adaptable to any data source
|
34
|
+
# * Simple resumption token support
|
35
|
+
#
|
36
|
+
# == Usage
|
37
|
+
#
|
38
|
+
# To create a functional provider either subclass Provider::Base,
|
39
|
+
# or reconfigure the defaults.
|
40
|
+
#
|
41
|
+
# === Sub classing a provider
|
42
|
+
#
|
43
|
+
# class MyProvider < Oai::Provider
|
44
|
+
# repository_name 'My little OAI provider'
|
45
|
+
# repository_url 'http://localhost/provider'
|
46
|
+
# record_prefix 'oai:localhost'
|
47
|
+
# admin_email 'root@localhost' # String or Array
|
48
|
+
# source_model MyModel.new # Subclass of OAI::Provider::Model
|
49
|
+
# end
|
50
|
+
#
|
51
|
+
# === Configuring the default provider
|
52
|
+
#
|
53
|
+
# class Oai::Provider::Base
|
54
|
+
# repository_name 'My little OAI Provider'
|
55
|
+
# repository_url 'http://localhost/provider'
|
56
|
+
# record_prefix 'oai:localhost'
|
57
|
+
# admin_email 'root@localhost'
|
58
|
+
# source_model MyModel.new
|
59
|
+
# end
|
60
|
+
#
|
61
|
+
# The provider does allow a URL to be passed in at request processing time
|
62
|
+
# in case the repository URL cannot be determined ahead of time.
|
63
|
+
#
|
64
|
+
# == Integrating with frameworks
|
65
|
+
#
|
66
|
+
# === Camping
|
67
|
+
#
|
68
|
+
# In the Models module of your camping application post model definition:
|
69
|
+
#
|
70
|
+
# class CampingProvider < OAI::Provider::Base
|
71
|
+
# repository_name 'Camping Test OAI Repository'
|
72
|
+
# source_model ActiveRecordWrapper.new(YOUR_ACTIVE_RECORD_MODEL)
|
73
|
+
# end
|
74
|
+
#
|
75
|
+
# In the Controllers module:
|
76
|
+
#
|
77
|
+
# class Oai
|
78
|
+
# def get
|
79
|
+
# @headers['Content-Type'] = 'text/xml'
|
80
|
+
# provider = Models::CampingProvider.new
|
81
|
+
# provider.process_request(@input.merge(:url => "http:"+URL(Oai).to_s))
|
82
|
+
# end
|
83
|
+
# end
|
84
|
+
#
|
85
|
+
# The provider will be available at "/oai"
|
86
|
+
#
|
87
|
+
# === Rails
|
88
|
+
#
|
89
|
+
# At the bottom of environment.rb create a OAI Provider:
|
90
|
+
#
|
91
|
+
# # forgive the standard blog example.
|
92
|
+
#
|
93
|
+
# require 'oai'
|
94
|
+
# class BlogProvider < OAI::Provider::Base
|
95
|
+
# repository_name 'My little OAI Provider'
|
96
|
+
# repository_url 'http://localhost:3000/provider'
|
97
|
+
# record_prefix 'oai:blog'
|
98
|
+
# admin_email 'root@localhost'
|
99
|
+
# source_model OAI::Provider::ActiveRecordWrapper.new(Post)
|
100
|
+
# end
|
101
|
+
#
|
102
|
+
# Create a custom controller:
|
103
|
+
#
|
104
|
+
# class OaiController < ApplicationController
|
105
|
+
# def index
|
106
|
+
# # Remove controller and action from the options. Rails adds them automatically.
|
107
|
+
# options = params.delete_if { |k,v| %w{controller action}.include?(k) }
|
108
|
+
# provider = BlogProvider.new
|
109
|
+
# response = provider.process_request(options)
|
110
|
+
# render :text => response, :content_type => 'text/xml'
|
111
|
+
# end
|
112
|
+
# end
|
113
|
+
#
|
114
|
+
# Special thanks to Jose Hales-Garcia for this solution.
|
115
|
+
#
|
116
|
+
# == Supporting custom metadata formats
|
117
|
+
#
|
118
|
+
# See Oai::Metadata for details.
|
119
|
+
#
|
120
|
+
# == ActiveRecord Integration
|
121
|
+
#
|
122
|
+
# ActiveRecord integration is provided by the ActiveRecordWrapper class.
|
123
|
+
# It takes one required paramater, the class name of the AR class to wrap,
|
124
|
+
# and optional hash of options.
|
125
|
+
#
|
126
|
+
# Valid options include:
|
127
|
+
# * timestamp_field - Specifies the model field to use as the update
|
128
|
+
# filter. Defaults to 'updated_at'.
|
129
|
+
# * limit - Maximum number of records to return in each page/set.
|
130
|
+
# Defaults to 100. The wrapper will paginate the result via resumption tokens.
|
131
|
+
# Caution: specifying too large a limit will adversely affect performance.
|
132
|
+
#
|
133
|
+
# Mapping from a ActiveRecord object to a specific metadata format follows
|
134
|
+
# this set of rules:
|
135
|
+
#
|
136
|
+
# 1. Does Model#to_{metadata_prefix} exist? If so just return the result.
|
137
|
+
# 2. Does the model provide a map via Model.map_{metadata_prefix}? If so
|
138
|
+
# use the map to generate the xml document.
|
139
|
+
# 3. Loop thru the fields of the metadata format and check to see if the
|
140
|
+
# model responds to either the plural, or singular of the field.
|
141
|
+
#
|
142
|
+
# For maximum control of the xml metadata generated, it's usually best to
|
143
|
+
# provide a 'to_{metadata_prefix}' in the model. If using Builder be sure
|
144
|
+
# not to include any instruct! in the xml object.
|
145
|
+
#
|
146
|
+
# === Explicit creation example
|
147
|
+
#
|
148
|
+
# class Post < ActiveRecord::Base
|
149
|
+
# def to_oai_dc
|
150
|
+
# xml = Builder::XmlMarkup.new
|
151
|
+
# xml.tag!("oai_dc:dc",
|
152
|
+
# 'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/",
|
153
|
+
# 'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
|
154
|
+
# 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
|
155
|
+
# 'xsi:schemaLocation' =>
|
156
|
+
# %{http://www.openarchives.org/OAI/2.0/oai_dc/
|
157
|
+
# http://www.openarchives.org/OAI/2.0/oai_dc.xsd}) do
|
158
|
+
# xml.tag!('oai_dc:title', title)
|
159
|
+
# xml.tag!('oai_dc:description', text)
|
160
|
+
# xml.tag!('oai_dc:creator', user)
|
161
|
+
# tags.each do |tag|
|
162
|
+
# xml.tag!('oai_dc:subject', tag)
|
163
|
+
# end
|
164
|
+
# end
|
165
|
+
# xml.target!
|
166
|
+
# end
|
167
|
+
# end
|
168
|
+
#
|
169
|
+
# === Mapping Example
|
170
|
+
#
|
171
|
+
# # Extremely contrived mapping
|
172
|
+
# class Post < ActiveRecord::Base
|
173
|
+
# def self.map_oai_dc
|
174
|
+
# {:subject => :tags,
|
175
|
+
# :description => :text,
|
176
|
+
# :creator => :user,
|
177
|
+
# :contibutor => :comments}
|
178
|
+
# end
|
179
|
+
# end
|
180
|
+
#
|
181
|
+
module OAI::Provider
|
182
|
+
class Base
|
183
|
+
include OAI::Provider
|
184
|
+
|
185
|
+
class << self
|
186
|
+
attr_reader :formats
|
187
|
+
attr_accessor :name, :url, :prefix, :email, :delete_support, :granularity, :model
|
188
|
+
|
189
|
+
def register_format(format)
|
190
|
+
@formats ||= {}
|
191
|
+
@formats[format.prefix] = format
|
192
|
+
end
|
193
|
+
|
194
|
+
def format_supported?(prefix)
|
195
|
+
@formats.keys.include?(prefix)
|
196
|
+
end
|
197
|
+
|
198
|
+
def format(prefix)
|
199
|
+
if @formats[prefix].nil?
|
200
|
+
raise OAI::FormatException.new
|
201
|
+
else
|
202
|
+
@formats[prefix]
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
protected
|
207
|
+
|
208
|
+
def inherited(klass)
|
209
|
+
self.instance_variables.each do |iv|
|
210
|
+
klass.instance_variable_set(iv, self.instance_variable_get(iv))
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
alias_method :repository_name, :name=
|
215
|
+
alias_method :repository_url, :url=
|
216
|
+
alias_method :record_prefix, :prefix=
|
217
|
+
alias_method :admin_email, :email=
|
218
|
+
alias_method :deletion_support, :delete_support=
|
219
|
+
alias_method :update_granularity, :granularity=
|
220
|
+
alias_method :source_model, :model=
|
221
|
+
|
222
|
+
end
|
223
|
+
|
224
|
+
# Default configuration of a repository
|
225
|
+
Base.repository_name 'Open Archives Initiative Data Provider'
|
226
|
+
Base.repository_url 'unknown'
|
227
|
+
Base.record_prefix 'oai:localhost'
|
228
|
+
Base.admin_email 'nobody@localhost'
|
229
|
+
Base.deletion_support OAI::Const::Delete::TRANSIENT
|
230
|
+
Base.update_granularity OAI::Const::Granularity::HIGH
|
231
|
+
|
232
|
+
Base.register_format(OAI::Provider::Metadata::DublinCore.instance)
|
233
|
+
|
234
|
+
# Equivalent to '&verb=Identify', returns information about the repository
|
235
|
+
def identify(options = {})
|
236
|
+
Response::Identify.new(self.class, options).to_xml
|
237
|
+
end
|
238
|
+
|
239
|
+
# Equivalent to '&verb=ListSets', returns a list of sets that are supported
|
240
|
+
# by the repository or an error if sets are not supported.
|
241
|
+
def list_sets(options = {})
|
242
|
+
Response::ListSets.new(self.class, options).to_xml
|
243
|
+
end
|
244
|
+
|
245
|
+
# Equivalent to '&verb=ListMetadataFormats', returns a list of metadata formats
|
246
|
+
# supported by the repository.
|
247
|
+
def list_metadata_formats(options = {})
|
248
|
+
Response::ListMetadataFormats.new(self.class, options).to_xml
|
249
|
+
end
|
250
|
+
|
251
|
+
# Equivalent to '&verb=ListIdentifiers', returns a list of record headers that
|
252
|
+
# meet the supplied criteria.
|
253
|
+
def list_identifiers(options = {})
|
254
|
+
Response::ListIdentifiers.new(self.class, options).to_xml
|
255
|
+
end
|
256
|
+
|
257
|
+
# Equivalent to '&verb=ListRecords', returns a list of records that meet the
|
258
|
+
# supplied criteria.
|
259
|
+
def list_records(options = {})
|
260
|
+
Response::ListRecords.new(self.class, options).to_xml
|
261
|
+
end
|
262
|
+
|
263
|
+
# Equivalent to '&verb=GetRecord', returns a record matching the required
|
264
|
+
# :identifier option
|
265
|
+
def get_record(options = {})
|
266
|
+
Response::GetRecord.new(self.class, options).to_xml
|
267
|
+
end
|
268
|
+
|
269
|
+
# xml_response = process_verb('ListRecords', :from => 'October 1, 2005',
|
270
|
+
# :until => 'November 1, 2005')
|
271
|
+
#
|
272
|
+
# If you are implementing a web interface using process_request is the
|
273
|
+
# preferred way.
|
274
|
+
def process_request(params = {})
|
275
|
+
begin
|
276
|
+
|
277
|
+
# Allow the request to pass in a url
|
278
|
+
self.class.url = params['url'] ? params.delete('url') : self.class.url
|
279
|
+
|
280
|
+
verb = params.delete('verb') || params.delete(:verb)
|
281
|
+
|
282
|
+
unless verb and OAI::Const::VERBS.keys.include?(verb)
|
283
|
+
raise OAI::VerbException.new
|
284
|
+
end
|
285
|
+
|
286
|
+
send(methodize(verb), params)
|
287
|
+
|
288
|
+
rescue => err
|
289
|
+
if err.respond_to?(:code)
|
290
|
+
Response::Error.new(self.class, err).to_xml
|
291
|
+
else
|
292
|
+
raise err
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
# Convert valid OAI-PMH verbs into ruby method calls
|
298
|
+
def methodize(verb)
|
299
|
+
verb.gsub(/[A-Z]/) {|m| "_#{m.downcase}"}.sub(/^\_/,'')
|
300
|
+
end
|
301
|
+
|
302
|
+
end
|
303
|
+
|
304
|
+
end
|
data/lib/oai/set.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
module OAI
|
2
|
+
|
3
|
+
# bundles up information about a set retrieved during a
|
4
|
+
# ListSets request
|
5
|
+
|
6
|
+
class Set
|
7
|
+
include OAI::XPath
|
8
|
+
attr_accessor :name, :spec, :description
|
9
|
+
|
10
|
+
def initialize(values = {})
|
11
|
+
@name = values.delete(:name)
|
12
|
+
@spec = values.delete(:spec)
|
13
|
+
@description = values.delete(:description)
|
14
|
+
raise ArgumentException, "Invalid options" unless values.empty?
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.parse(element)
|
18
|
+
set = self.new
|
19
|
+
set.name = set.xpath(element, './/setName')
|
20
|
+
set.spec = set.xpath(element, './/setSpec')
|
21
|
+
set.description = set.xpath_first(element, './/setDescription')
|
22
|
+
set
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_s
|
26
|
+
"#{@name} [#{@spec}]"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/oai/xpath.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
module OAI
|
2
|
+
module XPath
|
3
|
+
|
4
|
+
# get all matching nodes
|
5
|
+
def xpath_all(doc, path)
|
6
|
+
case parser_type(doc)
|
7
|
+
when 'libxml'
|
8
|
+
return doc.find(path).to_a if doc.find(path)
|
9
|
+
when 'rexml'
|
10
|
+
return REXML::XPath.match(doc, path)
|
11
|
+
end
|
12
|
+
return []
|
13
|
+
end
|
14
|
+
|
15
|
+
# get first matching node
|
16
|
+
def xpath_first(doc, path)
|
17
|
+
elements = xpath_all(doc, path)
|
18
|
+
return elements[0] if elements != nil
|
19
|
+
return nil
|
20
|
+
end
|
21
|
+
|
22
|
+
# get text for first matching node
|
23
|
+
def xpath(doc, path)
|
24
|
+
el = xpath_first(doc, path)
|
25
|
+
return unless el
|
26
|
+
case parser_type(doc)
|
27
|
+
when 'libxml'
|
28
|
+
return el.content
|
29
|
+
when 'rexml'
|
30
|
+
return el.text
|
31
|
+
end
|
32
|
+
return nil
|
33
|
+
end
|
34
|
+
|
35
|
+
# figure out an attribute
|
36
|
+
def get_attribute(node, attr_name)
|
37
|
+
case node.class.to_s
|
38
|
+
when 'REXML::Element'
|
39
|
+
return node.attribute(attr_name)
|
40
|
+
when 'LibXML::XML::Node'
|
41
|
+
#There has been a method shift between 0.5 and 0.7
|
42
|
+
if defined?(node.property) == nil
|
43
|
+
return node.attributes[attr_name]
|
44
|
+
else
|
45
|
+
#node.property is being deprecated. We'll eventually remove
|
46
|
+
#this trap
|
47
|
+
begin
|
48
|
+
return node[attr_name]
|
49
|
+
rescue
|
50
|
+
return node.property(attr_name)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
return nil
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
# figure out what sort of object we should do xpath on
|
60
|
+
def parser_type(x)
|
61
|
+
case x.class.to_s
|
62
|
+
when 'LibXML::XML::Document'
|
63
|
+
return 'libxml'
|
64
|
+
when 'LibXML::XML::Node'
|
65
|
+
return 'libxml'
|
66
|
+
when 'LibXML::XML::Node::Set'
|
67
|
+
return 'libxml'
|
68
|
+
when 'REXML::Element'
|
69
|
+
return 'rexml'
|
70
|
+
when 'REXML::Document'
|
71
|
+
return 'rexml'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|