oai 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/README.md +28 -23
  2. data/Rakefile +14 -40
  3. data/examples/providers/dublin_core.rb +63 -63
  4. data/lib/oai/client.rb +131 -97
  5. data/lib/oai/client/list_identifiers.rb +1 -0
  6. data/lib/oai/client/list_records.rb +6 -5
  7. data/lib/oai/client/list_sets.rb +6 -5
  8. data/lib/oai/client/record.rb +6 -7
  9. data/lib/oai/client/response.rb +7 -4
  10. data/lib/oai/client/resumable.rb +42 -0
  11. data/lib/oai/harvester/shell.rb +40 -41
  12. data/lib/oai/provider.rb +85 -67
  13. data/lib/oai/provider/metadata_format/oai_dc.rb +5 -6
  14. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +23 -25
  15. data/lib/oai/provider/model/activerecord_wrapper.rb +99 -51
  16. data/lib/oai/provider/response.rb +33 -31
  17. data/lib/oai/provider/response/get_record.rb +7 -7
  18. data/lib/oai/provider/response/list_records.rb +5 -4
  19. data/lib/oai/provider/response/record_response.rb +14 -14
  20. data/test/activerecord_provider/config/connection.rb +8 -4
  21. data/test/activerecord_provider/database/{ar_migration.rb → 0001_oaipmh_tables.rb} +17 -12
  22. data/test/activerecord_provider/helpers/providers.rb +2 -3
  23. data/test/activerecord_provider/helpers/set_provider.rb +10 -22
  24. data/test/activerecord_provider/helpers/transactional_test_case.rb +34 -0
  25. data/test/activerecord_provider/models/dc_field.rb +4 -4
  26. data/test/activerecord_provider/models/dc_set.rb +3 -2
  27. data/test/activerecord_provider/models/exclusive_set_dc_field.rb +11 -0
  28. data/test/activerecord_provider/tc_ar_provider.rb +67 -28
  29. data/test/activerecord_provider/tc_ar_sets_provider.rb +104 -18
  30. data/test/activerecord_provider/tc_caching_paging_provider.rb +6 -10
  31. data/test/activerecord_provider/tc_simple_paging_provider.rb +7 -11
  32. data/test/activerecord_provider/test_helper.rb +10 -0
  33. data/test/client/helpers/provider.rb +44 -47
  34. data/test/client/helpers/test_wrapper.rb +4 -16
  35. data/test/client/tc_http_client.rb +90 -2
  36. data/test/client/tc_list_identifiers.rb +22 -3
  37. data/test/client/tc_list_records.rb +17 -4
  38. data/test/client/tc_list_sets.rb +17 -2
  39. data/test/provider/models.rb +32 -30
  40. data/test/provider/tc_exceptions.rb +30 -20
  41. data/test/provider/tc_functional_tokens.rb +11 -6
  42. data/test/provider/tc_provider.rb +58 -24
  43. data/test/provider/tc_resumption_tokens.rb +6 -6
  44. data/test/provider/tc_simple_provider.rb +51 -26
  45. data/test/provider/test_helper.rb +7 -0
  46. metadata +67 -128
  47. data/test/activerecord_provider/config/database.yml +0 -6
  48. data/test/activerecord_provider/database/oaipmhtest +0 -0
@@ -1,9 +1,8 @@
1
1
  module OAI::Provider::Metadata
2
- # = OAI::Metadata::DublinCore
3
- #
2
+
4
3
  # Simple implementation of the Dublin Core metadata format.
5
4
  class DublinCore < Format
6
-
5
+
7
6
  def initialize
8
7
  @prefix = 'oai_dc'
9
8
  @schema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
@@ -19,9 +18,9 @@ module OAI::Provider::Metadata
19
18
  'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/",
20
19
  'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
21
20
  'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
22
- 'xsi:schemaLocation' =>
23
- %{http://www.openarchives.org/OAI/2.0/oai_dc/
24
- http://www.openarchives.org/OAI/2.0/oai_dc.xsd}
21
+ 'xsi:schemaLocation' =>
22
+ %{http://www.openarchives.org/OAI/2.0/oai_dc/
23
+ http://www.openarchives.org/OAI/2.0/oai_dc.xsd}.gsub(/\s+/, ' ')
25
24
  }
26
25
  end
27
26
 
@@ -1,21 +1,21 @@
1
1
  require 'active_record'
2
2
 
3
3
  module OAI::Provider
4
-
4
+
5
5
  # ActiveRecord model class in support of the caching wrapper.
6
6
  class OaiToken < ActiveRecord::Base
7
- has_many :entries, :class_name => 'OaiEntry',
7
+ has_many :entries, :class_name => 'OaiEntry',
8
8
  :order => "record_id", :dependent => :destroy
9
9
 
10
10
  validates_uniqueness_of :token
11
-
11
+
12
12
  # Make sanitize_sql a public method so we can make use of it.
13
13
  public
14
-
14
+
15
15
  def self.sanitize_sql(*arg)
16
16
  super(*arg)
17
17
  end
18
-
18
+
19
19
  def new_record_before_save?
20
20
  @new_record_before_save
21
21
  end
@@ -28,12 +28,10 @@ module OAI::Provider
28
28
 
29
29
  validates_uniqueness_of :record_id, :scope => :oai_token
30
30
  end
31
-
32
- # = OAI::Provider::ActiveRecordCachingWrapper
33
- #
31
+
34
32
  # This class wraps an ActiveRecord model and delegates all of the record
35
33
  # selection/retrieval to the AR model. It accepts options for specifying
36
- # the update timestamp field, a timeout, and a limit. The limit option
34
+ # the update timestamp field, a timeout, and a limit. The limit option
37
35
  # is used for doing pagination with resumption tokens. The timeout is
38
36
  # used to expire old tokens from the cache. Default timeout is 12 hours.
39
37
  #
@@ -43,18 +41,18 @@ module OAI::Provider
43
41
  # If you have an extremely active respository you may want to consider
44
42
  # the caching wrapper. The caching wrapper takes the entire result set
45
43
  # from a request and caches it in another database table, well tables
46
- # actually. So the result returned to the client will always be
44
+ # actually. So the result returned to the client will always be
47
45
  # internally consistent.
48
46
  #
49
47
  class ActiveRecordCachingWrapper < ActiveRecordWrapper
50
-
48
+
51
49
  attr_reader :model, :timestamp_field, :expire
52
-
50
+
53
51
  def initialize(model, options={})
54
52
  @expire = options.delete(:timeout) || 12.hours
55
53
  super(model, options)
56
54
  end
57
-
55
+
58
56
  def find(selector, options={})
59
57
  sweep_cache
60
58
  return next_set(options[:resumption_token]) if options[:resumption_token]
@@ -73,9 +71,9 @@ module OAI::Provider
73
71
  model.find(selector, :conditions => conditions)
74
72
  end
75
73
  end
76
-
77
- protected
78
-
74
+
75
+ protected
76
+
79
77
  def next_set(token_string)
80
78
  raise ResumptionTokenException.new unless @limit
81
79
 
@@ -84,11 +82,11 @@ module OAI::Provider
84
82
 
85
83
  if token.last * @limit + @limit < total
86
84
  select_partial(token)
87
- else
85
+ else
88
86
  select_partial(token).records
89
87
  end
90
88
  end
91
-
89
+
92
90
  # select a subset of the result set, and return it with a
93
91
  # resumption token to get the next subset
94
92
  def select_partial(token)
@@ -101,30 +99,30 @@ module OAI::Provider
101
99
  "#{OaiToken.sanitize_sql(token_conditions(token))}")
102
100
  end
103
101
  end
104
-
102
+
105
103
  oaitoken = OaiToken.find_by_token(token.to_s)
106
104
  raise ResumptionTokenException.new unless oaitoken
107
105
 
108
106
  PartialResult.new(
109
- hydrate_records(oaitoken.entries.find(:all, :limit => @limit,
107
+ hydrate_records(oaitoken.entries.find(:all, :limit => @limit,
110
108
  :offset => token.last * @limit)), token.next(token.last + 1)
111
109
  )
112
110
  end
113
-
111
+
114
112
  def sweep_cache
115
113
  OaiToken.destroy_all(["created_at < ?", Time.now - expire])
116
114
  end
117
-
115
+
118
116
  def hydrate_records(records)
119
117
  model.find(records.collect {|r| r.record_id })
120
118
  end
121
-
119
+
122
120
  def token_conditions(token)
123
121
  sql_conditions token.to_conditions_hash
124
122
  end
125
-
123
+
126
124
  private
127
-
125
+
128
126
  def expires_at(creation)
129
127
  created = Time.parse(creation.strftime("%Y-%m-%d %H:%M:%S"))
130
128
  created.utc + expire
@@ -1,23 +1,22 @@
1
1
  require 'active_record'
2
+
2
3
  module OAI::Provider
3
- # = OAI::Provider::ActiveRecordWrapper
4
- #
5
4
  # This class wraps an ActiveRecord model and delegates all of the record
6
5
  # selection/retrieval to the AR model. It accepts options for specifying
7
- # the update timestamp field, a timeout, and a limit. The limit option
6
+ # the update timestamp field, a timeout, and a limit. The limit option
8
7
  # is used for doing pagination with resumption tokens. The
9
8
  # expiration timeout is ignored, since all necessary information is
10
9
  # encoded in the token.
11
10
  #
12
11
  class ActiveRecordWrapper < Model
13
-
12
+
14
13
  attr_reader :model, :timestamp_field
15
-
14
+
16
15
  def initialize(model, options={})
17
16
  @model = model
18
17
  @timestamp_field = options.delete(:timestamp_field) || 'updated_at'
19
18
  @limit = options.delete(:limit)
20
-
19
+
21
20
  unless options.empty?
22
21
  raise ArgumentError.new(
23
22
  "Unsupported options [#{options.keys.join(', ')}]"
@@ -26,36 +25,39 @@ module OAI::Provider
26
25
  end
27
26
 
28
27
  def earliest
29
- model.find(:first,
30
- :order => "#{timestamp_field} asc").send(timestamp_field)
28
+ earliest_obj = model.find(:first, :order => "#{timestamp_field} asc")
29
+ earliest_obj.nil? ? Time.at(0) : earliest_obj.send(timestamp_field)
31
30
  end
32
-
31
+
33
32
  def latest
34
- model.find(:first,
35
- :order => "#{timestamp_field} desc").send(timestamp_field)
33
+ latest_obj = model.find(:first, :order => "#{timestamp_field} desc")
34
+ latest_obj.nil? ? Time.now : latest_obj.send(timestamp_field)
36
35
  end
37
36
  # A model class is expected to provide a method Model.sets that
38
- # returns all the sets the model supports. See the
39
- # activerecord_provider tests for an example.
37
+ # returns all the sets the model supports. See the
38
+ # activerecord_provider tests for an example.
40
39
  def sets
41
40
  model.sets if model.respond_to?(:sets)
42
41
  end
43
-
42
+
44
43
  def find(selector, options={})
45
- return next_set(options[:resumption_token]) if options[:resumption_token]
44
+ find_scope = find_scope(options)
45
+ return next_set(find_scope,
46
+ options[:resumption_token]) if options[:resumption_token]
46
47
  conditions = sql_conditions(options)
47
48
  if :all == selector
48
- total = model.count(:id, :conditions => conditions)
49
+ total = find_scope.count(:id, :conditions => conditions)
49
50
  if @limit && total > @limit
50
- select_partial(ResumptionToken.new(options.merge({:last => 0})))
51
+ select_partial(find_scope,
52
+ ResumptionToken.new(options.merge({:last => 0})))
51
53
  else
52
- model.find(:all, :conditions => conditions)
54
+ find_scope.find(:all, :conditions => conditions)
53
55
  end
54
56
  else
55
- model.find(selector, :conditions => conditions)
57
+ find_scope.find(selector, :conditions => conditions)
56
58
  end
57
59
  end
58
-
60
+
59
61
  def deleted?(record)
60
62
  if record.respond_to?(:deleted_at)
61
63
  return record.deleted_at
@@ -63,10 +65,10 @@ module OAI::Provider
63
65
  return record.deleted
64
66
  end
65
67
  false
66
- end
67
-
68
+ end
69
+
68
70
  def respond_to?(m, *args)
69
- if m =~ /^map_/
71
+ if m =~ /^map_/
70
72
  model.respond_to?(m, *args)
71
73
  else
72
74
  super
@@ -82,66 +84,112 @@ module OAI::Provider
82
84
  end
83
85
 
84
86
  protected
85
-
87
+
88
+ def find_scope(options)
89
+ return model unless options.key?(:set)
90
+
91
+ # Find the set or return an empty scope
92
+ set = find_set_by_spec(options[:set])
93
+ return model.scoped(:limit => 0) if set.nil?
94
+
95
+ # If the set has a backward relationship, we'll use it
96
+ if set.class.respond_to?(:reflect_on_all_associations)
97
+ set.class.reflect_on_all_associations.each do |assoc|
98
+ return set.send(assoc.name).scoped if assoc.klass == model
99
+ end
100
+ end
101
+
102
+ # Search the attributes for 'set'
103
+ if model.column_names.include?('set')
104
+ # Scope using the set attribute as the spec
105
+ model.scoped(:conditions => {:set => options[:set]})
106
+ else
107
+ # Default to empty set, as we've tried everything else
108
+ model.scoped(:limit => 0)
109
+ end
110
+ end
111
+
112
+ def find_set_by_spec(spec)
113
+ if sets.class == ActiveRecord::Relation
114
+ sets.find_by_spec(spec)
115
+ else
116
+ sets.detect {|set| set.spec == spec}
117
+ end
118
+ end
119
+
86
120
  # Request the next set in this sequence.
87
- def next_set(token_string)
121
+ def next_set(find_scope, token_string)
88
122
  raise OAI::ResumptionTokenException.new unless @limit
89
-
123
+
90
124
  token = ResumptionToken.parse(token_string)
91
- total = model.count(:id, :conditions => token_conditions(token))
92
-
125
+ total = find_scope.count(:id, :conditions => token_conditions(token))
126
+
93
127
  if @limit < total
94
- select_partial(token)
128
+ select_partial(find_scope, token)
95
129
  else # end of result set
96
- model.find(:all,
97
- :conditions => token_conditions(token),
130
+ find_scope.find(:all,
131
+ :conditions => token_conditions(token),
98
132
  :limit => @limit, :order => "#{model.primary_key} asc")
99
133
  end
100
134
  end
101
-
135
+
102
136
  # select a subset of the result set, and return it with a
103
137
  # resumption token to get the next subset
104
- def select_partial(token)
105
- records = model.find(:all,
138
+ def select_partial(find_scope, token)
139
+ records = find_scope.find(:all,
106
140
  :conditions => token_conditions(token),
107
- :limit => @limit,
141
+ :limit => @limit,
108
142
  :order => "#{model.primary_key} asc")
109
143
  raise OAI::ResumptionTokenException.new unless records
110
144
  offset = records.last.send(model.primary_key.to_sym)
111
-
145
+
112
146
  PartialResult.new(records, token.next(offset))
113
147
  end
114
-
148
+
115
149
  # build a sql conditions statement from the content
116
150
  # of a resumption token. It is very important not to
117
151
  # miss any changes as records may change scope as the
118
152
  # harvest is in progress. To avoid loosing any changes
119
- # the last 'id' of the previous set is used as the
153
+ # the last 'id' of the previous set is used as the
120
154
  # filter to the next set.
121
155
  def token_conditions(token)
122
156
  last = token.last
123
157
  sql = sql_conditions token.to_conditions_hash
124
-
158
+
125
159
  return sql if 0 == last
126
160
  # Now add last id constraint
127
- sql[0] << " AND #{model.primary_key} > ?"
128
- sql << last
129
-
161
+ sql.first << " AND #{model.primary_key} > :id"
162
+ sql.last[:id] = last
163
+
130
164
  return sql
131
165
  end
132
-
166
+
133
167
  # build a sql conditions statement from an OAI options hash
134
168
  def sql_conditions(opts)
135
169
  sql = []
136
- sql << "#{timestamp_field} >= ?" << "#{timestamp_field} <= ?"
137
- sql << "set = ?" if opts[:set]
138
- esc_values = [sql.join(" AND ")]
139
- esc_values << Time.parse(opts[:from].to_s).localtime << Time.parse(opts[:until].to_s).localtime.to_s #-- OAI 2.0 hack - UTC fix from record_responce
140
- esc_values << opts[:set] if opts[:set]
141
-
142
- return esc_values
170
+ esc_values = {}
171
+ if opts.has_key?(:from)
172
+ sql << "#{timestamp_field} >= :from"
173
+ esc_values[:from] = parse_to_local(opts[:from])
174
+ end
175
+ if opts.has_key?(:until)
176
+ # Handle databases which store fractions of a second by rounding up
177
+ sql << "#{timestamp_field} < :until"
178
+ esc_values[:until] = parse_to_local(opts[:until]) { |t| t + 1 }
179
+ end
180
+ return [sql.join(" AND "), esc_values]
143
181
  end
144
-
182
+
183
+ private
184
+
185
+ def parse_to_local(time)
186
+ time_obj = Time.parse(time.to_s)
187
+ time_obj = yield(time_obj) if block_given?
188
+ # Convert to same as DB - :local => :getlocal, :utc => :getutc
189
+ tzconv = "get#{model.default_timezone.to_s}".to_sym
190
+ time_obj.send(tzconv).strftime("%Y-%m-%d %H:%M:%S")
191
+ end
192
+
145
193
  end
146
194
  end
147
195
 
@@ -2,119 +2,121 @@ require 'builder' unless defined?(Builder)
2
2
  module OAI
3
3
  module Provider
4
4
  module Response
5
-
5
+
6
6
  class Base
7
7
  attr_reader :provider, :options
8
-
8
+
9
9
  class << self
10
10
  attr_reader :valid_options, :default_options, :required_options
11
11
  def valid_parameters(*args)
12
12
  @valid_options ||= []
13
13
  @valid_options = (@valid_options + args.dup).uniq
14
14
  end
15
-
15
+
16
16
  def default_parameters(options = {})
17
17
  @default_options ||= {}
18
18
  @default_options.merge! options.dup
19
19
  end
20
-
20
+
21
21
  def required_parameters(*args)
22
22
  valid_parameters(*args)
23
23
  @required_options ||= []
24
24
  @required_options = (@required_options + args.dup).uniq
25
25
  end
26
-
27
- end
26
+
27
+ end
28
28
  def initialize(provider, options = {})
29
29
  @provider = provider
30
+ @request_options = options.dup
30
31
  @options = internalize(options)
31
32
  raise OAI::ArgumentException.new unless valid?
32
33
  end
33
34
  def response
34
35
  @builder = Builder::XmlMarkup.new
35
36
  @builder.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
36
- @builder.tag!('OAI-PMH', header) do
37
+ @builder.tag!('OAI-PMH', header) do
37
38
  @builder.responseDate Time.now.utc.xmlschema
38
- #options parameter has been removed here because with it
39
- #the data won't validate against oai validators. Without, it
40
- #validates.
41
- @builder.request(provider.url) #-- OAI 2.0 Hack - removed request options
39
+ @builder.request(provider.url, (@request_options.merge(:verb => verb) unless self.class == Error))
42
40
  yield @builder
43
41
  end
44
42
  end
45
43
  private
46
-
44
+
47
45
  def header
48
- {
46
+ {
49
47
  'xmlns' => "http://www.openarchives.org/OAI/2.0/",
50
48
  'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
51
49
  'xsi:schemaLocation' => %{http://www.openarchives.org/OAI/2.0/
52
- http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd}
50
+ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd}.gsub(/\s+/, ' ')
53
51
  }
54
52
  end
55
53
  def extract_identifier(id)
56
54
  id.sub("#{provider.prefix}/", '')
57
55
  end
58
-
56
+
59
57
  def valid?
60
58
  return true if resumption?
61
-
59
+
62
60
  return true if self.class.valid_options.nil? and options.empty?
63
-
64
- # check if the request includes an argument and there are no valid
61
+
62
+ # check if the request includes an argument and there are no valid
65
63
  # arguments for that verb (Identify, for example).
66
64
  raise OAI::ArgumentException.new if self.class.valid_options.nil? && !options.empty?
67
-
65
+
68
66
  if self.class.required_options
69
67
  return false unless (self.class.required_options - @options.keys).empty?
70
68
  end
71
69
  return false unless (@options.keys - self.class.valid_options).empty?
72
70
  populate_defaults
73
71
  end
74
-
72
+
75
73
  def populate_defaults
76
74
  self.class.default_options.each do |k,v|
77
75
  @options[k] = v.respond_to?(:call) ? v.call(self) : v if not @options[k]
78
76
  end
79
77
  end
80
-
78
+
81
79
  def resumption?
82
- if @options.keys.include?(:resumption_token)
80
+ if @options.keys.include?(:resumption_token)
83
81
  return true if 1 == @options.keys.size
84
82
  raise OAI::ArgumentException.new
85
83
  end
86
84
  end
87
-
85
+
88
86
  # Convert our internal representations back into standard OAI options
89
87
  def externalize(value)
90
88
  value.to_s.gsub(/_[a-z]/) { |m| m.sub("_", '').capitalize }
91
89
  end
92
-
90
+
93
91
  def parse_date(value)
94
92
  return value if value.respond_to?(:strftime)
95
-
93
+
96
94
  Date.parse(value) # This will raise an exception for badly formatted dates
97
95
  Time.parse(value).utc # -- UTC Bug fix hack 8/08 not in core
98
96
  rescue
99
- raise OAI::ArgumentError.new
97
+ raise OAI::ArgumentError.new
100
98
  end
101
-
99
+
102
100
  def internalize(hash = {})
103
101
  internal = {}
104
102
  hash.keys.each do |key|
105
103
  internal[key.to_s.gsub(/([A-Z])/, '_\1').downcase.intern] = hash[key].dup
106
104
  end
107
-
105
+
108
106
  # Convert date formated strings into internal time values
109
107
  # Convert date formated strings in dates.
110
108
  internal[:from] = parse_date(internal[:from]) if internal[:from]
111
109
  internal[:until] = parse_date(internal[:until]) if internal[:until]
112
-
110
+
113
111
  internal
114
112
  end
115
-
113
+
114
+ def verb
115
+ self.class.to_s.split('::').last
116
+ end
117
+
116
118
  end
117
-
119
+
118
120
  end
119
121
  end
120
122
  end