oai 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/README.md +28 -23
  2. data/Rakefile +14 -40
  3. data/examples/providers/dublin_core.rb +63 -63
  4. data/lib/oai/client.rb +131 -97
  5. data/lib/oai/client/list_identifiers.rb +1 -0
  6. data/lib/oai/client/list_records.rb +6 -5
  7. data/lib/oai/client/list_sets.rb +6 -5
  8. data/lib/oai/client/record.rb +6 -7
  9. data/lib/oai/client/response.rb +7 -4
  10. data/lib/oai/client/resumable.rb +42 -0
  11. data/lib/oai/harvester/shell.rb +40 -41
  12. data/lib/oai/provider.rb +85 -67
  13. data/lib/oai/provider/metadata_format/oai_dc.rb +5 -6
  14. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +23 -25
  15. data/lib/oai/provider/model/activerecord_wrapper.rb +99 -51
  16. data/lib/oai/provider/response.rb +33 -31
  17. data/lib/oai/provider/response/get_record.rb +7 -7
  18. data/lib/oai/provider/response/list_records.rb +5 -4
  19. data/lib/oai/provider/response/record_response.rb +14 -14
  20. data/test/activerecord_provider/config/connection.rb +8 -4
  21. data/test/activerecord_provider/database/{ar_migration.rb → 0001_oaipmh_tables.rb} +17 -12
  22. data/test/activerecord_provider/helpers/providers.rb +2 -3
  23. data/test/activerecord_provider/helpers/set_provider.rb +10 -22
  24. data/test/activerecord_provider/helpers/transactional_test_case.rb +34 -0
  25. data/test/activerecord_provider/models/dc_field.rb +4 -4
  26. data/test/activerecord_provider/models/dc_set.rb +3 -2
  27. data/test/activerecord_provider/models/exclusive_set_dc_field.rb +11 -0
  28. data/test/activerecord_provider/tc_ar_provider.rb +67 -28
  29. data/test/activerecord_provider/tc_ar_sets_provider.rb +104 -18
  30. data/test/activerecord_provider/tc_caching_paging_provider.rb +6 -10
  31. data/test/activerecord_provider/tc_simple_paging_provider.rb +7 -11
  32. data/test/activerecord_provider/test_helper.rb +10 -0
  33. data/test/client/helpers/provider.rb +44 -47
  34. data/test/client/helpers/test_wrapper.rb +4 -16
  35. data/test/client/tc_http_client.rb +90 -2
  36. data/test/client/tc_list_identifiers.rb +22 -3
  37. data/test/client/tc_list_records.rb +17 -4
  38. data/test/client/tc_list_sets.rb +17 -2
  39. data/test/provider/models.rb +32 -30
  40. data/test/provider/tc_exceptions.rb +30 -20
  41. data/test/provider/tc_functional_tokens.rb +11 -6
  42. data/test/provider/tc_provider.rb +58 -24
  43. data/test/provider/tc_resumption_tokens.rb +6 -6
  44. data/test/provider/tc_simple_provider.rb +51 -26
  45. data/test/provider/test_helper.rb +7 -0
  46. metadata +67 -128
  47. data/test/activerecord_provider/config/database.yml +0 -6
  48. data/test/activerecord_provider/database/oaipmhtest +0 -0
@@ -1,9 +1,8 @@
1
1
  module OAI::Provider::Metadata
2
- # = OAI::Metadata::DublinCore
3
- #
2
+
4
3
  # Simple implementation of the Dublin Core metadata format.
5
4
  class DublinCore < Format
6
-
5
+
7
6
  def initialize
8
7
  @prefix = 'oai_dc'
9
8
  @schema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
@@ -19,9 +18,9 @@ module OAI::Provider::Metadata
19
18
  'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/",
20
19
  'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
21
20
  'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
22
- 'xsi:schemaLocation' =>
23
- %{http://www.openarchives.org/OAI/2.0/oai_dc/
24
- http://www.openarchives.org/OAI/2.0/oai_dc.xsd}
21
+ 'xsi:schemaLocation' =>
22
+ %{http://www.openarchives.org/OAI/2.0/oai_dc/
23
+ http://www.openarchives.org/OAI/2.0/oai_dc.xsd}.gsub(/\s+/, ' ')
25
24
  }
26
25
  end
27
26
 
@@ -1,21 +1,21 @@
1
1
  require 'active_record'
2
2
 
3
3
  module OAI::Provider
4
-
4
+
5
5
  # ActiveRecord model class in support of the caching wrapper.
6
6
  class OaiToken < ActiveRecord::Base
7
- has_many :entries, :class_name => 'OaiEntry',
7
+ has_many :entries, :class_name => 'OaiEntry',
8
8
  :order => "record_id", :dependent => :destroy
9
9
 
10
10
  validates_uniqueness_of :token
11
-
11
+
12
12
  # Make sanitize_sql a public method so we can make use of it.
13
13
  public
14
-
14
+
15
15
  def self.sanitize_sql(*arg)
16
16
  super(*arg)
17
17
  end
18
-
18
+
19
19
  def new_record_before_save?
20
20
  @new_record_before_save
21
21
  end
@@ -28,12 +28,10 @@ module OAI::Provider
28
28
 
29
29
  validates_uniqueness_of :record_id, :scope => :oai_token
30
30
  end
31
-
32
- # = OAI::Provider::ActiveRecordCachingWrapper
33
- #
31
+
34
32
  # This class wraps an ActiveRecord model and delegates all of the record
35
33
  # selection/retrieval to the AR model. It accepts options for specifying
36
- # the update timestamp field, a timeout, and a limit. The limit option
34
+ # the update timestamp field, a timeout, and a limit. The limit option
37
35
  # is used for doing pagination with resumption tokens. The timeout is
38
36
  # used to expire old tokens from the cache. Default timeout is 12 hours.
39
37
  #
@@ -43,18 +41,18 @@ module OAI::Provider
43
41
  # If you have an extremely active respository you may want to consider
44
42
  # the caching wrapper. The caching wrapper takes the entire result set
45
43
  # from a request and caches it in another database table, well tables
46
- # actually. So the result returned to the client will always be
44
+ # actually. So the result returned to the client will always be
47
45
  # internally consistent.
48
46
  #
49
47
  class ActiveRecordCachingWrapper < ActiveRecordWrapper
50
-
48
+
51
49
  attr_reader :model, :timestamp_field, :expire
52
-
50
+
53
51
  def initialize(model, options={})
54
52
  @expire = options.delete(:timeout) || 12.hours
55
53
  super(model, options)
56
54
  end
57
-
55
+
58
56
  def find(selector, options={})
59
57
  sweep_cache
60
58
  return next_set(options[:resumption_token]) if options[:resumption_token]
@@ -73,9 +71,9 @@ module OAI::Provider
73
71
  model.find(selector, :conditions => conditions)
74
72
  end
75
73
  end
76
-
77
- protected
78
-
74
+
75
+ protected
76
+
79
77
  def next_set(token_string)
80
78
  raise ResumptionTokenException.new unless @limit
81
79
 
@@ -84,11 +82,11 @@ module OAI::Provider
84
82
 
85
83
  if token.last * @limit + @limit < total
86
84
  select_partial(token)
87
- else
85
+ else
88
86
  select_partial(token).records
89
87
  end
90
88
  end
91
-
89
+
92
90
  # select a subset of the result set, and return it with a
93
91
  # resumption token to get the next subset
94
92
  def select_partial(token)
@@ -101,30 +99,30 @@ module OAI::Provider
101
99
  "#{OaiToken.sanitize_sql(token_conditions(token))}")
102
100
  end
103
101
  end
104
-
102
+
105
103
  oaitoken = OaiToken.find_by_token(token.to_s)
106
104
  raise ResumptionTokenException.new unless oaitoken
107
105
 
108
106
  PartialResult.new(
109
- hydrate_records(oaitoken.entries.find(:all, :limit => @limit,
107
+ hydrate_records(oaitoken.entries.find(:all, :limit => @limit,
110
108
  :offset => token.last * @limit)), token.next(token.last + 1)
111
109
  )
112
110
  end
113
-
111
+
114
112
  def sweep_cache
115
113
  OaiToken.destroy_all(["created_at < ?", Time.now - expire])
116
114
  end
117
-
115
+
118
116
  def hydrate_records(records)
119
117
  model.find(records.collect {|r| r.record_id })
120
118
  end
121
-
119
+
122
120
  def token_conditions(token)
123
121
  sql_conditions token.to_conditions_hash
124
122
  end
125
-
123
+
126
124
  private
127
-
125
+
128
126
  def expires_at(creation)
129
127
  created = Time.parse(creation.strftime("%Y-%m-%d %H:%M:%S"))
130
128
  created.utc + expire
@@ -1,23 +1,22 @@
1
1
  require 'active_record'
2
+
2
3
  module OAI::Provider
3
- # = OAI::Provider::ActiveRecordWrapper
4
- #
5
4
  # This class wraps an ActiveRecord model and delegates all of the record
6
5
  # selection/retrieval to the AR model. It accepts options for specifying
7
- # the update timestamp field, a timeout, and a limit. The limit option
6
+ # the update timestamp field, a timeout, and a limit. The limit option
8
7
  # is used for doing pagination with resumption tokens. The
9
8
  # expiration timeout is ignored, since all necessary information is
10
9
  # encoded in the token.
11
10
  #
12
11
  class ActiveRecordWrapper < Model
13
-
12
+
14
13
  attr_reader :model, :timestamp_field
15
-
14
+
16
15
  def initialize(model, options={})
17
16
  @model = model
18
17
  @timestamp_field = options.delete(:timestamp_field) || 'updated_at'
19
18
  @limit = options.delete(:limit)
20
-
19
+
21
20
  unless options.empty?
22
21
  raise ArgumentError.new(
23
22
  "Unsupported options [#{options.keys.join(', ')}]"
@@ -26,36 +25,39 @@ module OAI::Provider
26
25
  end
27
26
 
28
27
  def earliest
29
- model.find(:first,
30
- :order => "#{timestamp_field} asc").send(timestamp_field)
28
+ earliest_obj = model.find(:first, :order => "#{timestamp_field} asc")
29
+ earliest_obj.nil? ? Time.at(0) : earliest_obj.send(timestamp_field)
31
30
  end
32
-
31
+
33
32
  def latest
34
- model.find(:first,
35
- :order => "#{timestamp_field} desc").send(timestamp_field)
33
+ latest_obj = model.find(:first, :order => "#{timestamp_field} desc")
34
+ latest_obj.nil? ? Time.now : latest_obj.send(timestamp_field)
36
35
  end
37
36
  # A model class is expected to provide a method Model.sets that
38
- # returns all the sets the model supports. See the
39
- # activerecord_provider tests for an example.
37
+ # returns all the sets the model supports. See the
38
+ # activerecord_provider tests for an example.
40
39
  def sets
41
40
  model.sets if model.respond_to?(:sets)
42
41
  end
43
-
42
+
44
43
  def find(selector, options={})
45
- return next_set(options[:resumption_token]) if options[:resumption_token]
44
+ find_scope = find_scope(options)
45
+ return next_set(find_scope,
46
+ options[:resumption_token]) if options[:resumption_token]
46
47
  conditions = sql_conditions(options)
47
48
  if :all == selector
48
- total = model.count(:id, :conditions => conditions)
49
+ total = find_scope.count(:id, :conditions => conditions)
49
50
  if @limit && total > @limit
50
- select_partial(ResumptionToken.new(options.merge({:last => 0})))
51
+ select_partial(find_scope,
52
+ ResumptionToken.new(options.merge({:last => 0})))
51
53
  else
52
- model.find(:all, :conditions => conditions)
54
+ find_scope.find(:all, :conditions => conditions)
53
55
  end
54
56
  else
55
- model.find(selector, :conditions => conditions)
57
+ find_scope.find(selector, :conditions => conditions)
56
58
  end
57
59
  end
58
-
60
+
59
61
  def deleted?(record)
60
62
  if record.respond_to?(:deleted_at)
61
63
  return record.deleted_at
@@ -63,10 +65,10 @@ module OAI::Provider
63
65
  return record.deleted
64
66
  end
65
67
  false
66
- end
67
-
68
+ end
69
+
68
70
  def respond_to?(m, *args)
69
- if m =~ /^map_/
71
+ if m =~ /^map_/
70
72
  model.respond_to?(m, *args)
71
73
  else
72
74
  super
@@ -82,66 +84,112 @@ module OAI::Provider
82
84
  end
83
85
 
84
86
  protected
85
-
87
+
88
+ def find_scope(options)
89
+ return model unless options.key?(:set)
90
+
91
+ # Find the set or return an empty scope
92
+ set = find_set_by_spec(options[:set])
93
+ return model.scoped(:limit => 0) if set.nil?
94
+
95
+ # If the set has a backward relationship, we'll use it
96
+ if set.class.respond_to?(:reflect_on_all_associations)
97
+ set.class.reflect_on_all_associations.each do |assoc|
98
+ return set.send(assoc.name).scoped if assoc.klass == model
99
+ end
100
+ end
101
+
102
+ # Search the attributes for 'set'
103
+ if model.column_names.include?('set')
104
+ # Scope using the set attribute as the spec
105
+ model.scoped(:conditions => {:set => options[:set]})
106
+ else
107
+ # Default to empty set, as we've tried everything else
108
+ model.scoped(:limit => 0)
109
+ end
110
+ end
111
+
112
+ def find_set_by_spec(spec)
113
+ if sets.class == ActiveRecord::Relation
114
+ sets.find_by_spec(spec)
115
+ else
116
+ sets.detect {|set| set.spec == spec}
117
+ end
118
+ end
119
+
86
120
  # Request the next set in this sequence.
87
- def next_set(token_string)
121
+ def next_set(find_scope, token_string)
88
122
  raise OAI::ResumptionTokenException.new unless @limit
89
-
123
+
90
124
  token = ResumptionToken.parse(token_string)
91
- total = model.count(:id, :conditions => token_conditions(token))
92
-
125
+ total = find_scope.count(:id, :conditions => token_conditions(token))
126
+
93
127
  if @limit < total
94
- select_partial(token)
128
+ select_partial(find_scope, token)
95
129
  else # end of result set
96
- model.find(:all,
97
- :conditions => token_conditions(token),
130
+ find_scope.find(:all,
131
+ :conditions => token_conditions(token),
98
132
  :limit => @limit, :order => "#{model.primary_key} asc")
99
133
  end
100
134
  end
101
-
135
+
102
136
  # select a subset of the result set, and return it with a
103
137
  # resumption token to get the next subset
104
- def select_partial(token)
105
- records = model.find(:all,
138
+ def select_partial(find_scope, token)
139
+ records = find_scope.find(:all,
106
140
  :conditions => token_conditions(token),
107
- :limit => @limit,
141
+ :limit => @limit,
108
142
  :order => "#{model.primary_key} asc")
109
143
  raise OAI::ResumptionTokenException.new unless records
110
144
  offset = records.last.send(model.primary_key.to_sym)
111
-
145
+
112
146
  PartialResult.new(records, token.next(offset))
113
147
  end
114
-
148
+
115
149
  # build a sql conditions statement from the content
116
150
  # of a resumption token. It is very important not to
117
151
  # miss any changes as records may change scope as the
118
152
  # harvest is in progress. To avoid loosing any changes
119
- # the last 'id' of the previous set is used as the
153
+ # the last 'id' of the previous set is used as the
120
154
  # filter to the next set.
121
155
  def token_conditions(token)
122
156
  last = token.last
123
157
  sql = sql_conditions token.to_conditions_hash
124
-
158
+
125
159
  return sql if 0 == last
126
160
  # Now add last id constraint
127
- sql[0] << " AND #{model.primary_key} > ?"
128
- sql << last
129
-
161
+ sql.first << " AND #{model.primary_key} > :id"
162
+ sql.last[:id] = last
163
+
130
164
  return sql
131
165
  end
132
-
166
+
133
167
  # build a sql conditions statement from an OAI options hash
134
168
  def sql_conditions(opts)
135
169
  sql = []
136
- sql << "#{timestamp_field} >= ?" << "#{timestamp_field} <= ?"
137
- sql << "set = ?" if opts[:set]
138
- esc_values = [sql.join(" AND ")]
139
- esc_values << Time.parse(opts[:from].to_s).localtime << Time.parse(opts[:until].to_s).localtime.to_s #-- OAI 2.0 hack - UTC fix from record_responce
140
- esc_values << opts[:set] if opts[:set]
141
-
142
- return esc_values
170
+ esc_values = {}
171
+ if opts.has_key?(:from)
172
+ sql << "#{timestamp_field} >= :from"
173
+ esc_values[:from] = parse_to_local(opts[:from])
174
+ end
175
+ if opts.has_key?(:until)
176
+ # Handle databases which store fractions of a second by rounding up
177
+ sql << "#{timestamp_field} < :until"
178
+ esc_values[:until] = parse_to_local(opts[:until]) { |t| t + 1 }
179
+ end
180
+ return [sql.join(" AND "), esc_values]
143
181
  end
144
-
182
+
183
+ private
184
+
185
+ def parse_to_local(time)
186
+ time_obj = Time.parse(time.to_s)
187
+ time_obj = yield(time_obj) if block_given?
188
+ # Convert to same as DB - :local => :getlocal, :utc => :getutc
189
+ tzconv = "get#{model.default_timezone.to_s}".to_sym
190
+ time_obj.send(tzconv).strftime("%Y-%m-%d %H:%M:%S")
191
+ end
192
+
145
193
  end
146
194
  end
147
195
 
@@ -2,119 +2,121 @@ require 'builder' unless defined?(Builder)
2
2
  module OAI
3
3
  module Provider
4
4
  module Response
5
-
5
+
6
6
  class Base
7
7
  attr_reader :provider, :options
8
-
8
+
9
9
  class << self
10
10
  attr_reader :valid_options, :default_options, :required_options
11
11
  def valid_parameters(*args)
12
12
  @valid_options ||= []
13
13
  @valid_options = (@valid_options + args.dup).uniq
14
14
  end
15
-
15
+
16
16
  def default_parameters(options = {})
17
17
  @default_options ||= {}
18
18
  @default_options.merge! options.dup
19
19
  end
20
-
20
+
21
21
  def required_parameters(*args)
22
22
  valid_parameters(*args)
23
23
  @required_options ||= []
24
24
  @required_options = (@required_options + args.dup).uniq
25
25
  end
26
-
27
- end
26
+
27
+ end
28
28
  def initialize(provider, options = {})
29
29
  @provider = provider
30
+ @request_options = options.dup
30
31
  @options = internalize(options)
31
32
  raise OAI::ArgumentException.new unless valid?
32
33
  end
33
34
  def response
34
35
  @builder = Builder::XmlMarkup.new
35
36
  @builder.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
36
- @builder.tag!('OAI-PMH', header) do
37
+ @builder.tag!('OAI-PMH', header) do
37
38
  @builder.responseDate Time.now.utc.xmlschema
38
- #options parameter has been removed here because with it
39
- #the data won't validate against oai validators. Without, it
40
- #validates.
41
- @builder.request(provider.url) #-- OAI 2.0 Hack - removed request options
39
+ @builder.request(provider.url, (@request_options.merge(:verb => verb) unless self.class == Error))
42
40
  yield @builder
43
41
  end
44
42
  end
45
43
  private
46
-
44
+
47
45
  def header
48
- {
46
+ {
49
47
  'xmlns' => "http://www.openarchives.org/OAI/2.0/",
50
48
  'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
51
49
  'xsi:schemaLocation' => %{http://www.openarchives.org/OAI/2.0/
52
- http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd}
50
+ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd}.gsub(/\s+/, ' ')
53
51
  }
54
52
  end
55
53
  def extract_identifier(id)
56
54
  id.sub("#{provider.prefix}/", '')
57
55
  end
58
-
56
+
59
57
  def valid?
60
58
  return true if resumption?
61
-
59
+
62
60
  return true if self.class.valid_options.nil? and options.empty?
63
-
64
- # check if the request includes an argument and there are no valid
61
+
62
+ # check if the request includes an argument and there are no valid
65
63
  # arguments for that verb (Identify, for example).
66
64
  raise OAI::ArgumentException.new if self.class.valid_options.nil? && !options.empty?
67
-
65
+
68
66
  if self.class.required_options
69
67
  return false unless (self.class.required_options - @options.keys).empty?
70
68
  end
71
69
  return false unless (@options.keys - self.class.valid_options).empty?
72
70
  populate_defaults
73
71
  end
74
-
72
+
75
73
  def populate_defaults
76
74
  self.class.default_options.each do |k,v|
77
75
  @options[k] = v.respond_to?(:call) ? v.call(self) : v if not @options[k]
78
76
  end
79
77
  end
80
-
78
+
81
79
  def resumption?
82
- if @options.keys.include?(:resumption_token)
80
+ if @options.keys.include?(:resumption_token)
83
81
  return true if 1 == @options.keys.size
84
82
  raise OAI::ArgumentException.new
85
83
  end
86
84
  end
87
-
85
+
88
86
  # Convert our internal representations back into standard OAI options
89
87
  def externalize(value)
90
88
  value.to_s.gsub(/_[a-z]/) { |m| m.sub("_", '').capitalize }
91
89
  end
92
-
90
+
93
91
  def parse_date(value)
94
92
  return value if value.respond_to?(:strftime)
95
-
93
+
96
94
  Date.parse(value) # This will raise an exception for badly formatted dates
97
95
  Time.parse(value).utc # -- UTC Bug fix hack 8/08 not in core
98
96
  rescue
99
- raise OAI::ArgumentError.new
97
+ raise OAI::ArgumentError.new
100
98
  end
101
-
99
+
102
100
  def internalize(hash = {})
103
101
  internal = {}
104
102
  hash.keys.each do |key|
105
103
  internal[key.to_s.gsub(/([A-Z])/, '_\1').downcase.intern] = hash[key].dup
106
104
  end
107
-
105
+
108
106
  # Convert date formated strings into internal time values
109
107
  # Convert date formated strings in dates.
110
108
  internal[:from] = parse_date(internal[:from]) if internal[:from]
111
109
  internal[:until] = parse_date(internal[:until]) if internal[:until]
112
-
110
+
113
111
  internal
114
112
  end
115
-
113
+
114
+ def verb
115
+ self.class.to_s.split('::').last
116
+ end
117
+
116
118
  end
117
-
119
+
118
120
  end
119
121
  end
120
122
  end