oai 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. data/README +80 -0
  2. data/Rakefile +113 -0
  3. data/bin/oai +68 -0
  4. data/examples/models/file_model.rb +63 -0
  5. data/examples/providers/dublin_core.rb +474 -0
  6. data/lib/oai.rb +7 -13
  7. data/lib/oai/client.rb +133 -83
  8. data/lib/oai/{get_record.rb → client/get_record.rb} +0 -0
  9. data/lib/oai/{header.rb → client/header.rb} +2 -2
  10. data/lib/oai/{identify.rb → client/identify.rb} +0 -0
  11. data/lib/oai/{list_identifiers.rb → client/list_identifiers.rb} +0 -0
  12. data/lib/oai/{list_metadata_formats.rb → client/list_metadata_formats.rb} +0 -0
  13. data/lib/oai/{list_records.rb → client/list_records.rb} +0 -0
  14. data/lib/oai/{list_sets.rb → client/list_sets.rb} +1 -1
  15. data/lib/oai/{metadata_format.rb → client/metadata_format.rb} +0 -0
  16. data/lib/oai/{record.rb → client/record.rb} +0 -0
  17. data/lib/oai/{response.rb → client/response.rb} +1 -1
  18. data/lib/oai/constants.rb +34 -0
  19. data/lib/oai/exception.rb +72 -1
  20. data/lib/oai/harvester.rb +38 -0
  21. data/lib/oai/harvester/config.rb +41 -0
  22. data/lib/oai/harvester/harvest.rb +144 -0
  23. data/lib/oai/harvester/logging.rb +70 -0
  24. data/lib/oai/harvester/mailer.rb +17 -0
  25. data/lib/oai/harvester/shell.rb +334 -0
  26. data/lib/oai/provider.rb +300 -0
  27. data/lib/oai/provider/metadata_format.rb +72 -0
  28. data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
  29. data/lib/oai/provider/model.rb +71 -0
  30. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +135 -0
  31. data/lib/oai/provider/model/activerecord_wrapper.rb +136 -0
  32. data/lib/oai/provider/partial_result.rb +18 -0
  33. data/lib/oai/provider/response.rb +119 -0
  34. data/lib/oai/provider/response/error.rb +16 -0
  35. data/lib/oai/provider/response/get_record.rb +32 -0
  36. data/lib/oai/provider/response/identify.rb +24 -0
  37. data/lib/oai/provider/response/list_identifiers.rb +29 -0
  38. data/lib/oai/provider/response/list_metadata_formats.rb +21 -0
  39. data/lib/oai/provider/response/list_records.rb +32 -0
  40. data/lib/oai/provider/response/list_sets.rb +23 -0
  41. data/lib/oai/provider/response/record_response.rb +68 -0
  42. data/lib/oai/provider/resumption_token.rb +106 -0
  43. data/lib/oai/set.rb +14 -5
  44. data/test/activerecord_provider/config/connection.rb +5 -0
  45. data/test/activerecord_provider/config/database.yml +6 -0
  46. data/test/activerecord_provider/database/ar_migration.rb +59 -0
  47. data/test/activerecord_provider/database/oaipmhtest +0 -0
  48. data/test/activerecord_provider/fixtures/dc.yml +1501 -0
  49. data/test/activerecord_provider/helpers/providers.rb +44 -0
  50. data/test/activerecord_provider/helpers/set_provider.rb +36 -0
  51. data/test/activerecord_provider/models/dc_field.rb +7 -0
  52. data/test/activerecord_provider/models/dc_set.rb +6 -0
  53. data/test/activerecord_provider/models/oai_token.rb +3 -0
  54. data/test/activerecord_provider/tc_ar_provider.rb +93 -0
  55. data/test/activerecord_provider/tc_ar_sets_provider.rb +66 -0
  56. data/test/activerecord_provider/tc_caching_paging_provider.rb +53 -0
  57. data/test/activerecord_provider/tc_simple_paging_provider.rb +55 -0
  58. data/test/activerecord_provider/test_helper.rb +4 -0
  59. data/test/client/helpers/provider.rb +68 -0
  60. data/test/client/helpers/test_wrapper.rb +11 -0
  61. data/test/client/tc_exception.rb +36 -0
  62. data/test/{tc_get_record.rb → client/tc_get_record.rb} +11 -7
  63. data/test/client/tc_identify.rb +13 -0
  64. data/test/{tc_libxml.rb → client/tc_libxml.rb} +20 -10
  65. data/test/{tc_list_identifiers.rb → client/tc_list_identifiers.rb} +10 -8
  66. data/test/{tc_list_metadata_formats.rb → client/tc_list_metadata_formats.rb} +4 -1
  67. data/test/{tc_list_records.rb → client/tc_list_records.rb} +4 -1
  68. data/test/{tc_list_sets.rb → client/tc_list_sets.rb} +4 -2
  69. data/test/{tc_xpath.rb → client/tc_xpath.rb} +1 -1
  70. data/test/client/test_helper.rb +5 -0
  71. data/test/provider/models.rb +230 -0
  72. data/test/provider/tc_exceptions.rb +63 -0
  73. data/test/provider/tc_functional_tokens.rb +42 -0
  74. data/test/provider/tc_provider.rb +69 -0
  75. data/test/provider/tc_resumption_tokens.rb +46 -0
  76. data/test/provider/tc_simple_provider.rb +85 -0
  77. data/test/provider/test_helper.rb +36 -0
  78. metadata +123 -27
  79. data/test/tc_exception.rb +0 -38
  80. data/test/tc_identify.rb +0 -8
@@ -0,0 +1,44 @@
1
+ require 'active_record'
2
+ require 'oai'
3
+ require "config/connection.rb"
4
+
5
+ Dir.glob(File.dirname(__FILE__) + "/../models/*.rb").each do |lib|
6
+ require lib
7
+ end
8
+
9
+ class ARProvider < OAI::Provider::Base
10
+ repository_name 'ActiveRecord Based Provider'
11
+ repository_url 'http://localhost'
12
+ record_prefix 'oai:test'
13
+ source_model ActiveRecordWrapper.new(DCField)
14
+ end
15
+
16
+ class SimpleResumptionProvider < OAI::Provider::Base
17
+ repository_name 'ActiveRecord Resumption Provider'
18
+ repository_url 'http://localhost'
19
+ record_prefix 'oai:test'
20
+ source_model ActiveRecordWrapper.new(DCField, :limit => 25)
21
+ end
22
+
23
+ class CachingResumptionProvider < OAI::Provider::Base
24
+ repository_name 'ActiveRecord Caching Resumption Provider'
25
+ repository_url 'http://localhost'
26
+ record_prefix 'oai:test'
27
+ source_model ActiveRecordCachingWrapper.new(DCField, :limit => 25)
28
+ end
29
+
30
+
31
+ class ARLoader
32
+ def self.load
33
+ fixtures = YAML.load_file(
34
+ File.join(File.dirname(__FILE__), '..', 'fixtures', 'dc.yml')
35
+ )
36
+ fixtures.keys.sort.each do |key|
37
+ DCField.create(fixtures[key])
38
+ end
39
+ end
40
+
41
+ def self.unload
42
+ DCField.delete_all
43
+ end
44
+ end
@@ -0,0 +1,36 @@
1
+ # Extend ActiveRecordModel to support sets
2
+ class SetModel < OAI::Provider::ActiveRecordWrapper
3
+
4
+ # Return all available sets
5
+ def sets
6
+ DCSet.find(:all)
7
+ end
8
+
9
+ # Scope the find to a set relation if we get a set in the options
10
+ def find(selector, opts={})
11
+ if opts[:set]
12
+ set = DCSet.find_by_spec(opts.delete(:set))
13
+ conditions = sql_conditions(opts)
14
+
15
+ if :all == selector
16
+ set.dc_fields.find(selector, :conditions => conditions)
17
+ else
18
+ set.dc_fields.find(selector, :conditions => conditions)
19
+ end
20
+ else
21
+ if :all == selector
22
+ model.find(selector, :conditions => sql_conditions(opts))
23
+ else
24
+ model.find(selector, :conditions => sql_conditions(opts))
25
+ end
26
+ end
27
+ end
28
+
29
+ end
30
+
31
+ class ARSetProvider < OAI::Provider::Base
32
+ repository_name 'ActiveRecord Set Based Provider'
33
+ repository_url 'http://localhost'
34
+ record_prefix = 'oai:test'
35
+ source_model SetModel.new(DCField)
36
+ end
@@ -0,0 +1,7 @@
1
+ class DCField < ActiveRecord::Base
2
+ set_inheritance_column 'DONOTINHERIT'
3
+ has_and_belongs_to_many :sets,
4
+ :join_table => "dc_fields_dc_sets",
5
+ :foreign_key => "dc_field_id",
6
+ :class_name => "DCSet"
7
+ end
@@ -0,0 +1,6 @@
1
+ class DCSet < ActiveRecord::Base
2
+ has_and_belongs_to_many :dc_fields,
3
+ :join_table => "dc_fields_dc_sets",
4
+ :foreign_key => "dc_set_id",
5
+ :class_name => "DCField"
6
+ end
@@ -0,0 +1,3 @@
1
+ class OaiToken < ActiveRecord::Base
2
+ serialize :params
3
+ end
@@ -0,0 +1,93 @@
1
+ require 'test_helper'
2
+
3
+ class ActiveRecordProviderTest < Test::Unit::TestCase
4
+
5
+ def test_identify
6
+ assert @provider.identify =~ /ActiveRecord Based Provider/
7
+ end
8
+
9
+ def test_metadata_formats
10
+ assert_nothing_raised { REXML::Document.new(@provider.list_metadata_formats) }
11
+ doc = REXML::Document.new(@provider.list_metadata_formats)
12
+ assert doc.elements['/OAI-PMH/ListMetadataFormats/metadataFormat/metadataPrefix'].text == 'oai_dc'
13
+ end
14
+
15
+ def test_list_records
16
+ assert_nothing_raised { REXML::Document.new(@provider.list_records) }
17
+ doc = REXML::Document.new(@provider.list_records)
18
+ assert_equal 100, doc.elements['OAI-PMH/ListRecords'].to_a.size
19
+ end
20
+
21
+ def test_list_identifiers
22
+ assert_nothing_raised { REXML::Document.new(@provider.list_identifiers) }
23
+ doc = REXML::Document.new(@provider.list_identifiers)
24
+ assert_equal 100, doc.elements['OAI-PMH/ListIdentifiers'].to_a.size
25
+ end
26
+
27
+ def test_get_record
28
+ assert_nothing_raised { REXML::Document.new(@provider.get_record(:identifier => 'oai:test/1')) }
29
+ doc = REXML::Document.new(@provider.get_record(:identifier => 'oai:test/1'))
30
+ assert_equal 'oai:test/1', doc.elements['OAI-PMH/GetRecord/record/header/identifier'].text
31
+ end
32
+
33
+ def test_deleted
34
+ DCField.update(5, :deleted => true)
35
+ doc = REXML::Document.new(@provider.get_record(:identifier => 'oai:test/5'))
36
+ assert_equal 'oai:test/5', doc.elements['OAI-PMH/GetRecord/record/header/identifier'].text
37
+ assert_equal 'deleted', doc.elements['OAI-PMH/GetRecord/record/header'].attributes["status"]
38
+ end
39
+
40
+ def test_from
41
+ DCField.update_all(['updated_at = ?', Chronic.parse("January 1 2005")],
42
+ "id < 90")
43
+ DCField.update_all(['updated_at = ?', Chronic.parse("June 1 2005")],
44
+ "id < 10")
45
+
46
+ from_param = Chronic.parse("January 1 2006")
47
+
48
+ doc = REXML::Document.new(
49
+ @provider.list_records(:from => from_param)
50
+ )
51
+ assert_equal DCField.find(:all, :conditions => ["updated_at >= ?", from_param]).size,
52
+ doc.elements['OAI-PMH/ListRecords'].size
53
+
54
+ doc = REXML::Document.new(
55
+ @provider.list_records(:from => Chronic.parse("May 30 2005"))
56
+ )
57
+ assert_equal 20, doc.elements['OAI-PMH/ListRecords'].to_a.size
58
+ end
59
+
60
+ def test_until
61
+ DCField.update_all(['updated_at = ?', Chronic.parse("June 1 2005")],
62
+ "id < 10")
63
+
64
+ doc = REXML::Document.new(
65
+ @provider.list_records(:until => Chronic.parse("June 1 2005"))
66
+ )
67
+ assert_equal 9, doc.elements['OAI-PMH/ListRecords'].to_a.size
68
+ end
69
+
70
+ def test_from_and_until
71
+ DCField.update_all(['updated_at = ?', Chronic.parse("June 1 2005")])
72
+ DCField.update_all(['updated_at = ?', Chronic.parse("June 15 2005")],
73
+ "id < 50")
74
+ DCField.update_all(['updated_at = ?', Chronic.parse("June 30 2005")],
75
+ "id < 10")
76
+
77
+ doc = REXML::Document.new(
78
+ @provider.list_records(:from => Chronic.parse("June 3 2005"),
79
+ :until => Chronic.parse("June 16 2005"))
80
+ )
81
+ assert_equal 40, doc.elements['OAI-PMH/ListRecords'].to_a.size
82
+ end
83
+
84
+ def setup
85
+ @provider = ARProvider.new
86
+ ARLoader.load
87
+ end
88
+
89
+ def teardown
90
+ ARLoader.unload
91
+ end
92
+
93
+ end
@@ -0,0 +1,66 @@
1
+ require 'test_helper'
2
+
3
+ class ActiveRecordSetProviderTest < Test::Unit::TestCase
4
+
5
+ def test_list_sets
6
+ doc = REXML::Document.new(@provider.list_sets)
7
+ sets = doc.elements["/OAI-PMH/ListSets"]
8
+ assert sets.size == 4
9
+ assert sets[0].elements["//setName"].text == "Set A"
10
+ end
11
+
12
+ def test_set_a
13
+ doc = REXML::Document.new(@provider.list_records(:set => "A"))
14
+ assert_equal 20, doc.elements['OAI-PMH/ListRecords'].to_a.size
15
+ end
16
+
17
+ def test_set_b
18
+ doc = REXML::Document.new(@provider.list_records(:set => "B"))
19
+ assert_equal 10, doc.elements['OAI-PMH/ListRecords'].to_a.size
20
+ end
21
+
22
+ def test_set_ab
23
+ doc = REXML::Document.new(@provider.list_records(:set => "A:B"))
24
+ assert_equal 10, doc.elements['OAI-PMH/ListRecords'].to_a.size
25
+ end
26
+
27
+ def test_record_with_multiple_sets
28
+ assert_equal 2, DCField.find(32).sets.size
29
+ end
30
+
31
+ def setup
32
+ @provider = ARSetProvider.new
33
+ ARLoader.load
34
+ define_sets
35
+ end
36
+
37
+ def teardown
38
+ ARLoader.unload
39
+ DCSet.connection.execute("delete from dc_fields_dc_sets")
40
+ DCSet.delete_all
41
+ end
42
+
43
+ def define_sets
44
+ set_a = DCSet.create(:name => "Set A", :spec => "A")
45
+ set_b = DCSet.create(:name => "Set B", :spec => "B")
46
+ set_c = DCSet.create(:name => "Set B", :spec => "B")
47
+ set_ab = DCSet.create(:name => "Set A:B", :spec => "A:B")
48
+
49
+ DCField.find([1,2,3,4,5,6,7,8,9,10]).each do |record|
50
+ set_a.dc_fields << record
51
+ end
52
+
53
+ DCField.find([11,12,13,14,15,16,17,18,19,20]).each do |record|
54
+ set_b.dc_fields << record
55
+ end
56
+
57
+ DCField.find([21,22,23,24,25,26,27,28,29,30]).each do |record|
58
+ set_ab.dc_fields << record
59
+ end
60
+
61
+ DCField.find([31,32,33,34,35,36,37,38,39,40]).each do |record|
62
+ set_a.dc_fields << record
63
+ set_c.dc_fields << record
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,53 @@
1
+ require 'test_helper'
2
+
3
+ class CachingPagingProviderTest < Test::Unit::TestCase
4
+ include REXML
5
+
6
+ def test_full_harvest
7
+ doc = Document.new(@provider.list_records)
8
+ assert_not_nil doc.elements["/OAI-PMH/resumptionToken"]
9
+ assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].size
10
+ token = doc.elements["/OAI-PMH/resumptionToken"].text
11
+ doc = Document.new(@provider.list_records(:resumption_token => token))
12
+ assert_not_nil doc.elements["/OAI-PMH/resumptionToken"]
13
+ token = doc.elements["/OAI-PMH/resumptionToken"].text
14
+ assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].size
15
+ doc = Document.new(@provider.list_records(:resumption_token => token))
16
+ assert_not_nil doc.elements["/OAI-PMH/resumptionToken"]
17
+ token = doc.elements["/OAI-PMH/resumptionToken"].text
18
+ assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].size
19
+ doc = Document.new(@provider.list_records(:resumption_token => token))
20
+ assert_nil doc.elements["/OAI-PMH/resumptionToken"]
21
+ assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].size
22
+ end
23
+
24
+ def test_from_and_until
25
+ DCField.update_all(['updated_at = ?', Chronic.parse("September 15 2005")],
26
+ "id <= 25")
27
+ DCField.update_all(['updated_at = ?', Chronic.parse("November 1 2005")],
28
+ "id <= 50 and id > 25")
29
+
30
+ # Should return 50 records broken into 2 groups of 25.
31
+ doc = Document.new(
32
+ @provider.list_records(
33
+ :from => Chronic.parse("September 1 2005"),
34
+ :until => Chronic.parse("November 30 2005"))
35
+ )
36
+ assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].size
37
+ token = doc.elements["/OAI-PMH/resumptionToken"].text
38
+ assert_not_nil doc.elements["/OAI-PMH/resumptionToken"]
39
+ doc = Document.new(@provider.list_records(:resumption_token => token))
40
+ assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].size
41
+ assert_nil doc.elements["/OAI-PMH/resumptionToken"]
42
+ end
43
+
44
+ def setup
45
+ @provider = CachingResumptionProvider.new
46
+ ARLoader.load
47
+ end
48
+
49
+ def teardown
50
+ ARLoader.unload
51
+ end
52
+
53
+ end
@@ -0,0 +1,55 @@
1
+ require 'test_helper'
2
+
3
+ class SimpleResumptionProviderTest < Test::Unit::TestCase
4
+ include REXML
5
+
6
+ def test_full_harvest
7
+ doc = Document.new(@provider.list_records)
8
+ assert_not_nil doc.elements["/OAI-PMH/resumptionToken"]
9
+ assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].to_a.size
10
+ token = doc.elements["/OAI-PMH/resumptionToken"].text
11
+ doc = Document.new(@provider.list_records(:resumption_token => token))
12
+ assert_not_nil doc.elements["/OAI-PMH/resumptionToken"]
13
+ token = doc.elements["/OAI-PMH/resumptionToken"].text
14
+ assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].to_a.size
15
+ doc = Document.new(@provider.list_records(:resumption_token => token))
16
+ assert_not_nil doc.elements["/OAI-PMH/resumptionToken"]
17
+ token = doc.elements["/OAI-PMH/resumptionToken"].text
18
+ assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].to_a.size
19
+ doc = Document.new(@provider.list_records(:resumption_token => token))
20
+ assert_nil doc.elements["/OAI-PMH/resumptionToken"]
21
+ assert_equal 25, doc.elements["/OAI-PMH/ListRecords"].to_a.size
22
+ end
23
+
24
+ def test_from_and_until
25
+ DCField.update_all(['updated_at = ?', Chronic.parse("September 15 2005")],
26
+ "id < 26")
27
+ DCField.update_all(['updated_at = ?', Chronic.parse("November 1 2005")],
28
+ "id < 51 and id > 25")
29
+
30
+ total = DCField.count(:id, :conditions => ["updated_at >= ? AND updated_at <= ?", Chronic.parse("September 1 2005"), Chronic.parse("November 30 2005")])
31
+
32
+ # Should return 50 records broken into 2 groups of 25.
33
+ doc = Document.new(
34
+ @provider.list_records(
35
+ :from => Chronic.parse("September 1 2005"),
36
+ :until => Chronic.parse("November 30 2005"))
37
+ )
38
+ assert_equal total/2, doc.elements["/OAI-PMH/ListRecords"].to_a.size
39
+ assert_not_nil doc.elements["/OAI-PMH/resumptionToken"]
40
+ token = doc.elements["/OAI-PMH/resumptionToken"].text
41
+ doc = Document.new(@provider.list_records(:resumption_token => token))
42
+ assert_equal total/2, doc.elements["/OAI-PMH/ListRecords"].to_a.size
43
+ assert_nil doc.elements["/OAI-PMH/resumptionToken"]
44
+ end
45
+
46
+ def setup
47
+ @provider = SimpleResumptionProvider.new
48
+ ARLoader.load
49
+ end
50
+
51
+ def teardown
52
+ ARLoader.unload
53
+ end
54
+
55
+ end
@@ -0,0 +1,4 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require File.dirname(__FILE__) + '/helpers/providers'
4
+ require File.dirname(__FILE__) + '/helpers/set_provider'
@@ -0,0 +1,68 @@
1
+ require 'webrick'
2
+ require File.dirname(__FILE__) + '/../../provider/models'
3
+
4
+ class ComplexProvider < OAI::Provider::Base
5
+ repository_name 'Complex Provider'
6
+ repository_url 'http://localhost'
7
+ record_prefix 'oai:test'
8
+ source_model ComplexModel.new(100)
9
+ end
10
+
11
+ class ProviderServer < WEBrick::HTTPServlet::AbstractServlet
12
+ @@server = nil
13
+
14
+ def initialize(server)
15
+ super(server)
16
+ @provider = ComplexProvider.new
17
+ end
18
+
19
+ def do_GET(req, res)
20
+ begin
21
+ res.body = @provider.process_request(req.query)
22
+ res.status = 200
23
+ res['Content-Type'] = 'text/xml'
24
+ rescue => err
25
+ puts err
26
+ puts err.backtrace.join("\n")
27
+ res.body = err.backtrace.join("\n")
28
+ res.status = 500
29
+ end
30
+ end
31
+
32
+ def self.start(port)
33
+ unless @@server
34
+ @@server = WEBrick::HTTPServer.new(
35
+ :BindAddress => '127.0.0.1',
36
+ :Logger => WEBrick::Log.new('/dev/null'),
37
+ :AccessLog => [],
38
+ :Port => port)
39
+ @@server.mount("/oai", ProviderServer)
40
+
41
+ trap("INT") { @@server.shutdown }
42
+ @@thread = Thread.new { @@server.start }
43
+ puts "Starting Webrick/Provider on port[#{port}]"
44
+ end
45
+ end
46
+
47
+ def self.stop
48
+ puts "Stopping Webrick/Provider"
49
+ if @@thread
50
+ @@thread.exit
51
+ end
52
+ end
53
+
54
+ def self.wrap(port = 3333)
55
+ begin
56
+ start(port)
57
+
58
+ # Wait for startup
59
+ sleep 2
60
+
61
+ yield
62
+
63
+ ensure
64
+ stop
65
+ end
66
+ end
67
+
68
+ end