oai_talia 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. data/README +81 -0
  2. data/Rakefile +127 -0
  3. data/bin/oai +68 -0
  4. data/examples/models/file_model.rb +63 -0
  5. data/examples/providers/dublin_core.rb +474 -0
  6. data/lib/oai/client/get_record.rb +15 -0
  7. data/lib/oai/client/header.rb +18 -0
  8. data/lib/oai/client/identify.rb +30 -0
  9. data/lib/oai/client/list_identifiers.rb +12 -0
  10. data/lib/oai/client/list_metadata_formats.rb +12 -0
  11. data/lib/oai/client/list_records.rb +21 -0
  12. data/lib/oai/client/list_sets.rb +19 -0
  13. data/lib/oai/client/metadata_format.rb +12 -0
  14. data/lib/oai/client/record.rb +26 -0
  15. data/lib/oai/client/response.rb +35 -0
  16. data/lib/oai/client.rb +301 -0
  17. data/lib/oai/constants.rb +34 -0
  18. data/lib/oai/exception.rb +75 -0
  19. data/lib/oai/harvester/config.rb +41 -0
  20. data/lib/oai/harvester/harvest.rb +150 -0
  21. data/lib/oai/harvester/logging.rb +70 -0
  22. data/lib/oai/harvester/mailer.rb +17 -0
  23. data/lib/oai/harvester/shell.rb +338 -0
  24. data/lib/oai/harvester.rb +39 -0
  25. data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
  26. data/lib/oai/provider/metadata_format/oai_europeana.rb +38 -0
  27. data/lib/oai/provider/metadata_format.rb +143 -0
  28. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +134 -0
  29. data/lib/oai/provider/model/activerecord_wrapper.rb +139 -0
  30. data/lib/oai/provider/model.rb +74 -0
  31. data/lib/oai/provider/partial_result.rb +18 -0
  32. data/lib/oai/provider/response/error.rb +16 -0
  33. data/lib/oai/provider/response/get_record.rb +26 -0
  34. data/lib/oai/provider/response/identify.rb +25 -0
  35. data/lib/oai/provider/response/list_identifiers.rb +35 -0
  36. data/lib/oai/provider/response/list_metadata_formats.rb +34 -0
  37. data/lib/oai/provider/response/list_records.rb +34 -0
  38. data/lib/oai/provider/response/list_sets.rb +23 -0
  39. data/lib/oai/provider/response/record_response.rb +70 -0
  40. data/lib/oai/provider/response.rb +161 -0
  41. data/lib/oai/provider/resumption_token.rb +106 -0
  42. data/lib/oai/provider.rb +304 -0
  43. data/lib/oai/set.rb +29 -0
  44. data/lib/oai/xpath.rb +75 -0
  45. data/lib/oai.rb +8 -0
  46. data/lib/test.rb +25 -0
  47. data/test/activerecord_provider/config/connection.rb +5 -0
  48. data/test/activerecord_provider/config/database.yml +6 -0
  49. data/test/activerecord_provider/database/ar_migration.rb +59 -0
  50. data/test/activerecord_provider/database/oaipmhtest +0 -0
  51. data/test/activerecord_provider/fixtures/dc.yml +1501 -0
  52. data/test/activerecord_provider/helpers/providers.rb +44 -0
  53. data/test/activerecord_provider/helpers/set_provider.rb +36 -0
  54. data/test/activerecord_provider/models/dc_field.rb +7 -0
  55. data/test/activerecord_provider/models/dc_set.rb +6 -0
  56. data/test/activerecord_provider/models/oai_token.rb +3 -0
  57. data/test/activerecord_provider/tc_ar_provider.rb +113 -0
  58. data/test/activerecord_provider/tc_ar_sets_provider.rb +72 -0
  59. data/test/activerecord_provider/tc_caching_paging_provider.rb +55 -0
  60. data/test/activerecord_provider/tc_simple_paging_provider.rb +57 -0
  61. data/test/activerecord_provider/test_helper.rb +4 -0
  62. data/test/client/helpers/provider.rb +68 -0
  63. data/test/client/helpers/test_wrapper.rb +11 -0
  64. data/test/client/tc_exception.rb +36 -0
  65. data/test/client/tc_get_record.rb +37 -0
  66. data/test/client/tc_identify.rb +13 -0
  67. data/test/client/tc_libxml.rb +61 -0
  68. data/test/client/tc_list_identifiers.rb +52 -0
  69. data/test/client/tc_list_metadata_formats.rb +18 -0
  70. data/test/client/tc_list_records.rb +13 -0
  71. data/test/client/tc_list_sets.rb +19 -0
  72. data/test/client/tc_low_resolution_dates.rb +14 -0
  73. data/test/client/tc_utf8_escaping.rb +11 -0
  74. data/test/client/tc_xpath.rb +26 -0
  75. data/test/client/test_helper.rb +5 -0
  76. data/test/provider/models.rb +234 -0
  77. data/test/provider/tc_exceptions.rb +96 -0
  78. data/test/provider/tc_functional_tokens.rb +43 -0
  79. data/test/provider/tc_provider.rb +71 -0
  80. data/test/provider/tc_resumption_tokens.rb +46 -0
  81. data/test/provider/tc_simple_provider.rb +92 -0
  82. data/test/provider/test_helper.rb +36 -0
  83. data/test/test.xml +22 -0
  84. metadata +181 -0
data/README ADDED
@@ -0,0 +1,81 @@
1
+ = ruby-oai
2
+
3
+ == DESCRIPTION
4
+
5
+ ruby-oai is a Open Archives Protocol for Metadata Harvesting (OAI-PMH[http://openarchives.org])
6
+ library for Ruby. If you're not familiar with OAI-PMH[http://openarchives.org] it is the most used
7
+ protocol for sharing metadata between digital library repositories.
8
+
9
+ The OAI-PMH[http://openarchives.org] spec defines six verbs (Identify, ListIdentifiers, ListRecords,
10
+ GetRecords, ListSets, ListMetadataFormat) used for discovery and sharing of
11
+ metadata.
12
+
13
+ The ruby-oai gem includes a client library, a server/provider library and
14
+ a interactive harvesting shell.
15
+
16
+ === client
17
+
18
+ The OAI client library is used for harvesting metadata from repositories.
19
+ For example to initiate a ListRecords request to pubmed you can:
20
+
21
+ require 'oai'
22
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
23
+ for record in client.list_records
24
+ puts record.metadata
25
+ end
26
+
27
+ See OAI::Client for more details
28
+
29
+ === provider
30
+
31
+ The OAI provider library handles serving local content to other clients.
32
+
33
+ Setting up a simple provider:
34
+
35
+ class MyProvider < Oai::Provider
36
+ repository_name 'My little OAI provider'
37
+ repository_url 'http://localhost/provider'
38
+ record_prefix 'oai:localhost'
39
+ admin_email 'root@localhost' # String or Array
40
+ source_model MyModel.new # Subclass of OAI::Provider::Model
41
+ end
42
+
43
+ See OAI::Provider for more details
44
+
45
+ === interactive harvester
46
+
47
+ The OAI-PMH[http://openarchives.org] client shell allows OAI Harvesting to be configured in
48
+ an interactive manner. Typing 'oai' on the command line starts the
49
+ shell.
50
+
51
+ After initial configuration, the shell can be used to manage harvesting
52
+ operations.
53
+
54
+ See OAI::Harvester::Shell for more details
55
+
56
+ == INSTALLATION
57
+
58
+ Normally the best way to install oai is from rubyforge using the gem
59
+ command line tool:
60
+
61
+ % gem install oai
62
+
63
+ If you're reading this you've presumably got the tarball or zip distribution.
64
+ So you'll need to:
65
+
66
+ % rake package
67
+ % gem install pkg/oai-x.y.z.gem
68
+
69
+ Where x.y.z is the version of the gem that was generated.
70
+
71
+ == TODO
72
+
73
+ * consolidate response classes used by provider and client
74
+ * automatic validation of metadata schemas
75
+ * email the authors with your suggestions
76
+
77
+ == AUTHORS
78
+
79
+ - Ed Summers <ehs@pobox.com>
80
+ - William Groppe <will.groppe@gmail.com>
81
+ - Terry Reese <terry.reese@oregonstate.edu>
data/Rakefile ADDED
@@ -0,0 +1,127 @@
1
+ RUBY_OAI_VERSION = '0.0.12'
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'rake/testtask'
6
+ require 'rake/rdoctask'
7
+ require 'rake/packagetask'
8
+ require 'rake/gempackagetask'
9
+
10
+ task :default => ["test"]
11
+
12
+ task :test => ["test:client", "test:provider"]
13
+
14
+ begin
15
+ require 'jeweler'
16
+ Jeweler::Tasks.new do |s|
17
+ s.name = "oai_talia"
18
+ s.summary = "A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH)"
19
+ s.email = "ghub@limitedcreativity.org"
20
+ s.homepage = "http://trac.talia.discovery-project.eu/"
21
+ s.description = "A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH). Fork of the original version by Ed Summers, aims for best standards compatibility (test with http://re.cs.uct.ac.za/)"
22
+ s.required_ruby_version = '>= 1.8.6'
23
+ s.authors = ["Ed Summers", "Daniel Hahn"]
24
+ s.homepage = 'http://github.com/net7/ruby-oai-talia/'
25
+ s.platform = Gem::Platform::RUBY
26
+ s.require_path = 'lib'
27
+ s.autorequire = 'oai'
28
+ s.has_rdoc = true
29
+ s.bindir = 'bin'
30
+ s.executables = 'oai'
31
+
32
+ s.add_dependency('builder', '>=2.0.0')
33
+
34
+ s.files = %w(README Rakefile) +
35
+ Dir.glob("{bin,test,lib}/**/*") +
36
+ Dir.glob("examples/**/*.rb")
37
+ end
38
+ Jeweler::GemcutterTasks.new
39
+ rescue LoadError
40
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
41
+ end
42
+
43
+ namespace :test do
44
+ Rake::TestTask.new('client') do |t|
45
+ t.libs += ['lib', 'test/client']
46
+ t.pattern = 'test/client/tc_*.rb'
47
+ t.verbose = true
48
+ end
49
+
50
+ Rake::TestTask.new('provider') do |t|
51
+ t.libs += ['lib', 'test/provider']
52
+ t.pattern = 'test/provider/tc_*.rb'
53
+ t.verbose = true
54
+ end
55
+
56
+ desc "Active Record base Provider Tests"
57
+ Rake::TestTask.new('activerecord_provider') do |t|
58
+ t.libs += ['lib', 'test/activerecord_provider']
59
+ t.pattern = 'test/activerecord_provider/tc_*.rb'
60
+ t.verbose = true
61
+ end
62
+
63
+ desc 'Measures test coverage'
64
+ # borrowed from here: http://clarkware.com/cgi/blosxom/2007/01/05#RcovRakeTask
65
+ task :coverage do
66
+ rm_f "coverage"
67
+ rm_f "coverage.data"
68
+ system("rcov --aggregate coverage.data --text-summary -Ilib:test/provider test/provider/tc_*.rb")
69
+ system("rcov --aggregate coverage.data --text-summary -Ilib:test/client test/client/tc_*.rb")
70
+ system("open coverage/index.html") if PLATFORM['darwin']
71
+ end
72
+
73
+ end
74
+
75
+ desc "Run all unit tests"
76
+ task :test => ['test:client', 'test:provider', 'test:activerecord_provider']
77
+
78
+ task 'test:activerecord_provider' => :create_database
79
+
80
+ task :environment do
81
+ unless defined? OAI_PATH
82
+ OAI_PATH = File.dirname(__FILE__) + '/lib/oai'
83
+ $LOAD_PATH << OAI_PATH
84
+ $LOAD_PATH << File.dirname(__FILE__) + '/test'
85
+ end
86
+ end
87
+
88
+ task :drop_database => :environment do
89
+ %w{rubygems active_record yaml}.each { |lib| require lib }
90
+ require 'activerecord_provider/database/ar_migration'
91
+ require 'activerecord_provider/config/connection'
92
+ begin
93
+ OAIPMHTables.down
94
+ rescue
95
+ end
96
+ end
97
+
98
+ task :create_database => :drop_database do
99
+ OAIPMHTables.up
100
+ end
101
+
102
+ task :load_fixtures => :create_database do
103
+ require 'test/activerecord_provider/models/dc_field'
104
+ fixtures = YAML.load_file(
105
+ File.join('test', 'activerecord_provider', 'fixtures', 'dc.yml')
106
+ )
107
+ fixtures.keys.sort.each do |key|
108
+ DCField.create(fixtures[key])
109
+ end
110
+ end
111
+
112
+ Rake::RDocTask.new('doc') do |rd|
113
+ rd.rdoc_files.include("lib/**/*.rb", "README")
114
+ rd.main = 'README'
115
+ rd.rdoc_dir = 'doc'
116
+ end
117
+
118
+ begin
119
+ require 'gokdok'
120
+ Gokdok::Dokker.new do |gd|
121
+ gd.remote_path = ''
122
+ gd.rdoc_task = :doc
123
+ gd.doc_home = 'doc'
124
+ end
125
+ rescue LoadError
126
+ puts "Gokdoc not available. Install it with: gem install gokdok"
127
+ end
data/bin/oai ADDED
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env ruby -rubygems
2
+ #
3
+ # Created by William Groppe on 2006-11-05.
4
+ # Copyright (c) 2006. All rights reserved.
5
+
6
+ require 'optparse'
7
+
8
+ DIRECTORY_LAYOUT = "%Y/%m".freeze
9
+
10
+ require 'oai/harvester'
11
+
12
+ include OAI::Harvester
13
+
14
+ conf = OAI::Harvester::Config.load
15
+
16
+ startup = :interactive
17
+
18
+ rexml = false
19
+
20
+ opts = OptionParser.new do |opts|
21
+ opts.banner = "Usage: oai ..."
22
+ opts.define_head "#{File.basename($0)}, a OAI harvester shell."
23
+ opts.separator ""
24
+ opts.separator "Options:"
25
+
26
+ opts.on("-D", "--daemon", "Non-interactive mode, to be called via scheduler") { startup = :daemon }
27
+ opts.on("-R", "--rexml", "Use rexml even if libxml is available") { rexml = true }
28
+ opts.on("-?", "--help", "Show this message") do
29
+ puts opts
30
+ exit
31
+ end
32
+
33
+ # Another typical switch to print the version.
34
+ opts.on_tail("-v", "--version", "Show version") do
35
+ class << Gem; attr_accessor :loaded_specs; end
36
+ puts Gem.loaded_specs['oai'].version
37
+ exit
38
+ end
39
+ end
40
+
41
+ begin
42
+ opts.parse! ARGV
43
+ rescue
44
+ puts opts
45
+ exit
46
+ end
47
+
48
+ unless rexml
49
+ begin # Try to load libxml to speed up harvesting
50
+ require 'xml/libxml'
51
+ rescue LoadError
52
+ end
53
+ end
54
+
55
+ case startup
56
+ when :interactive
57
+ shell = Shell.new(conf)
58
+ shell.start
59
+ when :daemon
60
+ if conf.storage
61
+ harvest = Harvest.new(conf)
62
+ harvest.start(harvestable_sites(conf))
63
+ else
64
+ puts "Missing or corrupt configuration file, cannot continue."
65
+ exit(-1)
66
+ end
67
+ end
68
+
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Created by William Groppe on 2007-02-01.
4
+ #
5
+ # Simple file based Model. Basically just serves a directory of xml files to the
6
+ # Provider.
7
+ #
8
+ class File
9
+ def id
10
+ File.basename(self.path)
11
+ end
12
+
13
+ def to_oai_dc
14
+ self.read
15
+ end
16
+ end
17
+
18
+ class FileModel < OAI::Provider::Model
19
+ include OAI::Provider
20
+
21
+ def initialize(directory = 'data')
22
+ # nil specifies no partial results aka resumption tokens, and 'mtime' is the
23
+ # method that the provider will call for determining the timestamp
24
+ super(nil, 'mtime')
25
+ @directory = directory
26
+ end
27
+
28
+ def earliest
29
+ e = Dir["#{@directory}/*.xml"].min { |a,b| File.stat(a).mtime <=> File.stat(b).mtime }
30
+ File.stat(e).mtime.utc.xmlschema
31
+ end
32
+
33
+ def latest
34
+ e = Dir["#{@directory}/*.xml"].max { |a,b| File.stat(a).mtime <=> File.stat(b).mtime }
35
+ File.stat(e).mtime.utc.xmlschema
36
+ end
37
+
38
+ def sets
39
+ nil
40
+ end
41
+
42
+ def find(selector, opts={})
43
+ return nil unless selector
44
+
45
+ case selector
46
+ when :all
47
+ records = Dir["#{@directory}/*.xml"].sort.collect do |file|
48
+ File.new(file) unless File.stat(file).mtime.utc < opts[:from] or
49
+ File.stat(file).mtime.utc > opts[:until]
50
+ end
51
+ records
52
+ else
53
+ Find.find("#{@directory}/#{selector}") rescue nil
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ # == Example Usage:
60
+ # class FileProvider < OAI::Provider::Base
61
+ # repository_name 'XML File Provider'
62
+ # source_model FileModel.new('/tmp')
63
+ # end