oai_talia 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. data/README +81 -0
  2. data/Rakefile +127 -0
  3. data/bin/oai +68 -0
  4. data/examples/models/file_model.rb +63 -0
  5. data/examples/providers/dublin_core.rb +474 -0
  6. data/lib/oai/client/get_record.rb +15 -0
  7. data/lib/oai/client/header.rb +18 -0
  8. data/lib/oai/client/identify.rb +30 -0
  9. data/lib/oai/client/list_identifiers.rb +12 -0
  10. data/lib/oai/client/list_metadata_formats.rb +12 -0
  11. data/lib/oai/client/list_records.rb +21 -0
  12. data/lib/oai/client/list_sets.rb +19 -0
  13. data/lib/oai/client/metadata_format.rb +12 -0
  14. data/lib/oai/client/record.rb +26 -0
  15. data/lib/oai/client/response.rb +35 -0
  16. data/lib/oai/client.rb +301 -0
  17. data/lib/oai/constants.rb +34 -0
  18. data/lib/oai/exception.rb +75 -0
  19. data/lib/oai/harvester/config.rb +41 -0
  20. data/lib/oai/harvester/harvest.rb +150 -0
  21. data/lib/oai/harvester/logging.rb +70 -0
  22. data/lib/oai/harvester/mailer.rb +17 -0
  23. data/lib/oai/harvester/shell.rb +338 -0
  24. data/lib/oai/harvester.rb +39 -0
  25. data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
  26. data/lib/oai/provider/metadata_format/oai_europeana.rb +38 -0
  27. data/lib/oai/provider/metadata_format.rb +143 -0
  28. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +134 -0
  29. data/lib/oai/provider/model/activerecord_wrapper.rb +139 -0
  30. data/lib/oai/provider/model.rb +74 -0
  31. data/lib/oai/provider/partial_result.rb +18 -0
  32. data/lib/oai/provider/response/error.rb +16 -0
  33. data/lib/oai/provider/response/get_record.rb +26 -0
  34. data/lib/oai/provider/response/identify.rb +25 -0
  35. data/lib/oai/provider/response/list_identifiers.rb +35 -0
  36. data/lib/oai/provider/response/list_metadata_formats.rb +34 -0
  37. data/lib/oai/provider/response/list_records.rb +34 -0
  38. data/lib/oai/provider/response/list_sets.rb +23 -0
  39. data/lib/oai/provider/response/record_response.rb +70 -0
  40. data/lib/oai/provider/response.rb +161 -0
  41. data/lib/oai/provider/resumption_token.rb +106 -0
  42. data/lib/oai/provider.rb +304 -0
  43. data/lib/oai/set.rb +29 -0
  44. data/lib/oai/xpath.rb +75 -0
  45. data/lib/oai.rb +8 -0
  46. data/lib/test.rb +25 -0
  47. data/test/activerecord_provider/config/connection.rb +5 -0
  48. data/test/activerecord_provider/config/database.yml +6 -0
  49. data/test/activerecord_provider/database/ar_migration.rb +59 -0
  50. data/test/activerecord_provider/database/oaipmhtest +0 -0
  51. data/test/activerecord_provider/fixtures/dc.yml +1501 -0
  52. data/test/activerecord_provider/helpers/providers.rb +44 -0
  53. data/test/activerecord_provider/helpers/set_provider.rb +36 -0
  54. data/test/activerecord_provider/models/dc_field.rb +7 -0
  55. data/test/activerecord_provider/models/dc_set.rb +6 -0
  56. data/test/activerecord_provider/models/oai_token.rb +3 -0
  57. data/test/activerecord_provider/tc_ar_provider.rb +113 -0
  58. data/test/activerecord_provider/tc_ar_sets_provider.rb +72 -0
  59. data/test/activerecord_provider/tc_caching_paging_provider.rb +55 -0
  60. data/test/activerecord_provider/tc_simple_paging_provider.rb +57 -0
  61. data/test/activerecord_provider/test_helper.rb +4 -0
  62. data/test/client/helpers/provider.rb +68 -0
  63. data/test/client/helpers/test_wrapper.rb +11 -0
  64. data/test/client/tc_exception.rb +36 -0
  65. data/test/client/tc_get_record.rb +37 -0
  66. data/test/client/tc_identify.rb +13 -0
  67. data/test/client/tc_libxml.rb +61 -0
  68. data/test/client/tc_list_identifiers.rb +52 -0
  69. data/test/client/tc_list_metadata_formats.rb +18 -0
  70. data/test/client/tc_list_records.rb +13 -0
  71. data/test/client/tc_list_sets.rb +19 -0
  72. data/test/client/tc_low_resolution_dates.rb +14 -0
  73. data/test/client/tc_utf8_escaping.rb +11 -0
  74. data/test/client/tc_xpath.rb +26 -0
  75. data/test/client/test_helper.rb +5 -0
  76. data/test/provider/models.rb +234 -0
  77. data/test/provider/tc_exceptions.rb +96 -0
  78. data/test/provider/tc_functional_tokens.rb +43 -0
  79. data/test/provider/tc_provider.rb +71 -0
  80. data/test/provider/tc_resumption_tokens.rb +46 -0
  81. data/test/provider/tc_simple_provider.rb +92 -0
  82. data/test/provider/test_helper.rb +36 -0
  83. data/test/test.xml +22 -0
  84. metadata +181 -0
data/README ADDED
@@ -0,0 +1,81 @@
1
+ = ruby-oai
2
+
3
+ == DESCRIPTION
4
+
5
+ ruby-oai is a Open Archives Protocol for Metadata Harvesting (OAI-PMH[http://openarchives.org])
6
+ library for Ruby. If you're not familiar with OAI-PMH[http://openarchives.org] it is the most used
7
+ protocol for sharing metadata between digital library repositories.
8
+
9
+ The OAI-PMH[http://openarchives.org] spec defines six verbs (Identify, ListIdentifiers, ListRecords,
10
+ GetRecords, ListSets, ListMetadataFormat) used for discovery and sharing of
11
+ metadata.
12
+
13
+ The ruby-oai gem includes a client library, a server/provider library and
14
+ a interactive harvesting shell.
15
+
16
+ === client
17
+
18
+ The OAI client library is used for harvesting metadata from repositories.
19
+ For example to initiate a ListRecords request to pubmed you can:
20
+
21
+ require 'oai'
22
+ client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
23
+ for record in client.list_records
24
+ puts record.metadata
25
+ end
26
+
27
+ See OAI::Client for more details
28
+
29
+ === provider
30
+
31
+ The OAI provider library handles serving local content to other clients.
32
+
33
+ Setting up a simple provider:
34
+
35
+ class MyProvider < Oai::Provider
36
+ repository_name 'My little OAI provider'
37
+ repository_url 'http://localhost/provider'
38
+ record_prefix 'oai:localhost'
39
+ admin_email 'root@localhost' # String or Array
40
+ source_model MyModel.new # Subclass of OAI::Provider::Model
41
+ end
42
+
43
+ See OAI::Provider for more details
44
+
45
+ === interactive harvester
46
+
47
+ The OAI-PMH[http://openarchives.org] client shell allows OAI Harvesting to be configured in
48
+ an interactive manner. Typing 'oai' on the command line starts the
49
+ shell.
50
+
51
+ After initial configuration, the shell can be used to manage harvesting
52
+ operations.
53
+
54
+ See OAI::Harvester::Shell for more details
55
+
56
+ == INSTALLATION
57
+
58
+ Normally the best way to install oai is from rubyforge using the gem
59
+ command line tool:
60
+
61
+ % gem install oai
62
+
63
+ If you're reading this you've presumably got the tarball or zip distribution.
64
+ So you'll need to:
65
+
66
+ % rake package
67
+ % gem install pkg/oai-x.y.z.gem
68
+
69
+ Where x.y.z is the version of the gem that was generated.
70
+
71
+ == TODO
72
+
73
+ * consolidate response classes used by provider and client
74
+ * automatic validation of metadata schemas
75
+ * email the authors with your suggestions
76
+
77
+ == AUTHORS
78
+
79
+ - Ed Summers <ehs@pobox.com>
80
+ - William Groppe <will.groppe@gmail.com>
81
+ - Terry Reese <terry.reese@oregonstate.edu>
data/Rakefile ADDED
@@ -0,0 +1,127 @@
1
+ RUBY_OAI_VERSION = '0.0.12'
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'rake/testtask'
6
+ require 'rake/rdoctask'
7
+ require 'rake/packagetask'
8
+ require 'rake/gempackagetask'
9
+
10
+ task :default => ["test"]
11
+
12
+ task :test => ["test:client", "test:provider"]
13
+
14
+ begin
15
+ require 'jeweler'
16
+ Jeweler::Tasks.new do |s|
17
+ s.name = "oai_talia"
18
+ s.summary = "A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH)"
19
+ s.email = "ghub@limitedcreativity.org"
20
+ s.homepage = "http://trac.talia.discovery-project.eu/"
21
+ s.description = "A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH). Fork of the original version by Ed Summers, aims for best standards compatibility (test with http://re.cs.uct.ac.za/)"
22
+ s.required_ruby_version = '>= 1.8.6'
23
+ s.authors = ["Ed Summers", "Daniel Hahn"]
24
+ s.homepage = 'http://github.com/net7/ruby-oai-talia/'
25
+ s.platform = Gem::Platform::RUBY
26
+ s.require_path = 'lib'
27
+ s.autorequire = 'oai'
28
+ s.has_rdoc = true
29
+ s.bindir = 'bin'
30
+ s.executables = 'oai'
31
+
32
+ s.add_dependency('builder', '>=2.0.0')
33
+
34
+ s.files = %w(README Rakefile) +
35
+ Dir.glob("{bin,test,lib}/**/*") +
36
+ Dir.glob("examples/**/*.rb")
37
+ end
38
+ Jeweler::GemcutterTasks.new
39
+ rescue LoadError
40
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
41
+ end
42
+
43
+ namespace :test do
44
+ Rake::TestTask.new('client') do |t|
45
+ t.libs += ['lib', 'test/client']
46
+ t.pattern = 'test/client/tc_*.rb'
47
+ t.verbose = true
48
+ end
49
+
50
+ Rake::TestTask.new('provider') do |t|
51
+ t.libs += ['lib', 'test/provider']
52
+ t.pattern = 'test/provider/tc_*.rb'
53
+ t.verbose = true
54
+ end
55
+
56
+ desc "Active Record base Provider Tests"
57
+ Rake::TestTask.new('activerecord_provider') do |t|
58
+ t.libs += ['lib', 'test/activerecord_provider']
59
+ t.pattern = 'test/activerecord_provider/tc_*.rb'
60
+ t.verbose = true
61
+ end
62
+
63
+ desc 'Measures test coverage'
64
+ # borrowed from here: http://clarkware.com/cgi/blosxom/2007/01/05#RcovRakeTask
65
+ task :coverage do
66
+ rm_f "coverage"
67
+ rm_f "coverage.data"
68
+ system("rcov --aggregate coverage.data --text-summary -Ilib:test/provider test/provider/tc_*.rb")
69
+ system("rcov --aggregate coverage.data --text-summary -Ilib:test/client test/client/tc_*.rb")
70
+ system("open coverage/index.html") if PLATFORM['darwin']
71
+ end
72
+
73
+ end
74
+
75
+ desc "Run all unit tests"
76
+ task :test => ['test:client', 'test:provider', 'test:activerecord_provider']
77
+
78
+ task 'test:activerecord_provider' => :create_database
79
+
80
+ task :environment do
81
+ unless defined? OAI_PATH
82
+ OAI_PATH = File.dirname(__FILE__) + '/lib/oai'
83
+ $LOAD_PATH << OAI_PATH
84
+ $LOAD_PATH << File.dirname(__FILE__) + '/test'
85
+ end
86
+ end
87
+
88
+ task :drop_database => :environment do
89
+ %w{rubygems active_record yaml}.each { |lib| require lib }
90
+ require 'activerecord_provider/database/ar_migration'
91
+ require 'activerecord_provider/config/connection'
92
+ begin
93
+ OAIPMHTables.down
94
+ rescue
95
+ end
96
+ end
97
+
98
+ task :create_database => :drop_database do
99
+ OAIPMHTables.up
100
+ end
101
+
102
+ task :load_fixtures => :create_database do
103
+ require 'test/activerecord_provider/models/dc_field'
104
+ fixtures = YAML.load_file(
105
+ File.join('test', 'activerecord_provider', 'fixtures', 'dc.yml')
106
+ )
107
+ fixtures.keys.sort.each do |key|
108
+ DCField.create(fixtures[key])
109
+ end
110
+ end
111
+
112
+ Rake::RDocTask.new('doc') do |rd|
113
+ rd.rdoc_files.include("lib/**/*.rb", "README")
114
+ rd.main = 'README'
115
+ rd.rdoc_dir = 'doc'
116
+ end
117
+
118
+ begin
119
+ require 'gokdok'
120
+ Gokdok::Dokker.new do |gd|
121
+ gd.remote_path = ''
122
+ gd.rdoc_task = :doc
123
+ gd.doc_home = 'doc'
124
+ end
125
+ rescue LoadError
126
+ puts "Gokdoc not available. Install it with: gem install gokdok"
127
+ end
data/bin/oai ADDED
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env ruby -rubygems
2
+ #
3
+ # Created by William Groppe on 2006-11-05.
4
+ # Copyright (c) 2006. All rights reserved.
5
+
6
+ require 'optparse'
7
+
8
+ DIRECTORY_LAYOUT = "%Y/%m".freeze
9
+
10
+ require 'oai/harvester'
11
+
12
+ include OAI::Harvester
13
+
14
+ conf = OAI::Harvester::Config.load
15
+
16
+ startup = :interactive
17
+
18
+ rexml = false
19
+
20
+ opts = OptionParser.new do |opts|
21
+ opts.banner = "Usage: oai ..."
22
+ opts.define_head "#{File.basename($0)}, a OAI harvester shell."
23
+ opts.separator ""
24
+ opts.separator "Options:"
25
+
26
+ opts.on("-D", "--daemon", "Non-interactive mode, to be called via scheduler") { startup = :daemon }
27
+ opts.on("-R", "--rexml", "Use rexml even if libxml is available") { rexml = true }
28
+ opts.on("-?", "--help", "Show this message") do
29
+ puts opts
30
+ exit
31
+ end
32
+
33
+ # Another typical switch to print the version.
34
+ opts.on_tail("-v", "--version", "Show version") do
35
+ class << Gem; attr_accessor :loaded_specs; end
36
+ puts Gem.loaded_specs['oai'].version
37
+ exit
38
+ end
39
+ end
40
+
41
+ begin
42
+ opts.parse! ARGV
43
+ rescue
44
+ puts opts
45
+ exit
46
+ end
47
+
48
+ unless rexml
49
+ begin # Try to load libxml to speed up harvesting
50
+ require 'xml/libxml'
51
+ rescue LoadError
52
+ end
53
+ end
54
+
55
+ case startup
56
+ when :interactive
57
+ shell = Shell.new(conf)
58
+ shell.start
59
+ when :daemon
60
+ if conf.storage
61
+ harvest = Harvest.new(conf)
62
+ harvest.start(harvestable_sites(conf))
63
+ else
64
+ puts "Missing or corrupt configuration file, cannot continue."
65
+ exit(-1)
66
+ end
67
+ end
68
+
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Created by William Groppe on 2007-02-01.
4
+ #
5
+ # Simple file based Model. Basically just serves a directory of xml files to the
6
+ # Provider.
7
+ #
8
+ class File
9
+ def id
10
+ File.basename(self.path)
11
+ end
12
+
13
+ def to_oai_dc
14
+ self.read
15
+ end
16
+ end
17
+
18
+ class FileModel < OAI::Provider::Model
19
+ include OAI::Provider
20
+
21
+ def initialize(directory = 'data')
22
+ # nil specifies no partial results aka resumption tokens, and 'mtime' is the
23
+ # method that the provider will call for determining the timestamp
24
+ super(nil, 'mtime')
25
+ @directory = directory
26
+ end
27
+
28
+ def earliest
29
+ e = Dir["#{@directory}/*.xml"].min { |a,b| File.stat(a).mtime <=> File.stat(b).mtime }
30
+ File.stat(e).mtime.utc.xmlschema
31
+ end
32
+
33
+ def latest
34
+ e = Dir["#{@directory}/*.xml"].max { |a,b| File.stat(a).mtime <=> File.stat(b).mtime }
35
+ File.stat(e).mtime.utc.xmlschema
36
+ end
37
+
38
+ def sets
39
+ nil
40
+ end
41
+
42
+ def find(selector, opts={})
43
+ return nil unless selector
44
+
45
+ case selector
46
+ when :all
47
+ records = Dir["#{@directory}/*.xml"].sort.collect do |file|
48
+ File.new(file) unless File.stat(file).mtime.utc < opts[:from] or
49
+ File.stat(file).mtime.utc > opts[:until]
50
+ end
51
+ records
52
+ else
53
+ Find.find("#{@directory}/#{selector}") rescue nil
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ # == Example Usage:
60
+ # class FileProvider < OAI::Provider::Base
61
+ # repository_name 'XML File Provider'
62
+ # source_model FileModel.new('/tmp')
63
+ # end