oai_talia 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +81 -0
- data/Rakefile +127 -0
- data/bin/oai +68 -0
- data/examples/models/file_model.rb +63 -0
- data/examples/providers/dublin_core.rb +474 -0
- data/lib/oai/client/get_record.rb +15 -0
- data/lib/oai/client/header.rb +18 -0
- data/lib/oai/client/identify.rb +30 -0
- data/lib/oai/client/list_identifiers.rb +12 -0
- data/lib/oai/client/list_metadata_formats.rb +12 -0
- data/lib/oai/client/list_records.rb +21 -0
- data/lib/oai/client/list_sets.rb +19 -0
- data/lib/oai/client/metadata_format.rb +12 -0
- data/lib/oai/client/record.rb +26 -0
- data/lib/oai/client/response.rb +35 -0
- data/lib/oai/client.rb +301 -0
- data/lib/oai/constants.rb +34 -0
- data/lib/oai/exception.rb +75 -0
- data/lib/oai/harvester/config.rb +41 -0
- data/lib/oai/harvester/harvest.rb +150 -0
- data/lib/oai/harvester/logging.rb +70 -0
- data/lib/oai/harvester/mailer.rb +17 -0
- data/lib/oai/harvester/shell.rb +338 -0
- data/lib/oai/harvester.rb +39 -0
- data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
- data/lib/oai/provider/metadata_format/oai_europeana.rb +38 -0
- data/lib/oai/provider/metadata_format.rb +143 -0
- data/lib/oai/provider/model/activerecord_caching_wrapper.rb +134 -0
- data/lib/oai/provider/model/activerecord_wrapper.rb +139 -0
- data/lib/oai/provider/model.rb +74 -0
- data/lib/oai/provider/partial_result.rb +18 -0
- data/lib/oai/provider/response/error.rb +16 -0
- data/lib/oai/provider/response/get_record.rb +26 -0
- data/lib/oai/provider/response/identify.rb +25 -0
- data/lib/oai/provider/response/list_identifiers.rb +35 -0
- data/lib/oai/provider/response/list_metadata_formats.rb +34 -0
- data/lib/oai/provider/response/list_records.rb +34 -0
- data/lib/oai/provider/response/list_sets.rb +23 -0
- data/lib/oai/provider/response/record_response.rb +70 -0
- data/lib/oai/provider/response.rb +161 -0
- data/lib/oai/provider/resumption_token.rb +106 -0
- data/lib/oai/provider.rb +304 -0
- data/lib/oai/set.rb +29 -0
- data/lib/oai/xpath.rb +75 -0
- data/lib/oai.rb +8 -0
- data/lib/test.rb +25 -0
- data/test/activerecord_provider/config/connection.rb +5 -0
- data/test/activerecord_provider/config/database.yml +6 -0
- data/test/activerecord_provider/database/ar_migration.rb +59 -0
- data/test/activerecord_provider/database/oaipmhtest +0 -0
- data/test/activerecord_provider/fixtures/dc.yml +1501 -0
- data/test/activerecord_provider/helpers/providers.rb +44 -0
- data/test/activerecord_provider/helpers/set_provider.rb +36 -0
- data/test/activerecord_provider/models/dc_field.rb +7 -0
- data/test/activerecord_provider/models/dc_set.rb +6 -0
- data/test/activerecord_provider/models/oai_token.rb +3 -0
- data/test/activerecord_provider/tc_ar_provider.rb +113 -0
- data/test/activerecord_provider/tc_ar_sets_provider.rb +72 -0
- data/test/activerecord_provider/tc_caching_paging_provider.rb +55 -0
- data/test/activerecord_provider/tc_simple_paging_provider.rb +57 -0
- data/test/activerecord_provider/test_helper.rb +4 -0
- data/test/client/helpers/provider.rb +68 -0
- data/test/client/helpers/test_wrapper.rb +11 -0
- data/test/client/tc_exception.rb +36 -0
- data/test/client/tc_get_record.rb +37 -0
- data/test/client/tc_identify.rb +13 -0
- data/test/client/tc_libxml.rb +61 -0
- data/test/client/tc_list_identifiers.rb +52 -0
- data/test/client/tc_list_metadata_formats.rb +18 -0
- data/test/client/tc_list_records.rb +13 -0
- data/test/client/tc_list_sets.rb +19 -0
- data/test/client/tc_low_resolution_dates.rb +14 -0
- data/test/client/tc_utf8_escaping.rb +11 -0
- data/test/client/tc_xpath.rb +26 -0
- data/test/client/test_helper.rb +5 -0
- data/test/provider/models.rb +234 -0
- data/test/provider/tc_exceptions.rb +96 -0
- data/test/provider/tc_functional_tokens.rb +43 -0
- data/test/provider/tc_provider.rb +71 -0
- data/test/provider/tc_resumption_tokens.rb +46 -0
- data/test/provider/tc_simple_provider.rb +92 -0
- data/test/provider/test_helper.rb +36 -0
- data/test/test.xml +22 -0
- metadata +181 -0
data/README
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
= ruby-oai
|
2
|
+
|
3
|
+
== DESCRIPTION
|
4
|
+
|
5
|
+
ruby-oai is a Open Archives Protocol for Metadata Harvesting (OAI-PMH[http://openarchives.org])
|
6
|
+
library for Ruby. If you're not familiar with OAI-PMH[http://openarchives.org] it is the most used
|
7
|
+
protocol for sharing metadata between digital library repositories.
|
8
|
+
|
9
|
+
The OAI-PMH[http://openarchives.org] spec defines six verbs (Identify, ListIdentifiers, ListRecords,
|
10
|
+
GetRecords, ListSets, ListMetadataFormat) used for discovery and sharing of
|
11
|
+
metadata.
|
12
|
+
|
13
|
+
The ruby-oai gem includes a client library, a server/provider library and
|
14
|
+
a interactive harvesting shell.
|
15
|
+
|
16
|
+
=== client
|
17
|
+
|
18
|
+
The OAI client library is used for harvesting metadata from repositories.
|
19
|
+
For example to initiate a ListRecords request to pubmed you can:
|
20
|
+
|
21
|
+
require 'oai'
|
22
|
+
client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
|
23
|
+
for record in client.list_records
|
24
|
+
puts record.metadata
|
25
|
+
end
|
26
|
+
|
27
|
+
See OAI::Client for more details
|
28
|
+
|
29
|
+
=== provider
|
30
|
+
|
31
|
+
The OAI provider library handles serving local content to other clients.
|
32
|
+
|
33
|
+
Setting up a simple provider:
|
34
|
+
|
35
|
+
class MyProvider < Oai::Provider
|
36
|
+
repository_name 'My little OAI provider'
|
37
|
+
repository_url 'http://localhost/provider'
|
38
|
+
record_prefix 'oai:localhost'
|
39
|
+
admin_email 'root@localhost' # String or Array
|
40
|
+
source_model MyModel.new # Subclass of OAI::Provider::Model
|
41
|
+
end
|
42
|
+
|
43
|
+
See OAI::Provider for more details
|
44
|
+
|
45
|
+
=== interactive harvester
|
46
|
+
|
47
|
+
The OAI-PMH[http://openarchives.org] client shell allows OAI Harvesting to be configured in
|
48
|
+
an interactive manner. Typing 'oai' on the command line starts the
|
49
|
+
shell.
|
50
|
+
|
51
|
+
After initial configuration, the shell can be used to manage harvesting
|
52
|
+
operations.
|
53
|
+
|
54
|
+
See OAI::Harvester::Shell for more details
|
55
|
+
|
56
|
+
== INSTALLATION
|
57
|
+
|
58
|
+
Normally the best way to install oai is from rubyforge using the gem
|
59
|
+
command line tool:
|
60
|
+
|
61
|
+
% gem install oai
|
62
|
+
|
63
|
+
If you're reading this you've presumably got the tarball or zip distribution.
|
64
|
+
So you'll need to:
|
65
|
+
|
66
|
+
% rake package
|
67
|
+
% gem install pkg/oai-x.y.z.gem
|
68
|
+
|
69
|
+
Where x.y.z is the version of the gem that was generated.
|
70
|
+
|
71
|
+
== TODO
|
72
|
+
|
73
|
+
* consolidate response classes used by provider and client
|
74
|
+
* automatic validation of metadata schemas
|
75
|
+
* email the authors with your suggestions
|
76
|
+
|
77
|
+
== AUTHORS
|
78
|
+
|
79
|
+
- Ed Summers <ehs@pobox.com>
|
80
|
+
- William Groppe <will.groppe@gmail.com>
|
81
|
+
- Terry Reese <terry.reese@oregonstate.edu>
|
data/Rakefile
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
RUBY_OAI_VERSION = '0.0.12'
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake'
|
5
|
+
require 'rake/testtask'
|
6
|
+
require 'rake/rdoctask'
|
7
|
+
require 'rake/packagetask'
|
8
|
+
require 'rake/gempackagetask'
|
9
|
+
|
10
|
+
task :default => ["test"]
|
11
|
+
|
12
|
+
task :test => ["test:client", "test:provider"]
|
13
|
+
|
14
|
+
begin
|
15
|
+
require 'jeweler'
|
16
|
+
Jeweler::Tasks.new do |s|
|
17
|
+
s.name = "oai_talia"
|
18
|
+
s.summary = "A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH)"
|
19
|
+
s.email = "ghub@limitedcreativity.org"
|
20
|
+
s.homepage = "http://trac.talia.discovery-project.eu/"
|
21
|
+
s.description = "A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH). Fork of the original version by Ed Summers, aims for best standards compatibility (test with http://re.cs.uct.ac.za/)"
|
22
|
+
s.required_ruby_version = '>= 1.8.6'
|
23
|
+
s.authors = ["Ed Summers", "Daniel Hahn"]
|
24
|
+
s.homepage = 'http://github.com/net7/ruby-oai-talia/'
|
25
|
+
s.platform = Gem::Platform::RUBY
|
26
|
+
s.require_path = 'lib'
|
27
|
+
s.autorequire = 'oai'
|
28
|
+
s.has_rdoc = true
|
29
|
+
s.bindir = 'bin'
|
30
|
+
s.executables = 'oai'
|
31
|
+
|
32
|
+
s.add_dependency('builder', '>=2.0.0')
|
33
|
+
|
34
|
+
s.files = %w(README Rakefile) +
|
35
|
+
Dir.glob("{bin,test,lib}/**/*") +
|
36
|
+
Dir.glob("examples/**/*.rb")
|
37
|
+
end
|
38
|
+
Jeweler::GemcutterTasks.new
|
39
|
+
rescue LoadError
|
40
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
41
|
+
end
|
42
|
+
|
43
|
+
namespace :test do
|
44
|
+
Rake::TestTask.new('client') do |t|
|
45
|
+
t.libs += ['lib', 'test/client']
|
46
|
+
t.pattern = 'test/client/tc_*.rb'
|
47
|
+
t.verbose = true
|
48
|
+
end
|
49
|
+
|
50
|
+
Rake::TestTask.new('provider') do |t|
|
51
|
+
t.libs += ['lib', 'test/provider']
|
52
|
+
t.pattern = 'test/provider/tc_*.rb'
|
53
|
+
t.verbose = true
|
54
|
+
end
|
55
|
+
|
56
|
+
desc "Active Record base Provider Tests"
|
57
|
+
Rake::TestTask.new('activerecord_provider') do |t|
|
58
|
+
t.libs += ['lib', 'test/activerecord_provider']
|
59
|
+
t.pattern = 'test/activerecord_provider/tc_*.rb'
|
60
|
+
t.verbose = true
|
61
|
+
end
|
62
|
+
|
63
|
+
desc 'Measures test coverage'
|
64
|
+
# borrowed from here: http://clarkware.com/cgi/blosxom/2007/01/05#RcovRakeTask
|
65
|
+
task :coverage do
|
66
|
+
rm_f "coverage"
|
67
|
+
rm_f "coverage.data"
|
68
|
+
system("rcov --aggregate coverage.data --text-summary -Ilib:test/provider test/provider/tc_*.rb")
|
69
|
+
system("rcov --aggregate coverage.data --text-summary -Ilib:test/client test/client/tc_*.rb")
|
70
|
+
system("open coverage/index.html") if PLATFORM['darwin']
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
desc "Run all unit tests"
|
76
|
+
task :test => ['test:client', 'test:provider', 'test:activerecord_provider']
|
77
|
+
|
78
|
+
task 'test:activerecord_provider' => :create_database
|
79
|
+
|
80
|
+
task :environment do
|
81
|
+
unless defined? OAI_PATH
|
82
|
+
OAI_PATH = File.dirname(__FILE__) + '/lib/oai'
|
83
|
+
$LOAD_PATH << OAI_PATH
|
84
|
+
$LOAD_PATH << File.dirname(__FILE__) + '/test'
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
task :drop_database => :environment do
|
89
|
+
%w{rubygems active_record yaml}.each { |lib| require lib }
|
90
|
+
require 'activerecord_provider/database/ar_migration'
|
91
|
+
require 'activerecord_provider/config/connection'
|
92
|
+
begin
|
93
|
+
OAIPMHTables.down
|
94
|
+
rescue
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
task :create_database => :drop_database do
|
99
|
+
OAIPMHTables.up
|
100
|
+
end
|
101
|
+
|
102
|
+
task :load_fixtures => :create_database do
|
103
|
+
require 'test/activerecord_provider/models/dc_field'
|
104
|
+
fixtures = YAML.load_file(
|
105
|
+
File.join('test', 'activerecord_provider', 'fixtures', 'dc.yml')
|
106
|
+
)
|
107
|
+
fixtures.keys.sort.each do |key|
|
108
|
+
DCField.create(fixtures[key])
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
Rake::RDocTask.new('doc') do |rd|
|
113
|
+
rd.rdoc_files.include("lib/**/*.rb", "README")
|
114
|
+
rd.main = 'README'
|
115
|
+
rd.rdoc_dir = 'doc'
|
116
|
+
end
|
117
|
+
|
118
|
+
begin
|
119
|
+
require 'gokdok'
|
120
|
+
Gokdok::Dokker.new do |gd|
|
121
|
+
gd.remote_path = ''
|
122
|
+
gd.rdoc_task = :doc
|
123
|
+
gd.doc_home = 'doc'
|
124
|
+
end
|
125
|
+
rescue LoadError
|
126
|
+
puts "Gokdoc not available. Install it with: gem install gokdok"
|
127
|
+
end
|
data/bin/oai
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby -rubygems
|
2
|
+
#
|
3
|
+
# Created by William Groppe on 2006-11-05.
|
4
|
+
# Copyright (c) 2006. All rights reserved.
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
|
8
|
+
DIRECTORY_LAYOUT = "%Y/%m".freeze
|
9
|
+
|
10
|
+
require 'oai/harvester'
|
11
|
+
|
12
|
+
include OAI::Harvester
|
13
|
+
|
14
|
+
conf = OAI::Harvester::Config.load
|
15
|
+
|
16
|
+
startup = :interactive
|
17
|
+
|
18
|
+
rexml = false
|
19
|
+
|
20
|
+
opts = OptionParser.new do |opts|
|
21
|
+
opts.banner = "Usage: oai ..."
|
22
|
+
opts.define_head "#{File.basename($0)}, a OAI harvester shell."
|
23
|
+
opts.separator ""
|
24
|
+
opts.separator "Options:"
|
25
|
+
|
26
|
+
opts.on("-D", "--daemon", "Non-interactive mode, to be called via scheduler") { startup = :daemon }
|
27
|
+
opts.on("-R", "--rexml", "Use rexml even if libxml is available") { rexml = true }
|
28
|
+
opts.on("-?", "--help", "Show this message") do
|
29
|
+
puts opts
|
30
|
+
exit
|
31
|
+
end
|
32
|
+
|
33
|
+
# Another typical switch to print the version.
|
34
|
+
opts.on_tail("-v", "--version", "Show version") do
|
35
|
+
class << Gem; attr_accessor :loaded_specs; end
|
36
|
+
puts Gem.loaded_specs['oai'].version
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
begin
|
42
|
+
opts.parse! ARGV
|
43
|
+
rescue
|
44
|
+
puts opts
|
45
|
+
exit
|
46
|
+
end
|
47
|
+
|
48
|
+
unless rexml
|
49
|
+
begin # Try to load libxml to speed up harvesting
|
50
|
+
require 'xml/libxml'
|
51
|
+
rescue LoadError
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
case startup
|
56
|
+
when :interactive
|
57
|
+
shell = Shell.new(conf)
|
58
|
+
shell.start
|
59
|
+
when :daemon
|
60
|
+
if conf.storage
|
61
|
+
harvest = Harvest.new(conf)
|
62
|
+
harvest.start(harvestable_sites(conf))
|
63
|
+
else
|
64
|
+
puts "Missing or corrupt configuration file, cannot continue."
|
65
|
+
exit(-1)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Created by William Groppe on 2007-02-01.
|
4
|
+
#
|
5
|
+
# Simple file based Model. Basically just serves a directory of xml files to the
|
6
|
+
# Provider.
|
7
|
+
#
|
8
|
+
class File
|
9
|
+
def id
|
10
|
+
File.basename(self.path)
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_oai_dc
|
14
|
+
self.read
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class FileModel < OAI::Provider::Model
|
19
|
+
include OAI::Provider
|
20
|
+
|
21
|
+
def initialize(directory = 'data')
|
22
|
+
# nil specifies no partial results aka resumption tokens, and 'mtime' is the
|
23
|
+
# method that the provider will call for determining the timestamp
|
24
|
+
super(nil, 'mtime')
|
25
|
+
@directory = directory
|
26
|
+
end
|
27
|
+
|
28
|
+
def earliest
|
29
|
+
e = Dir["#{@directory}/*.xml"].min { |a,b| File.stat(a).mtime <=> File.stat(b).mtime }
|
30
|
+
File.stat(e).mtime.utc.xmlschema
|
31
|
+
end
|
32
|
+
|
33
|
+
def latest
|
34
|
+
e = Dir["#{@directory}/*.xml"].max { |a,b| File.stat(a).mtime <=> File.stat(b).mtime }
|
35
|
+
File.stat(e).mtime.utc.xmlschema
|
36
|
+
end
|
37
|
+
|
38
|
+
def sets
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
def find(selector, opts={})
|
43
|
+
return nil unless selector
|
44
|
+
|
45
|
+
case selector
|
46
|
+
when :all
|
47
|
+
records = Dir["#{@directory}/*.xml"].sort.collect do |file|
|
48
|
+
File.new(file) unless File.stat(file).mtime.utc < opts[:from] or
|
49
|
+
File.stat(file).mtime.utc > opts[:until]
|
50
|
+
end
|
51
|
+
records
|
52
|
+
else
|
53
|
+
Find.find("#{@directory}/#{selector}") rescue nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
# == Example Usage:
|
60
|
+
# class FileProvider < OAI::Provider::Base
|
61
|
+
# repository_name 'XML File Provider'
|
62
|
+
# source_model FileModel.new('/tmp')
|
63
|
+
# end
|