oai_talia 0.0.13
Sign up to get free protection for your applications and to get access to all the features.
- data/README +81 -0
- data/Rakefile +127 -0
- data/bin/oai +68 -0
- data/examples/models/file_model.rb +63 -0
- data/examples/providers/dublin_core.rb +474 -0
- data/lib/oai/client/get_record.rb +15 -0
- data/lib/oai/client/header.rb +18 -0
- data/lib/oai/client/identify.rb +30 -0
- data/lib/oai/client/list_identifiers.rb +12 -0
- data/lib/oai/client/list_metadata_formats.rb +12 -0
- data/lib/oai/client/list_records.rb +21 -0
- data/lib/oai/client/list_sets.rb +19 -0
- data/lib/oai/client/metadata_format.rb +12 -0
- data/lib/oai/client/record.rb +26 -0
- data/lib/oai/client/response.rb +35 -0
- data/lib/oai/client.rb +301 -0
- data/lib/oai/constants.rb +34 -0
- data/lib/oai/exception.rb +75 -0
- data/lib/oai/harvester/config.rb +41 -0
- data/lib/oai/harvester/harvest.rb +150 -0
- data/lib/oai/harvester/logging.rb +70 -0
- data/lib/oai/harvester/mailer.rb +17 -0
- data/lib/oai/harvester/shell.rb +338 -0
- data/lib/oai/harvester.rb +39 -0
- data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
- data/lib/oai/provider/metadata_format/oai_europeana.rb +38 -0
- data/lib/oai/provider/metadata_format.rb +143 -0
- data/lib/oai/provider/model/activerecord_caching_wrapper.rb +134 -0
- data/lib/oai/provider/model/activerecord_wrapper.rb +139 -0
- data/lib/oai/provider/model.rb +74 -0
- data/lib/oai/provider/partial_result.rb +18 -0
- data/lib/oai/provider/response/error.rb +16 -0
- data/lib/oai/provider/response/get_record.rb +26 -0
- data/lib/oai/provider/response/identify.rb +25 -0
- data/lib/oai/provider/response/list_identifiers.rb +35 -0
- data/lib/oai/provider/response/list_metadata_formats.rb +34 -0
- data/lib/oai/provider/response/list_records.rb +34 -0
- data/lib/oai/provider/response/list_sets.rb +23 -0
- data/lib/oai/provider/response/record_response.rb +70 -0
- data/lib/oai/provider/response.rb +161 -0
- data/lib/oai/provider/resumption_token.rb +106 -0
- data/lib/oai/provider.rb +304 -0
- data/lib/oai/set.rb +29 -0
- data/lib/oai/xpath.rb +75 -0
- data/lib/oai.rb +8 -0
- data/lib/test.rb +25 -0
- data/test/activerecord_provider/config/connection.rb +5 -0
- data/test/activerecord_provider/config/database.yml +6 -0
- data/test/activerecord_provider/database/ar_migration.rb +59 -0
- data/test/activerecord_provider/database/oaipmhtest +0 -0
- data/test/activerecord_provider/fixtures/dc.yml +1501 -0
- data/test/activerecord_provider/helpers/providers.rb +44 -0
- data/test/activerecord_provider/helpers/set_provider.rb +36 -0
- data/test/activerecord_provider/models/dc_field.rb +7 -0
- data/test/activerecord_provider/models/dc_set.rb +6 -0
- data/test/activerecord_provider/models/oai_token.rb +3 -0
- data/test/activerecord_provider/tc_ar_provider.rb +113 -0
- data/test/activerecord_provider/tc_ar_sets_provider.rb +72 -0
- data/test/activerecord_provider/tc_caching_paging_provider.rb +55 -0
- data/test/activerecord_provider/tc_simple_paging_provider.rb +57 -0
- data/test/activerecord_provider/test_helper.rb +4 -0
- data/test/client/helpers/provider.rb +68 -0
- data/test/client/helpers/test_wrapper.rb +11 -0
- data/test/client/tc_exception.rb +36 -0
- data/test/client/tc_get_record.rb +37 -0
- data/test/client/tc_identify.rb +13 -0
- data/test/client/tc_libxml.rb +61 -0
- data/test/client/tc_list_identifiers.rb +52 -0
- data/test/client/tc_list_metadata_formats.rb +18 -0
- data/test/client/tc_list_records.rb +13 -0
- data/test/client/tc_list_sets.rb +19 -0
- data/test/client/tc_low_resolution_dates.rb +14 -0
- data/test/client/tc_utf8_escaping.rb +11 -0
- data/test/client/tc_xpath.rb +26 -0
- data/test/client/test_helper.rb +5 -0
- data/test/provider/models.rb +234 -0
- data/test/provider/tc_exceptions.rb +96 -0
- data/test/provider/tc_functional_tokens.rb +43 -0
- data/test/provider/tc_provider.rb +71 -0
- data/test/provider/tc_resumption_tokens.rb +46 -0
- data/test/provider/tc_simple_provider.rb +92 -0
- data/test/provider/test_helper.rb +36 -0
- data/test/test.xml +22 -0
- metadata +181 -0
data/README
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
= ruby-oai
|
2
|
+
|
3
|
+
== DESCRIPTION
|
4
|
+
|
5
|
+
ruby-oai is a Open Archives Protocol for Metadata Harvesting (OAI-PMH[http://openarchives.org])
|
6
|
+
library for Ruby. If you're not familiar with OAI-PMH[http://openarchives.org] it is the most used
|
7
|
+
protocol for sharing metadata between digital library repositories.
|
8
|
+
|
9
|
+
The OAI-PMH[http://openarchives.org] spec defines six verbs (Identify, ListIdentifiers, ListRecords,
|
10
|
+
GetRecords, ListSets, ListMetadataFormat) used for discovery and sharing of
|
11
|
+
metadata.
|
12
|
+
|
13
|
+
The ruby-oai gem includes a client library, a server/provider library and
|
14
|
+
a interactive harvesting shell.
|
15
|
+
|
16
|
+
=== client
|
17
|
+
|
18
|
+
The OAI client library is used for harvesting metadata from repositories.
|
19
|
+
For example to initiate a ListRecords request to pubmed you can:
|
20
|
+
|
21
|
+
require 'oai'
|
22
|
+
client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
|
23
|
+
for record in client.list_records
|
24
|
+
puts record.metadata
|
25
|
+
end
|
26
|
+
|
27
|
+
See OAI::Client for more details
|
28
|
+
|
29
|
+
=== provider
|
30
|
+
|
31
|
+
The OAI provider library handles serving local content to other clients.
|
32
|
+
|
33
|
+
Setting up a simple provider:
|
34
|
+
|
35
|
+
class MyProvider < Oai::Provider
|
36
|
+
repository_name 'My little OAI provider'
|
37
|
+
repository_url 'http://localhost/provider'
|
38
|
+
record_prefix 'oai:localhost'
|
39
|
+
admin_email 'root@localhost' # String or Array
|
40
|
+
source_model MyModel.new # Subclass of OAI::Provider::Model
|
41
|
+
end
|
42
|
+
|
43
|
+
See OAI::Provider for more details
|
44
|
+
|
45
|
+
=== interactive harvester
|
46
|
+
|
47
|
+
The OAI-PMH[http://openarchives.org] client shell allows OAI Harvesting to be configured in
|
48
|
+
an interactive manner. Typing 'oai' on the command line starts the
|
49
|
+
shell.
|
50
|
+
|
51
|
+
After initial configuration, the shell can be used to manage harvesting
|
52
|
+
operations.
|
53
|
+
|
54
|
+
See OAI::Harvester::Shell for more details
|
55
|
+
|
56
|
+
== INSTALLATION
|
57
|
+
|
58
|
+
Normally the best way to install oai is from rubyforge using the gem
|
59
|
+
command line tool:
|
60
|
+
|
61
|
+
% gem install oai
|
62
|
+
|
63
|
+
If you're reading this you've presumably got the tarball or zip distribution.
|
64
|
+
So you'll need to:
|
65
|
+
|
66
|
+
% rake package
|
67
|
+
% gem install pkg/oai-x.y.z.gem
|
68
|
+
|
69
|
+
Where x.y.z is the version of the gem that was generated.
|
70
|
+
|
71
|
+
== TODO
|
72
|
+
|
73
|
+
* consolidate response classes used by provider and client
|
74
|
+
* automatic validation of metadata schemas
|
75
|
+
* email the authors with your suggestions
|
76
|
+
|
77
|
+
== AUTHORS
|
78
|
+
|
79
|
+
- Ed Summers <ehs@pobox.com>
|
80
|
+
- William Groppe <will.groppe@gmail.com>
|
81
|
+
- Terry Reese <terry.reese@oregonstate.edu>
|
data/Rakefile
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
RUBY_OAI_VERSION = '0.0.12'
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake'
|
5
|
+
require 'rake/testtask'
|
6
|
+
require 'rake/rdoctask'
|
7
|
+
require 'rake/packagetask'
|
8
|
+
require 'rake/gempackagetask'
|
9
|
+
|
10
|
+
task :default => ["test"]
|
11
|
+
|
12
|
+
task :test => ["test:client", "test:provider"]
|
13
|
+
|
14
|
+
begin
|
15
|
+
require 'jeweler'
|
16
|
+
Jeweler::Tasks.new do |s|
|
17
|
+
s.name = "oai_talia"
|
18
|
+
s.summary = "A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH)"
|
19
|
+
s.email = "ghub@limitedcreativity.org"
|
20
|
+
s.homepage = "http://trac.talia.discovery-project.eu/"
|
21
|
+
s.description = "A ruby library for working with the Open Archive Initiative Protocol for Metadata Harvesting (OAI-PMH). Fork of the original version by Ed Summers, aims for best standards compatibility (test with http://re.cs.uct.ac.za/)"
|
22
|
+
s.required_ruby_version = '>= 1.8.6'
|
23
|
+
s.authors = ["Ed Summers", "Daniel Hahn"]
|
24
|
+
s.homepage = 'http://github.com/net7/ruby-oai-talia/'
|
25
|
+
s.platform = Gem::Platform::RUBY
|
26
|
+
s.require_path = 'lib'
|
27
|
+
s.autorequire = 'oai'
|
28
|
+
s.has_rdoc = true
|
29
|
+
s.bindir = 'bin'
|
30
|
+
s.executables = 'oai'
|
31
|
+
|
32
|
+
s.add_dependency('builder', '>=2.0.0')
|
33
|
+
|
34
|
+
s.files = %w(README Rakefile) +
|
35
|
+
Dir.glob("{bin,test,lib}/**/*") +
|
36
|
+
Dir.glob("examples/**/*.rb")
|
37
|
+
end
|
38
|
+
Jeweler::GemcutterTasks.new
|
39
|
+
rescue LoadError
|
40
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
41
|
+
end
|
42
|
+
|
43
|
+
namespace :test do
|
44
|
+
Rake::TestTask.new('client') do |t|
|
45
|
+
t.libs += ['lib', 'test/client']
|
46
|
+
t.pattern = 'test/client/tc_*.rb'
|
47
|
+
t.verbose = true
|
48
|
+
end
|
49
|
+
|
50
|
+
Rake::TestTask.new('provider') do |t|
|
51
|
+
t.libs += ['lib', 'test/provider']
|
52
|
+
t.pattern = 'test/provider/tc_*.rb'
|
53
|
+
t.verbose = true
|
54
|
+
end
|
55
|
+
|
56
|
+
desc "Active Record base Provider Tests"
|
57
|
+
Rake::TestTask.new('activerecord_provider') do |t|
|
58
|
+
t.libs += ['lib', 'test/activerecord_provider']
|
59
|
+
t.pattern = 'test/activerecord_provider/tc_*.rb'
|
60
|
+
t.verbose = true
|
61
|
+
end
|
62
|
+
|
63
|
+
desc 'Measures test coverage'
|
64
|
+
# borrowed from here: http://clarkware.com/cgi/blosxom/2007/01/05#RcovRakeTask
|
65
|
+
task :coverage do
|
66
|
+
rm_f "coverage"
|
67
|
+
rm_f "coverage.data"
|
68
|
+
system("rcov --aggregate coverage.data --text-summary -Ilib:test/provider test/provider/tc_*.rb")
|
69
|
+
system("rcov --aggregate coverage.data --text-summary -Ilib:test/client test/client/tc_*.rb")
|
70
|
+
system("open coverage/index.html") if PLATFORM['darwin']
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
desc "Run all unit tests"
|
76
|
+
task :test => ['test:client', 'test:provider', 'test:activerecord_provider']
|
77
|
+
|
78
|
+
task 'test:activerecord_provider' => :create_database
|
79
|
+
|
80
|
+
task :environment do
|
81
|
+
unless defined? OAI_PATH
|
82
|
+
OAI_PATH = File.dirname(__FILE__) + '/lib/oai'
|
83
|
+
$LOAD_PATH << OAI_PATH
|
84
|
+
$LOAD_PATH << File.dirname(__FILE__) + '/test'
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
task :drop_database => :environment do
|
89
|
+
%w{rubygems active_record yaml}.each { |lib| require lib }
|
90
|
+
require 'activerecord_provider/database/ar_migration'
|
91
|
+
require 'activerecord_provider/config/connection'
|
92
|
+
begin
|
93
|
+
OAIPMHTables.down
|
94
|
+
rescue
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
task :create_database => :drop_database do
|
99
|
+
OAIPMHTables.up
|
100
|
+
end
|
101
|
+
|
102
|
+
task :load_fixtures => :create_database do
|
103
|
+
require 'test/activerecord_provider/models/dc_field'
|
104
|
+
fixtures = YAML.load_file(
|
105
|
+
File.join('test', 'activerecord_provider', 'fixtures', 'dc.yml')
|
106
|
+
)
|
107
|
+
fixtures.keys.sort.each do |key|
|
108
|
+
DCField.create(fixtures[key])
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
Rake::RDocTask.new('doc') do |rd|
|
113
|
+
rd.rdoc_files.include("lib/**/*.rb", "README")
|
114
|
+
rd.main = 'README'
|
115
|
+
rd.rdoc_dir = 'doc'
|
116
|
+
end
|
117
|
+
|
118
|
+
begin
|
119
|
+
require 'gokdok'
|
120
|
+
Gokdok::Dokker.new do |gd|
|
121
|
+
gd.remote_path = ''
|
122
|
+
gd.rdoc_task = :doc
|
123
|
+
gd.doc_home = 'doc'
|
124
|
+
end
|
125
|
+
rescue LoadError
|
126
|
+
puts "Gokdoc not available. Install it with: gem install gokdok"
|
127
|
+
end
|
data/bin/oai
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby -rubygems
|
2
|
+
#
|
3
|
+
# Created by William Groppe on 2006-11-05.
|
4
|
+
# Copyright (c) 2006. All rights reserved.
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
|
8
|
+
DIRECTORY_LAYOUT = "%Y/%m".freeze
|
9
|
+
|
10
|
+
require 'oai/harvester'
|
11
|
+
|
12
|
+
include OAI::Harvester
|
13
|
+
|
14
|
+
conf = OAI::Harvester::Config.load
|
15
|
+
|
16
|
+
startup = :interactive
|
17
|
+
|
18
|
+
rexml = false
|
19
|
+
|
20
|
+
opts = OptionParser.new do |opts|
|
21
|
+
opts.banner = "Usage: oai ..."
|
22
|
+
opts.define_head "#{File.basename($0)}, a OAI harvester shell."
|
23
|
+
opts.separator ""
|
24
|
+
opts.separator "Options:"
|
25
|
+
|
26
|
+
opts.on("-D", "--daemon", "Non-interactive mode, to be called via scheduler") { startup = :daemon }
|
27
|
+
opts.on("-R", "--rexml", "Use rexml even if libxml is available") { rexml = true }
|
28
|
+
opts.on("-?", "--help", "Show this message") do
|
29
|
+
puts opts
|
30
|
+
exit
|
31
|
+
end
|
32
|
+
|
33
|
+
# Another typical switch to print the version.
|
34
|
+
opts.on_tail("-v", "--version", "Show version") do
|
35
|
+
class << Gem; attr_accessor :loaded_specs; end
|
36
|
+
puts Gem.loaded_specs['oai'].version
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
begin
|
42
|
+
opts.parse! ARGV
|
43
|
+
rescue
|
44
|
+
puts opts
|
45
|
+
exit
|
46
|
+
end
|
47
|
+
|
48
|
+
unless rexml
|
49
|
+
begin # Try to load libxml to speed up harvesting
|
50
|
+
require 'xml/libxml'
|
51
|
+
rescue LoadError
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
case startup
|
56
|
+
when :interactive
|
57
|
+
shell = Shell.new(conf)
|
58
|
+
shell.start
|
59
|
+
when :daemon
|
60
|
+
if conf.storage
|
61
|
+
harvest = Harvest.new(conf)
|
62
|
+
harvest.start(harvestable_sites(conf))
|
63
|
+
else
|
64
|
+
puts "Missing or corrupt configuration file, cannot continue."
|
65
|
+
exit(-1)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Created by William Groppe on 2007-02-01.
|
4
|
+
#
|
5
|
+
# Simple file based Model. Basically just serves a directory of xml files to the
|
6
|
+
# Provider.
|
7
|
+
#
|
8
|
+
class File
|
9
|
+
def id
|
10
|
+
File.basename(self.path)
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_oai_dc
|
14
|
+
self.read
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class FileModel < OAI::Provider::Model
|
19
|
+
include OAI::Provider
|
20
|
+
|
21
|
+
def initialize(directory = 'data')
|
22
|
+
# nil specifies no partial results aka resumption tokens, and 'mtime' is the
|
23
|
+
# method that the provider will call for determining the timestamp
|
24
|
+
super(nil, 'mtime')
|
25
|
+
@directory = directory
|
26
|
+
end
|
27
|
+
|
28
|
+
def earliest
|
29
|
+
e = Dir["#{@directory}/*.xml"].min { |a,b| File.stat(a).mtime <=> File.stat(b).mtime }
|
30
|
+
File.stat(e).mtime.utc.xmlschema
|
31
|
+
end
|
32
|
+
|
33
|
+
def latest
|
34
|
+
e = Dir["#{@directory}/*.xml"].max { |a,b| File.stat(a).mtime <=> File.stat(b).mtime }
|
35
|
+
File.stat(e).mtime.utc.xmlschema
|
36
|
+
end
|
37
|
+
|
38
|
+
def sets
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
def find(selector, opts={})
|
43
|
+
return nil unless selector
|
44
|
+
|
45
|
+
case selector
|
46
|
+
when :all
|
47
|
+
records = Dir["#{@directory}/*.xml"].sort.collect do |file|
|
48
|
+
File.new(file) unless File.stat(file).mtime.utc < opts[:from] or
|
49
|
+
File.stat(file).mtime.utc > opts[:until]
|
50
|
+
end
|
51
|
+
records
|
52
|
+
else
|
53
|
+
Find.find("#{@directory}/#{selector}") rescue nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
# == Example Usage:
|
60
|
+
# class FileProvider < OAI::Provider::Base
|
61
|
+
# repository_name 'XML File Provider'
|
62
|
+
# source_model FileModel.new('/tmp')
|
63
|
+
# end
|