oai_talia 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +81 -0
- data/Rakefile +127 -0
- data/bin/oai +68 -0
- data/examples/models/file_model.rb +63 -0
- data/examples/providers/dublin_core.rb +474 -0
- data/lib/oai/client/get_record.rb +15 -0
- data/lib/oai/client/header.rb +18 -0
- data/lib/oai/client/identify.rb +30 -0
- data/lib/oai/client/list_identifiers.rb +12 -0
- data/lib/oai/client/list_metadata_formats.rb +12 -0
- data/lib/oai/client/list_records.rb +21 -0
- data/lib/oai/client/list_sets.rb +19 -0
- data/lib/oai/client/metadata_format.rb +12 -0
- data/lib/oai/client/record.rb +26 -0
- data/lib/oai/client/response.rb +35 -0
- data/lib/oai/client.rb +301 -0
- data/lib/oai/constants.rb +34 -0
- data/lib/oai/exception.rb +75 -0
- data/lib/oai/harvester/config.rb +41 -0
- data/lib/oai/harvester/harvest.rb +150 -0
- data/lib/oai/harvester/logging.rb +70 -0
- data/lib/oai/harvester/mailer.rb +17 -0
- data/lib/oai/harvester/shell.rb +338 -0
- data/lib/oai/harvester.rb +39 -0
- data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
- data/lib/oai/provider/metadata_format/oai_europeana.rb +38 -0
- data/lib/oai/provider/metadata_format.rb +143 -0
- data/lib/oai/provider/model/activerecord_caching_wrapper.rb +134 -0
- data/lib/oai/provider/model/activerecord_wrapper.rb +139 -0
- data/lib/oai/provider/model.rb +74 -0
- data/lib/oai/provider/partial_result.rb +18 -0
- data/lib/oai/provider/response/error.rb +16 -0
- data/lib/oai/provider/response/get_record.rb +26 -0
- data/lib/oai/provider/response/identify.rb +25 -0
- data/lib/oai/provider/response/list_identifiers.rb +35 -0
- data/lib/oai/provider/response/list_metadata_formats.rb +34 -0
- data/lib/oai/provider/response/list_records.rb +34 -0
- data/lib/oai/provider/response/list_sets.rb +23 -0
- data/lib/oai/provider/response/record_response.rb +70 -0
- data/lib/oai/provider/response.rb +161 -0
- data/lib/oai/provider/resumption_token.rb +106 -0
- data/lib/oai/provider.rb +304 -0
- data/lib/oai/set.rb +29 -0
- data/lib/oai/xpath.rb +75 -0
- data/lib/oai.rb +8 -0
- data/lib/test.rb +25 -0
- data/test/activerecord_provider/config/connection.rb +5 -0
- data/test/activerecord_provider/config/database.yml +6 -0
- data/test/activerecord_provider/database/ar_migration.rb +59 -0
- data/test/activerecord_provider/database/oaipmhtest +0 -0
- data/test/activerecord_provider/fixtures/dc.yml +1501 -0
- data/test/activerecord_provider/helpers/providers.rb +44 -0
- data/test/activerecord_provider/helpers/set_provider.rb +36 -0
- data/test/activerecord_provider/models/dc_field.rb +7 -0
- data/test/activerecord_provider/models/dc_set.rb +6 -0
- data/test/activerecord_provider/models/oai_token.rb +3 -0
- data/test/activerecord_provider/tc_ar_provider.rb +113 -0
- data/test/activerecord_provider/tc_ar_sets_provider.rb +72 -0
- data/test/activerecord_provider/tc_caching_paging_provider.rb +55 -0
- data/test/activerecord_provider/tc_simple_paging_provider.rb +57 -0
- data/test/activerecord_provider/test_helper.rb +4 -0
- data/test/client/helpers/provider.rb +68 -0
- data/test/client/helpers/test_wrapper.rb +11 -0
- data/test/client/tc_exception.rb +36 -0
- data/test/client/tc_get_record.rb +37 -0
- data/test/client/tc_identify.rb +13 -0
- data/test/client/tc_libxml.rb +61 -0
- data/test/client/tc_list_identifiers.rb +52 -0
- data/test/client/tc_list_metadata_formats.rb +18 -0
- data/test/client/tc_list_records.rb +13 -0
- data/test/client/tc_list_sets.rb +19 -0
- data/test/client/tc_low_resolution_dates.rb +14 -0
- data/test/client/tc_utf8_escaping.rb +11 -0
- data/test/client/tc_xpath.rb +26 -0
- data/test/client/test_helper.rb +5 -0
- data/test/provider/models.rb +234 -0
- data/test/provider/tc_exceptions.rb +96 -0
- data/test/provider/tc_functional_tokens.rb +43 -0
- data/test/provider/tc_provider.rb +71 -0
- data/test/provider/tc_resumption_tokens.rb +46 -0
- data/test/provider/tc_simple_provider.rb +92 -0
- data/test/provider/test_helper.rb +36 -0
- data/test/test.xml +22 -0
- metadata +181 -0
@@ -0,0 +1,338 @@
|
|
1
|
+
module OAI
|
2
|
+
module Harvester
|
3
|
+
# = OAI::Harvester::Shell
|
4
|
+
#
|
5
|
+
# A OAI-PMH client shell allowing OAI Harvesting to be configured in
|
6
|
+
# an interactive manner. Typing 'oai' on the command line starts the
|
7
|
+
# shell. The first time the shell is run it will prompt for the following
|
8
|
+
# configuration details:
|
9
|
+
# 1. A storage directory for all harvested records. Harvests will be
|
10
|
+
# stored under this directory in a directory structure based on the
|
11
|
+
# date of the harvest.
|
12
|
+
# 2. A log file directory.
|
13
|
+
# 3. Email address(es) for sending daily harvesting activity reports.
|
14
|
+
# 4. Network address of the SMTP server for sending mail.
|
15
|
+
#
|
16
|
+
# After the initial configuration, new harvest sites can be added by using
|
17
|
+
# the 'new' command. Sites are identified via nickname assigned by the
|
18
|
+
# user. After choosing a nickname, provide the URL of a harvestable site,
|
19
|
+
# and the shell will prompt you for the rest of the configuration
|
20
|
+
# information.
|
21
|
+
#
|
22
|
+
# The shell automatically pulls down the list of sets in the repository, and
|
23
|
+
# the supported metadata prefixes. Making it very simple to setup harvests.
|
24
|
+
#
|
25
|
+
class Shell
|
26
|
+
include Readline
|
27
|
+
|
28
|
+
def initialize(config)
|
29
|
+
@conf = config
|
30
|
+
@conf.sites ||= {} # Initialize sites hash there isn't one
|
31
|
+
end
|
32
|
+
|
33
|
+
def start
|
34
|
+
unless @conf.storage
|
35
|
+
banner "Entering first-time setup"
|
36
|
+
config
|
37
|
+
setup_cron
|
38
|
+
end
|
39
|
+
puts "type 'help' for help"
|
40
|
+
while((input = readline("oai> ", true)) != 'exit')
|
41
|
+
begin
|
42
|
+
cmd = input.split
|
43
|
+
if 1 == cmd.size
|
44
|
+
self.send(cmd[0])
|
45
|
+
else
|
46
|
+
self.send(cmd.shift, cmd.join(" "))
|
47
|
+
end
|
48
|
+
rescue NoMethodError
|
49
|
+
puts "Not a recognized command. Type 'help' for clues."
|
50
|
+
rescue
|
51
|
+
puts "An error occurred:"
|
52
|
+
puts $!
|
53
|
+
puts $!.backtrace.join("\n")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def help
|
61
|
+
banner "Commands:"
|
62
|
+
puts "\tharvest site [date] - Harvest site(s) manually"
|
63
|
+
puts "\tconfig - Configure harvester"
|
64
|
+
puts "\tlist <config> - List known providers or configuration"
|
65
|
+
puts "\tinfo [site[, site]] - Show information about a provider."
|
66
|
+
puts "\tnew - Add a new provider site to harvester"
|
67
|
+
puts "\tremove [site] - Remove a provider site from harvester"
|
68
|
+
puts "\tedit [site] - Change settings for a provider site"
|
69
|
+
puts "\texit - Exit the harvester shell.\n\n"
|
70
|
+
end
|
71
|
+
|
72
|
+
def harvest(options)
|
73
|
+
site, *date = options.split(/\s/)
|
74
|
+
if @conf.sites.keys.include?(site)
|
75
|
+
banner "Harvesting '#{site}'"
|
76
|
+
if date && !date.empty?
|
77
|
+
begin
|
78
|
+
date = Time.parse(date.join(' ')).utc
|
79
|
+
rescue NoMethodError
|
80
|
+
puts "Couldn't parse the date supplied"
|
81
|
+
return
|
82
|
+
end
|
83
|
+
else
|
84
|
+
date = nil
|
85
|
+
end
|
86
|
+
harvester = Harvest.new(@conf, @conf.storage, date)
|
87
|
+
harvester.start(site, true)
|
88
|
+
puts "done"
|
89
|
+
else
|
90
|
+
puts "Unknown repository: '#{args[0]}'"
|
91
|
+
end
|
92
|
+
puts # blank line
|
93
|
+
end
|
94
|
+
|
95
|
+
def list(args = nil)
|
96
|
+
if 'config' == args
|
97
|
+
banner "Current Configuration"
|
98
|
+
list_config
|
99
|
+
else
|
100
|
+
banner "Configured Repositories"
|
101
|
+
@conf.sites.keys.each do |k|
|
102
|
+
puts k
|
103
|
+
end
|
104
|
+
end
|
105
|
+
puts # blank line
|
106
|
+
end
|
107
|
+
|
108
|
+
def info(args)
|
109
|
+
banner "Provider Site Information"
|
110
|
+
sites = args.split(/[,\s|\s|,]/)
|
111
|
+
sites.each do |site|
|
112
|
+
print_site(site)
|
113
|
+
end
|
114
|
+
puts
|
115
|
+
rescue
|
116
|
+
puts args + " doesn't appear to be configured, use list to see configured repositories."
|
117
|
+
end
|
118
|
+
|
119
|
+
def new
|
120
|
+
banner "Define New Harvesting Site"
|
121
|
+
name, site = form
|
122
|
+
@conf.sites[name] = site
|
123
|
+
@conf.save
|
124
|
+
end
|
125
|
+
|
126
|
+
def edit(name)
|
127
|
+
banner "Edit Harvesting Site"
|
128
|
+
name, site = form(name)
|
129
|
+
@conf.sites[name] = site
|
130
|
+
@conf.save
|
131
|
+
end
|
132
|
+
|
133
|
+
def remove(site)
|
134
|
+
if 'Y' == readline("Remove #{site}? (Y/N): ").upcase
|
135
|
+
@conf.sites.delete(site)
|
136
|
+
@conf.save
|
137
|
+
puts "#{site} removed"
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# http://oai.getty.edu:80/oaicat/OAIHandler
|
142
|
+
def form(name = nil)
|
143
|
+
begin
|
144
|
+
if not name
|
145
|
+
name = prompt("nickname", nil)
|
146
|
+
while(@conf.sites.keys.include?(name))
|
147
|
+
show 0, "Nickname already in use, choose another."
|
148
|
+
name = prompt("nickname")
|
149
|
+
end
|
150
|
+
end
|
151
|
+
site = @conf.sites[name] || {}
|
152
|
+
|
153
|
+
# URL
|
154
|
+
url = prompt("url", site['url'])
|
155
|
+
while(not (site['url'] = verify(url)))
|
156
|
+
puts "Trouble contacting provider, bad url?"
|
157
|
+
url = prompt("url", site['url'])
|
158
|
+
end
|
159
|
+
|
160
|
+
# Metadata formats
|
161
|
+
formats = metadata(site['url'])
|
162
|
+
report "Repository supports [#{formats.join(', ')}] metadata formats."
|
163
|
+
prefix = prompt("prefix", site['prefix'])
|
164
|
+
while(not formats.include?(prefix))
|
165
|
+
prefix = prompt("prefix", site['prefix'])
|
166
|
+
end
|
167
|
+
site['prefix'] = prefix
|
168
|
+
|
169
|
+
# Sets
|
170
|
+
sets = ['all']
|
171
|
+
begin
|
172
|
+
sets.concat sets(site['url'])
|
173
|
+
site['set'] = 'all' unless site['set'] # default to all sets
|
174
|
+
report "Repository supports [#{sets.join(', ')}] metadata sets."
|
175
|
+
set = prompt("set", site['set'])
|
176
|
+
while(not sets.include?(site['set']))
|
177
|
+
set = prompt("set", site['set'])
|
178
|
+
end
|
179
|
+
site['set'] = set
|
180
|
+
rescue
|
181
|
+
site['set'] = 'all'
|
182
|
+
end
|
183
|
+
|
184
|
+
# Period
|
185
|
+
period = expand_period(prompt("period", "daily"))
|
186
|
+
while(not Config::PERIODS.include?(period))
|
187
|
+
puts "Must be daily, weekly, or monthly"
|
188
|
+
period = expand_period(prompt("period", "daily"))
|
189
|
+
end
|
190
|
+
|
191
|
+
site['period'] = period
|
192
|
+
|
193
|
+
return [name, site]
|
194
|
+
rescue
|
195
|
+
puts "Problem adding/updating provider, aborting. (#{$!})"
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def config
|
200
|
+
begin
|
201
|
+
directory = prompt("storage directory", @conf.storage)
|
202
|
+
while not directory_acceptable(directory)
|
203
|
+
directory = prompt("storage directory: ", @conf.storage)
|
204
|
+
end
|
205
|
+
|
206
|
+
email = @conf.email.join(', ') rescue nil
|
207
|
+
@conf.email = parse_emails(prompt("email", email))
|
208
|
+
|
209
|
+
@conf.mail_server = prompt("mail server", @conf.mail_server)
|
210
|
+
|
211
|
+
logfile = prompt("log file(s) directory", @conf.logfile)
|
212
|
+
while not directory_acceptable(logfile)
|
213
|
+
logfile = prompt("log file(s) directory", @conf.logfile)
|
214
|
+
end
|
215
|
+
@conf.storage = directory
|
216
|
+
@conf.logfile = logfile
|
217
|
+
@conf.save
|
218
|
+
rescue
|
219
|
+
nil
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def display(key, value, split = 40)
|
224
|
+
(split - key.size).times { print " " } if key.size < split
|
225
|
+
puts "#{key}: #{value}"
|
226
|
+
end
|
227
|
+
|
228
|
+
def banner(str)
|
229
|
+
puts "\n#{str}"
|
230
|
+
str.size.times { print "-" }
|
231
|
+
puts "\n"
|
232
|
+
end
|
233
|
+
|
234
|
+
def report(str)
|
235
|
+
puts "\n#{str}\n"
|
236
|
+
end
|
237
|
+
|
238
|
+
def indent(number)
|
239
|
+
number.times do
|
240
|
+
print "\t"
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def prompt(text, default = nil, split = 20)
|
245
|
+
prompt_text = "#{text} [#{default}]: "
|
246
|
+
(split - prompt_text.size).times { print " " } if prompt_text.size < split
|
247
|
+
value = readline(prompt_text, true)
|
248
|
+
raise RuntimeError.new("Exit loop") unless value
|
249
|
+
return value.empty? ? default : value
|
250
|
+
end
|
251
|
+
|
252
|
+
def verify(url)
|
253
|
+
begin
|
254
|
+
client = OAI::Client.new(url, :redirects => false)
|
255
|
+
identify = client.identify
|
256
|
+
puts "Repository name \"#{identify.repository_name}\""
|
257
|
+
return url
|
258
|
+
rescue
|
259
|
+
if $!.to_s =~ /^Permanently Redirected to \[(.*)\?.*\]/
|
260
|
+
report "Provider redirected to: #{$1}"
|
261
|
+
verify($1)
|
262
|
+
else
|
263
|
+
puts "Error selecting repository: #{$!}"
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
def metadata(url)
|
269
|
+
formats = []
|
270
|
+
client = OAI::Client.new url
|
271
|
+
response = client.list_metadata_formats
|
272
|
+
response.to_a.each do |format|
|
273
|
+
formats << format.prefix
|
274
|
+
end
|
275
|
+
formats
|
276
|
+
end
|
277
|
+
|
278
|
+
def sets(url)
|
279
|
+
sets = []
|
280
|
+
client = OAI::Client.new url
|
281
|
+
response = client.list_sets
|
282
|
+
response.to_a.each do |set|
|
283
|
+
sets << set.spec
|
284
|
+
end
|
285
|
+
sets
|
286
|
+
end
|
287
|
+
|
288
|
+
def directory_acceptable(dir)
|
289
|
+
if not (dir && File.exists?(dir) && File.writable?(dir))
|
290
|
+
puts "Directory doesn't exist, or isn't writtable."
|
291
|
+
return false
|
292
|
+
end
|
293
|
+
true
|
294
|
+
end
|
295
|
+
|
296
|
+
def expand_period(str)
|
297
|
+
return str if Config::PERIODS.include?(str)
|
298
|
+
Config::PERIODS.each { |p| return p if p =~ /^#{str}/}
|
299
|
+
nil
|
300
|
+
end
|
301
|
+
|
302
|
+
def parse_emails(emails)
|
303
|
+
return nil unless emails
|
304
|
+
addresses = emails.split(/[,\s|\s|,]/)
|
305
|
+
end
|
306
|
+
|
307
|
+
def list_config
|
308
|
+
display("storage directory", @conf.storage, 20)
|
309
|
+
display("email", @conf.email.join(', '), 20) if @conf.email
|
310
|
+
display("mail server", @conf.mail_server, 20) if @conf.mail_server
|
311
|
+
display("log location", @conf.logfile, 20) if @conf.logfile
|
312
|
+
end
|
313
|
+
|
314
|
+
def list_sites
|
315
|
+
banner "Sites"
|
316
|
+
@conf.sites.each_key { |site| print_site(site) }
|
317
|
+
end
|
318
|
+
|
319
|
+
def print_site(site)
|
320
|
+
puts site
|
321
|
+
@conf.sites[site].each { |k,v| display(k, v, 15)}
|
322
|
+
end
|
323
|
+
|
324
|
+
def setup_cron
|
325
|
+
banner "Scheduling Automatic Harvesting"
|
326
|
+
puts "To activate automatic harvesting you must add an entry to"
|
327
|
+
puts "your scheduler. Linux/Mac OS X users should add the following"
|
328
|
+
puts "entry to their crontabs:\n\n"
|
329
|
+
puts "0 0 * * * #{$0} -D\n\n"
|
330
|
+
puts "Windows users should use WinAt to schedule"
|
331
|
+
puts "#{$0} to run every night.\n\n\n"
|
332
|
+
end
|
333
|
+
|
334
|
+
end
|
335
|
+
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
require 'net/smtp'
|
3
|
+
require 'yaml'
|
4
|
+
require 'tempfile'
|
5
|
+
require 'logger'
|
6
|
+
require 'fileutils'
|
7
|
+
require 'ostruct'
|
8
|
+
require 'readline'
|
9
|
+
require 'chronic'
|
10
|
+
require 'socket'
|
11
|
+
|
12
|
+
require 'oai/client'
|
13
|
+
require 'oai/harvester/config'
|
14
|
+
require 'oai/harvester/harvest'
|
15
|
+
require 'oai/harvester/logging'
|
16
|
+
require 'oai/harvester/mailer'
|
17
|
+
require 'oai/harvester/shell'
|
18
|
+
|
19
|
+
def harvestable_sites(conf)
|
20
|
+
sites = []
|
21
|
+
conf.sites.each do |k, v|
|
22
|
+
sites << k if needs_updating(v['period'], v['last'])
|
23
|
+
end if conf.sites
|
24
|
+
sites
|
25
|
+
end
|
26
|
+
|
27
|
+
def needs_updating(period, last)
|
28
|
+
return true if last.nil?
|
29
|
+
case period
|
30
|
+
when 'daily'
|
31
|
+
return true if Time.now - last > 86000
|
32
|
+
when 'weekly'
|
33
|
+
return true if Time.now - last > 604000
|
34
|
+
when 'monthly'
|
35
|
+
return true if Time.now - last > 2591000
|
36
|
+
end
|
37
|
+
return false
|
38
|
+
end
|
39
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module OAI::Provider::Metadata
|
2
|
+
# = OAI::Metadata::DublinCore
|
3
|
+
#
|
4
|
+
# Simple implementation of the Dublin Core metadata format.
|
5
|
+
class DublinCore < Format
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@prefix = 'oai_dc'
|
9
|
+
@schema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
|
10
|
+
@namespace = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
|
11
|
+
@element_namespace = 'dc'
|
12
|
+
@fields = [ :title, :creator, :subject, :description, :publisher,
|
13
|
+
:contributor, :date, :type, :format, :identifier,
|
14
|
+
:source, :language, :relation, :coverage, :rights]
|
15
|
+
end
|
16
|
+
|
17
|
+
def header_specification
|
18
|
+
{
|
19
|
+
'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/",
|
20
|
+
'xmlns:dc' => "http://purl.org/dc/elements/1.1/",
|
21
|
+
'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
|
22
|
+
'xsi:schemaLocation' =>
|
23
|
+
%{http://www.openarchives.org/OAI/2.0/oai_dc/
|
24
|
+
http://www.openarchives.org/OAI/2.0/oai_dc.xsd}
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module OAI::Provider::Metadata
|
2
|
+
# = OAI::Metadata::Europeana
|
3
|
+
#
|
4
|
+
# Simple implementation of the Europeana metadata format.
|
5
|
+
class Europeana < Format
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@prefix = 'ese'
|
9
|
+
@schema = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
|
10
|
+
@namespace = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
|
11
|
+
@element_namespace = 'ese'
|
12
|
+
@fields = {:dc => [:title, :creator, :subject, :description, :publisher,
|
13
|
+
:contributor, :date, :type, :format, :identifier,
|
14
|
+
:source, :language, :relation, :coverage, :rights],
|
15
|
+
:dcterms => :provenance,
|
16
|
+
:ese => [:userTag, :unstored, :object, :language, :provider,
|
17
|
+
:type, :uri, :year, :hasObject, :country]
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
def header_specification
|
22
|
+
{
|
23
|
+
'xmlns:oai_dc' => 'http://www.openarchives.org/OAI/2.0/oai_dc/',
|
24
|
+
'xmlns:ese' => 'http://europeana.eu/terms/',
|
25
|
+
'xmlns:dc' => 'http://purl.org/dc/terms/',
|
26
|
+
'xmlns:dcterms' => 'http://purl.org/dc/terms/',
|
27
|
+
'xmlns:europeana' => 'http://europeana.eu/terms/',
|
28
|
+
'xmlns:local' => 'http://metadata.cerl.org/oai/namespace/local/',
|
29
|
+
'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
30
|
+
'xsi:schemaLocation' =>
|
31
|
+
%{http://purl.org/dc/elements/1.1/
|
32
|
+
http://dublincore.org/schemas/xmls/qdc/dc.xsd http://purl.org/dc/terms/
|
33
|
+
http://dublincore.org/schemas/xmls/qdc/dcterms.xsd}
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module OAI::Provider::Metadata
|
4
|
+
# == Metadata Base Class
|
5
|
+
#
|
6
|
+
# MetadataFormat is the base class from which all other format classes
|
7
|
+
# should inherit. Format classes provide mapping of record fields into XML.
|
8
|
+
#
|
9
|
+
# * prefix - contains the metadata_prefix used to select the format
|
10
|
+
# * schema - location of the xml schema
|
11
|
+
# * namespace - location of the namespace document
|
12
|
+
# * element_namespace - the namespace portion of the XML elements
|
13
|
+
# * fields - list of fields in this metadata format
|
14
|
+
#
|
15
|
+
# See OAI::Metadata::DublinCore for an example
|
16
|
+
#
|
17
|
+
class Format
|
18
|
+
include Singleton
|
19
|
+
|
20
|
+
attr_accessor :prefix, :schema, :namespace, :element_namespace, :fields
|
21
|
+
|
22
|
+
# Provided a model, and a record belonging to that model this method
|
23
|
+
# will return an xml represention of the record. This is the method
|
24
|
+
# that should be extended if you need to create more complex xml
|
25
|
+
# representations.
|
26
|
+
def encode(model, record)
|
27
|
+
if record.respond_to?("to_#{prefix}")
|
28
|
+
record.send("to_#{prefix}")
|
29
|
+
else
|
30
|
+
xml = Builder::XmlMarkup.new
|
31
|
+
map = model.respond_to?("map_#{prefix}") ? model.send("map_#{prefix}") : {}
|
32
|
+
xml.tag!("#{prefix}:#{element_namespace}", header_specification) do
|
33
|
+
if fields.is_a?(Array)
|
34
|
+
fields.each do |field|
|
35
|
+
values = value_for(field, record, map)
|
36
|
+
if values.respond_to?(:each)
|
37
|
+
values.each do |value|
|
38
|
+
xml.tag! "#{element_namespace}:#{field}", value
|
39
|
+
end
|
40
|
+
else
|
41
|
+
xml.tag! "#{element_namespace}:#{field}", values
|
42
|
+
end
|
43
|
+
end
|
44
|
+
elsif fields.is_a?(Hash)
|
45
|
+
fields.each_pair do |key, field_array|
|
46
|
+
field_array.to_a.each do |field|
|
47
|
+
values = value_for(field, record, map)
|
48
|
+
if values.respond_to?(:each)
|
49
|
+
values.each do |value|
|
50
|
+
xml.tag! "#{key}:#{field}", value
|
51
|
+
end
|
52
|
+
else
|
53
|
+
xml.tag! "#{key}:#{field}", values
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
xml.target!
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
# We try a bunch of different methods to get the data from the model.
|
66
|
+
#
|
67
|
+
# 1. Check if the model defines a field mapping for the field of
|
68
|
+
# interest.
|
69
|
+
# 2. Try calling the pluralized name method on the model.
|
70
|
+
# 3. Try calling the singular name method on the model
|
71
|
+
def value_for(field, record, map)
|
72
|
+
method = map[field] ? map[field].to_s : field.to_s
|
73
|
+
|
74
|
+
if record.respond_to?(pluralize(method))
|
75
|
+
record.send pluralize(method)
|
76
|
+
elsif record.respond_to?(method)
|
77
|
+
# at this point, this function will throw a dep. error because of the call to type -- a reserved work
|
78
|
+
# in ruby
|
79
|
+
silence_warnings { record.send method }
|
80
|
+
else
|
81
|
+
[]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Subclasses must override
|
86
|
+
def header_specification
|
87
|
+
raise NotImplementedError.new
|
88
|
+
end
|
89
|
+
|
90
|
+
# Shamelessly lifted form ActiveSupport. Thanks Rails community!
|
91
|
+
def pluralize(word)
|
92
|
+
# Use ActiveSupports pluralization if it's available.
|
93
|
+
return word.pluralize if word.respond_to?(:pluralize)
|
94
|
+
|
95
|
+
# Otherwise use our own simple pluralization rules.
|
96
|
+
result = word.to_s.dup
|
97
|
+
|
98
|
+
# Uncountable words
|
99
|
+
return result if %w(equipment information rice money species series fish sheep).include?(result)
|
100
|
+
|
101
|
+
# Irregular words
|
102
|
+
{ 'person' => 'people', 'man' => 'men', 'child' => 'children', 'sex' => 'sexes',
|
103
|
+
'move' => 'moves', 'cow' => 'kine' }.each { |k,v| return v if word == k }
|
104
|
+
|
105
|
+
rules.each { |(rule, replacement)| break if result.gsub!(rule, replacement) }
|
106
|
+
result
|
107
|
+
end
|
108
|
+
|
109
|
+
def rules
|
110
|
+
[
|
111
|
+
[/$/, 's'],
|
112
|
+
[/s$/i, 's'],
|
113
|
+
[/(ax|test)is$/i, '\1es'],
|
114
|
+
[/(octop|vir)us$/i, '\1i'],
|
115
|
+
[/(alias|status)$/i, '\1es'],
|
116
|
+
[/(bu)s$/i, '\1ses'],
|
117
|
+
[/(buffal|tomat)o$/i, '\1oes'],
|
118
|
+
[/([ti])um$/i, '\1a'],
|
119
|
+
[/sis$/i, 'ses'],
|
120
|
+
[/(?:([^f])fe|([lr])f)$/i, '\1\2ves'],
|
121
|
+
[/(hive)$/i, '\1s'],
|
122
|
+
[/([^aeiouy]|qu)y$/i, '\1ies'],
|
123
|
+
[/(x|ch|ss|sh)$/i, '\1es'],
|
124
|
+
[/(matr|vert|ind)(?:ix|ex)$/i, '\1ices'],
|
125
|
+
[/([m|l])ouse$/i, '\1ice'],
|
126
|
+
[/^(ox)$/i, '\1en'],
|
127
|
+
[/(quiz)$/i, '\1zes']
|
128
|
+
]
|
129
|
+
end
|
130
|
+
|
131
|
+
def silence_warnings
|
132
|
+
old_verbose, $VERBOSE = $VERBOSE, nil
|
133
|
+
yield
|
134
|
+
ensure
|
135
|
+
$VERBOSE = old_verbose
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
Dir.glob(File.dirname(__FILE__) + '/metadata_format/*.rb').each {|lib| require lib}
|