oai 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. data/README +80 -0
  2. data/Rakefile +113 -0
  3. data/bin/oai +68 -0
  4. data/examples/models/file_model.rb +63 -0
  5. data/examples/providers/dublin_core.rb +474 -0
  6. data/lib/oai.rb +7 -13
  7. data/lib/oai/client.rb +133 -83
  8. data/lib/oai/{get_record.rb → client/get_record.rb} +0 -0
  9. data/lib/oai/{header.rb → client/header.rb} +2 -2
  10. data/lib/oai/{identify.rb → client/identify.rb} +0 -0
  11. data/lib/oai/{list_identifiers.rb → client/list_identifiers.rb} +0 -0
  12. data/lib/oai/{list_metadata_formats.rb → client/list_metadata_formats.rb} +0 -0
  13. data/lib/oai/{list_records.rb → client/list_records.rb} +0 -0
  14. data/lib/oai/{list_sets.rb → client/list_sets.rb} +1 -1
  15. data/lib/oai/{metadata_format.rb → client/metadata_format.rb} +0 -0
  16. data/lib/oai/{record.rb → client/record.rb} +0 -0
  17. data/lib/oai/{response.rb → client/response.rb} +1 -1
  18. data/lib/oai/constants.rb +34 -0
  19. data/lib/oai/exception.rb +72 -1
  20. data/lib/oai/harvester.rb +38 -0
  21. data/lib/oai/harvester/config.rb +41 -0
  22. data/lib/oai/harvester/harvest.rb +144 -0
  23. data/lib/oai/harvester/logging.rb +70 -0
  24. data/lib/oai/harvester/mailer.rb +17 -0
  25. data/lib/oai/harvester/shell.rb +334 -0
  26. data/lib/oai/provider.rb +300 -0
  27. data/lib/oai/provider/metadata_format.rb +72 -0
  28. data/lib/oai/provider/metadata_format/oai_dc.rb +29 -0
  29. data/lib/oai/provider/model.rb +71 -0
  30. data/lib/oai/provider/model/activerecord_caching_wrapper.rb +135 -0
  31. data/lib/oai/provider/model/activerecord_wrapper.rb +136 -0
  32. data/lib/oai/provider/partial_result.rb +18 -0
  33. data/lib/oai/provider/response.rb +119 -0
  34. data/lib/oai/provider/response/error.rb +16 -0
  35. data/lib/oai/provider/response/get_record.rb +32 -0
  36. data/lib/oai/provider/response/identify.rb +24 -0
  37. data/lib/oai/provider/response/list_identifiers.rb +29 -0
  38. data/lib/oai/provider/response/list_metadata_formats.rb +21 -0
  39. data/lib/oai/provider/response/list_records.rb +32 -0
  40. data/lib/oai/provider/response/list_sets.rb +23 -0
  41. data/lib/oai/provider/response/record_response.rb +68 -0
  42. data/lib/oai/provider/resumption_token.rb +106 -0
  43. data/lib/oai/set.rb +14 -5
  44. data/test/activerecord_provider/config/connection.rb +5 -0
  45. data/test/activerecord_provider/config/database.yml +6 -0
  46. data/test/activerecord_provider/database/ar_migration.rb +59 -0
  47. data/test/activerecord_provider/database/oaipmhtest +0 -0
  48. data/test/activerecord_provider/fixtures/dc.yml +1501 -0
  49. data/test/activerecord_provider/helpers/providers.rb +44 -0
  50. data/test/activerecord_provider/helpers/set_provider.rb +36 -0
  51. data/test/activerecord_provider/models/dc_field.rb +7 -0
  52. data/test/activerecord_provider/models/dc_set.rb +6 -0
  53. data/test/activerecord_provider/models/oai_token.rb +3 -0
  54. data/test/activerecord_provider/tc_ar_provider.rb +93 -0
  55. data/test/activerecord_provider/tc_ar_sets_provider.rb +66 -0
  56. data/test/activerecord_provider/tc_caching_paging_provider.rb +53 -0
  57. data/test/activerecord_provider/tc_simple_paging_provider.rb +55 -0
  58. data/test/activerecord_provider/test_helper.rb +4 -0
  59. data/test/client/helpers/provider.rb +68 -0
  60. data/test/client/helpers/test_wrapper.rb +11 -0
  61. data/test/client/tc_exception.rb +36 -0
  62. data/test/{tc_get_record.rb → client/tc_get_record.rb} +11 -7
  63. data/test/client/tc_identify.rb +13 -0
  64. data/test/{tc_libxml.rb → client/tc_libxml.rb} +20 -10
  65. data/test/{tc_list_identifiers.rb → client/tc_list_identifiers.rb} +10 -8
  66. data/test/{tc_list_metadata_formats.rb → client/tc_list_metadata_formats.rb} +4 -1
  67. data/test/{tc_list_records.rb → client/tc_list_records.rb} +4 -1
  68. data/test/{tc_list_sets.rb → client/tc_list_sets.rb} +4 -2
  69. data/test/{tc_xpath.rb → client/tc_xpath.rb} +1 -1
  70. data/test/client/test_helper.rb +5 -0
  71. data/test/provider/models.rb +230 -0
  72. data/test/provider/tc_exceptions.rb +63 -0
  73. data/test/provider/tc_functional_tokens.rb +42 -0
  74. data/test/provider/tc_provider.rb +69 -0
  75. data/test/provider/tc_resumption_tokens.rb +46 -0
  76. data/test/provider/tc_simple_provider.rb +85 -0
  77. data/test/provider/test_helper.rb +36 -0
  78. metadata +123 -27
  79. data/test/tc_exception.rb +0 -38
  80. data/test/tc_identify.rb +0 -8
@@ -0,0 +1,38 @@
1
+ require 'zlib'
2
+ require 'net/smtp'
3
+ require 'yaml'
4
+ require 'tempfile'
5
+ require 'logger'
6
+ require 'fileutils'
7
+ require 'ostruct'
8
+ require 'readline'
9
+ require 'chronic'
10
+ require 'socket'
11
+
12
+ require 'oai/harvester/config'
13
+ require 'oai/harvester/harvest'
14
+ require 'oai/harvester/logging'
15
+ require 'oai/harvester/mailer'
16
+ require 'oai/harvester/shell'
17
+
18
+ def harvestable_sites(conf)
19
+ sites = []
20
+ conf.sites.each do |k, v|
21
+ sites << k if needs_updating(v['period'], v['last'])
22
+ end if conf.sites
23
+ sites
24
+ end
25
+
26
+ def needs_updating(period, last)
27
+ return true if last.nil?
28
+ case period
29
+ when 'daily'
30
+ return true if Time.now - last > 86000
31
+ when 'weekly'
32
+ return true if Time.now - last > 604000
33
+ when 'monthly'
34
+ return true if Time.now - last > 2591000
35
+ end
36
+ return false
37
+ end
38
+
@@ -0,0 +1,41 @@
1
+ #
2
+ # Created by William Groppe on 2006-11-05.
3
+ # Copyright (c) 2006. All rights reserved.
4
+
5
+ module OAI
6
+ module Harvester
7
+
8
+ LOW_RESOLUTION = "YYYY-MM-DD"
9
+
10
+ class Config < OpenStruct
11
+
12
+ PERIODS = %w(daily weekly monthly)
13
+ GLOBAL = "/etc/oai/harvester.yml"
14
+
15
+ def self.load
16
+ config = find_config
17
+ File.exists?(config) ? new(YAML.load_file(config)) : new
18
+ end
19
+
20
+ def save
21
+ config = Config.find_config
22
+ open(config, 'w') do |out|
23
+ YAML.dump(@table, out)
24
+ end
25
+ end
26
+
27
+ private
28
+ # Shamelessly lifted from Camping
29
+ def self.find_config
30
+ if home = ENV['HOME'] # POSIX
31
+ return GLOBAL if File.exists?(GLOBAL) && File.writable?(GLOBAL)
32
+ FileUtils.mkdir_p File.join(home, '.oai')
33
+ File.join(home, '.oai/harvester.yml')
34
+ elsif home = ENV['APPDATA'] # MSWIN
35
+ File.join(home, 'oai/harvester.yml')
36
+ end
37
+ end
38
+
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,144 @@
1
+ #
2
+ # Created by William Groppe on 2006-11-03.
3
+
4
+ module OAI
5
+ module Harvester
6
+
7
+ class Harvest
8
+
9
+ def initialize(config = nil, directory = nil, date = nil)
10
+ @config = config || Config.load
11
+ @directory = directory || @config.storage
12
+ @from = date
13
+ @from.freeze
14
+ @parser = defined?(XML::Document) ? 'libxml' : 'rexml'
15
+ end
16
+
17
+ def start(sites = nil, interactive = false)
18
+ @interactive = interactive
19
+ sites = (@config.sites.keys rescue {}) unless sites
20
+ begin
21
+ sites.each do |site|
22
+ harvest(site)
23
+ end
24
+ ensure
25
+ @config.save
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def harvest(site)
32
+ harvest_time = Time.now.utc
33
+ opts = build_options_hash(@config.sites[site])
34
+ opts[:until] = harvest_time.xmlschema
35
+
36
+ # Allow a from date to be passed in
37
+ if(@from)
38
+ opts[:from] = @from
39
+ else
40
+ opts[:from] = earliest(opts[:url])
41
+ end
42
+
43
+ opts.delete(:set) if 'all' == opts[:set]
44
+
45
+ begin
46
+ # Connect, and download
47
+ file, records = call(opts.delete(:url), opts)
48
+
49
+ # Move document to storage directory
50
+ dir = File.join(@directory, date_based_directory(harvest_time))
51
+ FileUtils.mkdir_p dir
52
+ FileUtils.mv(file.path,
53
+ File.join(dir, "#{site}-#{filename(Time.parse(opts[:from]),
54
+ harvest_time)}.xml.gz"))
55
+ @config.sites[site]['last'] = harvest_time
56
+ rescue
57
+ raise $! unless $!.respond_to?(:code)
58
+ raise $! if not @interactive || "noRecordsMatch" != $!.code
59
+ puts "No new records available"
60
+ end
61
+ end
62
+
63
+ def call(url, opts)
64
+ # Preserve original options
65
+ options = opts.dup
66
+
67
+ records = 0;
68
+ client = OAI::Client.new(url, :parser => @parser)
69
+ provider_config = client.identify
70
+
71
+ if Harvester::LOW_RESOLUTION == provider_config.granularity
72
+ options[:from] = Time.parse(options[:from]).strftime("%Y-%m-%d")
73
+ options[:until] = Time.parse(options[:until]).strftime("%Y-%m-%d")
74
+ end
75
+
76
+ file = Tempfile.new('oai_data')
77
+ gz = Zlib::GzipWriter.new(file)
78
+ gz << "<? xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
79
+ gz << "<records>"
80
+ begin
81
+ response = client.list_records(options)
82
+ get_records(response.doc).each do |rec|
83
+ gz << rec
84
+ records += 1
85
+ end
86
+ puts "#{records} records retrieved" if @interactive
87
+
88
+ # Get a full response by iterating with the resumption tokens.
89
+ # Not very Ruby like. Should fix OAI::Client to handle resumption
90
+ # tokens internally.
91
+ while(response.resumption_token and not response.resumption_token.empty?)
92
+ puts "\nresumption token recieved, continuing" if @interactive
93
+ response = client.list_records(:resumption_token =>
94
+ response.resumption_token)
95
+ get_records(response.doc).each do |rec|
96
+ gz << rec
97
+ records += 1
98
+ end
99
+ puts "#{records} records retrieved" if @interactive
100
+ end
101
+
102
+ gz << "</records>"
103
+
104
+ ensure
105
+ gz.close
106
+ file.close
107
+ end
108
+
109
+ [file, records]
110
+ end
111
+
112
+ def get_records(doc)
113
+ doc.find("/OAI-PMH/ListRecords/record").to_a
114
+ end
115
+
116
+ def build_options_hash(site)
117
+ options = {:url => site['url']}
118
+ options[:set] = site['set'] if site['set']
119
+ options[:from] = site['last'].utc.xmlschema if site['last']
120
+ options[:metadata_prefix] = site['prefix'] if site['prefix']
121
+ options
122
+ end
123
+
124
+ def date_based_directory(time)
125
+ "#{time.strftime(DIRECTORY_LAYOUT)}"
126
+ end
127
+
128
+ def filename(from_time, until_time)
129
+ format = "%Y-%m-%d"
130
+ "#{from_time.strftime(format)}_til_#{until_time.strftime(format)}"\
131
+ "_at_#{until_time.strftime('%H-%M-%S')}"
132
+ end
133
+
134
+ # Get earliest timestamp from repository
135
+ def earliest(url)
136
+ client = OAI::Client.new url
137
+ identify = client.identify
138
+ Time.parse(identify.earliest_datestamp).utc.xmlschema
139
+ end
140
+
141
+ end
142
+
143
+ end
144
+ end
@@ -0,0 +1,70 @@
1
+ # Reopen Harvest and add logging
2
+ module OAI
3
+ module Harvester
4
+
5
+ class Harvest
6
+ alias_method :orig_start, :start
7
+ alias_method :orig_harvest, :harvest
8
+ alias_method :orig_call, :call
9
+ alias_method :orig_init, :initialize
10
+
11
+ def initialize(config = nil, directory = nil, date = nil)
12
+ orig_init(config, directory, date)
13
+ @summary = []
14
+ @logger = Logger.new(File.join(@config.logfile, "harvester.log"),
15
+ shift_age = 'weekly') if @config.logfile
16
+ @logger.datetime_format = "%Y-%m-%d %H:%M"
17
+
18
+ # Turn off logging if no logging directory is specified.
19
+ @logger.level = Logger::FATAL unless @config.logfile
20
+ end
21
+
22
+ def start(sites = nil, interactive = false)
23
+ if not interactive
24
+ @logger.info { "Starting regular harvest" }
25
+ orig_start(sites)
26
+ begin
27
+ OAI::Harvester::
28
+ Mailer.send(@config.mail_server, @config.email, @summary)
29
+ rescue
30
+ @logger.error { "Error sending out summary email: #{$!}"}
31
+ end
32
+ else
33
+ @logger.info { "Starting interactive harvest"}
34
+ orig_start(sites, true)
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def harvest(site)
41
+ begin
42
+ @logger.info { "Harvest of '#{site}' starting" }
43
+ @summary << "Harvest of '#{site}' attempted"
44
+ orig_harvest(site)
45
+ rescue OAI::Exception
46
+ if "noRecordsMatch" == $!.code
47
+ @logger.info "No new records available"
48
+ @summary << "'#{site}' had no new records."
49
+ else
50
+ @logger.error { "Harvesting of '#{site}' failed, message: #{$!}" }
51
+ @summary << "'#{site}' had an OAI Error! #{$!}"
52
+ end
53
+ rescue
54
+ @logger.error { "Harvesting of '#{site}' failed, message: #{$!}" }
55
+ @logger.error { "#{$!.backtrace.join('\n')}" }
56
+ @summary << "'#{site}' had an Error! #{$!}"
57
+ end
58
+ end
59
+
60
+ def call(url, options)
61
+ @logger.info { "fetching: #{url} with options #{options.inspect}" }
62
+ file, records = orig_call(url, options)
63
+ @logger.info { "retrieved #{records} records" }
64
+ @summary << "Retrieved #{records} records."
65
+ return file, records
66
+ end
67
+ end
68
+
69
+ end
70
+ end
@@ -0,0 +1,17 @@
1
+ module OAI
2
+ module Harvester
3
+
4
+ class Mailer
5
+
6
+ def self.send(server = nil, email = nil, message = nil)
7
+ msg = %{Subject: Harvester Summary\n\n#{message.join("\n")}}
8
+ to = (email.map { |e| "'#{e}'"}).join(", ")
9
+ Net::SMTP.start(server) do |smtp|
10
+ smtp.send_message msg, "harvester@#{Socket.gethostname}", to
11
+ end
12
+ end
13
+
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,334 @@
1
+ module OAI
2
+ module Harvester
3
+ # = OAI::Harvester::Shell
4
+ #
5
+ # A OAI-PMH client shell allowing OAI Harvesting to be configured in
6
+ # an interactive manner. Typing 'oai' on the command line starts the
7
+ # shell. The first time the shell is run it will prompt for the following
8
+ # configuration details:
9
+ # 1. A storage directory for all harvested records. Harvests will be
10
+ # stored under this directory in a directory structure based on the
11
+ # date of the harvest.
12
+ # 2. A log file directory.
13
+ # 3. Email address(es) for sending daily harvesting activity reports.
14
+ # 4. Network address of the SMTP server for sending mail.
15
+ #
16
+ # After the initial configuration, new harvest sites can be added by using
17
+ # the 'new' command. Sites are identified via nickname assigned by the
18
+ # user. After choosing a nickname, provide the URL of a harvestable site,
19
+ # and the shell will prompt you for the rest of the configuration
20
+ # information.
21
+ #
22
+ # The shell automatically pulls down the list of sets in the repository, and
23
+ # the supported metadata prefixes. Making it very simple to setup harvests.
24
+ #
25
+ class Shell
26
+ include Readline
27
+
28
+ def initialize(config)
29
+ @conf = config
30
+ @conf.sites ||= {} # Initialize sites hash there isn't one
31
+ end
32
+
33
+ def start
34
+ unless @conf.storage
35
+ banner "Entering first-time setup"
36
+ config
37
+ setup_cron
38
+ end
39
+ puts "type 'help' for help"
40
+ while((input = readline("oai> ", true)) != 'exit')
41
+ begin
42
+ cmd = input.split
43
+ if 1 == cmd.size
44
+ self.send(cmd[0])
45
+ else
46
+ self.send(cmd.shift, cmd.join(" "))
47
+ end
48
+ rescue
49
+ puts "Not a recognized command, or bad options. Type 'help' for clues."
50
+ #puts $!
51
+ #puts $!.backtrace.join("\n")
52
+ end
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def help
59
+ banner "Commands:"
60
+ puts "\tharvest site [date] - Harvest site(s) manually"
61
+ puts "\tconfig - Configure harvester"
62
+ puts "\tlist <config> - List known providers or configuration"
63
+ puts "\tinfo [site[, site]] - Show information about a provider."
64
+ puts "\tnew - Add a new provider site to harvester"
65
+ puts "\tremove [site] - Remove a provider site from harvester"
66
+ puts "\tedit [site] - Change settings for a provider site"
67
+ puts "\texit - Exit the harvester shell.\n\n"
68
+ end
69
+
70
+ def harvest(options)
71
+ site, *date = options.split(/\s/)
72
+ if @conf.sites.keys.include?(site)
73
+ banner "Harvesting '#{site}'"
74
+ if date && !date.empty?
75
+ begin
76
+ date = Chronic.parse(date.join(' ')).utc.xmlschema
77
+ rescue NoMethodError
78
+ puts "Couldn't parse the date supplied"
79
+ return
80
+ end
81
+ else
82
+ date = nil
83
+ end
84
+ harvester = Harvest.new(@conf, @conf.storage, date)
85
+ harvester.start(site, true)
86
+ puts "done"
87
+ else
88
+ puts "Unknown repository: '#{args[0]}'"
89
+ end
90
+ puts # blank line
91
+ end
92
+
93
+ def list(args = nil)
94
+ if 'config' == args
95
+ banner "Current Configuration"
96
+ list_config
97
+ else
98
+ banner "Configured Repositories"
99
+ @conf.sites.keys.each do |k|
100
+ puts k
101
+ end
102
+ end
103
+ puts # blank line
104
+ end
105
+
106
+ def info(args)
107
+ banner "Provider Site Information"
108
+ sites = args.split(/[,\s|\s|,]/)
109
+ sites.each do |site|
110
+ print_site(site)
111
+ end
112
+ puts
113
+ end
114
+
115
+ def new
116
+ banner "Define New Harvesting Site"
117
+ name, site = form
118
+ @conf.sites[name] = site
119
+ @conf.save
120
+ end
121
+
122
+ def edit(name)
123
+ banner "Edit Harvesting Site"
124
+ name, site = form(name)
125
+ @conf.sites[name] = site
126
+ @conf.save
127
+ end
128
+
129
+ def remove(site)
130
+ if 'Y' == readline("Remove #{site}? (Y/N): ").upcase
131
+ @conf.sites.delete(site)
132
+ @conf.save
133
+ puts "#{site} removed"
134
+ end
135
+ end
136
+
137
+ # http://oai.getty.edu:80/oaicat/OAIHandler
138
+ def form(name = nil)
139
+ begin
140
+ if not name
141
+ name = prompt("nickname", nil)
142
+ while(@conf.sites.keys.include?(name))
143
+ show 0, "Nickname already in use, choose another."
144
+ name = prompt("nickname")
145
+ end
146
+ end
147
+ site = @conf.sites[name] || {}
148
+
149
+ # URL
150
+ url = prompt("url", site['url'])
151
+ while(not (site['url'] = verify(url)))
152
+ puts "Trouble contacting provider, bad url?"
153
+ url = prompt("url", site['url'])
154
+ end
155
+
156
+ # Metadata formats
157
+ formats = metadata(site['url'])
158
+ report "Repository supports [#{formats.join(', ')}] metadata formats."
159
+ prefix = prompt("prefix", site['prefix'])
160
+ while(not formats.include?(prefix))
161
+ prefix = prompt("prefix", site['prefix'])
162
+ end
163
+ site['prefix'] = prefix
164
+
165
+ # Sets
166
+ sets = ['all']
167
+ begin
168
+ sets.concat sets(site['url'])
169
+ site['set'] = 'all' unless site['set'] # default to all sets
170
+ report "Repository supports [#{sets.join(', ')}] metadata sets."
171
+ set = prompt("set", site['set'])
172
+ while(not sets.include?(site['set']))
173
+ set = prompt("set", site['set'])
174
+ end
175
+ site['set'] = set
176
+ rescue
177
+ site['set'] = 'all'
178
+ end
179
+
180
+ # Period
181
+ period = expand_period(prompt("period", "daily"))
182
+ while(not Config::PERIODS.include?(period))
183
+ puts "Must be daily, weekly, or monthly"
184
+ period = expand_period(prompt("period", "daily"))
185
+ end
186
+
187
+ site['period'] = period
188
+
189
+ return [name, site]
190
+ rescue
191
+ puts "Problem adding/updating provider, aborting. (#{$!})"
192
+ end
193
+ end
194
+
195
+ def config
196
+ begin
197
+ directory = prompt("storage directory", @conf.storage)
198
+ while not directory_acceptable(directory)
199
+ directory = prompt("storage directory: ", @conf.storage)
200
+ end
201
+
202
+ email = @conf.email.join(', ') rescue nil
203
+ @conf.email = parse_emails(prompt("email", email))
204
+
205
+ @conf.mail_server = prompt("mail server", @conf.mail_server)
206
+
207
+ logfile = prompt("log file(s) directory", @conf.logfile)
208
+ while not directory_acceptable(logfile)
209
+ logfile = prompt("log file(s) directory", @conf.logfile)
210
+ end
211
+ @conf.storage = directory
212
+ @conf.logfile = logfile
213
+ @conf.save
214
+ rescue
215
+ nil
216
+ end
217
+ end
218
+
219
+ def display(key, value, split = 40)
220
+ (split - key.size).times { print " " } if key.size < split
221
+ puts "#{key}: #{value}"
222
+ end
223
+
224
+ def banner(str)
225
+ puts "\n#{str}"
226
+ str.size.times { print "-" }
227
+ puts "\n"
228
+ end
229
+
230
+ def report(str)
231
+ puts "\n#{str}\n"
232
+ end
233
+
234
+ def indent(number)
235
+ number.times do
236
+ print "\t"
237
+ end
238
+ end
239
+
240
+ def prompt(text, default = nil, split = 20)
241
+ prompt_text = "#{text} [#{default}]: "
242
+ (split - prompt_text.size).times { print " " } if prompt_text.size < split
243
+ value = readline(prompt_text, true)
244
+ raise RuntimeError.new("Exit loop") unless value
245
+ return value.empty? ? default : value
246
+ end
247
+
248
+ def verify(url)
249
+ begin
250
+ client = OAI::Client.new(url, :redirects => false)
251
+ identify = client.identify
252
+ puts "Repository name \"#{identify.repository_name}\""
253
+ return url
254
+ rescue
255
+ if $!.to_s =~ /^Permanently Redirected to \[(.*)\?.*\]/
256
+ report "Provider redirected to: #{$1}"
257
+ verify($1)
258
+ else
259
+ puts "Error selecting repository: #{$!}"
260
+ end
261
+ end
262
+ end
263
+
264
+ def metadata(url)
265
+ formats = []
266
+ client = OAI::Client.new url
267
+ response = client.list_metadata_formats
268
+ response.to_a.each do |format|
269
+ formats << format.prefix
270
+ end
271
+ formats
272
+ end
273
+
274
+ def sets(url)
275
+ sets = []
276
+ client = OAI::Client.new url
277
+ response = client.list_sets
278
+ response.to_a.each do |set|
279
+ sets << set.spec
280
+ end
281
+ sets
282
+ end
283
+
284
+ def directory_acceptable(dir)
285
+ if not (dir && File.exists?(dir) && File.writable?(dir))
286
+ puts "Directory doesn't exist, or isn't writtable."
287
+ return false
288
+ end
289
+ true
290
+ end
291
+
292
+ def expand_period(str)
293
+ return str if Config::PERIODS.include?(str)
294
+ Config::PERIODS.each { |p| return p if p =~ /^#{str}/}
295
+ nil
296
+ end
297
+
298
+ def parse_emails(emails)
299
+ return nil unless emails
300
+ addresses = emails.split(/[,\s|\s|,]/)
301
+ end
302
+
303
+ def list_config
304
+ display("storage directory", @conf.storage, 20)
305
+ display("email", @conf.email.join(', '), 20) if @conf.email
306
+ display("mail server", @conf.mail_server, 20) if @conf.mail_server
307
+ display("log location", @conf.logfile, 20) if @conf.logfile
308
+ end
309
+
310
+ def list_sites
311
+ banner "Sites"
312
+ @conf.sites.each_key { |site| print_site(site) }
313
+ end
314
+
315
+ def print_site(site)
316
+ puts site
317
+ @conf.sites[site].each { |k,v| display(k, v, 15)}
318
+ end
319
+
320
+ def setup_cron
321
+ banner "Scheduling Automatic Harvesting"
322
+ puts "To activate automatic harvesting you must add an entry to"
323
+ puts "your scheduler. Linux/Mac OS X users should add the following"
324
+ puts "entry to their crontabs:\n\n"
325
+ puts "0 0 * * * #{$0} -D\n\n"
326
+ puts "Windows users should use WinAt to schedule"
327
+ puts "#{$0} to run every night.\n\n\n"
328
+ end
329
+
330
+ end
331
+
332
+ end
333
+ end
334
+