mrt-tind-harvester 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/mrt-tind-harvester +74 -0
- data/lib/merritt.rb +1 -0
- data/lib/merritt/tind.rb +1 -0
- data/lib/merritt/tind/config.rb +114 -0
- data/lib/merritt/tind/feed.rb +29 -0
- data/lib/merritt/tind/feed_processor.rb +61 -0
- data/lib/merritt/tind/files.rb +66 -0
- data/lib/merritt/tind/harvester.rb +138 -0
- data/lib/merritt/tind/inventory_db.rb +54 -0
- data/lib/merritt/tind/last_harvest.rb +84 -0
- data/lib/merritt/tind/logging.rb +42 -0
- data/lib/merritt/tind/module_info.rb +7 -0
- data/lib/merritt/tind/record.rb +109 -0
- data/lib/merritt/tind/record_processor.rb +90 -0
- data/lib/merritt/tind/times.rb +35 -0
- metadata +298 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a8a12f69bf55200b7d6b02ee260281c3d1d7b050
|
4
|
+
data.tar.gz: 6ea06ac8dbff3b873ef69042512f3d81fb5b2e73
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7fdd4a906e4b22079c5a15f2eab8062dd551e6cc8e3ecf7d8909554b0bdec73ac84f616c12a862f03f854d57e3146880f59f1f82f5d3f7d38c2abce92222d7ee
|
7
|
+
data.tar.gz: dddc4f22729901b865d383dcfb63c8a0a04d9f4921a5c5a137ef6c619eff5886842747a6a5ffdae3e168e31bec189681d3e096fbe22b1cce52b7443d79ea0f5e
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
Dir.chdir(__dir__) { require 'bundler/setup' }
|
4
|
+
|
5
|
+
require 'optparse'
|
6
|
+
require 'optparse/time'
|
7
|
+
require 'ostruct'
|
8
|
+
require 'merritt/tind'
|
9
|
+
|
10
|
+
USAGE = OpenStruct.new(
|
11
|
+
CONFIG: 'path to configuration file (required)'.freeze,
|
12
|
+
DRY_RUN: 'dry run (harvest, but do not submit or update last_harvest)'.freeze,
|
13
|
+
FROM: 'start date/time (inclusive) for selective harvesting'.freeze,
|
14
|
+
UNTIL: 'end date/time (inclusive) for selective harvesting'.freeze,
|
15
|
+
HELP: 'print help and exit'
|
16
|
+
).freeze
|
17
|
+
|
18
|
+
class TINDHarvesterApp
|
19
|
+
attr_reader :options
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
@options = OpenStruct.new(dry_run: false, help: false)
|
23
|
+
end
|
24
|
+
|
25
|
+
def option_parser
|
26
|
+
@option_parser = OptionParser.new do |opts|
|
27
|
+
opts.on('-c CONFIG', '--config CONFIG', USAGE.CONFIG) { |config_file| options.config_file = config_file }
|
28
|
+
opts.on('-f', '--from DATETIME', Time, USAGE.FROM) { |from_time| options.from_time = to_utc_time(from_time) }
|
29
|
+
opts.on('-u', '--until DATETIME', Time, USAGE.UNTIL) { |until_time| options.until_time = to_utc_time(until_time) }
|
30
|
+
opts.on('-n', '--dry-run', USAGE.DRY_RUN) { options.dry_run = true }
|
31
|
+
opts.on('-h', '--help', USAGE.HELP) { options.help = true }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def do_harvest!
|
36
|
+
option_parser.parse!
|
37
|
+
|
38
|
+
print_help_and_exit(0) if options.help
|
39
|
+
|
40
|
+
harvester = Merritt::TIND::Harvester.from_file(config_file, dry_run: options.dry_run)
|
41
|
+
warn("Starting harvester; logging to #{harvester.log_path}")
|
42
|
+
harvester.process_feed!(from_time: options.from_time, until_time: options.until_time)
|
43
|
+
rescue StandardError => e
|
44
|
+
warn(e)
|
45
|
+
print_help_and_exit(1)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def config_file
|
51
|
+
config_file = options.config_file
|
52
|
+
return config_file if config_file && File.exist?(config_file)
|
53
|
+
|
54
|
+
exit_with_error('No configuration file specified') unless config_file
|
55
|
+
exit_with_error('The specified configuration file does not exist: ' + config_file)
|
56
|
+
end
|
57
|
+
|
58
|
+
def help
|
59
|
+
option_parser.to_s
|
60
|
+
end
|
61
|
+
|
62
|
+
def exit_with_error(msg)
|
63
|
+
warn(msg)
|
64
|
+
print_help_and_exit(1)
|
65
|
+
end
|
66
|
+
|
67
|
+
def print_help_and_exit(status)
|
68
|
+
warn(help)
|
69
|
+
exit(status)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
app = TINDHarvesterApp.new
|
74
|
+
app.do_harvest!
|
data/lib/merritt.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Dir.glob(File.expand_path('merritt/*.rb', __dir__)).sort.each(&method(:require))
|
data/lib/merritt/tind.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Dir.glob(File.expand_path('tind/*.rb', __dir__)).sort.each(&method(:require))
|
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
module Merritt
|
5
|
+
module TIND
|
6
|
+
class Config
|
7
|
+
|
8
|
+
attr_reader :config_h
|
9
|
+
attr_reader :config_path
|
10
|
+
|
11
|
+
def initialize(config_h = nil, config_yml: nil)
|
12
|
+
@config_h = config_h || {}
|
13
|
+
@config_path = Pathname.new(config_yml).realpath if config_yml
|
14
|
+
end
|
15
|
+
|
16
|
+
def oai_base_url
|
17
|
+
oai_config_h['base_url']
|
18
|
+
end
|
19
|
+
|
20
|
+
def oai_set
|
21
|
+
oai_config_h['set']
|
22
|
+
end
|
23
|
+
|
24
|
+
def stop_file_path
|
25
|
+
@stop_file_path ||= begin
|
26
|
+
stop_file = config_h['stop_file']
|
27
|
+
resolve_relative_path(stop_file)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def mrt_collection_ark
|
32
|
+
merritt_config_h['collection_ark']
|
33
|
+
end
|
34
|
+
|
35
|
+
def mrt_ingest_url
|
36
|
+
merritt_config_h['ingest_url']
|
37
|
+
end
|
38
|
+
|
39
|
+
def mrt_ingest_profile
|
40
|
+
merritt_config_h['ingest_profile']
|
41
|
+
end
|
42
|
+
|
43
|
+
def db_config_path
|
44
|
+
@db_config_path ||= begin
|
45
|
+
db = merritt_config_h['database']
|
46
|
+
resolve_relative_path(db)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def log_level
|
51
|
+
log_config_h['level']
|
52
|
+
end
|
53
|
+
|
54
|
+
def log_path
|
55
|
+
@log_path ||= begin
|
56
|
+
lp = log_config_h['file']
|
57
|
+
resolve_relative_path(lp)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def last_harvest_path
|
62
|
+
@last_harvest_path ||= begin
|
63
|
+
lh = config_h['last_harvest']
|
64
|
+
resolve_relative_path(lh)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def oai_config_h
|
71
|
+
config_h['oai'] || {}
|
72
|
+
end
|
73
|
+
|
74
|
+
def merritt_config_h
|
75
|
+
config_h['merritt'] || {}
|
76
|
+
end
|
77
|
+
|
78
|
+
def log_config_h
|
79
|
+
config_h['log'] || {}
|
80
|
+
end
|
81
|
+
|
82
|
+
def resolve_relative_path(filename)
|
83
|
+
return nil unless filename
|
84
|
+
|
85
|
+
pathname = Pathname.new(filename)
|
86
|
+
return pathname if pathname.absolute?
|
87
|
+
return pathname unless config_path
|
88
|
+
|
89
|
+
(config_path.parent + pathname).cleanpath
|
90
|
+
end
|
91
|
+
|
92
|
+
class << self
|
93
|
+
|
94
|
+
def from_file(config_yml)
|
95
|
+
# A missing config.yml is not normal
|
96
|
+
raise ArgumentError, "Can't read config from nil file" unless config_yml
|
97
|
+
raise ArgumentError, "Specified config file #{config_yml} does not exist" unless File.exist?(config_yml)
|
98
|
+
|
99
|
+
config_h = YAML.load_file(config_yml)
|
100
|
+
env_config = config_h[environment]
|
101
|
+
raise ArgumentError, "No configuration for environment '#{environment}' found in #{config_yml}" if env_config.nil? || env_config.empty?
|
102
|
+
|
103
|
+
Config.new(env_config, config_yml: config_yml)
|
104
|
+
end
|
105
|
+
|
106
|
+
def environment
|
107
|
+
%w[HARVESTER_ENV RAILS_ENV RACK_ENV].each { |v| return ENV[v] if ENV[v] }
|
108
|
+
'development'
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'oai/client'
|
2
|
+
|
3
|
+
module Merritt
|
4
|
+
module TIND
|
5
|
+
class Feed
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
def initialize(resp)
|
9
|
+
@resp = ensure_full_response(resp)
|
10
|
+
end
|
11
|
+
|
12
|
+
def each
|
13
|
+
return enum_for(:each) unless block_given?
|
14
|
+
|
15
|
+
@resp.each { |oai_record| yield Record.from_oai(oai_record) }
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def ensure_full_response(resp)
|
21
|
+
return resp unless resp.respond_to?(:resumption_token) # already wrapped
|
22
|
+
return resp unless resp.resumption_token # nothing to paginate
|
23
|
+
|
24
|
+
resp.full
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Merritt
|
2
|
+
module TIND
|
3
|
+
class FeedProcessor
|
4
|
+
|
5
|
+
attr_reader :feed
|
6
|
+
attr_reader :harvester
|
7
|
+
attr_reader :server
|
8
|
+
|
9
|
+
def initialize(feed:, server:, harvester:)
|
10
|
+
@feed = feed
|
11
|
+
@server = server
|
12
|
+
@harvester = harvester
|
13
|
+
end
|
14
|
+
|
15
|
+
def process_feed!
|
16
|
+
feed.each { |r| process_record(r, server) }
|
17
|
+
|
18
|
+
log.debug("Updating #{config.last_harvest_path}:\n#{last_harvest_next.to_yaml.gsub(/^/, "\t")}")
|
19
|
+
update_last_harvest!
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def config
|
25
|
+
harvester.config
|
26
|
+
end
|
27
|
+
|
28
|
+
def log
|
29
|
+
harvester.log
|
30
|
+
end
|
31
|
+
|
32
|
+
def dry_run?
|
33
|
+
harvester.dry_run?
|
34
|
+
end
|
35
|
+
|
36
|
+
def last_harvest_next
|
37
|
+
@last_harvest_next ||= begin
|
38
|
+
last_harvest = harvester.last_harvest
|
39
|
+
last_harvest ? last_harvest.clone : LastHarvest.new
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def update_last_harvest!
|
44
|
+
if dry_run?
|
45
|
+
log.info("Dry run: #{config.last_harvest_path} not updated")
|
46
|
+
else
|
47
|
+
last_harvest_next.write_to(config.last_harvest_path)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def process_record(r, server)
|
52
|
+
RecordProcessor.new(r, harvester, server).process_record!
|
53
|
+
@last_harvest_next = last_harvest_next.update(success: r)
|
54
|
+
rescue StandardError => e
|
55
|
+
# TODO: can we identify failures after submission?
|
56
|
+
log.warn(e)
|
57
|
+
@last_harvest_next = last_harvest_next.update(failure: r)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'timeout'
|
2
|
+
|
3
|
+
module Merritt
|
4
|
+
module TIND
|
5
|
+
module Files
|
6
|
+
DEFAULT_TIMEOUT_SECS = 5
|
7
|
+
DEFAULT_SLEEP_INTERVAL_SECS = 0.1
|
8
|
+
|
9
|
+
class << self
|
10
|
+
|
11
|
+
def with_lock(filename)
|
12
|
+
f = acquire_lock(filename)
|
13
|
+
yield f
|
14
|
+
ensure
|
15
|
+
f.flock(File::LOCK_UN) if f
|
16
|
+
end
|
17
|
+
|
18
|
+
def rotate_and_lock(filename)
|
19
|
+
with_lock(filename) do |f|
|
20
|
+
if File.size?(filename)
|
21
|
+
rotating(filename) { |f1| yield f1 }
|
22
|
+
else
|
23
|
+
yield f
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def rotating(filename)
|
31
|
+
rotate_to = rotated_name(filename)
|
32
|
+
|
33
|
+
File.rename(filename, rotate_to)
|
34
|
+
with_lock(filename) { |f| yield f }
|
35
|
+
end
|
36
|
+
|
37
|
+
def rotated_name(filename)
|
38
|
+
loop do
|
39
|
+
renamed_file = filename + '-' + Time.now.utc.iso8601(3)
|
40
|
+
return renamed_file unless File.exist?(renamed_file)
|
41
|
+
|
42
|
+
sleep(DEFAULT_SLEEP_INTERVAL_SECS)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def acquire_lock(filename)
|
47
|
+
Timeout.timeout(DEFAULT_TIMEOUT_SECS) do
|
48
|
+
loop do
|
49
|
+
f = File.open(filename, 'a+')
|
50
|
+
f.flock(File::LOCK_EX)
|
51
|
+
return f if File.identical?(filename, f)
|
52
|
+
|
53
|
+
# we do cover this, but it's called in a subprocess
|
54
|
+
# so SimpleCov can't tell we've called it
|
55
|
+
# :nocov:
|
56
|
+
f.flock(File::LOCK_UN)
|
57
|
+
sleep(DEFAULT_SLEEP_INTERVAL_SECS)
|
58
|
+
# :nocov:
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'faraday_middleware'
|
2
|
+
require 'oai/client'
|
3
|
+
|
4
|
+
module Merritt
|
5
|
+
module TIND
|
6
|
+
class Harvester
|
7
|
+
|
8
|
+
attr_reader :config
|
9
|
+
|
10
|
+
def initialize(config, dry_run: false)
|
11
|
+
@config = config
|
12
|
+
@dry_run = dry_run
|
13
|
+
|
14
|
+
set_str = config.oai_set ? "'#{config.oai_set}'" : '<nil>'
|
15
|
+
log.info("Initializing harvester for base URL #{oai_base_uri}, set #{set_str} => collection #{config.mrt_collection_ark}")
|
16
|
+
end
|
17
|
+
|
18
|
+
def process_feed!(from_time: nil, until_time: nil)
|
19
|
+
return if stop_file_present?
|
20
|
+
|
21
|
+
opts = to_oai_opts(from_time, until_time)
|
22
|
+
log.info("harvesting #{query_uri(opts)}")
|
23
|
+
resp = oai_client.list_records(opts)
|
24
|
+
feed = Feed.new(resp)
|
25
|
+
return process_feed(feed, nil) if dry_run?
|
26
|
+
|
27
|
+
with_server { |server| process_feed(feed, server) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def with_server
|
31
|
+
server = Mrt::Ingest::OneTimeServer.new
|
32
|
+
server.start_server
|
33
|
+
yield server
|
34
|
+
ensure
|
35
|
+
server.join_server
|
36
|
+
end
|
37
|
+
|
38
|
+
def dry_run?
|
39
|
+
@dry_run
|
40
|
+
end
|
41
|
+
|
42
|
+
def stop_file_present?
|
43
|
+
stop_file_path = config.stop_file_path
|
44
|
+
stop_file_present = stop_file_path && stop_file_path.exist?
|
45
|
+
log.info("Stop file present: #{config.stop_file_path}") if stop_file_present
|
46
|
+
stop_file_present
|
47
|
+
end
|
48
|
+
|
49
|
+
def last_harvest
|
50
|
+
# read this from the file every time
|
51
|
+
LastHarvest.from_file(config.last_harvest_path)
|
52
|
+
end
|
53
|
+
|
54
|
+
def oai_client
|
55
|
+
@oai_client ||= Harvester.oai_client_for(oai_base_uri)
|
56
|
+
end
|
57
|
+
|
58
|
+
def oai_base_uri
|
59
|
+
@oai_base_uri ||= URI.parse(config.oai_base_url)
|
60
|
+
end
|
61
|
+
|
62
|
+
def mrt_collection_ark
|
63
|
+
config.mrt_collection_ark
|
64
|
+
end
|
65
|
+
|
66
|
+
def mrt_ingest_profile
|
67
|
+
config.mrt_ingest_profile
|
68
|
+
end
|
69
|
+
|
70
|
+
def mrt_inv_db
|
71
|
+
@mrt_inv_db ||= InventoryDB.from_file(config.db_config_path)
|
72
|
+
end
|
73
|
+
|
74
|
+
def mrt_ingest_client
|
75
|
+
# TODO: secure way to get username and password?
|
76
|
+
@mrt_ingest_client ||= Mrt::Ingest::Client.new(config.mrt_ingest_url)
|
77
|
+
end
|
78
|
+
|
79
|
+
def log
|
80
|
+
@log ||= Logging.new_logger(log_path, config.log_level)
|
81
|
+
end
|
82
|
+
|
83
|
+
def determine_from_time(from_time = nil)
|
84
|
+
return from_time if from_time
|
85
|
+
|
86
|
+
lh = last_harvest
|
87
|
+
oldest_failed = lh.oldest_failed_datestamp
|
88
|
+
return oldest_failed if oldest_failed
|
89
|
+
|
90
|
+
lh.newest_success_datestamp
|
91
|
+
end
|
92
|
+
|
93
|
+
def log_path
|
94
|
+
config.log_path
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
def process_feed(feed, server)
|
100
|
+
return unless feed
|
101
|
+
|
102
|
+
feed_processor = FeedProcessor.new(feed: feed, server: server, harvester: self)
|
103
|
+
feed_processor.process_feed!
|
104
|
+
end
|
105
|
+
|
106
|
+
def query_uri(opts)
|
107
|
+
query = '?ListRecords'
|
108
|
+
opts.each { |k, v| query << "&#{k}=#{v}" } if opts
|
109
|
+
oai_base_uri.merge(query)
|
110
|
+
end
|
111
|
+
|
112
|
+
def to_oai_opts(from_time, until_time)
|
113
|
+
from_time = determine_from_time(from_time)
|
114
|
+
from_iso8601, until_iso8601 = Times.iso8601_range(from_time, until_time)
|
115
|
+
{ from: from_iso8601, until: until_iso8601, set: config.oai_set }.compact
|
116
|
+
end
|
117
|
+
|
118
|
+
class << self
|
119
|
+
|
120
|
+
def from_file(config_yml, dry_run: false)
|
121
|
+
config = Config.from_file(config_yml)
|
122
|
+
Harvester.new(config, dry_run: dry_run)
|
123
|
+
end
|
124
|
+
|
125
|
+
def oai_client_for(base_uri)
|
126
|
+
# Workaround for https://github.com/code4lib/ruby-oai/issues/45
|
127
|
+
http_client = Faraday.new(base_uri) do |conn|
|
128
|
+
conn.request(:retry, max: 5, retry_statuses: 503)
|
129
|
+
conn.response(:follow_redirects, limit: 5)
|
130
|
+
conn.adapter(:net_http)
|
131
|
+
end
|
132
|
+
OAI::Client.new(base_uri.to_s, http: http_client)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'mysql2'
|
2
|
+
|
3
|
+
module Merritt
|
4
|
+
module TIND
|
5
|
+
class InventoryDB
|
6
|
+
|
7
|
+
attr_reader :db_connection
|
8
|
+
|
9
|
+
def initialize(db_config_h)
|
10
|
+
@db_connection = Mysql2::Client.new(db_config_h)
|
11
|
+
end
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def from_file(db_config_path)
|
15
|
+
raise "Can't connect to nil database" unless db_config_path
|
16
|
+
raise ArgumentError, "Specified database config #{db_config_path} does not exist" unless File.exist?(db_config_path)
|
17
|
+
|
18
|
+
db_config = YAML.load_file(db_config_path)
|
19
|
+
env_db_config = db_config[Config.environment]
|
20
|
+
InventoryDB.new(env_db_config)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def find_existing_object(local_id, collection_ark)
|
25
|
+
result = existing_object_stmt.execute(local_id, collection_ark).first
|
26
|
+
return nil unless result
|
27
|
+
|
28
|
+
OpenStruct.new(result)
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
# TODO: is this right or should we be using erc_where? or both?
|
34
|
+
EXISTING_OBJECT_SQL = <<~SQL.freeze
|
35
|
+
SELECT o.*
|
36
|
+
FROM inv_objects AS o
|
37
|
+
JOIN inv_collections_inv_objects AS co
|
38
|
+
ON co.inv_object_id = o.id
|
39
|
+
JOIN inv_collections AS c
|
40
|
+
ON c.id = co.inv_collection_id
|
41
|
+
JOIN inv_localids AS li
|
42
|
+
ON li.inv_object_ark = o.ark
|
43
|
+
WHERE li.local_id = ?
|
44
|
+
AND c.ark = ?
|
45
|
+
LIMIT 1
|
46
|
+
SQL
|
47
|
+
|
48
|
+
def existing_object_stmt
|
49
|
+
@existing_object_stmt ||= db_connection.prepare(EXISTING_OBJECT_SQL)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Merritt
|
4
|
+
module TIND
|
5
|
+
|
6
|
+
class LastHarvest
|
7
|
+
|
8
|
+
OLDEST_FAILED = 'oldest_failed'.freeze
|
9
|
+
NEWEST_SUCCESS = 'newest_success'.freeze
|
10
|
+
|
11
|
+
attr_reader :oldest_failed
|
12
|
+
attr_reader :newest_success
|
13
|
+
|
14
|
+
# @param oldest_failed [Record, nil] the oldest record that failed to submit
|
15
|
+
# @param newest_success [Record, nil] the newest record successfully submitted
|
16
|
+
def initialize(oldest_failed: nil, newest_success: nil)
|
17
|
+
@oldest_failed = oldest_failed
|
18
|
+
@newest_success = newest_success
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_h
|
22
|
+
{
|
23
|
+
OLDEST_FAILED => (oldest_failed && oldest_failed.to_h),
|
24
|
+
NEWEST_SUCCESS => (newest_success && newest_success.to_h)
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_yaml
|
29
|
+
to_h.to_yaml
|
30
|
+
end
|
31
|
+
|
32
|
+
def write_to(last_harvest_yml)
|
33
|
+
Files.rotate_and_lock(last_harvest_yml) do |f|
|
34
|
+
f.write(to_yaml)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def oldest_failed_datestamp
|
39
|
+
oldest_failed && oldest_failed.datestamp
|
40
|
+
end
|
41
|
+
|
42
|
+
def newest_success_datestamp
|
43
|
+
newest_success && newest_success.datestamp
|
44
|
+
end
|
45
|
+
|
46
|
+
def update(success: nil, failure: nil)
|
47
|
+
LastHarvest.new(
|
48
|
+
newest_success: Record.later(success, newest_success),
|
49
|
+
oldest_failed: Record.earlier(failure, oldest_failed)
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def initialize_dup(source)
|
56
|
+
@newest_success = source.newest_success && source.newest_success.dup
|
57
|
+
@oldest_failed = source.oldest_failed && source.oldest_failed.dup
|
58
|
+
end
|
59
|
+
|
60
|
+
def initialize_clone(source)
|
61
|
+
@newest_success = source.newest_success && source.newest_success.clone
|
62
|
+
@oldest_failed = source.oldest_failed && source.oldest_failed.clone
|
63
|
+
end
|
64
|
+
|
65
|
+
class << self
|
66
|
+
def from_file(last_harvest_yml)
|
67
|
+
return from_hash(YAML.load_file(last_harvest_yml)) if last_harvest_yml && File.exist?(last_harvest_yml)
|
68
|
+
|
69
|
+
# A missing last_yarvest.yml is normal
|
70
|
+
LastHarvest.new
|
71
|
+
end
|
72
|
+
|
73
|
+
def from_hash(h)
|
74
|
+
LastHarvest.new(
|
75
|
+
oldest_failed: Record.from_hash(h[OLDEST_FAILED]),
|
76
|
+
newest_success: Record.from_hash(h[NEWEST_SUCCESS])
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'time'
|
3
|
+
|
4
|
+
module Merritt
|
5
|
+
module TIND
|
6
|
+
module Logging
|
7
|
+
NUM_LOG_FILES = 10
|
8
|
+
DEFAULT_LOG_LEVEL = Logger::DEBUG
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def fmt_log(severity, datetime, _, msg)
|
12
|
+
"#{datetime.iso8601}\t#{severity}\t#{msg}\n"
|
13
|
+
end
|
14
|
+
|
15
|
+
def new_logger(log_dev = nil, log_level = nil)
|
16
|
+
log_dev ||= STDERR
|
17
|
+
log_level ||= Logger::DEBUG
|
18
|
+
|
19
|
+
created_log_dir = ensure_log_dir(log_dev)
|
20
|
+
logger = Logger.new(log_dev, NUM_LOG_FILES, level: log_level, formatter: Logging.method(:fmt_log))
|
21
|
+
created_log_dir.each { |d| logger.info("Created log directory #{d}") } if created_log_dir
|
22
|
+
logger
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def io_like?(log_dev)
|
28
|
+
# This is how Ruby's Logger identifies an IO-like log device
|
29
|
+
log_dev.respond_to?(:write) && log_dev.respond_to?(:close)
|
30
|
+
end
|
31
|
+
|
32
|
+
def ensure_log_dir(log_dev)
|
33
|
+
return if io_like?(log_dev)
|
34
|
+
|
35
|
+
# assume it's a string or a pathname
|
36
|
+
log_dir = Pathname.new(log_dev).parent
|
37
|
+
FileUtils.mkdir_p(log_dir) unless log_dir.exist?
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'oai/client'
|
2
|
+
require 'time'
|
3
|
+
|
4
|
+
module Merritt
|
5
|
+
module TIND
|
6
|
+
class Record
|
7
|
+
IDENTIFIER = 'identifier'.freeze
|
8
|
+
DATESTAMP = 'datestamp'.freeze
|
9
|
+
|
10
|
+
attr_reader :identifier
|
11
|
+
attr_reader :datestamp
|
12
|
+
attr_reader :metadata
|
13
|
+
|
14
|
+
def initialize(identifier:, datestamp:, oai_metadata: nil)
|
15
|
+
@identifier = identifier
|
16
|
+
@datestamp = datestamp
|
17
|
+
@metadata = oai_metadata
|
18
|
+
end
|
19
|
+
|
20
|
+
def erc
|
21
|
+
# TODO: something smarter when we know the real requirements
|
22
|
+
{
|
23
|
+
'what' => identifier,
|
24
|
+
'where' => local_id,
|
25
|
+
'when' => dc_dates.first || datestamp,
|
26
|
+
'when/created' => dc_dates.first || datestamp,
|
27
|
+
'when/modified' => datestamp
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def dc_identifiers
|
32
|
+
@dc_identifiers ||= REXML::XPath.match(metadata, './/dc:identifier').map(&:text)
|
33
|
+
end
|
34
|
+
|
35
|
+
def dc_dates
|
36
|
+
@dc_dates ||= begin
|
37
|
+
REXML::XPath.match(metadata, './/dc:date')
|
38
|
+
.map(&:text)
|
39
|
+
.map { |t| Time.parse(t) }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def dc_titles
|
44
|
+
@dc_titles ||= REXML::XPath.match(metadata, './/dc:title').map(&:text)
|
45
|
+
end
|
46
|
+
|
47
|
+
def dc_creators
|
48
|
+
@dc_creators ||= REXML::XPath.match(metadata, './/dc:creator').map(&:text)
|
49
|
+
end
|
50
|
+
|
51
|
+
def content_uri
|
52
|
+
@content_uri ||= begin
|
53
|
+
# TODO: something smarter when we know the real requirements
|
54
|
+
content_url = dc_identifiers.find do |dc_id|
|
55
|
+
dc_id.start_with?('http') && dc_id.end_with?('jpg')
|
56
|
+
end
|
57
|
+
content_url && URI.parse(content_url)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def local_id
|
62
|
+
# TODO: something smarter when we know the real requirements
|
63
|
+
dc_identifiers.first || identifier
|
64
|
+
end
|
65
|
+
|
66
|
+
def to_h
|
67
|
+
{ IDENTIFIER => identifier, DATESTAMP => datestamp }
|
68
|
+
end
|
69
|
+
|
70
|
+
class << self
|
71
|
+
|
72
|
+
def later(r1, r2)
|
73
|
+
return r1 if r2.nil?
|
74
|
+
return r2 if r1.nil?
|
75
|
+
return r1 if (r1.datestamp <=> r2.datestamp) > 0
|
76
|
+
|
77
|
+
r2
|
78
|
+
end
|
79
|
+
|
80
|
+
def earlier(r1, r2)
|
81
|
+
return r1 if r2.nil?
|
82
|
+
return r2 if r1.nil?
|
83
|
+
return r1 if (r1.datestamp <=> r2.datestamp) < 0
|
84
|
+
|
85
|
+
r2
|
86
|
+
end
|
87
|
+
|
88
|
+
def from_hash(h)
|
89
|
+
return unless h
|
90
|
+
|
91
|
+
Record.new(identifier: h[IDENTIFIER], datestamp: h[DATESTAMP])
|
92
|
+
end
|
93
|
+
|
94
|
+
# Constructs a new {Record} wrapping the specified record.
|
95
|
+
#
|
96
|
+
# @param oai_record [OAI::Record] An OAI record as returned by `OAI::Client`
|
97
|
+
def from_oai(oai_record)
|
98
|
+
raise ArgumentError, "can't parse nil record" unless oai_record
|
99
|
+
|
100
|
+
header = oai_record.header
|
101
|
+
identifier = header.identifier
|
102
|
+
datestamp = header.datestamp && Time.parse(header.datestamp)
|
103
|
+
Record.new(identifier: identifier, datestamp: datestamp, oai_metadata: oai_record.metadata)
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'mrt/ingest'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
module Merritt
|
5
|
+
module TIND
|
6
|
+
class RecordProcessor
|
7
|
+
|
8
|
+
USER_AGENT = 'Merritt/TIND Harvester'.freeze
|
9
|
+
|
10
|
+
attr_reader :record
|
11
|
+
attr_reader :harvester
|
12
|
+
attr_reader :server
|
13
|
+
|
14
|
+
def initialize(record, harvester, server)
|
15
|
+
@record = record
|
16
|
+
@harvester = harvester
|
17
|
+
@server = server
|
18
|
+
end
|
19
|
+
|
20
|
+
def process_record!
|
21
|
+
return true if already_up_to_date?
|
22
|
+
|
23
|
+
log.info("Processing record: #{local_id} (content: #{content_uri}")
|
24
|
+
return true if harvester.dry_run?
|
25
|
+
|
26
|
+
submit_to_ingest!
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def submit_to_ingest!
|
32
|
+
ingest_object.add_component(content_uri)
|
33
|
+
response = ingest_object.start_ingest(ingest_client, ingest_profile, USER_AGENT)
|
34
|
+
log.info("Batch #{response.batch_id} queued at #{response.submission_date}")
|
35
|
+
true # TODO: is there anything in the response that might cause us to return false?
|
36
|
+
end
|
37
|
+
|
38
|
+
def already_up_to_date?
|
39
|
+
@already_up_to_date ||= existing_object && existing_object.modified >= record.datestamp
|
40
|
+
end
|
41
|
+
|
42
|
+
def existing_object
|
43
|
+
@existing_object = (find_existing_object || false) if @existing_object.nil?
|
44
|
+
@existing_object
|
45
|
+
end
|
46
|
+
|
47
|
+
def find_existing_object
|
48
|
+
inv_db.find_existing_object(local_id, collection_ark)
|
49
|
+
end
|
50
|
+
|
51
|
+
def inv_db
|
52
|
+
harvester.mrt_inv_db
|
53
|
+
end
|
54
|
+
|
55
|
+
def local_id
|
56
|
+
record.local_id
|
57
|
+
end
|
58
|
+
|
59
|
+
def content_uri
|
60
|
+
record.content_uri
|
61
|
+
end
|
62
|
+
|
63
|
+
def collection_ark
|
64
|
+
harvester.mrt_collection_ark
|
65
|
+
end
|
66
|
+
|
67
|
+
def ingest_client
|
68
|
+
harvester.mrt_ingest_client
|
69
|
+
end
|
70
|
+
|
71
|
+
def ingest_profile
|
72
|
+
harvester.mrt_ingest_profile
|
73
|
+
end
|
74
|
+
|
75
|
+
def log
|
76
|
+
harvester.log
|
77
|
+
end
|
78
|
+
|
79
|
+
def ingest_object
|
80
|
+
@ingest_object ||= begin
|
81
|
+
Mrt::Ingest::IObject.new(
|
82
|
+
erc: record.erc,
|
83
|
+
server: server,
|
84
|
+
local_identifier: record.local_id
|
85
|
+
)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module Merritt
|
4
|
+
module TIND
|
5
|
+
module Times
|
6
|
+
class << self
|
7
|
+
def iso8601_range(from_time, until_time)
|
8
|
+
from_time, until_time = valid_range(from_time, until_time)
|
9
|
+
[
|
10
|
+
from_time && from_time.iso8601,
|
11
|
+
until_time && until_time.iso8601
|
12
|
+
]
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def valid_range(from_time, until_time)
|
18
|
+
from_time, until_time = [from_time, until_time].map(&method(:utc_or_nil))
|
19
|
+
if from_time && until_time
|
20
|
+
raise RangeError, "from_time #{from_time} must be <= until_time #{until_time}" if from_time > until_time
|
21
|
+
end
|
22
|
+
|
23
|
+
[from_time, until_time]
|
24
|
+
end
|
25
|
+
|
26
|
+
def utc_or_nil(time)
|
27
|
+
return time.utc if time.respond_to?(:utc)
|
28
|
+
return unless time
|
29
|
+
|
30
|
+
raise ArgumentError, "time #{time} does not appear to be a Time"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,298 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mrt-tind-harvester
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Moles
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-05-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mrt-ingest
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.0.5
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.5
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mysql2
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.4.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.4.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.10'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.10'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: oai
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.4'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rest-client
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '2.0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '2.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: capistrano
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '3.4'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '3.4'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: capistrano-bundler
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '1.1'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '1.1'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: database_cleaner
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '1.5'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '1.5'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: factory_bot
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '4.11'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '4.11'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: rake
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '12.0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '12.0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rspec
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '3.8'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '3.8'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: rubocop
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0.68'
|
174
|
+
type: :development
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0.68'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: rubocop-rspec
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '1.33'
|
188
|
+
type: :development
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '1.33'
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: simplecov
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - "~>"
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '0.16'
|
202
|
+
type: :development
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - "~>"
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: '0.16'
|
209
|
+
- !ruby/object:Gem::Dependency
|
210
|
+
name: simplecov-console
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
212
|
+
requirements:
|
213
|
+
- - "~>"
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0.4'
|
216
|
+
type: :development
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
requirements:
|
220
|
+
- - "~>"
|
221
|
+
- !ruby/object:Gem::Version
|
222
|
+
version: '0.4'
|
223
|
+
- !ruby/object:Gem::Dependency
|
224
|
+
name: standalone_migrations
|
225
|
+
requirement: !ruby/object:Gem::Requirement
|
226
|
+
requirements:
|
227
|
+
- - "~>"
|
228
|
+
- !ruby/object:Gem::Version
|
229
|
+
version: '5.2'
|
230
|
+
type: :development
|
231
|
+
prerelease: false
|
232
|
+
version_requirements: !ruby/object:Gem::Requirement
|
233
|
+
requirements:
|
234
|
+
- - "~>"
|
235
|
+
- !ruby/object:Gem::Version
|
236
|
+
version: '5.2'
|
237
|
+
- !ruby/object:Gem::Dependency
|
238
|
+
name: webmock
|
239
|
+
requirement: !ruby/object:Gem::Requirement
|
240
|
+
requirements:
|
241
|
+
- - "~>"
|
242
|
+
- !ruby/object:Gem::Version
|
243
|
+
version: '3.5'
|
244
|
+
type: :development
|
245
|
+
prerelease: false
|
246
|
+
version_requirements: !ruby/object:Gem::Requirement
|
247
|
+
requirements:
|
248
|
+
- - "~>"
|
249
|
+
- !ruby/object:Gem::Version
|
250
|
+
version: '3.5'
|
251
|
+
description: Harvests TIND OAI-PMH feed to identify files for ingest into Merritt
|
252
|
+
email:
|
253
|
+
- david.moles@ucop.edu
|
254
|
+
executables:
|
255
|
+
- mrt-tind-harvester
|
256
|
+
extensions: []
|
257
|
+
extra_rdoc_files: []
|
258
|
+
files:
|
259
|
+
- bin/mrt-tind-harvester
|
260
|
+
- lib/merritt.rb
|
261
|
+
- lib/merritt/tind.rb
|
262
|
+
- lib/merritt/tind/config.rb
|
263
|
+
- lib/merritt/tind/feed.rb
|
264
|
+
- lib/merritt/tind/feed_processor.rb
|
265
|
+
- lib/merritt/tind/files.rb
|
266
|
+
- lib/merritt/tind/harvester.rb
|
267
|
+
- lib/merritt/tind/inventory_db.rb
|
268
|
+
- lib/merritt/tind/last_harvest.rb
|
269
|
+
- lib/merritt/tind/logging.rb
|
270
|
+
- lib/merritt/tind/module_info.rb
|
271
|
+
- lib/merritt/tind/record.rb
|
272
|
+
- lib/merritt/tind/record_processor.rb
|
273
|
+
- lib/merritt/tind/times.rb
|
274
|
+
homepage: https://github.com/CDLUC3/mrt-tind-harvester
|
275
|
+
licenses:
|
276
|
+
- MIT
|
277
|
+
metadata: {}
|
278
|
+
post_install_message:
|
279
|
+
rdoc_options: []
|
280
|
+
require_paths:
|
281
|
+
- lib
|
282
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
283
|
+
requirements:
|
284
|
+
- - "~>"
|
285
|
+
- !ruby/object:Gem::Version
|
286
|
+
version: '2.4'
|
287
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
288
|
+
requirements:
|
289
|
+
- - ">="
|
290
|
+
- !ruby/object:Gem::Version
|
291
|
+
version: '0'
|
292
|
+
requirements: []
|
293
|
+
rubyforge_project:
|
294
|
+
rubygems_version: 2.6.14.1
|
295
|
+
signing_key:
|
296
|
+
specification_version: 4
|
297
|
+
summary: TIND harvester for Merritt
|
298
|
+
test_files: []
|