mrt-tind-harvester 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/mrt-tind-harvester +74 -0
- data/lib/merritt.rb +1 -0
- data/lib/merritt/tind.rb +1 -0
- data/lib/merritt/tind/config.rb +114 -0
- data/lib/merritt/tind/feed.rb +29 -0
- data/lib/merritt/tind/feed_processor.rb +61 -0
- data/lib/merritt/tind/files.rb +66 -0
- data/lib/merritt/tind/harvester.rb +138 -0
- data/lib/merritt/tind/inventory_db.rb +54 -0
- data/lib/merritt/tind/last_harvest.rb +84 -0
- data/lib/merritt/tind/logging.rb +42 -0
- data/lib/merritt/tind/module_info.rb +7 -0
- data/lib/merritt/tind/record.rb +109 -0
- data/lib/merritt/tind/record_processor.rb +90 -0
- data/lib/merritt/tind/times.rb +35 -0
- metadata +298 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: a8a12f69bf55200b7d6b02ee260281c3d1d7b050
|
|
4
|
+
data.tar.gz: 6ea06ac8dbff3b873ef69042512f3d81fb5b2e73
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 7fdd4a906e4b22079c5a15f2eab8062dd551e6cc8e3ecf7d8909554b0bdec73ac84f616c12a862f03f854d57e3146880f59f1f82f5d3f7d38c2abce92222d7ee
|
|
7
|
+
data.tar.gz: dddc4f22729901b865d383dcfb63c8a0a04d9f4921a5c5a137ef6c619eff5886842747a6a5ffdae3e168e31bec189681d3e096fbe22b1cce52b7443d79ea0f5e
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
Dir.chdir(__dir__) { require 'bundler/setup' }
|
|
4
|
+
|
|
5
|
+
require 'optparse'
|
|
6
|
+
require 'optparse/time'
|
|
7
|
+
require 'ostruct'
|
|
8
|
+
require 'merritt/tind'
|
|
9
|
+
|
|
10
|
+
USAGE = OpenStruct.new(
|
|
11
|
+
CONFIG: 'path to configuration file (required)'.freeze,
|
|
12
|
+
DRY_RUN: 'dry run (harvest, but do not submit or update last_harvest)'.freeze,
|
|
13
|
+
FROM: 'start date/time (inclusive) for selective harvesting'.freeze,
|
|
14
|
+
UNTIL: 'end date/time (inclusive) for selective harvesting'.freeze,
|
|
15
|
+
HELP: 'print help and exit'
|
|
16
|
+
).freeze
|
|
17
|
+
|
|
18
|
+
class TINDHarvesterApp
|
|
19
|
+
attr_reader :options
|
|
20
|
+
|
|
21
|
+
def initialize
|
|
22
|
+
@options = OpenStruct.new(dry_run: false, help: false)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def option_parser
|
|
26
|
+
@option_parser = OptionParser.new do |opts|
|
|
27
|
+
opts.on('-c CONFIG', '--config CONFIG', USAGE.CONFIG) { |config_file| options.config_file = config_file }
|
|
28
|
+
opts.on('-f', '--from DATETIME', Time, USAGE.FROM) { |from_time| options.from_time = to_utc_time(from_time) }
|
|
29
|
+
opts.on('-u', '--until DATETIME', Time, USAGE.UNTIL) { |until_time| options.until_time = to_utc_time(until_time) }
|
|
30
|
+
opts.on('-n', '--dry-run', USAGE.DRY_RUN) { options.dry_run = true }
|
|
31
|
+
opts.on('-h', '--help', USAGE.HELP) { options.help = true }
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def do_harvest!
|
|
36
|
+
option_parser.parse!
|
|
37
|
+
|
|
38
|
+
print_help_and_exit(0) if options.help
|
|
39
|
+
|
|
40
|
+
harvester = Merritt::TIND::Harvester.from_file(config_file, dry_run: options.dry_run)
|
|
41
|
+
warn("Starting harvester; logging to #{harvester.log_path}")
|
|
42
|
+
harvester.process_feed!(from_time: options.from_time, until_time: options.until_time)
|
|
43
|
+
rescue StandardError => e
|
|
44
|
+
warn(e)
|
|
45
|
+
print_help_and_exit(1)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def config_file
|
|
51
|
+
config_file = options.config_file
|
|
52
|
+
return config_file if config_file && File.exist?(config_file)
|
|
53
|
+
|
|
54
|
+
exit_with_error('No configuration file specified') unless config_file
|
|
55
|
+
exit_with_error('The specified configuration file does not exist: ' + config_file)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def help
|
|
59
|
+
option_parser.to_s
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def exit_with_error(msg)
|
|
63
|
+
warn(msg)
|
|
64
|
+
print_help_and_exit(1)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def print_help_and_exit(status)
|
|
68
|
+
warn(help)
|
|
69
|
+
exit(status)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
app = TINDHarvesterApp.new
|
|
74
|
+
app.do_harvest!
|
data/lib/merritt.rb
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Dir.glob(File.expand_path('merritt/*.rb', __dir__)).sort.each(&method(:require))
|
data/lib/merritt/tind.rb
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Dir.glob(File.expand_path('tind/*.rb', __dir__)).sort.each(&method(:require))
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
require 'pathname'
|
|
2
|
+
require 'yaml'
|
|
3
|
+
|
|
4
|
+
module Merritt
|
|
5
|
+
module TIND
|
|
6
|
+
class Config
|
|
7
|
+
|
|
8
|
+
attr_reader :config_h
|
|
9
|
+
attr_reader :config_path
|
|
10
|
+
|
|
11
|
+
def initialize(config_h = nil, config_yml: nil)
|
|
12
|
+
@config_h = config_h || {}
|
|
13
|
+
@config_path = Pathname.new(config_yml).realpath if config_yml
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def oai_base_url
|
|
17
|
+
oai_config_h['base_url']
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def oai_set
|
|
21
|
+
oai_config_h['set']
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def stop_file_path
|
|
25
|
+
@stop_file_path ||= begin
|
|
26
|
+
stop_file = config_h['stop_file']
|
|
27
|
+
resolve_relative_path(stop_file)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def mrt_collection_ark
|
|
32
|
+
merritt_config_h['collection_ark']
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def mrt_ingest_url
|
|
36
|
+
merritt_config_h['ingest_url']
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def mrt_ingest_profile
|
|
40
|
+
merritt_config_h['ingest_profile']
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def db_config_path
|
|
44
|
+
@db_config_path ||= begin
|
|
45
|
+
db = merritt_config_h['database']
|
|
46
|
+
resolve_relative_path(db)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def log_level
|
|
51
|
+
log_config_h['level']
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def log_path
|
|
55
|
+
@log_path ||= begin
|
|
56
|
+
lp = log_config_h['file']
|
|
57
|
+
resolve_relative_path(lp)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def last_harvest_path
|
|
62
|
+
@last_harvest_path ||= begin
|
|
63
|
+
lh = config_h['last_harvest']
|
|
64
|
+
resolve_relative_path(lh)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def oai_config_h
|
|
71
|
+
config_h['oai'] || {}
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def merritt_config_h
|
|
75
|
+
config_h['merritt'] || {}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def log_config_h
|
|
79
|
+
config_h['log'] || {}
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def resolve_relative_path(filename)
|
|
83
|
+
return nil unless filename
|
|
84
|
+
|
|
85
|
+
pathname = Pathname.new(filename)
|
|
86
|
+
return pathname if pathname.absolute?
|
|
87
|
+
return pathname unless config_path
|
|
88
|
+
|
|
89
|
+
(config_path.parent + pathname).cleanpath
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
class << self
|
|
93
|
+
|
|
94
|
+
def from_file(config_yml)
|
|
95
|
+
# A missing config.yml is not normal
|
|
96
|
+
raise ArgumentError, "Can't read config from nil file" unless config_yml
|
|
97
|
+
raise ArgumentError, "Specified config file #{config_yml} does not exist" unless File.exist?(config_yml)
|
|
98
|
+
|
|
99
|
+
config_h = YAML.load_file(config_yml)
|
|
100
|
+
env_config = config_h[environment]
|
|
101
|
+
raise ArgumentError, "No configuration for environment '#{environment}' found in #{config_yml}" if env_config.nil? || env_config.empty?
|
|
102
|
+
|
|
103
|
+
Config.new(env_config, config_yml: config_yml)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def environment
|
|
107
|
+
%w[HARVESTER_ENV RAILS_ENV RACK_ENV].each { |v| return ENV[v] if ENV[v] }
|
|
108
|
+
'development'
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require 'oai/client'
|
|
2
|
+
|
|
3
|
+
module Merritt
|
|
4
|
+
module TIND
|
|
5
|
+
class Feed
|
|
6
|
+
include Enumerable
|
|
7
|
+
|
|
8
|
+
def initialize(resp)
|
|
9
|
+
@resp = ensure_full_response(resp)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def each
|
|
13
|
+
return enum_for(:each) unless block_given?
|
|
14
|
+
|
|
15
|
+
@resp.each { |oai_record| yield Record.from_oai(oai_record) }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def ensure_full_response(resp)
|
|
21
|
+
return resp unless resp.respond_to?(:resumption_token) # already wrapped
|
|
22
|
+
return resp unless resp.resumption_token # nothing to paginate
|
|
23
|
+
|
|
24
|
+
resp.full
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
module Merritt
|
|
2
|
+
module TIND
|
|
3
|
+
class FeedProcessor
|
|
4
|
+
|
|
5
|
+
attr_reader :feed
|
|
6
|
+
attr_reader :harvester
|
|
7
|
+
attr_reader :server
|
|
8
|
+
|
|
9
|
+
def initialize(feed:, server:, harvester:)
|
|
10
|
+
@feed = feed
|
|
11
|
+
@server = server
|
|
12
|
+
@harvester = harvester
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def process_feed!
|
|
16
|
+
feed.each { |r| process_record(r, server) }
|
|
17
|
+
|
|
18
|
+
log.debug("Updating #{config.last_harvest_path}:\n#{last_harvest_next.to_yaml.gsub(/^/, "\t")}")
|
|
19
|
+
update_last_harvest!
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def config
|
|
25
|
+
harvester.config
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def log
|
|
29
|
+
harvester.log
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def dry_run?
|
|
33
|
+
harvester.dry_run?
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def last_harvest_next
|
|
37
|
+
@last_harvest_next ||= begin
|
|
38
|
+
last_harvest = harvester.last_harvest
|
|
39
|
+
last_harvest ? last_harvest.clone : LastHarvest.new
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def update_last_harvest!
|
|
44
|
+
if dry_run?
|
|
45
|
+
log.info("Dry run: #{config.last_harvest_path} not updated")
|
|
46
|
+
else
|
|
47
|
+
last_harvest_next.write_to(config.last_harvest_path)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def process_record(r, server)
|
|
52
|
+
RecordProcessor.new(r, harvester, server).process_record!
|
|
53
|
+
@last_harvest_next = last_harvest_next.update(success: r)
|
|
54
|
+
rescue StandardError => e
|
|
55
|
+
# TODO: can we identify failures after submission?
|
|
56
|
+
log.warn(e)
|
|
57
|
+
@last_harvest_next = last_harvest_next.update(failure: r)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
require 'timeout'
|
|
2
|
+
|
|
3
|
+
module Merritt
|
|
4
|
+
module TIND
|
|
5
|
+
module Files
|
|
6
|
+
DEFAULT_TIMEOUT_SECS = 5
|
|
7
|
+
DEFAULT_SLEEP_INTERVAL_SECS = 0.1
|
|
8
|
+
|
|
9
|
+
class << self
|
|
10
|
+
|
|
11
|
+
def with_lock(filename)
|
|
12
|
+
f = acquire_lock(filename)
|
|
13
|
+
yield f
|
|
14
|
+
ensure
|
|
15
|
+
f.flock(File::LOCK_UN) if f
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def rotate_and_lock(filename)
|
|
19
|
+
with_lock(filename) do |f|
|
|
20
|
+
if File.size?(filename)
|
|
21
|
+
rotating(filename) { |f1| yield f1 }
|
|
22
|
+
else
|
|
23
|
+
yield f
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def rotating(filename)
|
|
31
|
+
rotate_to = rotated_name(filename)
|
|
32
|
+
|
|
33
|
+
File.rename(filename, rotate_to)
|
|
34
|
+
with_lock(filename) { |f| yield f }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def rotated_name(filename)
|
|
38
|
+
loop do
|
|
39
|
+
renamed_file = filename + '-' + Time.now.utc.iso8601(3)
|
|
40
|
+
return renamed_file unless File.exist?(renamed_file)
|
|
41
|
+
|
|
42
|
+
sleep(DEFAULT_SLEEP_INTERVAL_SECS)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def acquire_lock(filename)
|
|
47
|
+
Timeout.timeout(DEFAULT_TIMEOUT_SECS) do
|
|
48
|
+
loop do
|
|
49
|
+
f = File.open(filename, 'a+')
|
|
50
|
+
f.flock(File::LOCK_EX)
|
|
51
|
+
return f if File.identical?(filename, f)
|
|
52
|
+
|
|
53
|
+
# we do cover this, but it's called in a subprocess
|
|
54
|
+
# so SimpleCov can't tell we've called it
|
|
55
|
+
# :nocov:
|
|
56
|
+
f.flock(File::LOCK_UN)
|
|
57
|
+
sleep(DEFAULT_SLEEP_INTERVAL_SECS)
|
|
58
|
+
# :nocov:
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
require 'faraday_middleware'
|
|
2
|
+
require 'oai/client'
|
|
3
|
+
|
|
4
|
+
module Merritt
|
|
5
|
+
module TIND
|
|
6
|
+
class Harvester
|
|
7
|
+
|
|
8
|
+
attr_reader :config
|
|
9
|
+
|
|
10
|
+
def initialize(config, dry_run: false)
|
|
11
|
+
@config = config
|
|
12
|
+
@dry_run = dry_run
|
|
13
|
+
|
|
14
|
+
set_str = config.oai_set ? "'#{config.oai_set}'" : '<nil>'
|
|
15
|
+
log.info("Initializing harvester for base URL #{oai_base_uri}, set #{set_str} => collection #{config.mrt_collection_ark}")
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def process_feed!(from_time: nil, until_time: nil)
|
|
19
|
+
return if stop_file_present?
|
|
20
|
+
|
|
21
|
+
opts = to_oai_opts(from_time, until_time)
|
|
22
|
+
log.info("harvesting #{query_uri(opts)}")
|
|
23
|
+
resp = oai_client.list_records(opts)
|
|
24
|
+
feed = Feed.new(resp)
|
|
25
|
+
return process_feed(feed, nil) if dry_run?
|
|
26
|
+
|
|
27
|
+
with_server { |server| process_feed(feed, server) }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def with_server
|
|
31
|
+
server = Mrt::Ingest::OneTimeServer.new
|
|
32
|
+
server.start_server
|
|
33
|
+
yield server
|
|
34
|
+
ensure
|
|
35
|
+
server.join_server
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def dry_run?
|
|
39
|
+
@dry_run
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def stop_file_present?
|
|
43
|
+
stop_file_path = config.stop_file_path
|
|
44
|
+
stop_file_present = stop_file_path && stop_file_path.exist?
|
|
45
|
+
log.info("Stop file present: #{config.stop_file_path}") if stop_file_present
|
|
46
|
+
stop_file_present
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def last_harvest
|
|
50
|
+
# read this from the file every time
|
|
51
|
+
LastHarvest.from_file(config.last_harvest_path)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def oai_client
|
|
55
|
+
@oai_client ||= Harvester.oai_client_for(oai_base_uri)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def oai_base_uri
|
|
59
|
+
@oai_base_uri ||= URI.parse(config.oai_base_url)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def mrt_collection_ark
|
|
63
|
+
config.mrt_collection_ark
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def mrt_ingest_profile
|
|
67
|
+
config.mrt_ingest_profile
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def mrt_inv_db
|
|
71
|
+
@mrt_inv_db ||= InventoryDB.from_file(config.db_config_path)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def mrt_ingest_client
|
|
75
|
+
# TODO: secure way to get username and password?
|
|
76
|
+
@mrt_ingest_client ||= Mrt::Ingest::Client.new(config.mrt_ingest_url)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def log
|
|
80
|
+
@log ||= Logging.new_logger(log_path, config.log_level)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def determine_from_time(from_time = nil)
|
|
84
|
+
return from_time if from_time
|
|
85
|
+
|
|
86
|
+
lh = last_harvest
|
|
87
|
+
oldest_failed = lh.oldest_failed_datestamp
|
|
88
|
+
return oldest_failed if oldest_failed
|
|
89
|
+
|
|
90
|
+
lh.newest_success_datestamp
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def log_path
|
|
94
|
+
config.log_path
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
private
|
|
98
|
+
|
|
99
|
+
def process_feed(feed, server)
|
|
100
|
+
return unless feed
|
|
101
|
+
|
|
102
|
+
feed_processor = FeedProcessor.new(feed: feed, server: server, harvester: self)
|
|
103
|
+
feed_processor.process_feed!
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def query_uri(opts)
|
|
107
|
+
query = '?ListRecords'
|
|
108
|
+
opts.each { |k, v| query << "&#{k}=#{v}" } if opts
|
|
109
|
+
oai_base_uri.merge(query)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def to_oai_opts(from_time, until_time)
|
|
113
|
+
from_time = determine_from_time(from_time)
|
|
114
|
+
from_iso8601, until_iso8601 = Times.iso8601_range(from_time, until_time)
|
|
115
|
+
{ from: from_iso8601, until: until_iso8601, set: config.oai_set }.compact
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
class << self
|
|
119
|
+
|
|
120
|
+
def from_file(config_yml, dry_run: false)
|
|
121
|
+
config = Config.from_file(config_yml)
|
|
122
|
+
Harvester.new(config, dry_run: dry_run)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def oai_client_for(base_uri)
|
|
126
|
+
# Workaround for https://github.com/code4lib/ruby-oai/issues/45
|
|
127
|
+
http_client = Faraday.new(base_uri) do |conn|
|
|
128
|
+
conn.request(:retry, max: 5, retry_statuses: 503)
|
|
129
|
+
conn.response(:follow_redirects, limit: 5)
|
|
130
|
+
conn.adapter(:net_http)
|
|
131
|
+
end
|
|
132
|
+
OAI::Client.new(base_uri.to_s, http: http_client)
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require 'mysql2'
|
|
2
|
+
|
|
3
|
+
module Merritt
|
|
4
|
+
module TIND
|
|
5
|
+
class InventoryDB
|
|
6
|
+
|
|
7
|
+
attr_reader :db_connection
|
|
8
|
+
|
|
9
|
+
def initialize(db_config_h)
|
|
10
|
+
@db_connection = Mysql2::Client.new(db_config_h)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
class << self
|
|
14
|
+
def from_file(db_config_path)
|
|
15
|
+
raise "Can't connect to nil database" unless db_config_path
|
|
16
|
+
raise ArgumentError, "Specified database config #{db_config_path} does not exist" unless File.exist?(db_config_path)
|
|
17
|
+
|
|
18
|
+
db_config = YAML.load_file(db_config_path)
|
|
19
|
+
env_db_config = db_config[Config.environment]
|
|
20
|
+
InventoryDB.new(env_db_config)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def find_existing_object(local_id, collection_ark)
|
|
25
|
+
result = existing_object_stmt.execute(local_id, collection_ark).first
|
|
26
|
+
return nil unless result
|
|
27
|
+
|
|
28
|
+
OpenStruct.new(result)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
# TODO: is this right or should we be using erc_where? or both?
|
|
34
|
+
EXISTING_OBJECT_SQL = <<~SQL.freeze
|
|
35
|
+
SELECT o.*
|
|
36
|
+
FROM inv_objects AS o
|
|
37
|
+
JOIN inv_collections_inv_objects AS co
|
|
38
|
+
ON co.inv_object_id = o.id
|
|
39
|
+
JOIN inv_collections AS c
|
|
40
|
+
ON c.id = co.inv_collection_id
|
|
41
|
+
JOIN inv_localids AS li
|
|
42
|
+
ON li.inv_object_ark = o.ark
|
|
43
|
+
WHERE li.local_id = ?
|
|
44
|
+
AND c.ark = ?
|
|
45
|
+
LIMIT 1
|
|
46
|
+
SQL
|
|
47
|
+
|
|
48
|
+
def existing_object_stmt
|
|
49
|
+
@existing_object_stmt ||= db_connection.prepare(EXISTING_OBJECT_SQL)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
require 'yaml'
|
|
2
|
+
|
|
3
|
+
module Merritt
|
|
4
|
+
module TIND
|
|
5
|
+
|
|
6
|
+
class LastHarvest
|
|
7
|
+
|
|
8
|
+
OLDEST_FAILED = 'oldest_failed'.freeze
|
|
9
|
+
NEWEST_SUCCESS = 'newest_success'.freeze
|
|
10
|
+
|
|
11
|
+
attr_reader :oldest_failed
|
|
12
|
+
attr_reader :newest_success
|
|
13
|
+
|
|
14
|
+
# @param oldest_failed [Record, nil] the oldest record that failed to submit
|
|
15
|
+
# @param newest_success [Record, nil] the newest record successfully submitted
|
|
16
|
+
def initialize(oldest_failed: nil, newest_success: nil)
|
|
17
|
+
@oldest_failed = oldest_failed
|
|
18
|
+
@newest_success = newest_success
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def to_h
|
|
22
|
+
{
|
|
23
|
+
OLDEST_FAILED => (oldest_failed && oldest_failed.to_h),
|
|
24
|
+
NEWEST_SUCCESS => (newest_success && newest_success.to_h)
|
|
25
|
+
}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def to_yaml
|
|
29
|
+
to_h.to_yaml
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def write_to(last_harvest_yml)
|
|
33
|
+
Files.rotate_and_lock(last_harvest_yml) do |f|
|
|
34
|
+
f.write(to_yaml)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def oldest_failed_datestamp
|
|
39
|
+
oldest_failed && oldest_failed.datestamp
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def newest_success_datestamp
|
|
43
|
+
newest_success && newest_success.datestamp
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def update(success: nil, failure: nil)
|
|
47
|
+
LastHarvest.new(
|
|
48
|
+
newest_success: Record.later(success, newest_success),
|
|
49
|
+
oldest_failed: Record.earlier(failure, oldest_failed)
|
|
50
|
+
)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def initialize_dup(source)
|
|
56
|
+
@newest_success = source.newest_success && source.newest_success.dup
|
|
57
|
+
@oldest_failed = source.oldest_failed && source.oldest_failed.dup
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def initialize_clone(source)
|
|
61
|
+
@newest_success = source.newest_success && source.newest_success.clone
|
|
62
|
+
@oldest_failed = source.oldest_failed && source.oldest_failed.clone
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
class << self
|
|
66
|
+
def from_file(last_harvest_yml)
|
|
67
|
+
return from_hash(YAML.load_file(last_harvest_yml)) if last_harvest_yml && File.exist?(last_harvest_yml)
|
|
68
|
+
|
|
69
|
+
# A missing last_yarvest.yml is normal
|
|
70
|
+
LastHarvest.new
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def from_hash(h)
|
|
74
|
+
LastHarvest.new(
|
|
75
|
+
oldest_failed: Record.from_hash(h[OLDEST_FAILED]),
|
|
76
|
+
newest_success: Record.from_hash(h[NEWEST_SUCCESS])
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
require 'logger'
|
|
2
|
+
require 'time'
|
|
3
|
+
|
|
4
|
+
module Merritt
|
|
5
|
+
module TIND
|
|
6
|
+
module Logging
|
|
7
|
+
NUM_LOG_FILES = 10
|
|
8
|
+
DEFAULT_LOG_LEVEL = Logger::DEBUG
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def fmt_log(severity, datetime, _, msg)
|
|
12
|
+
"#{datetime.iso8601}\t#{severity}\t#{msg}\n"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def new_logger(log_dev = nil, log_level = nil)
|
|
16
|
+
log_dev ||= STDERR
|
|
17
|
+
log_level ||= Logger::DEBUG
|
|
18
|
+
|
|
19
|
+
created_log_dir = ensure_log_dir(log_dev)
|
|
20
|
+
logger = Logger.new(log_dev, NUM_LOG_FILES, level: log_level, formatter: Logging.method(:fmt_log))
|
|
21
|
+
created_log_dir.each { |d| logger.info("Created log directory #{d}") } if created_log_dir
|
|
22
|
+
logger
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def io_like?(log_dev)
|
|
28
|
+
# This is how Ruby's Logger identifies an IO-like log device
|
|
29
|
+
log_dev.respond_to?(:write) && log_dev.respond_to?(:close)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def ensure_log_dir(log_dev)
|
|
33
|
+
return if io_like?(log_dev)
|
|
34
|
+
|
|
35
|
+
# assume it's a string or a pathname
|
|
36
|
+
log_dir = Pathname.new(log_dev).parent
|
|
37
|
+
FileUtils.mkdir_p(log_dir) unless log_dir.exist?
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
require 'oai/client'
|
|
2
|
+
require 'time'
|
|
3
|
+
|
|
4
|
+
module Merritt
|
|
5
|
+
module TIND
|
|
6
|
+
class Record
|
|
7
|
+
IDENTIFIER = 'identifier'.freeze
|
|
8
|
+
DATESTAMP = 'datestamp'.freeze
|
|
9
|
+
|
|
10
|
+
attr_reader :identifier
|
|
11
|
+
attr_reader :datestamp
|
|
12
|
+
attr_reader :metadata
|
|
13
|
+
|
|
14
|
+
def initialize(identifier:, datestamp:, oai_metadata: nil)
|
|
15
|
+
@identifier = identifier
|
|
16
|
+
@datestamp = datestamp
|
|
17
|
+
@metadata = oai_metadata
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def erc
|
|
21
|
+
# TODO: something smarter when we know the real requirements
|
|
22
|
+
{
|
|
23
|
+
'what' => identifier,
|
|
24
|
+
'where' => local_id,
|
|
25
|
+
'when' => dc_dates.first || datestamp,
|
|
26
|
+
'when/created' => dc_dates.first || datestamp,
|
|
27
|
+
'when/modified' => datestamp
|
|
28
|
+
}
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def dc_identifiers
|
|
32
|
+
@dc_identifiers ||= REXML::XPath.match(metadata, './/dc:identifier').map(&:text)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def dc_dates
|
|
36
|
+
@dc_dates ||= begin
|
|
37
|
+
REXML::XPath.match(metadata, './/dc:date')
|
|
38
|
+
.map(&:text)
|
|
39
|
+
.map { |t| Time.parse(t) }
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def dc_titles
|
|
44
|
+
@dc_titles ||= REXML::XPath.match(metadata, './/dc:title').map(&:text)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def dc_creators
|
|
48
|
+
@dc_creators ||= REXML::XPath.match(metadata, './/dc:creator').map(&:text)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def content_uri
|
|
52
|
+
@content_uri ||= begin
|
|
53
|
+
# TODO: something smarter when we know the real requirements
|
|
54
|
+
content_url = dc_identifiers.find do |dc_id|
|
|
55
|
+
dc_id.start_with?('http') && dc_id.end_with?('jpg')
|
|
56
|
+
end
|
|
57
|
+
content_url && URI.parse(content_url)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def local_id
|
|
62
|
+
# TODO: something smarter when we know the real requirements
|
|
63
|
+
dc_identifiers.first || identifier
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def to_h
|
|
67
|
+
{ IDENTIFIER => identifier, DATESTAMP => datestamp }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
class << self
|
|
71
|
+
|
|
72
|
+
def later(r1, r2)
|
|
73
|
+
return r1 if r2.nil?
|
|
74
|
+
return r2 if r1.nil?
|
|
75
|
+
return r1 if (r1.datestamp <=> r2.datestamp) > 0
|
|
76
|
+
|
|
77
|
+
r2
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def earlier(r1, r2)
|
|
81
|
+
return r1 if r2.nil?
|
|
82
|
+
return r2 if r1.nil?
|
|
83
|
+
return r1 if (r1.datestamp <=> r2.datestamp) < 0
|
|
84
|
+
|
|
85
|
+
r2
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def from_hash(h)
|
|
89
|
+
return unless h
|
|
90
|
+
|
|
91
|
+
Record.new(identifier: h[IDENTIFIER], datestamp: h[DATESTAMP])
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Constructs a new {Record} wrapping the specified record.
|
|
95
|
+
#
|
|
96
|
+
# @param oai_record [OAI::Record] An OAI record as returned by `OAI::Client`
|
|
97
|
+
def from_oai(oai_record)
|
|
98
|
+
raise ArgumentError, "can't parse nil record" unless oai_record
|
|
99
|
+
|
|
100
|
+
header = oai_record.header
|
|
101
|
+
identifier = header.identifier
|
|
102
|
+
datestamp = header.datestamp && Time.parse(header.datestamp)
|
|
103
|
+
Record.new(identifier: identifier, datestamp: datestamp, oai_metadata: oai_record.metadata)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
require 'mrt/ingest'
|
|
2
|
+
require 'ostruct'
|
|
3
|
+
|
|
4
|
+
module Merritt
|
|
5
|
+
module TIND
|
|
6
|
+
class RecordProcessor
|
|
7
|
+
|
|
8
|
+
USER_AGENT = 'Merritt/TIND Harvester'.freeze
|
|
9
|
+
|
|
10
|
+
attr_reader :record
|
|
11
|
+
attr_reader :harvester
|
|
12
|
+
attr_reader :server
|
|
13
|
+
|
|
14
|
+
def initialize(record, harvester, server)
|
|
15
|
+
@record = record
|
|
16
|
+
@harvester = harvester
|
|
17
|
+
@server = server
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def process_record!
|
|
21
|
+
return true if already_up_to_date?
|
|
22
|
+
|
|
23
|
+
log.info("Processing record: #{local_id} (content: #{content_uri}")
|
|
24
|
+
return true if harvester.dry_run?
|
|
25
|
+
|
|
26
|
+
submit_to_ingest!
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def submit_to_ingest!
|
|
32
|
+
ingest_object.add_component(content_uri)
|
|
33
|
+
response = ingest_object.start_ingest(ingest_client, ingest_profile, USER_AGENT)
|
|
34
|
+
log.info("Batch #{response.batch_id} queued at #{response.submission_date}")
|
|
35
|
+
true # TODO: is there anything in the response that might cause us to return false?
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def already_up_to_date?
|
|
39
|
+
@already_up_to_date ||= existing_object && existing_object.modified >= record.datestamp
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def existing_object
|
|
43
|
+
@existing_object = (find_existing_object || false) if @existing_object.nil?
|
|
44
|
+
@existing_object
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def find_existing_object
|
|
48
|
+
inv_db.find_existing_object(local_id, collection_ark)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def inv_db
|
|
52
|
+
harvester.mrt_inv_db
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def local_id
|
|
56
|
+
record.local_id
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def content_uri
|
|
60
|
+
record.content_uri
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def collection_ark
|
|
64
|
+
harvester.mrt_collection_ark
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def ingest_client
|
|
68
|
+
harvester.mrt_ingest_client
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def ingest_profile
|
|
72
|
+
harvester.mrt_ingest_profile
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def log
|
|
76
|
+
harvester.log
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def ingest_object
|
|
80
|
+
@ingest_object ||= begin
|
|
81
|
+
Mrt::Ingest::IObject.new(
|
|
82
|
+
erc: record.erc,
|
|
83
|
+
server: server,
|
|
84
|
+
local_identifier: record.local_id
|
|
85
|
+
)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
|
|
3
|
+
module Merritt
|
|
4
|
+
module TIND
|
|
5
|
+
module Times
|
|
6
|
+
class << self
|
|
7
|
+
def iso8601_range(from_time, until_time)
|
|
8
|
+
from_time, until_time = valid_range(from_time, until_time)
|
|
9
|
+
[
|
|
10
|
+
from_time && from_time.iso8601,
|
|
11
|
+
until_time && until_time.iso8601
|
|
12
|
+
]
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
def valid_range(from_time, until_time)
|
|
18
|
+
from_time, until_time = [from_time, until_time].map(&method(:utc_or_nil))
|
|
19
|
+
if from_time && until_time
|
|
20
|
+
raise RangeError, "from_time #{from_time} must be <= until_time #{until_time}" if from_time > until_time
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
[from_time, until_time]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def utc_or_nil(time)
|
|
27
|
+
return time.utc if time.respond_to?(:utc)
|
|
28
|
+
return unless time
|
|
29
|
+
|
|
30
|
+
raise ArgumentError, "time #{time} does not appear to be a Time"
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: mrt-tind-harvester
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- David Moles
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2019-05-23 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: mrt-ingest
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 0.0.5
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: 0.0.5
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: mysql2
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: 0.4.0
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: 0.4.0
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: nokogiri
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '1.10'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '1.10'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: oai
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0.4'
|
|
62
|
+
type: :runtime
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0.4'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: rest-client
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '2.0'
|
|
76
|
+
type: :runtime
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '2.0'
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: capistrano
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - "~>"
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '3.4'
|
|
90
|
+
type: :development
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - "~>"
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: '3.4'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: capistrano-bundler
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - "~>"
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '1.1'
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - "~>"
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: '1.1'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: database_cleaner
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - "~>"
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: '1.5'
|
|
118
|
+
type: :development
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - "~>"
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: '1.5'
|
|
125
|
+
- !ruby/object:Gem::Dependency
|
|
126
|
+
name: factory_bot
|
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
|
128
|
+
requirements:
|
|
129
|
+
- - "~>"
|
|
130
|
+
- !ruby/object:Gem::Version
|
|
131
|
+
version: '4.11'
|
|
132
|
+
type: :development
|
|
133
|
+
prerelease: false
|
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
135
|
+
requirements:
|
|
136
|
+
- - "~>"
|
|
137
|
+
- !ruby/object:Gem::Version
|
|
138
|
+
version: '4.11'
|
|
139
|
+
- !ruby/object:Gem::Dependency
|
|
140
|
+
name: rake
|
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
|
142
|
+
requirements:
|
|
143
|
+
- - "~>"
|
|
144
|
+
- !ruby/object:Gem::Version
|
|
145
|
+
version: '12.0'
|
|
146
|
+
type: :development
|
|
147
|
+
prerelease: false
|
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
149
|
+
requirements:
|
|
150
|
+
- - "~>"
|
|
151
|
+
- !ruby/object:Gem::Version
|
|
152
|
+
version: '12.0'
|
|
153
|
+
- !ruby/object:Gem::Dependency
|
|
154
|
+
name: rspec
|
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
|
156
|
+
requirements:
|
|
157
|
+
- - "~>"
|
|
158
|
+
- !ruby/object:Gem::Version
|
|
159
|
+
version: '3.8'
|
|
160
|
+
type: :development
|
|
161
|
+
prerelease: false
|
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
163
|
+
requirements:
|
|
164
|
+
- - "~>"
|
|
165
|
+
- !ruby/object:Gem::Version
|
|
166
|
+
version: '3.8'
|
|
167
|
+
- !ruby/object:Gem::Dependency
|
|
168
|
+
name: rubocop
|
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
|
170
|
+
requirements:
|
|
171
|
+
- - "~>"
|
|
172
|
+
- !ruby/object:Gem::Version
|
|
173
|
+
version: '0.68'
|
|
174
|
+
type: :development
|
|
175
|
+
prerelease: false
|
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
177
|
+
requirements:
|
|
178
|
+
- - "~>"
|
|
179
|
+
- !ruby/object:Gem::Version
|
|
180
|
+
version: '0.68'
|
|
181
|
+
- !ruby/object:Gem::Dependency
|
|
182
|
+
name: rubocop-rspec
|
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
|
184
|
+
requirements:
|
|
185
|
+
- - "~>"
|
|
186
|
+
- !ruby/object:Gem::Version
|
|
187
|
+
version: '1.33'
|
|
188
|
+
type: :development
|
|
189
|
+
prerelease: false
|
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
191
|
+
requirements:
|
|
192
|
+
- - "~>"
|
|
193
|
+
- !ruby/object:Gem::Version
|
|
194
|
+
version: '1.33'
|
|
195
|
+
- !ruby/object:Gem::Dependency
|
|
196
|
+
name: simplecov
|
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
|
198
|
+
requirements:
|
|
199
|
+
- - "~>"
|
|
200
|
+
- !ruby/object:Gem::Version
|
|
201
|
+
version: '0.16'
|
|
202
|
+
type: :development
|
|
203
|
+
prerelease: false
|
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
205
|
+
requirements:
|
|
206
|
+
- - "~>"
|
|
207
|
+
- !ruby/object:Gem::Version
|
|
208
|
+
version: '0.16'
|
|
209
|
+
- !ruby/object:Gem::Dependency
|
|
210
|
+
name: simplecov-console
|
|
211
|
+
requirement: !ruby/object:Gem::Requirement
|
|
212
|
+
requirements:
|
|
213
|
+
- - "~>"
|
|
214
|
+
- !ruby/object:Gem::Version
|
|
215
|
+
version: '0.4'
|
|
216
|
+
type: :development
|
|
217
|
+
prerelease: false
|
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
219
|
+
requirements:
|
|
220
|
+
- - "~>"
|
|
221
|
+
- !ruby/object:Gem::Version
|
|
222
|
+
version: '0.4'
|
|
223
|
+
- !ruby/object:Gem::Dependency
|
|
224
|
+
name: standalone_migrations
|
|
225
|
+
requirement: !ruby/object:Gem::Requirement
|
|
226
|
+
requirements:
|
|
227
|
+
- - "~>"
|
|
228
|
+
- !ruby/object:Gem::Version
|
|
229
|
+
version: '5.2'
|
|
230
|
+
type: :development
|
|
231
|
+
prerelease: false
|
|
232
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
233
|
+
requirements:
|
|
234
|
+
- - "~>"
|
|
235
|
+
- !ruby/object:Gem::Version
|
|
236
|
+
version: '5.2'
|
|
237
|
+
- !ruby/object:Gem::Dependency
|
|
238
|
+
name: webmock
|
|
239
|
+
requirement: !ruby/object:Gem::Requirement
|
|
240
|
+
requirements:
|
|
241
|
+
- - "~>"
|
|
242
|
+
- !ruby/object:Gem::Version
|
|
243
|
+
version: '3.5'
|
|
244
|
+
type: :development
|
|
245
|
+
prerelease: false
|
|
246
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
247
|
+
requirements:
|
|
248
|
+
- - "~>"
|
|
249
|
+
- !ruby/object:Gem::Version
|
|
250
|
+
version: '3.5'
|
|
251
|
+
description: Harvests TIND OAI-PMH feed to identify files for ingest into Merritt
|
|
252
|
+
email:
|
|
253
|
+
- david.moles@ucop.edu
|
|
254
|
+
executables:
|
|
255
|
+
- mrt-tind-harvester
|
|
256
|
+
extensions: []
|
|
257
|
+
extra_rdoc_files: []
|
|
258
|
+
files:
|
|
259
|
+
- bin/mrt-tind-harvester
|
|
260
|
+
- lib/merritt.rb
|
|
261
|
+
- lib/merritt/tind.rb
|
|
262
|
+
- lib/merritt/tind/config.rb
|
|
263
|
+
- lib/merritt/tind/feed.rb
|
|
264
|
+
- lib/merritt/tind/feed_processor.rb
|
|
265
|
+
- lib/merritt/tind/files.rb
|
|
266
|
+
- lib/merritt/tind/harvester.rb
|
|
267
|
+
- lib/merritt/tind/inventory_db.rb
|
|
268
|
+
- lib/merritt/tind/last_harvest.rb
|
|
269
|
+
- lib/merritt/tind/logging.rb
|
|
270
|
+
- lib/merritt/tind/module_info.rb
|
|
271
|
+
- lib/merritt/tind/record.rb
|
|
272
|
+
- lib/merritt/tind/record_processor.rb
|
|
273
|
+
- lib/merritt/tind/times.rb
|
|
274
|
+
homepage: https://github.com/CDLUC3/mrt-tind-harvester
|
|
275
|
+
licenses:
|
|
276
|
+
- MIT
|
|
277
|
+
metadata: {}
|
|
278
|
+
post_install_message:
|
|
279
|
+
rdoc_options: []
|
|
280
|
+
require_paths:
|
|
281
|
+
- lib
|
|
282
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
283
|
+
requirements:
|
|
284
|
+
- - "~>"
|
|
285
|
+
- !ruby/object:Gem::Version
|
|
286
|
+
version: '2.4'
|
|
287
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
288
|
+
requirements:
|
|
289
|
+
- - ">="
|
|
290
|
+
- !ruby/object:Gem::Version
|
|
291
|
+
version: '0'
|
|
292
|
+
requirements: []
|
|
293
|
+
rubyforge_project:
|
|
294
|
+
rubygems_version: 2.6.14.1
|
|
295
|
+
signing_key:
|
|
296
|
+
specification_version: 4
|
|
297
|
+
summary: TIND harvester for Merritt
|
|
298
|
+
test_files: []
|