pushmi_pullyu 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +2 -0
- data/.github/PULL_REQUEST_TEMPLATE +11 -0
- data/.github/workflows/ruby.yml +31 -0
- data/.rubocop.yml +20 -1
- data/.travis.yml +2 -2
- data/CHANGELOG.md +16 -0
- data/Dangerfile +7 -3
- data/README.md +4 -5
- data/examples/pushmi_pullyu.yml +7 -10
- data/lib/pushmi_pullyu.rb +10 -17
- data/lib/pushmi_pullyu/aip.rb +9 -6
- data/lib/pushmi_pullyu/aip/downloader.rb +133 -180
- data/lib/pushmi_pullyu/cli.rb +34 -16
- data/lib/pushmi_pullyu/logging.rb +1 -1
- data/lib/pushmi_pullyu/preservation_queue.rb +1 -0
- data/lib/pushmi_pullyu/swift_depositer.rb +1 -1
- data/lib/pushmi_pullyu/version.rb +1 -1
- data/pushmi_pullyu.gemspec +15 -14
- metadata +82 -49
- data/lib/pushmi_pullyu/aip/fedora_fetcher.rb +0 -65
- data/lib/pushmi_pullyu/aip/file_list_creator.rb +0 -118
- data/lib/pushmi_pullyu/aip/owner_email_editor.rb +0 -62
- data/lib/pushmi_pullyu/aip/user.rb +0 -2
@@ -1,65 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
|
3
|
-
class PushmiPullyu::AIP::FedoraFetcher
|
4
|
-
|
5
|
-
class FedoraFetchError < StandardError; end
|
6
|
-
|
7
|
-
RDF_FORMAT = 'text/rdf+n3'.freeze
|
8
|
-
|
9
|
-
def initialize(noid)
|
10
|
-
@noid = noid
|
11
|
-
end
|
12
|
-
|
13
|
-
def object_url(url_extra = nil)
|
14
|
-
url = "#{PushmiPullyu.options[:fedora][:url]}#{base_path}/#{pairtree}"
|
15
|
-
url += url_extra if url_extra
|
16
|
-
url
|
17
|
-
end
|
18
|
-
|
19
|
-
# Return true on success, raise an error otherwise
|
20
|
-
# (or use 'optional' to return false on 404)
|
21
|
-
def download_object(download_path, url_extra: nil,
|
22
|
-
optional: false, is_rdf: false,
|
23
|
-
should_add_user_email: false)
|
24
|
-
|
25
|
-
uri = URI(object_url(url_extra))
|
26
|
-
|
27
|
-
request = Net::HTTP::Get.new(uri)
|
28
|
-
request.basic_auth(PushmiPullyu.options[:fedora][:user],
|
29
|
-
PushmiPullyu.options[:fedora][:password])
|
30
|
-
|
31
|
-
request['Accept'] = RDF_FORMAT if is_rdf
|
32
|
-
|
33
|
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
|
34
|
-
http.request(request)
|
35
|
-
end
|
36
|
-
|
37
|
-
if response.is_a?(Net::HTTPSuccess)
|
38
|
-
body = if should_add_user_email
|
39
|
-
PushmiPullyu::AIP::OwnerEmailEditor.new(response.body).run
|
40
|
-
else
|
41
|
-
response.body
|
42
|
-
end
|
43
|
-
file = File.open(download_path, 'wb')
|
44
|
-
file.write(body)
|
45
|
-
file.close
|
46
|
-
return true
|
47
|
-
elsif response.is_a?(Net::HTTPNotFound)
|
48
|
-
raise FedoraFetchError unless optional
|
49
|
-
return false
|
50
|
-
else
|
51
|
-
raise FedoraFetchError
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
private
|
56
|
-
|
57
|
-
def pairtree
|
58
|
-
"#{@noid[0..1]}/#{@noid[2..3]}/#{@noid[4..5]}/#{@noid[6..7]}/#{@noid}"
|
59
|
-
end
|
60
|
-
|
61
|
-
def base_path
|
62
|
-
PushmiPullyu.options[:fedora][:base_path]
|
63
|
-
end
|
64
|
-
|
65
|
-
end
|
@@ -1,118 +0,0 @@
|
|
1
|
-
require 'rdf'
|
2
|
-
require 'rdf/n3'
|
3
|
-
require 'rest-client'
|
4
|
-
|
5
|
-
class PushmiPullyu::AIP::FileListCreator
|
6
|
-
|
7
|
-
IANA = 'http://www.iana.org/assignments/relation/'.freeze
|
8
|
-
PREDICATES = {
|
9
|
-
proxy_for: RDF::URI('http://www.openarchives.org/ore/terms/proxyFor'),
|
10
|
-
first: RDF::URI(IANA + 'first'),
|
11
|
-
last: RDF::URI(IANA + 'last'),
|
12
|
-
prev: RDF::URI(IANA + 'prev'),
|
13
|
-
next: RDF::URI(IANA + 'next'),
|
14
|
-
has_part: RDF::URI('http://purl.org/dc/terms/hasPart')
|
15
|
-
}.freeze
|
16
|
-
|
17
|
-
class NoProxyURIFound < StandardError; end
|
18
|
-
class NoFirstProxyFound < StandardError; end
|
19
|
-
class FirstProxyHasPrev < StandardError; end
|
20
|
-
class ListSourceFileSetMismatch < StandardError; end
|
21
|
-
|
22
|
-
def initialize(list_source_uri, output_xml_file, file_set_uuids)
|
23
|
-
@uri = RDF::URI(list_source_uri)
|
24
|
-
@auth_uri = RDF::URI(list_source_uri)
|
25
|
-
@auth_uri.user = PushmiPullyu.options[:fedora][:user]
|
26
|
-
@auth_uri.password = PushmiPullyu.options[:fedora][:password]
|
27
|
-
@output_file = output_xml_file
|
28
|
-
|
29
|
-
# These are the known fileset uuids, used for validation
|
30
|
-
@file_set_uuids = file_set_uuids
|
31
|
-
end
|
32
|
-
|
33
|
-
def run
|
34
|
-
extract_list_source_uuids
|
35
|
-
raise ListSourceFileSetMismatch, @uri.to_s if @list_source_uuids.sort != @file_set_uuids.sort
|
36
|
-
|
37
|
-
write_output_file
|
38
|
-
end
|
39
|
-
|
40
|
-
def extract_list_source_uuids
|
41
|
-
# Note: raises IOError if can't find
|
42
|
-
# raises RDF::ReaderError if can't parse
|
43
|
-
@graph = RDF::Graph.load(@auth_uri, validate: true)
|
44
|
-
@list_source_uuids = []
|
45
|
-
|
46
|
-
# Fetch first FileSet in list source
|
47
|
-
this_proxy = find_first_proxy
|
48
|
-
|
49
|
-
while @list_source_uuids.count <= num_proxies
|
50
|
-
@list_source_uuids << uuid_from_proxy(this_proxy)
|
51
|
-
next_proxy = find_next_proxy(this_proxy)
|
52
|
-
|
53
|
-
break if next_proxy.nil?
|
54
|
-
|
55
|
-
raise NextPreviousProxyMismatch if this_proxy != find_prev_proxy(next_proxy)
|
56
|
-
this_proxy = next_proxy
|
57
|
-
end
|
58
|
-
|
59
|
-
raise ProxyCountIncorrect if @list_source_uuids.count != num_proxies
|
60
|
-
raise LastProxyFailsValidation if this_proxy != find_last_proxy
|
61
|
-
end
|
62
|
-
|
63
|
-
def num_proxies
|
64
|
-
@num_proxies ||= @graph.query(subject: @uri, predicate: PREDICATES[:has_part]).count
|
65
|
-
end
|
66
|
-
|
67
|
-
def uuid_from_proxy(proxy_uri)
|
68
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:proxy_for]) do |statement|
|
69
|
-
return statement.object.to_s.split('/').last
|
70
|
-
end
|
71
|
-
raise NoProxyURIFound, proxy_uri.to_s
|
72
|
-
end
|
73
|
-
|
74
|
-
def find_first_proxy
|
75
|
-
@graph.query(subject: @uri, predicate: PREDICATES[:first]) do |statement|
|
76
|
-
first_uri = statement.object
|
77
|
-
# Validate that the first proxy doesn't have a previous one
|
78
|
-
raise FirstProxyHasPrev, @uri.to_s if find_prev_proxy(first_uri)
|
79
|
-
return first_uri
|
80
|
-
end
|
81
|
-
raise NoFirstProxyFound, @uri.to_s
|
82
|
-
end
|
83
|
-
|
84
|
-
def find_last_proxy
|
85
|
-
@graph.query(subject: @uri, predicate: PREDICATES[:last]) do |statement|
|
86
|
-
last_uri = statement.object
|
87
|
-
# Validate that the last proxy doesn't have a next one
|
88
|
-
raise LastProxyHasNext, @uri.to_s if find_next_proxy(last_uri)
|
89
|
-
return last_uri
|
90
|
-
end
|
91
|
-
raise LastProxyFound, @uri.to_s
|
92
|
-
end
|
93
|
-
|
94
|
-
def find_next_proxy(proxy_uri)
|
95
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:next]) do |statement|
|
96
|
-
return statement.object
|
97
|
-
end
|
98
|
-
nil
|
99
|
-
end
|
100
|
-
|
101
|
-
def find_prev_proxy(proxy_uri)
|
102
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:prev]) do |statement|
|
103
|
-
return statement.object
|
104
|
-
end
|
105
|
-
nil
|
106
|
-
end
|
107
|
-
|
108
|
-
def write_output_file
|
109
|
-
File.open(@output_file, 'w') do |file|
|
110
|
-
file.write("<file_order>\n")
|
111
|
-
@list_source_uuids.each do |uuid|
|
112
|
-
file.write(" <uuid>#{uuid}</uuid>\n")
|
113
|
-
end
|
114
|
-
file.write("</file_order>\n")
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
|
3
|
-
class PushmiPullyu::AIP::OwnerEmailEditor
|
4
|
-
|
5
|
-
OWNER_PREDICATE = RDF::URI('http://purl.org/ontology/bibo/owner').freeze
|
6
|
-
|
7
|
-
class NoOwnerPredicate < StandardError; end
|
8
|
-
|
9
|
-
def initialize(rdf_string)
|
10
|
-
@document = rdf_string
|
11
|
-
end
|
12
|
-
|
13
|
-
def run
|
14
|
-
ensure_database_connection
|
15
|
-
|
16
|
-
is_modified = false
|
17
|
-
prefixes = nil
|
18
|
-
# Read once to load prefixes (the @things at the top of an n3 file)
|
19
|
-
RDF::N3::Reader.new(input = @document) do |reader|
|
20
|
-
reader.each_statement { |_statement| }
|
21
|
-
prefixes = reader.prefixes
|
22
|
-
end
|
23
|
-
new_body = RDF::N3::Writer.buffer(prefixes: prefixes) do |writer|
|
24
|
-
RDF::N3::Reader.new(input = @document) do |reader|
|
25
|
-
reader.each_statement do |statement|
|
26
|
-
if statement.predicate == OWNER_PREDICATE
|
27
|
-
user = PushmiPullyu::AIP::User.find(statement.object.to_i)
|
28
|
-
writer << [statement.subject, statement.predicate, user.email]
|
29
|
-
is_modified = true
|
30
|
-
else
|
31
|
-
writer << statement
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
return new_body if is_modified
|
37
|
-
raise NoOwnerPredicate
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
|
-
|
42
|
-
def ensure_database_connection
|
43
|
-
return if ActiveRecord::Base.connected?
|
44
|
-
ActiveRecord::Base.establish_connection(database_configuration)
|
45
|
-
end
|
46
|
-
|
47
|
-
def database_configuration
|
48
|
-
# Config either from URL, or with more granular options (the later taking precedence)
|
49
|
-
config = {}
|
50
|
-
uri = URI.parse(PushmiPullyu.options[:database][:url])
|
51
|
-
config[:adapter] = PushmiPullyu.options[:database][:adaptor] || uri.scheme
|
52
|
-
config[:host] = PushmiPullyu.options[:database][:host] || uri.host
|
53
|
-
config[:database] = PushmiPullyu.options[:database][:database] || uri.path.split('/')[1].to_s
|
54
|
-
config[:username] = PushmiPullyu.options[:database][:username] || uri.user
|
55
|
-
config[:password] = PushmiPullyu.options[:database][:password] || uri.password
|
56
|
-
params = CGI.parse(uri.query || '')
|
57
|
-
config[:encoding] = PushmiPullyu.options[:database][:encoding] || params['encoding'].to_a.first
|
58
|
-
config[:pool] = PushmiPullyu.options[:database][:pool] || params['pool'].to_a.first
|
59
|
-
config
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|