pushmi_pullyu 1.0.3 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +2 -0
- data/.github/PULL_REQUEST_TEMPLATE +11 -0
- data/.github/workflows/ruby.yml +31 -0
- data/.rubocop.yml +20 -1
- data/.travis.yml +2 -2
- data/CHANGELOG.md +18 -0
- data/Dangerfile +7 -3
- data/README.md +4 -5
- data/examples/pushmi_pullyu.yml +6 -6
- data/lib/pushmi_pullyu.rb +10 -17
- data/lib/pushmi_pullyu/aip.rb +9 -6
- data/lib/pushmi_pullyu/aip/downloader.rb +133 -180
- data/lib/pushmi_pullyu/cli.rb +34 -26
- data/lib/pushmi_pullyu/logging.rb +1 -1
- data/lib/pushmi_pullyu/preservation_queue.rb +1 -0
- data/lib/pushmi_pullyu/swift_depositer.rb +1 -1
- data/lib/pushmi_pullyu/version.rb +1 -1
- data/pushmi_pullyu.gemspec +14 -13
- metadata +72 -45
- data/lib/pushmi_pullyu/aip/fedora_fetcher.rb +0 -65
- data/lib/pushmi_pullyu/aip/file_list_creator.rb +0 -118
- data/lib/pushmi_pullyu/aip/owner_email_editor.rb +0 -38
- data/lib/pushmi_pullyu/aip/user.rb +0 -2
@@ -1,65 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
|
3
|
-
class PushmiPullyu::AIP::FedoraFetcher
|
4
|
-
|
5
|
-
class FedoraFetchError < StandardError; end
|
6
|
-
|
7
|
-
RDF_FORMAT = 'text/rdf+n3'.freeze
|
8
|
-
|
9
|
-
def initialize(noid)
|
10
|
-
@noid = noid
|
11
|
-
end
|
12
|
-
|
13
|
-
def object_url(url_extra = nil)
|
14
|
-
url = "#{PushmiPullyu.options[:fedora][:url]}#{base_path}/#{pairtree}"
|
15
|
-
url += url_extra if url_extra
|
16
|
-
url
|
17
|
-
end
|
18
|
-
|
19
|
-
# Return true on success, raise an error otherwise
|
20
|
-
# (or use 'optional' to return false on 404)
|
21
|
-
def download_object(download_path, url_extra: nil,
|
22
|
-
optional: false, is_rdf: false,
|
23
|
-
should_add_user_email: false)
|
24
|
-
|
25
|
-
uri = URI(object_url(url_extra))
|
26
|
-
|
27
|
-
request = Net::HTTP::Get.new(uri)
|
28
|
-
request.basic_auth(PushmiPullyu.options[:fedora][:user],
|
29
|
-
PushmiPullyu.options[:fedora][:password])
|
30
|
-
|
31
|
-
request['Accept'] = RDF_FORMAT if is_rdf
|
32
|
-
|
33
|
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
|
34
|
-
http.request(request)
|
35
|
-
end
|
36
|
-
|
37
|
-
if response.is_a?(Net::HTTPSuccess)
|
38
|
-
body = if should_add_user_email
|
39
|
-
PushmiPullyu::AIP::OwnerEmailEditor.new(response.body).run
|
40
|
-
else
|
41
|
-
response.body
|
42
|
-
end
|
43
|
-
file = File.open(download_path, 'wb')
|
44
|
-
file.write(body)
|
45
|
-
file.close
|
46
|
-
return true
|
47
|
-
elsif response.is_a?(Net::HTTPNotFound)
|
48
|
-
raise FedoraFetchError unless optional
|
49
|
-
return false
|
50
|
-
else
|
51
|
-
raise FedoraFetchError
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
private
|
56
|
-
|
57
|
-
def pairtree
|
58
|
-
"#{@noid[0..1]}/#{@noid[2..3]}/#{@noid[4..5]}/#{@noid[6..7]}/#{@noid}"
|
59
|
-
end
|
60
|
-
|
61
|
-
def base_path
|
62
|
-
PushmiPullyu.options[:fedora][:base_path]
|
63
|
-
end
|
64
|
-
|
65
|
-
end
|
@@ -1,118 +0,0 @@
|
|
1
|
-
require 'rdf'
|
2
|
-
require 'rdf/n3'
|
3
|
-
require 'rest-client'
|
4
|
-
|
5
|
-
class PushmiPullyu::AIP::FileListCreator
|
6
|
-
|
7
|
-
IANA = 'http://www.iana.org/assignments/relation/'.freeze
|
8
|
-
PREDICATES = {
|
9
|
-
proxy_for: RDF::URI('http://www.openarchives.org/ore/terms/proxyFor'),
|
10
|
-
first: RDF::URI(IANA + 'first'),
|
11
|
-
last: RDF::URI(IANA + 'last'),
|
12
|
-
prev: RDF::URI(IANA + 'prev'),
|
13
|
-
next: RDF::URI(IANA + 'next'),
|
14
|
-
has_part: RDF::URI('http://purl.org/dc/terms/hasPart')
|
15
|
-
}.freeze
|
16
|
-
|
17
|
-
class NoProxyURIFound < StandardError; end
|
18
|
-
class NoFirstProxyFound < StandardError; end
|
19
|
-
class FirstProxyHasPrev < StandardError; end
|
20
|
-
class ListSourceFileSetMismatch < StandardError; end
|
21
|
-
|
22
|
-
def initialize(list_source_uri, output_xml_file, file_set_uuids)
|
23
|
-
@uri = RDF::URI(list_source_uri)
|
24
|
-
@auth_uri = RDF::URI(list_source_uri)
|
25
|
-
@auth_uri.user = PushmiPullyu.options[:fedora][:user]
|
26
|
-
@auth_uri.password = PushmiPullyu.options[:fedora][:password]
|
27
|
-
@output_file = output_xml_file
|
28
|
-
|
29
|
-
# These are the known fileset uuids, used for validation
|
30
|
-
@file_set_uuids = file_set_uuids
|
31
|
-
end
|
32
|
-
|
33
|
-
def run
|
34
|
-
extract_list_source_uuids
|
35
|
-
raise ListSourceFileSetMismatch, @uri.to_s if @list_source_uuids.sort != @file_set_uuids.sort
|
36
|
-
|
37
|
-
write_output_file
|
38
|
-
end
|
39
|
-
|
40
|
-
def extract_list_source_uuids
|
41
|
-
# Note: raises IOError if can't find
|
42
|
-
# raises RDF::ReaderError if can't parse
|
43
|
-
@graph = RDF::Graph.load(@auth_uri, validate: true)
|
44
|
-
@list_source_uuids = []
|
45
|
-
|
46
|
-
# Fetch first FileSet in list source
|
47
|
-
this_proxy = find_first_proxy
|
48
|
-
|
49
|
-
while @list_source_uuids.count <= num_proxies
|
50
|
-
@list_source_uuids << uuid_from_proxy(this_proxy)
|
51
|
-
next_proxy = find_next_proxy(this_proxy)
|
52
|
-
|
53
|
-
break if next_proxy.nil?
|
54
|
-
|
55
|
-
raise NextPreviousProxyMismatch if this_proxy != find_prev_proxy(next_proxy)
|
56
|
-
this_proxy = next_proxy
|
57
|
-
end
|
58
|
-
|
59
|
-
raise ProxyCountIncorrect if @list_source_uuids.count != num_proxies
|
60
|
-
raise LastProxyFailsValidation if this_proxy != find_last_proxy
|
61
|
-
end
|
62
|
-
|
63
|
-
def num_proxies
|
64
|
-
@num_proxies ||= @graph.query(subject: @uri, predicate: PREDICATES[:has_part]).count
|
65
|
-
end
|
66
|
-
|
67
|
-
def uuid_from_proxy(proxy_uri)
|
68
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:proxy_for]) do |statement|
|
69
|
-
return statement.object.to_s.split('/').last
|
70
|
-
end
|
71
|
-
raise NoProxyURIFound, proxy_uri.to_s
|
72
|
-
end
|
73
|
-
|
74
|
-
def find_first_proxy
|
75
|
-
@graph.query(subject: @uri, predicate: PREDICATES[:first]) do |statement|
|
76
|
-
first_uri = statement.object
|
77
|
-
# Validate that the first proxy doesn't have a previous one
|
78
|
-
raise FirstProxyHasPrev, @uri.to_s if find_prev_proxy(first_uri)
|
79
|
-
return first_uri
|
80
|
-
end
|
81
|
-
raise NoFirstProxyFound, @uri.to_s
|
82
|
-
end
|
83
|
-
|
84
|
-
def find_last_proxy
|
85
|
-
@graph.query(subject: @uri, predicate: PREDICATES[:last]) do |statement|
|
86
|
-
last_uri = statement.object
|
87
|
-
# Validate that the last proxy doesn't have a next one
|
88
|
-
raise LastProxyHasNext, @uri.to_s if find_next_proxy(last_uri)
|
89
|
-
return last_uri
|
90
|
-
end
|
91
|
-
raise LastProxyFound, @uri.to_s
|
92
|
-
end
|
93
|
-
|
94
|
-
def find_next_proxy(proxy_uri)
|
95
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:next]) do |statement|
|
96
|
-
return statement.object
|
97
|
-
end
|
98
|
-
nil
|
99
|
-
end
|
100
|
-
|
101
|
-
def find_prev_proxy(proxy_uri)
|
102
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:prev]) do |statement|
|
103
|
-
return statement.object
|
104
|
-
end
|
105
|
-
nil
|
106
|
-
end
|
107
|
-
|
108
|
-
def write_output_file
|
109
|
-
File.open(@output_file, 'w') do |file|
|
110
|
-
file.write("<file_order>\n")
|
111
|
-
@list_source_uuids.each do |uuid|
|
112
|
-
file.write(" <uuid>#{uuid}</uuid>\n")
|
113
|
-
end
|
114
|
-
file.write("</file_order>\n")
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
|
3
|
-
class PushmiPullyu::AIP::OwnerEmailEditor
|
4
|
-
|
5
|
-
OWNER_PREDICATE = RDF::URI('http://purl.org/ontology/bibo/owner').freeze
|
6
|
-
|
7
|
-
class NoOwnerPredicate < StandardError; end
|
8
|
-
|
9
|
-
def initialize(rdf_string)
|
10
|
-
@document = rdf_string
|
11
|
-
end
|
12
|
-
|
13
|
-
def run
|
14
|
-
is_modified = false
|
15
|
-
prefixes = nil
|
16
|
-
# Read once to load prefixes (the @things at the top of an n3 file)
|
17
|
-
RDF::N3::Reader.new(input = @document) do |reader|
|
18
|
-
reader.each_statement { |_statement| }
|
19
|
-
prefixes = reader.prefixes
|
20
|
-
end
|
21
|
-
new_body = RDF::N3::Writer.buffer(prefixes: prefixes) do |writer|
|
22
|
-
RDF::N3::Reader.new(input = @document) do |reader|
|
23
|
-
reader.each_statement do |statement|
|
24
|
-
if statement.predicate == OWNER_PREDICATE
|
25
|
-
user = PushmiPullyu::AIP::User.find(statement.object.to_i)
|
26
|
-
writer << [statement.subject, statement.predicate, user.email]
|
27
|
-
is_modified = true
|
28
|
-
else
|
29
|
-
writer << statement
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
return new_body if is_modified
|
35
|
-
raise NoOwnerPredicate
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|