pushmi_pullyu 1.0.6 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +2 -0
- data/.github/PULL_REQUEST_TEMPLATE +11 -0
- data/.github/workflows/ruby.yml +31 -0
- data/.rubocop.yml +20 -1
- data/.travis.yml +2 -2
- data/CHANGELOG.md +16 -0
- data/Dangerfile +7 -3
- data/README.md +4 -5
- data/examples/pushmi_pullyu.yml +6 -6
- data/lib/pushmi_pullyu.rb +10 -17
- data/lib/pushmi_pullyu/aip.rb +9 -6
- data/lib/pushmi_pullyu/aip/downloader.rb +132 -180
- data/lib/pushmi_pullyu/cli.rb +23 -19
- data/lib/pushmi_pullyu/logging.rb +1 -1
- data/lib/pushmi_pullyu/version.rb +1 -1
- data/pushmi_pullyu.gemspec +11 -11
- metadata +46 -39
- data/lib/pushmi_pullyu/aip/fedora_fetcher.rb +0 -66
- data/lib/pushmi_pullyu/aip/file_list_creator.rb +0 -121
- data/lib/pushmi_pullyu/aip/owner_email_editor.rb +0 -50
- data/lib/pushmi_pullyu/aip/user.rb +0 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8596ed87a287d7708b92c6a3538dcff1f976ddd1b2dc0171b30011bcfe62276b
|
4
|
+
data.tar.gz: 9babcb9576246ee00e581056361731b324e27d02c49814af9b26346fbd269120
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d031eef30ea6828c6fac6c094ab040e914fce8094298249f56ebf2b44d8820c22fd3de6ff98abc546e482f278351b6838419d9b09a90cdd9be6b50576896afc
|
7
|
+
data.tar.gz: 858a1d2f2a91a7ee9f9c0257391aa601316afa8fb07c09e883b817ec7b90879f5fdeda9eca34b375aeea5c1ed2c9b06b13c1212a03e30c538d2942a2e4976dca
|
data/.github/CODEOWNERS
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
test:
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
services:
|
13
|
+
redis:
|
14
|
+
image: redis:alpine
|
15
|
+
ports: ["6379:6379"]
|
16
|
+
steps:
|
17
|
+
- uses: actions/checkout@v2
|
18
|
+
- name: Set up Ruby
|
19
|
+
uses: ruby/setup-ruby@v1
|
20
|
+
with:
|
21
|
+
ruby-version: 2.6.6
|
22
|
+
- name: Install dependencies
|
23
|
+
run: bundle install
|
24
|
+
- name: Lint with RuboCop
|
25
|
+
run: bundle exec rubocop --parallel
|
26
|
+
- name: Run Danger
|
27
|
+
env:
|
28
|
+
DANGER_GITHUB_API_TOKEN: ${{ secrets.DANGER_GITHUB_API_TOKEN }}
|
29
|
+
run: bundle exec danger
|
30
|
+
- name: Run tests
|
31
|
+
run: bundle exec rake spec
|
data/.rubocop.yml
CHANGED
@@ -10,8 +10,9 @@ AllCops:
|
|
10
10
|
Exclude:
|
11
11
|
- 'tmp/**/*'
|
12
12
|
- 'vendor/**/*'
|
13
|
+
- 'Dangerfile'
|
13
14
|
ExtraDetails: true
|
14
|
-
TargetRubyVersion: 2.
|
15
|
+
TargetRubyVersion: 2.5
|
15
16
|
|
16
17
|
# readability is Actually Good
|
17
18
|
Layout/EmptyLinesAroundClassBody:
|
@@ -59,6 +60,15 @@ Style/ClassAndModuleChildren:
|
|
59
60
|
Style/Documentation:
|
60
61
|
Enabled: false
|
61
62
|
|
63
|
+
Style/HashEachMethods:
|
64
|
+
Enabled: true
|
65
|
+
|
66
|
+
Style/HashTransformKeys:
|
67
|
+
Enabled: true
|
68
|
+
|
69
|
+
Style/HashTransformValues:
|
70
|
+
Enabled: true
|
71
|
+
|
62
72
|
Naming/FileName:
|
63
73
|
Exclude:
|
64
74
|
- Dangerfile
|
@@ -88,3 +98,12 @@ RSpec/MultipleExpectations:
|
|
88
98
|
|
89
99
|
RSpec/DescribedClass:
|
90
100
|
EnforcedStyle: explicit
|
101
|
+
|
102
|
+
Lint/RaiseException:
|
103
|
+
Enabled: true
|
104
|
+
|
105
|
+
Lint/StructNewOverride:
|
106
|
+
Enabled: true
|
107
|
+
|
108
|
+
RSpec/MultipleMemoizedHelpers:
|
109
|
+
Enabled: false
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# Changelog
|
2
|
+
All notable changes to PushmiPullyu project will be documented in this file.
|
3
|
+
|
4
|
+
PushmiPullyu is a Ruby application, whose primary job is to manage the flow of content from [Jupiter](https://github.com/ualbertalib/jupiter/) into Swift for preservation.
|
5
|
+
|
6
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
7
|
+
and releases in PushmiPullyu adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
8
|
+
|
9
|
+
## [Unreleased]
|
10
|
+
|
11
|
+
## [2.0.0] - 2020-12-14
|
12
|
+
|
13
|
+
### Removed
|
14
|
+
- Data output for original_file information
|
15
|
+
|
16
|
+
## [1.0.6] - 2018-11-29
|
data/Dangerfile
CHANGED
@@ -13,9 +13,13 @@ end
|
|
13
13
|
# just leaving a title
|
14
14
|
warn('Please add a detailed summary in the description.') if github.pr_body.length < 5
|
15
15
|
|
16
|
-
#
|
17
|
-
|
18
|
-
|
16
|
+
# Let people say that this isn't worth a CHANGELOG entry in the PR if they choose
|
17
|
+
declared_trivial = (github.pr_title + github.pr_body).include?('#trivial') || !has_app_changes
|
18
|
+
|
19
|
+
if !git.modified_files.include?('CHANGELOG.md') && !declared_trivial
|
20
|
+
error_message = "Please include a CHANGELOG entry. \nYou can find it at " \
|
21
|
+
'[CHANGELOG.md](https://github.com/ualbertalib/pushmi_pullyu/blob/master/CHANGELOG.md).'
|
22
|
+
fail(error_message, sticky: false)
|
19
23
|
end
|
20
24
|
|
21
25
|
# Warn when there is a big PR
|
data/README.md
CHANGED
@@ -5,22 +5,21 @@
|
|
5
5
|
</p>
|
6
6
|
|
7
7
|
[![Gem Version](https://badge.fury.io/rb/pushmi_pullyu.svg)](https://rubygems.org/gems/pushmi_pullyu)
|
8
|
-
[![
|
9
|
-
[![Build Status](https://travis-ci.org/ualbertalib/pushmi_pullyu.svg?branch=master)](https://travis-ci.org/ualbertalib/pushmi_pullyu)
|
8
|
+
[![Github Build Status](https://github.com/ualbertalib/pushmi_pullyu/workflows/CI/badge.svg)](https://github.com/ualbertalib/pushmi_pullyu/actions)
|
10
9
|
[![Coverage Status](https://coveralls.io/repos/github/ualbertalib/pushmi_pullyu/badge.svg?branch=master)](https://coveralls.io/github/ualbertalib/pushmi_pullyu?branch=master)
|
11
10
|
|
12
11
|
PushmiPullyu is a Ruby application, running behind the firewall that protects our Swift environment.
|
13
12
|
|
14
|
-
Its primary job is to manage the flow of content from
|
13
|
+
Its primary job is to manage the flow of content from Jupiter into Swift for preservation.
|
15
14
|
|
16
15
|
![System Infrastructure Diagram](docs/images/system-infrastructure-diagram.png)
|
17
16
|
|
18
17
|
## Workflow
|
19
18
|
|
20
|
-
1. Any save (create or update) on a Item/Thesis in ERA/Jupiter will trigger an after save callback that will push the item's unique identifier (UUID
|
19
|
+
1. Any save (create or update) on a Item/Thesis in ERA/Jupiter will trigger an after save callback that will push the item's unique identifier (UUID) into a Queue.
|
21
20
|
2. The queue (Redis) is setup to be a unique set (which only allows one item's UUID to be included in the queue at a single time), and ordered by priority from First In, First out (FIFO).
|
22
21
|
3. PushmiPullyu will then monitor the queue. After a certain wait period has passed since an element has been on the queue, PushmiPullyu will then retrieve the elements off the queue and begin to process the preservation event.
|
23
|
-
4. All the
|
22
|
+
4. All the Item/Thesis information and data required for preservation are retrieved from Jupiter using multiple REST calls to Jupiter's AIP API.
|
24
23
|
5. An Archival Information Package (AIP) is created from the item's information. It is then bagged and tarred.
|
25
24
|
6. The AIP tar is then uploaded to Swift via a REST call.
|
26
25
|
7. On a successful Swift upload, a entry is added for this preservation event to the preservation event logs.
|
data/examples/pushmi_pullyu.yml
CHANGED
@@ -21,12 +21,6 @@ minimum_age: 0
|
|
21
21
|
redis:
|
22
22
|
url: redis://localhost:6379
|
23
23
|
|
24
|
-
fedora:
|
25
|
-
url: http://localhost:8080/fcrepo/rest
|
26
|
-
user: fedoraAdmin
|
27
|
-
password: fedoraAdmin
|
28
|
-
base_path: /dev
|
29
|
-
|
30
24
|
database:
|
31
25
|
url: postgresql://jupiter:mysecretpassword@127.0.0.1/jupiter_development
|
32
26
|
|
@@ -44,3 +38,9 @@ rollbar:
|
|
44
38
|
token: 'abc123xyz'
|
45
39
|
proxy_host: 'your_proxy_host_url'
|
46
40
|
proxy_port: '80'
|
41
|
+
|
42
|
+
jupiter:
|
43
|
+
user: jupiter@ualberta.ca
|
44
|
+
api_key: 5042c4ad-6d22-486d-bc63-2b9e5b9a630a
|
45
|
+
jupiter_url: http://localhost:3000/
|
46
|
+
aip_api_path: aip/v1
|
data/lib/pushmi_pullyu.rb
CHANGED
@@ -1,22 +1,15 @@
|
|
1
|
-
# require 'pushmi_pullyu/version' must be first as it declares the PushmiPullyu
|
2
|
-
# (This fixes a weird NameError bug when using the nested compact syntax
|
3
|
-
# defining modules/classes like `module PushmiPullyu::Logging`)
|
4
|
-
require 'pushmi_pullyu/version'
|
1
|
+
# require 'pushmi_pullyu/version' must be first as it declares the PushmiPullyu
|
2
|
+
# module. (This fixes a weird NameError bug when using the nested compact syntax
|
3
|
+
# for defining modules/classes like `module PushmiPullyu::Logging`)
|
5
4
|
|
5
|
+
require 'pushmi_pullyu/version'
|
6
6
|
require 'pushmi_pullyu/logging'
|
7
|
-
|
8
7
|
require 'pushmi_pullyu/aip'
|
9
8
|
require 'pushmi_pullyu/aip/creator'
|
10
9
|
require 'pushmi_pullyu/aip/downloader'
|
11
|
-
require 'pushmi_pullyu/aip/fedora_fetcher'
|
12
|
-
require 'pushmi_pullyu/aip/file_list_creator'
|
13
|
-
require 'pushmi_pullyu/aip/owner_email_editor'
|
14
|
-
require 'active_record'
|
15
|
-
require 'pushmi_pullyu/aip/user'
|
16
10
|
require 'pushmi_pullyu/cli'
|
17
11
|
require 'pushmi_pullyu/preservation_queue'
|
18
12
|
require 'pushmi_pullyu/swift_depositer'
|
19
|
-
|
20
13
|
require 'active_support'
|
21
14
|
require 'active_support/core_ext'
|
22
15
|
|
@@ -36,12 +29,6 @@ module PushmiPullyu
|
|
36
29
|
redis: {
|
37
30
|
url: 'redis://localhost:6379'
|
38
31
|
},
|
39
|
-
fedora: {
|
40
|
-
url: 'http://localhost:8080/fcrepo/rest',
|
41
|
-
user: 'fedoraAdmin',
|
42
|
-
password: 'fedoraAdmin',
|
43
|
-
base_path: '/dev'
|
44
|
-
},
|
45
32
|
swift: {
|
46
33
|
tenant: 'tester',
|
47
34
|
username: 'test:tester',
|
@@ -58,6 +45,12 @@ module PushmiPullyu
|
|
58
45
|
pool: ENV['RAILS_MAX_THREADS'] || 5,
|
59
46
|
url: ENV['DATABASE_URL'] || ENV['JUPITER_DATABASE_URL'] || 'postgresql://jupiter:mysecretpassword@127.0.0.1',
|
60
47
|
database: 'jupiter_development'
|
48
|
+
},
|
49
|
+
jupiter: {
|
50
|
+
user: ENV['JUPITER_USER'],
|
51
|
+
api_key: ENV['JUPITER_API_KEY'],
|
52
|
+
jupiter_url: ENV['JUPITER_URL'] || 'http://localhost:3000/',
|
53
|
+
aip_api_path: ENV['JUPITER_AIP_API_PATH'] || 'aip/v1'
|
61
54
|
}
|
62
55
|
}.freeze
|
63
56
|
|
data/lib/pushmi_pullyu/aip.rb
CHANGED
@@ -1,17 +1,20 @@
|
|
1
1
|
require 'fileutils'
|
2
|
+
require 'uuid'
|
2
3
|
|
3
4
|
module PushmiPullyu::AIP
|
4
|
-
class
|
5
|
+
class EntityInvalid < StandardError; end
|
5
6
|
module_function
|
6
7
|
|
7
|
-
def create(
|
8
|
-
raise
|
8
|
+
def create(entity)
|
9
|
+
raise EntityInvalid if entity.nil? ||
|
10
|
+
UUID.validate(entity[:uuid]) != true ||
|
11
|
+
entity[:type].blank?
|
9
12
|
|
10
|
-
aip_directory = "#{PushmiPullyu.options[:workdir]}/#{
|
13
|
+
aip_directory = "#{PushmiPullyu.options[:workdir]}/#{entity[:uuid]}"
|
11
14
|
aip_filename = "#{aip_directory}.tar"
|
12
15
|
|
13
|
-
PushmiPullyu::AIP::Downloader.new(
|
14
|
-
PushmiPullyu::AIP::Creator.new(
|
16
|
+
PushmiPullyu::AIP::Downloader.new(entity, aip_directory).run
|
17
|
+
PushmiPullyu::AIP::Creator.new(entity[:uuid], aip_directory, aip_filename).run
|
15
18
|
|
16
19
|
yield aip_filename, aip_directory
|
17
20
|
|
@@ -2,261 +2,213 @@ require 'fileutils'
|
|
2
2
|
require 'ostruct'
|
3
3
|
require 'rdf'
|
4
4
|
require 'rdf/n3'
|
5
|
+
require 'net/http'
|
6
|
+
require 'uri'
|
7
|
+
require 'digest'
|
5
8
|
|
6
|
-
# Download all of the metadata/datastreams and associated data
|
7
|
-
# related to an object
|
9
|
+
# Download all of the metadata/datastreams and associated data related to an object
|
8
10
|
class PushmiPullyu::AIP::Downloader
|
9
11
|
|
10
12
|
PREDICATE_URIS = {
|
11
13
|
filename: 'http://purl.org/dc/terms/title',
|
12
14
|
member_files: 'http://pcdm.org/models#hasFile',
|
13
15
|
member_file_sets: 'http://pcdm.org/models#hasMember',
|
14
|
-
original_file: 'http://pcdm.org/use#OriginalFile',
|
15
16
|
type: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
|
16
17
|
}.freeze
|
17
18
|
|
18
|
-
class
|
19
|
-
class
|
20
|
-
class
|
21
|
-
class NoOriginalFile < StandardError; end
|
19
|
+
class JupiterDownloadError < StandardError; end
|
20
|
+
class JupiterCopyError < StandardError; end
|
21
|
+
class JupiterAuthenticationError < StandardError; end
|
22
22
|
|
23
|
-
def initialize(
|
24
|
-
@
|
23
|
+
def initialize(entity, aip_directory)
|
24
|
+
@entity = entity
|
25
|
+
@entity_identifier = "[#{entity[:type]} - #{entity[:uuid]}]".freeze
|
25
26
|
@aip_directory = aip_directory
|
26
27
|
end
|
27
28
|
|
28
29
|
def run
|
29
|
-
|
30
|
+
PushmiPullyu.logger.info("#{@entity_identifier}: Retreiving data from Jupiter ...")
|
30
31
|
|
31
|
-
|
32
|
+
authenticate_http_calls
|
33
|
+
make_directories
|
32
34
|
|
33
35
|
# Main object metadata
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
download_and_log(
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
member_files(file_set_uuid).each do |file_path|
|
52
|
-
path_spec = OpenStruct.new(
|
53
|
-
remote: "/files/#{file_path}/fcr:metadata",
|
54
|
-
# Note: local file gets clobbered on each download until it finds the right one
|
55
|
-
local: "#{file_set_dirs(file_set_uuid).metadata}/original_file_metadata.n3",
|
56
|
-
optional: true
|
57
|
-
)
|
58
|
-
download_and_log(path_spec, file_set_downloader)
|
59
|
-
if original_file?(path_spec.local)
|
60
|
-
original_file_remote_base = "/files/#{file_path}"
|
61
|
-
break
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
raise NoOriginalFile unless original_file_remote_base.present?
|
66
|
-
|
67
|
-
[:content, :fixity].each do |item|
|
68
|
-
path_spec = file_aip_paths(file_set_uuid, original_file_remote_base)[item]
|
69
|
-
download_and_log(path_spec, file_set_downloader)
|
70
|
-
end
|
36
|
+
download_and_log(object_aip_paths[:main_object_remote],
|
37
|
+
object_aip_paths[:main_object_local])
|
38
|
+
download_and_log(object_aip_paths[:file_sets_remote],
|
39
|
+
object_aip_paths[:file_sets_local])
|
40
|
+
|
41
|
+
# Get file paths for processing
|
42
|
+
file_paths = get_file_paths(object_aip_paths[:file_paths_remote])
|
43
|
+
|
44
|
+
file_paths[:files].each do |file_path|
|
45
|
+
file_uuid = file_path[:file_uuid]
|
46
|
+
make_file_set_directories(file_uuid)
|
47
|
+
copy_and_log(file_uuid, file_path)
|
48
|
+
file_aip_path = file_aip_paths(file_uuid)
|
49
|
+
download_and_log(file_aip_path[:fixity_remote],
|
50
|
+
file_aip_path[:fixity_local])
|
51
|
+
download_and_log(file_aip_path[:file_set_remote],
|
52
|
+
file_aip_path[:file_set_local])
|
71
53
|
end
|
72
54
|
end
|
73
55
|
|
74
56
|
private
|
75
57
|
|
76
|
-
def
|
77
|
-
|
78
|
-
|
79
|
-
|
58
|
+
def copy_and_log(file_uuid, file_path)
|
59
|
+
remote = file_path[:file_path]
|
60
|
+
remote_checksum = file_path[:file_checksum]
|
61
|
+
files_path = file_set_dirs(file_uuid)[:files]
|
62
|
+
output_file = "#{files_path}/#{file_path[:file_name]}"
|
63
|
+
log_downloading(remote, output_file)
|
64
|
+
FileUtils.copy_file(remote, output_file)
|
80
65
|
|
81
|
-
|
82
|
-
|
66
|
+
is_success = File.exist?(output_file) &&
|
67
|
+
File.size(remote) == File.size(output_file) &&
|
68
|
+
compare_md5(output_file, remote_checksum)
|
83
69
|
|
84
|
-
is_success = fedora_fetcher.download_object(output_file,
|
85
|
-
url_extra: path_spec.remote,
|
86
|
-
optional: path_spec.optional,
|
87
|
-
is_rdf: is_rdf,
|
88
|
-
should_add_user_email: should_add_user_email)
|
89
70
|
log_saved(is_success, output_file)
|
71
|
+
|
72
|
+
raise JupiterCopyError unless is_success
|
73
|
+
end
|
74
|
+
|
75
|
+
def compare_md5(local, remote_checksum)
|
76
|
+
local_md5 = Digest::MD5.file local
|
77
|
+
local_md5.base64digest == remote_checksum
|
78
|
+
end
|
79
|
+
|
80
|
+
def authenticate_http_calls
|
81
|
+
@uri = URI.parse(PushmiPullyu.options[:jupiter][:jupiter_url])
|
82
|
+
@http = Net::HTTP.new(@uri.host, @uri.port)
|
83
|
+
request = Net::HTTP::Post.new(@uri.request_uri + 'auth/system')
|
84
|
+
request.set_form_data(
|
85
|
+
email: PushmiPullyu.options[:jupiter][:user],
|
86
|
+
api_key: PushmiPullyu.options[:jupiter][:api_key]
|
87
|
+
)
|
88
|
+
response = @http.request(request)
|
89
|
+
# If we cannot find the set-cookie header then the session was not set
|
90
|
+
raise JupiterAuthenticationError if response.response['set-cookie'].nil?
|
91
|
+
|
92
|
+
@cookies = response.response['set-cookie']
|
93
|
+
end
|
94
|
+
|
95
|
+
def download_and_log(remote, local)
|
96
|
+
log_downloading(remote, local)
|
97
|
+
|
98
|
+
@uri = URI.parse(PushmiPullyu.options[:jupiter][:jupiter_url])
|
99
|
+
request = Net::HTTP::Get.new(@uri.request_uri + remote)
|
100
|
+
# add previously stored cookies
|
101
|
+
request['Cookie'] = @cookies
|
102
|
+
|
103
|
+
response = @http.request(request)
|
104
|
+
is_success = if response.is_a?(Net::HTTPSuccess)
|
105
|
+
File.open(local, 'wb') do |file|
|
106
|
+
file.write(response.body)
|
107
|
+
end
|
108
|
+
# Response was a success and the file was saved to local
|
109
|
+
File.exist? local
|
110
|
+
end
|
111
|
+
|
112
|
+
log_saved(is_success, local)
|
113
|
+
raise JupiterDownloadError unless is_success
|
114
|
+
end
|
115
|
+
|
116
|
+
def get_file_paths(url)
|
117
|
+
request = Net::HTTP::Get.new(@uri.request_uri + url)
|
118
|
+
# add previously stored cookies
|
119
|
+
request['Cookie'] = @cookies
|
120
|
+
|
121
|
+
response = @http.request(request)
|
122
|
+
|
123
|
+
JSON.parse(response.body, symbolize_names: true)
|
90
124
|
end
|
91
125
|
|
92
|
-
def
|
93
|
-
|
94
|
-
|
95
|
-
"#{@noid}: #{output_file} -- creating from #{url} ...")
|
96
|
-
PushmiPullyu::AIP::FileListCreator.new(url, output_file, member_file_set_uuids).run
|
97
|
-
PushmiPullyu::Logging.log_aip_activity(@aip_directory,
|
98
|
-
"#{@noid}: #{output_file} -- created")
|
126
|
+
def object_uri
|
127
|
+
aip_api_url = PushmiPullyu.options[:jupiter][:aip_api_path]
|
128
|
+
@object_uri ||= "#{aip_api_url}/#{@entity[:type]}/#{@entity[:uuid]}"
|
99
129
|
end
|
100
130
|
|
101
131
|
### Logging
|
102
132
|
|
103
|
-
def
|
104
|
-
message = "#{@
|
133
|
+
def log_downloading(url, output_file)
|
134
|
+
message = "#{@entity_identifier}: #{output_file} -- Downloading from #{url} ..."
|
105
135
|
PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
|
106
136
|
end
|
107
137
|
|
108
138
|
def log_saved(is_success, output_file)
|
109
|
-
message = "#{@
|
139
|
+
message = "#{@entity_identifier}: #{output_file} -- #{is_success ? 'Saved' : 'Failed'}"
|
110
140
|
PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
|
111
141
|
end
|
112
142
|
|
113
143
|
### Directories
|
114
144
|
|
115
145
|
def aip_dirs
|
116
|
-
@aip_dirs ||=
|
146
|
+
@aip_dirs ||= {
|
117
147
|
objects: "#{@aip_directory}/data/objects",
|
118
148
|
metadata: "#{@aip_directory}/data/objects/metadata",
|
119
149
|
files: "#{@aip_directory}/data/objects/files",
|
120
150
|
files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
121
151
|
logs: "#{@aip_directory}/data/logs",
|
122
152
|
file_logs: "#{@aip_directory}/data/logs/files_logs"
|
123
|
-
|
153
|
+
}
|
124
154
|
end
|
125
155
|
|
126
156
|
def file_set_dirs(file_set_uuid)
|
127
157
|
@file_set_dirs ||= {}
|
128
|
-
@file_set_dirs[file_set_uuid] ||=
|
129
|
-
metadata: "#{aip_dirs
|
130
|
-
files: "#{aip_dirs
|
131
|
-
logs: "#{aip_dirs
|
132
|
-
|
158
|
+
@file_set_dirs[file_set_uuid] ||= {
|
159
|
+
metadata: "#{aip_dirs[:files_metadata]}/#{file_set_uuid}",
|
160
|
+
files: "#{aip_dirs[:files]}/#{file_set_uuid}",
|
161
|
+
logs: "#{aip_dirs[:file_logs]}/#{file_set_uuid}"
|
162
|
+
}
|
133
163
|
end
|
134
164
|
|
135
165
|
def make_directories
|
166
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating directories ...")
|
136
167
|
clean_directories
|
137
|
-
|
138
|
-
aip_dirs.to_h.each_value do |path|
|
168
|
+
aip_dirs.each_value do |path|
|
139
169
|
FileUtils.mkdir_p(path)
|
140
170
|
end
|
141
|
-
PushmiPullyu.logger.debug("#{@
|
171
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating directories done")
|
142
172
|
end
|
143
173
|
|
144
174
|
def make_file_set_directories(file_set_uuid)
|
145
|
-
PushmiPullyu.logger.debug("#{@
|
146
|
-
file_set_dirs(file_set_uuid).
|
175
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating file set #{file_set_uuid} directories ...")
|
176
|
+
file_set_dirs(file_set_uuid).each_value do |path|
|
147
177
|
FileUtils.mkdir_p(path)
|
148
178
|
end
|
149
|
-
PushmiPullyu.logger.debug("#{@
|
179
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating file set #{file_set_uuid} directories done")
|
150
180
|
end
|
151
181
|
|
152
182
|
def clean_directories
|
153
183
|
return unless File.exist?(@aip_directory)
|
154
184
|
|
155
|
-
PushmiPullyu.logger.debug("#{@
|
185
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Nuking directories ...")
|
156
186
|
FileUtils.rm_rf(@aip_directory)
|
157
187
|
end
|
158
188
|
|
159
189
|
### Files
|
160
190
|
|
161
191
|
def object_aip_paths
|
162
|
-
@object_aip_paths ||=
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
local: "#{aip_dirs.files_metadata}/file_order.xml"
|
176
|
-
)
|
177
|
-
).freeze
|
178
|
-
end
|
179
|
-
|
180
|
-
def file_set_aip_paths(file_set_uuid)
|
181
|
-
@file_set_aip_paths ||= {}
|
182
|
-
@file_set_aip_paths[file_set_uuid] ||= OpenStruct.new(
|
183
|
-
main_object: OpenStruct.new(
|
184
|
-
remote: nil, # Base file_set path
|
185
|
-
local: "#{file_set_dirs(file_set_uuid).metadata}/file_set_metadata.n3",
|
186
|
-
should_add_user_email: true,
|
187
|
-
optional: false
|
188
|
-
)
|
189
|
-
).freeze
|
190
|
-
end
|
191
|
-
|
192
|
-
def file_aip_paths(file_set_uuid, original_file_remote_base)
|
192
|
+
@object_aip_paths ||= {
|
193
|
+
# Base path
|
194
|
+
main_object_remote: object_uri,
|
195
|
+
main_object_local: "#{aip_dirs[:metadata]}/object_metadata.n3",
|
196
|
+
file_sets_remote: "#{object_uri}/filesets",
|
197
|
+
file_sets_local: "#{aip_dirs[:files_metadata]}/file_order.xml",
|
198
|
+
# This is downloaded for processing but not saved
|
199
|
+
file_paths_remote: "#{object_uri}/file_paths"
|
200
|
+
}.freeze
|
201
|
+
end
|
202
|
+
|
203
|
+
def file_aip_paths(file_set_uuid)
|
204
|
+
file_set_paths = file_set_dirs(file_set_uuid)
|
193
205
|
@file_aip_paths ||= {}
|
194
|
-
@file_aip_paths[file_set_uuid] ||=
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
fixity: OpenStruct.new(
|
201
|
-
remote: "#{original_file_remote_base}/fcr:fixity",
|
202
|
-
local: "#{file_set_dirs(file_set_uuid)[:logs]}/content_fixity_report.n3",
|
203
|
-
optional: false
|
204
|
-
)
|
205
|
-
).freeze
|
206
|
-
end
|
207
|
-
|
208
|
-
def member_file_set_uuids
|
209
|
-
@member_file_set_uuids ||= []
|
210
|
-
return @member_file_set_uuids unless @member_file_set_uuids.empty?
|
211
|
-
|
212
|
-
member_file_set_predicate = RDF::URI(PREDICATE_URIS[:member_file_sets])
|
213
|
-
|
214
|
-
graph = RDF::Graph.load(object_aip_paths.main_object.local)
|
215
|
-
|
216
|
-
graph.query(predicate: member_file_set_predicate) do |results|
|
217
|
-
# Get uuid from end of fedora path
|
218
|
-
@member_file_set_uuids << results.object.to_s.split('/').last
|
219
|
-
end
|
220
|
-
return @member_file_set_uuids unless @member_file_set_uuids.empty?
|
221
|
-
|
222
|
-
raise NoFileSets
|
223
|
-
end
|
224
|
-
|
225
|
-
def file_set_filename(file_set_uuid)
|
226
|
-
filename_predicate = RDF::URI(PREDICATE_URIS[:filename])
|
227
|
-
|
228
|
-
graph = RDF::Graph.load(file_set_aip_paths(file_set_uuid).main_object.local)
|
229
|
-
|
230
|
-
graph.query(predicate: filename_predicate) do |results|
|
231
|
-
return "#{file_set_dirs(file_set_uuid).files}/#{results.object}"
|
232
|
-
end
|
233
|
-
|
234
|
-
raise NoContentFilename
|
235
|
-
end
|
236
|
-
|
237
|
-
def member_files(file_set_uuid)
|
238
|
-
member_file_predicate = RDF::URI(PREDICATE_URIS[:member_files])
|
239
|
-
|
240
|
-
graph = RDF::Graph.load(file_set_aip_paths(file_set_uuid).main_object.local)
|
241
|
-
|
242
|
-
member_files = []
|
243
|
-
graph.query(predicate: member_file_predicate) do |results|
|
244
|
-
# Get uuid from end of fedora path
|
245
|
-
member_files << results.object.to_s.split('/').last
|
246
|
-
end
|
247
|
-
return member_files if member_files.present?
|
248
|
-
|
249
|
-
raise NoMemberFiles
|
250
|
-
end
|
251
|
-
|
252
|
-
def original_file?(metadata_filename)
|
253
|
-
type_predicate = RDF::URI(PREDICATE_URIS[:type])
|
254
|
-
original_file_uri = RDF::URI(PREDICATE_URIS[:original_file])
|
255
|
-
graph = RDF::Graph.load(metadata_filename)
|
256
|
-
graph.query(predicate: type_predicate) do |results|
|
257
|
-
return true if results.object == original_file_uri
|
258
|
-
end
|
259
|
-
false
|
206
|
+
@file_aip_paths[file_set_uuid] ||= {
|
207
|
+
fixity_remote: "#{object_uri}/filesets/#{file_set_uuid}/fixity",
|
208
|
+
fixity_local: "#{file_set_paths[:logs]}/content_fixity_report.n3",
|
209
|
+
file_set_remote: "#{object_uri}/filesets/#{file_set_uuid}",
|
210
|
+
file_set_local: "#{file_set_paths[:metadata]}/file_set_metadata.n3"
|
211
|
+
}.freeze
|
260
212
|
end
|
261
213
|
|
262
214
|
end
|
data/lib/pushmi_pullyu/cli.rb
CHANGED
@@ -4,6 +4,7 @@ require 'optparse'
|
|
4
4
|
require 'rollbar'
|
5
5
|
require 'singleton'
|
6
6
|
require 'yaml'
|
7
|
+
require 'json'
|
7
8
|
|
8
9
|
# CLI runner
|
9
10
|
class PushmiPullyu::CLI
|
@@ -64,8 +65,8 @@ class PushmiPullyu::CLI
|
|
64
65
|
# add a filter after Rollbar has built the error payload but before it is delivered to the API,
|
65
66
|
# in order to strip sensitive information out of certain error messages
|
66
67
|
exception_message_transformer = proc do |payload|
|
67
|
-
clean_message = payload[:exception][:message].sub(/http:\/\/.+:.+@(.+)\/
|
68
|
-
"http://\1/
|
68
|
+
clean_message = payload[:exception][:message].sub(/http:\/\/.+:.+@(.+)\/aip\/v1\/(.*)/,
|
69
|
+
"http://\1/aip/v1/\2")
|
69
70
|
payload[:exception][:message] = clean_message
|
70
71
|
payload[:message] = clean_message
|
71
72
|
end
|
@@ -181,26 +182,29 @@ class PushmiPullyu::CLI
|
|
181
182
|
end
|
182
183
|
|
183
184
|
def run_preservation_cycle
|
184
|
-
|
185
|
-
|
185
|
+
entity_json = JSON.parse(queue.wait_next_item)
|
186
|
+
entity = {
|
187
|
+
type: entity_json['type'],
|
188
|
+
uuid: entity_json['uuid']
|
189
|
+
}
|
190
|
+
return unless entity[:type].present? && entity[:uuid].present?
|
186
191
|
|
187
192
|
# add additional information about the error context to errors that occur while processing this item.
|
188
|
-
Rollbar.scoped(
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
end
|
197
|
-
# rubocop:disable Lint/RescueException
|
198
|
-
rescue Exception => e
|
199
|
-
Rollbar.error(e)
|
200
|
-
logger.error(e)
|
201
|
-
# TODO: we could re-raise here and let the daemon die on any preservation error, or just log the issue and
|
202
|
-
# move on to the next item.
|
193
|
+
Rollbar.scoped(entity_uuid: entity[:uuid]) do
|
194
|
+
# Download AIP from Jupiter, bag and tar AIP directory and cleanup after
|
195
|
+
# block code
|
196
|
+
PushmiPullyu::AIP.create(entity) do |aip_filename, aip_directory|
|
197
|
+
# Push tarred AIP to swift API
|
198
|
+
deposited_file = swift.deposit_file(aip_filename, options[:swift][:container])
|
199
|
+
# Log successful preservation event to the log files
|
200
|
+
PushmiPullyu::Logging.log_preservation_event(deposited_file, aip_directory)
|
203
201
|
end
|
202
|
+
# rubocop:disable Lint/RescueException
|
203
|
+
rescue Exception => e
|
204
|
+
Rollbar.error(e)
|
205
|
+
logger.error(e)
|
206
|
+
# TODO: we could re-raise here and let the daemon die on any preservation error, or just log the issue and
|
207
|
+
# move on to the next item.
|
204
208
|
# rubocop:enable Lint/RescueException
|
205
209
|
end
|
206
210
|
end
|
@@ -48,7 +48,7 @@ module PushmiPullyu::Logging
|
|
48
48
|
|
49
49
|
message = "#{deposited_file.name} was successfully deposited into Swift Storage!\n"\
|
50
50
|
"Here are the details of this preservation event:\n"\
|
51
|
-
"\
|
51
|
+
"\tUUID: '#{deposited_file.name}'\n"\
|
52
52
|
"\tTimestamp of Completion: '#{deposited_file.last_modified}'\n"\
|
53
53
|
"\tAIP Checksum: '#{deposited_file.etag}'\n"\
|
54
54
|
"\tMetadata: #{deposited_file.metadata}\n"\
|
data/pushmi_pullyu.gemspec
CHANGED
@@ -5,10 +5,10 @@ require 'pushmi_pullyu/version'
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = 'pushmi_pullyu'
|
7
7
|
spec.version = PushmiPullyu::VERSION
|
8
|
-
spec.authors = ['Shane Murnaghan']
|
9
|
-
spec.email = ['murnagha@ualberta.ca']
|
8
|
+
spec.authors = ['Shane Murnaghan', 'Omar Rodriguez-Arenas']
|
9
|
+
spec.email = ['murnagha@ualberta.ca', 'orodrigu@ualberta.ca']
|
10
10
|
|
11
|
-
spec.summary = 'Ruby application to manage flow of content from
|
11
|
+
spec.summary = 'Ruby application to manage flow of content from Jupiter into Swift for preservation'
|
12
12
|
spec.homepage = 'https://github.com/ualbertalib/pushmi_pullyu'
|
13
13
|
spec.license = 'MIT'
|
14
14
|
|
@@ -19,10 +19,9 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
|
22
|
-
spec.required_ruby_version = '>= 2.
|
22
|
+
spec.required_ruby_version = '>= 2.5'
|
23
23
|
|
24
|
-
spec.add_runtime_dependency '
|
25
|
-
spec.add_runtime_dependency 'activesupport', '~> 5.0'
|
24
|
+
spec.add_runtime_dependency 'activesupport', '>= 5', '< 7'
|
26
25
|
spec.add_runtime_dependency 'bagit', '~> 0.4'
|
27
26
|
spec.add_runtime_dependency 'connection_pool', '~> 2.2'
|
28
27
|
spec.add_runtime_dependency 'daemons', '~> 1.2', '>= 1.2.4'
|
@@ -33,18 +32,19 @@ Gem::Specification.new do |spec|
|
|
33
32
|
spec.add_runtime_dependency 'rdf-n3', '>= 1.99', '< 4.0'
|
34
33
|
spec.add_runtime_dependency 'redis', '>= 3.3', '< 5.0'
|
35
34
|
spec.add_runtime_dependency 'rest-client', '>= 1.8', '< 3.0'
|
36
|
-
spec.add_runtime_dependency 'rollbar', '
|
35
|
+
spec.add_runtime_dependency 'rollbar', '>= 2.18', '< 4.0'
|
37
36
|
|
38
|
-
spec.add_development_dependency 'bundler', '~>
|
37
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
39
38
|
spec.add_development_dependency 'coveralls', '~> 0.8'
|
40
|
-
spec.add_development_dependency 'danger', '~>
|
39
|
+
spec.add_development_dependency 'danger', '~> 8.0'
|
41
40
|
spec.add_development_dependency 'pry', '~> 0.10', '>= 0.10.4'
|
42
41
|
spec.add_development_dependency 'pry-byebug', '~> 3.6'
|
43
|
-
spec.add_development_dependency 'rake', '~>
|
42
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
44
43
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
45
44
|
spec.add_development_dependency 'rubocop', '~> 0.51'
|
46
45
|
spec.add_development_dependency 'rubocop-rspec', '~> 1.10'
|
47
46
|
spec.add_development_dependency 'timecop', '~> 0.8'
|
48
|
-
spec.add_development_dependency '
|
47
|
+
spec.add_development_dependency 'uuid', '~> 2.3.9'
|
48
|
+
spec.add_development_dependency 'vcr', '~> 5.0'
|
49
49
|
spec.add_development_dependency 'webmock', '~> 3.3'
|
50
50
|
end
|
metadata
CHANGED
@@ -1,49 +1,36 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pushmi_pullyu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shane Murnaghan
|
8
|
+
- Omar Rodriguez-Arenas
|
8
9
|
autorequire:
|
9
10
|
bindir: exe
|
10
11
|
cert_chain: []
|
11
|
-
date:
|
12
|
+
date: 2021-01-29 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
+
name: activesupport
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
16
17
|
requirements:
|
17
18
|
- - ">="
|
18
19
|
- !ruby/object:Gem::Version
|
19
|
-
version: 5
|
20
|
+
version: '5'
|
20
21
|
- - "<"
|
21
22
|
- !ruby/object:Gem::Version
|
22
|
-
version:
|
23
|
+
version: '7'
|
23
24
|
type: :runtime
|
24
25
|
prerelease: false
|
25
26
|
version_requirements: !ruby/object:Gem::Requirement
|
26
27
|
requirements:
|
27
28
|
- - ">="
|
28
29
|
- !ruby/object:Gem::Version
|
29
|
-
version: 5
|
30
|
+
version: '5'
|
30
31
|
- - "<"
|
31
32
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
33
|
-
- !ruby/object:Gem::Dependency
|
34
|
-
name: activesupport
|
35
|
-
requirement: !ruby/object:Gem::Requirement
|
36
|
-
requirements:
|
37
|
-
- - "~>"
|
38
|
-
- !ruby/object:Gem::Version
|
39
|
-
version: '5.0'
|
40
|
-
type: :runtime
|
41
|
-
prerelease: false
|
42
|
-
version_requirements: !ruby/object:Gem::Requirement
|
43
|
-
requirements:
|
44
|
-
- - "~>"
|
45
|
-
- !ruby/object:Gem::Version
|
46
|
-
version: '5.0'
|
33
|
+
version: '7'
|
47
34
|
- !ruby/object:Gem::Dependency
|
48
35
|
name: bagit
|
49
36
|
requirement: !ruby/object:Gem::Requirement
|
@@ -230,30 +217,36 @@ dependencies:
|
|
230
217
|
name: rollbar
|
231
218
|
requirement: !ruby/object:Gem::Requirement
|
232
219
|
requirements:
|
233
|
-
- - "
|
220
|
+
- - ">="
|
234
221
|
- !ruby/object:Gem::Version
|
235
222
|
version: '2.18'
|
223
|
+
- - "<"
|
224
|
+
- !ruby/object:Gem::Version
|
225
|
+
version: '4.0'
|
236
226
|
type: :runtime
|
237
227
|
prerelease: false
|
238
228
|
version_requirements: !ruby/object:Gem::Requirement
|
239
229
|
requirements:
|
240
|
-
- - "
|
230
|
+
- - ">="
|
241
231
|
- !ruby/object:Gem::Version
|
242
232
|
version: '2.18'
|
233
|
+
- - "<"
|
234
|
+
- !ruby/object:Gem::Version
|
235
|
+
version: '4.0'
|
243
236
|
- !ruby/object:Gem::Dependency
|
244
237
|
name: bundler
|
245
238
|
requirement: !ruby/object:Gem::Requirement
|
246
239
|
requirements:
|
247
240
|
- - "~>"
|
248
241
|
- !ruby/object:Gem::Version
|
249
|
-
version: '
|
242
|
+
version: '2.0'
|
250
243
|
type: :development
|
251
244
|
prerelease: false
|
252
245
|
version_requirements: !ruby/object:Gem::Requirement
|
253
246
|
requirements:
|
254
247
|
- - "~>"
|
255
248
|
- !ruby/object:Gem::Version
|
256
|
-
version: '
|
249
|
+
version: '2.0'
|
257
250
|
- !ruby/object:Gem::Dependency
|
258
251
|
name: coveralls
|
259
252
|
requirement: !ruby/object:Gem::Requirement
|
@@ -274,14 +267,14 @@ dependencies:
|
|
274
267
|
requirements:
|
275
268
|
- - "~>"
|
276
269
|
- !ruby/object:Gem::Version
|
277
|
-
version: '
|
270
|
+
version: '8.0'
|
278
271
|
type: :development
|
279
272
|
prerelease: false
|
280
273
|
version_requirements: !ruby/object:Gem::Requirement
|
281
274
|
requirements:
|
282
275
|
- - "~>"
|
283
276
|
- !ruby/object:Gem::Version
|
284
|
-
version: '
|
277
|
+
version: '8.0'
|
285
278
|
- !ruby/object:Gem::Dependency
|
286
279
|
name: pry
|
287
280
|
requirement: !ruby/object:Gem::Requirement
|
@@ -322,14 +315,14 @@ dependencies:
|
|
322
315
|
requirements:
|
323
316
|
- - "~>"
|
324
317
|
- !ruby/object:Gem::Version
|
325
|
-
version: '
|
318
|
+
version: '13.0'
|
326
319
|
type: :development
|
327
320
|
prerelease: false
|
328
321
|
version_requirements: !ruby/object:Gem::Requirement
|
329
322
|
requirements:
|
330
323
|
- - "~>"
|
331
324
|
- !ruby/object:Gem::Version
|
332
|
-
version: '
|
325
|
+
version: '13.0'
|
333
326
|
- !ruby/object:Gem::Dependency
|
334
327
|
name: rspec
|
335
328
|
requirement: !ruby/object:Gem::Requirement
|
@@ -386,20 +379,34 @@ dependencies:
|
|
386
379
|
- - "~>"
|
387
380
|
- !ruby/object:Gem::Version
|
388
381
|
version: '0.8'
|
382
|
+
- !ruby/object:Gem::Dependency
|
383
|
+
name: uuid
|
384
|
+
requirement: !ruby/object:Gem::Requirement
|
385
|
+
requirements:
|
386
|
+
- - "~>"
|
387
|
+
- !ruby/object:Gem::Version
|
388
|
+
version: 2.3.9
|
389
|
+
type: :development
|
390
|
+
prerelease: false
|
391
|
+
version_requirements: !ruby/object:Gem::Requirement
|
392
|
+
requirements:
|
393
|
+
- - "~>"
|
394
|
+
- !ruby/object:Gem::Version
|
395
|
+
version: 2.3.9
|
389
396
|
- !ruby/object:Gem::Dependency
|
390
397
|
name: vcr
|
391
398
|
requirement: !ruby/object:Gem::Requirement
|
392
399
|
requirements:
|
393
400
|
- - "~>"
|
394
401
|
- !ruby/object:Gem::Version
|
395
|
-
version: '
|
402
|
+
version: '5.0'
|
396
403
|
type: :development
|
397
404
|
prerelease: false
|
398
405
|
version_requirements: !ruby/object:Gem::Requirement
|
399
406
|
requirements:
|
400
407
|
- - "~>"
|
401
408
|
- !ruby/object:Gem::Version
|
402
|
-
version: '
|
409
|
+
version: '5.0'
|
403
410
|
- !ruby/object:Gem::Dependency
|
404
411
|
name: webmock
|
405
412
|
requirement: !ruby/object:Gem::Requirement
|
@@ -417,6 +424,7 @@ dependencies:
|
|
417
424
|
description:
|
418
425
|
email:
|
419
426
|
- murnagha@ualberta.ca
|
427
|
+
- orodrigu@ualberta.ca
|
420
428
|
executables:
|
421
429
|
- pushmi_pullyu
|
422
430
|
extensions: []
|
@@ -424,11 +432,15 @@ extra_rdoc_files: []
|
|
424
432
|
files:
|
425
433
|
- ".coveralls.yml"
|
426
434
|
- ".editorconfig"
|
435
|
+
- ".github/CODEOWNERS"
|
436
|
+
- ".github/PULL_REQUEST_TEMPLATE"
|
437
|
+
- ".github/workflows/ruby.yml"
|
427
438
|
- ".gitignore"
|
428
439
|
- ".hound.yml"
|
429
440
|
- ".rspec"
|
430
441
|
- ".rubocop.yml"
|
431
442
|
- ".travis.yml"
|
443
|
+
- CHANGELOG.md
|
432
444
|
- Dangerfile
|
433
445
|
- Gemfile
|
434
446
|
- LICENSE.txt
|
@@ -445,10 +457,6 @@ files:
|
|
445
457
|
- lib/pushmi_pullyu/aip.rb
|
446
458
|
- lib/pushmi_pullyu/aip/creator.rb
|
447
459
|
- lib/pushmi_pullyu/aip/downloader.rb
|
448
|
-
- lib/pushmi_pullyu/aip/fedora_fetcher.rb
|
449
|
-
- lib/pushmi_pullyu/aip/file_list_creator.rb
|
450
|
-
- lib/pushmi_pullyu/aip/owner_email_editor.rb
|
451
|
-
- lib/pushmi_pullyu/aip/user.rb
|
452
460
|
- lib/pushmi_pullyu/cli.rb
|
453
461
|
- lib/pushmi_pullyu/logging.rb
|
454
462
|
- lib/pushmi_pullyu/preservation_queue.rb
|
@@ -470,16 +478,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
470
478
|
requirements:
|
471
479
|
- - ">="
|
472
480
|
- !ruby/object:Gem::Version
|
473
|
-
version: 2.
|
481
|
+
version: '2.5'
|
474
482
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
475
483
|
requirements:
|
476
484
|
- - ">="
|
477
485
|
- !ruby/object:Gem::Version
|
478
486
|
version: '0'
|
479
487
|
requirements: []
|
480
|
-
|
481
|
-
rubygems_version: 2.6.11
|
488
|
+
rubygems_version: 3.0.3
|
482
489
|
signing_key:
|
483
490
|
specification_version: 4
|
484
|
-
summary: Ruby application to manage flow of content from
|
491
|
+
summary: Ruby application to manage flow of content from Jupiter into Swift for preservation
|
485
492
|
test_files: []
|
@@ -1,66 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
|
3
|
-
class PushmiPullyu::AIP::FedoraFetcher
|
4
|
-
|
5
|
-
class FedoraFetchError < StandardError; end
|
6
|
-
|
7
|
-
RDF_FORMAT = 'text/rdf+n3'.freeze
|
8
|
-
|
9
|
-
def initialize(noid)
|
10
|
-
@noid = noid
|
11
|
-
end
|
12
|
-
|
13
|
-
def object_url(url_extra = nil)
|
14
|
-
url = "#{PushmiPullyu.options[:fedora][:url]}#{base_path}/#{pairtree}"
|
15
|
-
url += url_extra if url_extra
|
16
|
-
url
|
17
|
-
end
|
18
|
-
|
19
|
-
# Return true on success, raise an error otherwise
|
20
|
-
# (or use 'optional' to return false on 404)
|
21
|
-
def download_object(download_path, url_extra: nil,
|
22
|
-
optional: false, is_rdf: false,
|
23
|
-
should_add_user_email: false)
|
24
|
-
|
25
|
-
uri = URI(object_url(url_extra))
|
26
|
-
|
27
|
-
request = Net::HTTP::Get.new(uri)
|
28
|
-
request.basic_auth(PushmiPullyu.options[:fedora][:user],
|
29
|
-
PushmiPullyu.options[:fedora][:password])
|
30
|
-
|
31
|
-
request['Accept'] = RDF_FORMAT if is_rdf
|
32
|
-
|
33
|
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
|
34
|
-
http.request(request)
|
35
|
-
end
|
36
|
-
|
37
|
-
if response.is_a?(Net::HTTPSuccess)
|
38
|
-
body = if should_add_user_email
|
39
|
-
PushmiPullyu::AIP::OwnerEmailEditor.new(response.body).run
|
40
|
-
else
|
41
|
-
response.body
|
42
|
-
end
|
43
|
-
file = File.open(download_path, 'wb')
|
44
|
-
file.write(body)
|
45
|
-
file.close
|
46
|
-
return true
|
47
|
-
elsif response.is_a?(Net::HTTPNotFound)
|
48
|
-
raise FedoraFetchError unless optional
|
49
|
-
|
50
|
-
return false
|
51
|
-
else
|
52
|
-
raise FedoraFetchError
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
private
|
57
|
-
|
58
|
-
def pairtree
|
59
|
-
"#{@noid[0..1]}/#{@noid[2..3]}/#{@noid[4..5]}/#{@noid[6..7]}/#{@noid}"
|
60
|
-
end
|
61
|
-
|
62
|
-
def base_path
|
63
|
-
PushmiPullyu.options[:fedora][:base_path]
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
@@ -1,121 +0,0 @@
|
|
1
|
-
require 'rdf'
|
2
|
-
require 'rdf/n3'
|
3
|
-
require 'rest-client'
|
4
|
-
|
5
|
-
class PushmiPullyu::AIP::FileListCreator
|
6
|
-
|
7
|
-
IANA = 'http://www.iana.org/assignments/relation/'.freeze
|
8
|
-
PREDICATES = {
|
9
|
-
proxy_for: RDF::URI('http://www.openarchives.org/ore/terms/proxyFor'),
|
10
|
-
first: RDF::URI(IANA + 'first'),
|
11
|
-
last: RDF::URI(IANA + 'last'),
|
12
|
-
prev: RDF::URI(IANA + 'prev'),
|
13
|
-
next: RDF::URI(IANA + 'next'),
|
14
|
-
has_part: RDF::URI('http://purl.org/dc/terms/hasPart')
|
15
|
-
}.freeze
|
16
|
-
|
17
|
-
class NoProxyURIFound < StandardError; end
|
18
|
-
class NoFirstProxyFound < StandardError; end
|
19
|
-
class FirstProxyHasPrev < StandardError; end
|
20
|
-
class ListSourceFileSetMismatch < StandardError; end
|
21
|
-
|
22
|
-
def initialize(list_source_uri, output_xml_file, file_set_uuids)
|
23
|
-
@uri = RDF::URI(list_source_uri)
|
24
|
-
@auth_uri = RDF::URI(list_source_uri)
|
25
|
-
@auth_uri.user = PushmiPullyu.options[:fedora][:user]
|
26
|
-
@auth_uri.password = PushmiPullyu.options[:fedora][:password]
|
27
|
-
@output_file = output_xml_file
|
28
|
-
|
29
|
-
# These are the known fileset uuids, used for validation
|
30
|
-
@file_set_uuids = file_set_uuids
|
31
|
-
end
|
32
|
-
|
33
|
-
def run
|
34
|
-
extract_list_source_uuids
|
35
|
-
raise ListSourceFileSetMismatch, @uri.to_s if @list_source_uuids.sort != @file_set_uuids.sort
|
36
|
-
|
37
|
-
write_output_file
|
38
|
-
end
|
39
|
-
|
40
|
-
def extract_list_source_uuids
|
41
|
-
# Note: raises IOError if can't find
|
42
|
-
# raises RDF::ReaderError if can't parse
|
43
|
-
@graph = RDF::Graph.load(@auth_uri, validate: true)
|
44
|
-
@list_source_uuids = []
|
45
|
-
|
46
|
-
# Fetch first FileSet in list source
|
47
|
-
this_proxy = find_first_proxy
|
48
|
-
|
49
|
-
while @list_source_uuids.count <= num_proxies
|
50
|
-
@list_source_uuids << uuid_from_proxy(this_proxy)
|
51
|
-
next_proxy = find_next_proxy(this_proxy)
|
52
|
-
|
53
|
-
break if next_proxy.nil?
|
54
|
-
|
55
|
-
raise NextPreviousProxyMismatch if this_proxy != find_prev_proxy(next_proxy)
|
56
|
-
|
57
|
-
this_proxy = next_proxy
|
58
|
-
end
|
59
|
-
|
60
|
-
raise ProxyCountIncorrect if @list_source_uuids.count != num_proxies
|
61
|
-
raise LastProxyFailsValidation if this_proxy != find_last_proxy
|
62
|
-
end
|
63
|
-
|
64
|
-
def num_proxies
|
65
|
-
@num_proxies ||= @graph.query(subject: @uri, predicate: PREDICATES[:has_part]).count
|
66
|
-
end
|
67
|
-
|
68
|
-
def uuid_from_proxy(proxy_uri)
|
69
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:proxy_for]) do |statement|
|
70
|
-
return statement.object.to_s.split('/').last
|
71
|
-
end
|
72
|
-
raise NoProxyURIFound, proxy_uri.to_s
|
73
|
-
end
|
74
|
-
|
75
|
-
def find_first_proxy
|
76
|
-
@graph.query(subject: @uri, predicate: PREDICATES[:first]) do |statement|
|
77
|
-
first_uri = statement.object
|
78
|
-
# Validate that the first proxy doesn't have a previous one
|
79
|
-
raise FirstProxyHasPrev, @uri.to_s if find_prev_proxy(first_uri)
|
80
|
-
|
81
|
-
return first_uri
|
82
|
-
end
|
83
|
-
raise NoFirstProxyFound, @uri.to_s
|
84
|
-
end
|
85
|
-
|
86
|
-
def find_last_proxy
|
87
|
-
@graph.query(subject: @uri, predicate: PREDICATES[:last]) do |statement|
|
88
|
-
last_uri = statement.object
|
89
|
-
# Validate that the last proxy doesn't have a next one
|
90
|
-
raise LastProxyHasNext, @uri.to_s if find_next_proxy(last_uri)
|
91
|
-
|
92
|
-
return last_uri
|
93
|
-
end
|
94
|
-
raise LastProxyFound, @uri.to_s
|
95
|
-
end
|
96
|
-
|
97
|
-
def find_next_proxy(proxy_uri)
|
98
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:next]) do |statement|
|
99
|
-
return statement.object
|
100
|
-
end
|
101
|
-
nil
|
102
|
-
end
|
103
|
-
|
104
|
-
def find_prev_proxy(proxy_uri)
|
105
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:prev]) do |statement|
|
106
|
-
return statement.object
|
107
|
-
end
|
108
|
-
nil
|
109
|
-
end
|
110
|
-
|
111
|
-
def write_output_file
|
112
|
-
File.open(@output_file, 'w') do |file|
|
113
|
-
file.write("<file_order>\n")
|
114
|
-
@list_source_uuids.each do |uuid|
|
115
|
-
file.write(" <uuid>#{uuid}</uuid>\n")
|
116
|
-
end
|
117
|
-
file.write("</file_order>\n")
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
end
|
@@ -1,50 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
|
3
|
-
class PushmiPullyu::AIP::OwnerEmailEditor
|
4
|
-
|
5
|
-
OWNER_PREDICATE = RDF::URI('http://purl.org/ontology/bibo/owner').freeze
|
6
|
-
|
7
|
-
class NoOwnerPredicate < StandardError; end
|
8
|
-
|
9
|
-
def initialize(rdf_string)
|
10
|
-
@document = rdf_string
|
11
|
-
end
|
12
|
-
|
13
|
-
def run
|
14
|
-
setup_db_connection
|
15
|
-
is_modified = false
|
16
|
-
prefixes = nil
|
17
|
-
# Read once to load prefixes (the @things at the top of an n3 file)
|
18
|
-
RDF::N3::Reader.new(input = @document) do |reader|
|
19
|
-
reader.each_statement { |_statement| }
|
20
|
-
prefixes = reader.prefixes
|
21
|
-
end
|
22
|
-
new_body = RDF::N3::Writer.buffer(prefixes: prefixes) do |writer|
|
23
|
-
RDF::N3::Reader.new(input = @document) do |reader|
|
24
|
-
reader.each_statement do |statement|
|
25
|
-
if statement.predicate == OWNER_PREDICATE
|
26
|
-
user = PushmiPullyu::AIP::User.find(statement.object.to_i)
|
27
|
-
writer << [statement.subject, statement.predicate, user.email]
|
28
|
-
is_modified = true
|
29
|
-
else
|
30
|
-
writer << statement
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
return new_body if is_modified
|
36
|
-
|
37
|
-
raise NoOwnerPredicate
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
|
-
|
42
|
-
def setup_db_connection
|
43
|
-
ActiveRecord::Base.establish_connection(database_configuration)
|
44
|
-
end
|
45
|
-
|
46
|
-
def database_configuration
|
47
|
-
PushmiPullyu.options[:database][:url]
|
48
|
-
end
|
49
|
-
|
50
|
-
end
|