pushmi_pullyu 1.0.6 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +2 -0
- data/.github/PULL_REQUEST_TEMPLATE +11 -0
- data/.github/workflows/ruby.yml +31 -0
- data/.rubocop.yml +20 -1
- data/.travis.yml +2 -2
- data/CHANGELOG.md +16 -0
- data/Dangerfile +7 -3
- data/README.md +4 -5
- data/examples/pushmi_pullyu.yml +6 -6
- data/lib/pushmi_pullyu.rb +10 -17
- data/lib/pushmi_pullyu/aip.rb +9 -6
- data/lib/pushmi_pullyu/aip/downloader.rb +132 -180
- data/lib/pushmi_pullyu/cli.rb +23 -19
- data/lib/pushmi_pullyu/logging.rb +1 -1
- data/lib/pushmi_pullyu/version.rb +1 -1
- data/pushmi_pullyu.gemspec +11 -11
- metadata +46 -39
- data/lib/pushmi_pullyu/aip/fedora_fetcher.rb +0 -66
- data/lib/pushmi_pullyu/aip/file_list_creator.rb +0 -121
- data/lib/pushmi_pullyu/aip/owner_email_editor.rb +0 -50
- data/lib/pushmi_pullyu/aip/user.rb +0 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 8596ed87a287d7708b92c6a3538dcff1f976ddd1b2dc0171b30011bcfe62276b
|
|
4
|
+
data.tar.gz: 9babcb9576246ee00e581056361731b324e27d02c49814af9b26346fbd269120
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0d031eef30ea6828c6fac6c094ab040e914fce8094298249f56ebf2b44d8820c22fd3de6ff98abc546e482f278351b6838419d9b09a90cdd9be6b50576896afc
|
|
7
|
+
data.tar.gz: 858a1d2f2a91a7ee9f9c0257391aa601316afa8fb07c09e883b817ec7b90879f5fdeda9eca34b375aeea5c1ed2c9b06b13c1212a03e30c538d2942a2e4976dca
|
data/.github/CODEOWNERS
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ master ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ master ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
services:
|
|
13
|
+
redis:
|
|
14
|
+
image: redis:alpine
|
|
15
|
+
ports: ["6379:6379"]
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v2
|
|
18
|
+
- name: Set up Ruby
|
|
19
|
+
uses: ruby/setup-ruby@v1
|
|
20
|
+
with:
|
|
21
|
+
ruby-version: 2.6.6
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: bundle install
|
|
24
|
+
- name: Lint with RuboCop
|
|
25
|
+
run: bundle exec rubocop --parallel
|
|
26
|
+
- name: Run Danger
|
|
27
|
+
env:
|
|
28
|
+
DANGER_GITHUB_API_TOKEN: ${{ secrets.DANGER_GITHUB_API_TOKEN }}
|
|
29
|
+
run: bundle exec danger
|
|
30
|
+
- name: Run tests
|
|
31
|
+
run: bundle exec rake spec
|
data/.rubocop.yml
CHANGED
|
@@ -10,8 +10,9 @@ AllCops:
|
|
|
10
10
|
Exclude:
|
|
11
11
|
- 'tmp/**/*'
|
|
12
12
|
- 'vendor/**/*'
|
|
13
|
+
- 'Dangerfile'
|
|
13
14
|
ExtraDetails: true
|
|
14
|
-
TargetRubyVersion: 2.
|
|
15
|
+
TargetRubyVersion: 2.5
|
|
15
16
|
|
|
16
17
|
# readability is Actually Good
|
|
17
18
|
Layout/EmptyLinesAroundClassBody:
|
|
@@ -59,6 +60,15 @@ Style/ClassAndModuleChildren:
|
|
|
59
60
|
Style/Documentation:
|
|
60
61
|
Enabled: false
|
|
61
62
|
|
|
63
|
+
Style/HashEachMethods:
|
|
64
|
+
Enabled: true
|
|
65
|
+
|
|
66
|
+
Style/HashTransformKeys:
|
|
67
|
+
Enabled: true
|
|
68
|
+
|
|
69
|
+
Style/HashTransformValues:
|
|
70
|
+
Enabled: true
|
|
71
|
+
|
|
62
72
|
Naming/FileName:
|
|
63
73
|
Exclude:
|
|
64
74
|
- Dangerfile
|
|
@@ -88,3 +98,12 @@ RSpec/MultipleExpectations:
|
|
|
88
98
|
|
|
89
99
|
RSpec/DescribedClass:
|
|
90
100
|
EnforcedStyle: explicit
|
|
101
|
+
|
|
102
|
+
Lint/RaiseException:
|
|
103
|
+
Enabled: true
|
|
104
|
+
|
|
105
|
+
Lint/StructNewOverride:
|
|
106
|
+
Enabled: true
|
|
107
|
+
|
|
108
|
+
RSpec/MultipleMemoizedHelpers:
|
|
109
|
+
Enabled: false
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
All notable changes to PushmiPullyu project will be documented in this file.
|
|
3
|
+
|
|
4
|
+
PushmiPullyu is a Ruby application, whose primary job is to manage the flow of content from [Jupiter](https://github.com/ualbertalib/jupiter/) into Swift for preservation.
|
|
5
|
+
|
|
6
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
7
|
+
and releases in PushmiPullyu adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
8
|
+
|
|
9
|
+
## [Unreleased]
|
|
10
|
+
|
|
11
|
+
## [2.0.0] - 2020-12-14
|
|
12
|
+
|
|
13
|
+
### Removed
|
|
14
|
+
- Data output for original_file information
|
|
15
|
+
|
|
16
|
+
## [1.0.6] - 2018-11-29
|
data/Dangerfile
CHANGED
|
@@ -13,9 +13,13 @@ end
|
|
|
13
13
|
# just leaving a title
|
|
14
14
|
warn('Please add a detailed summary in the description.') if github.pr_body.length < 5
|
|
15
15
|
|
|
16
|
-
#
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
# Let people say that this isn't worth a CHANGELOG entry in the PR if they choose
|
|
17
|
+
declared_trivial = (github.pr_title + github.pr_body).include?('#trivial') || !has_app_changes
|
|
18
|
+
|
|
19
|
+
if !git.modified_files.include?('CHANGELOG.md') && !declared_trivial
|
|
20
|
+
error_message = "Please include a CHANGELOG entry. \nYou can find it at " \
|
|
21
|
+
'[CHANGELOG.md](https://github.com/ualbertalib/pushmi_pullyu/blob/master/CHANGELOG.md).'
|
|
22
|
+
fail(error_message, sticky: false)
|
|
19
23
|
end
|
|
20
24
|
|
|
21
25
|
# Warn when there is a big PR
|
data/README.md
CHANGED
|
@@ -5,22 +5,21 @@
|
|
|
5
5
|
</p>
|
|
6
6
|
|
|
7
7
|
[](https://rubygems.org/gems/pushmi_pullyu)
|
|
8
|
-
[](https://travis-ci.org/ualbertalib/pushmi_pullyu)
|
|
8
|
+
[](https://github.com/ualbertalib/pushmi_pullyu/actions)
|
|
10
9
|
[](https://coveralls.io/github/ualbertalib/pushmi_pullyu?branch=master)
|
|
11
10
|
|
|
12
11
|
PushmiPullyu is a Ruby application, running behind the firewall that protects our Swift environment.
|
|
13
12
|
|
|
14
|
-
Its primary job is to manage the flow of content from
|
|
13
|
+
Its primary job is to manage the flow of content from Jupiter into Swift for preservation.
|
|
15
14
|
|
|
16
15
|

|
|
17
16
|
|
|
18
17
|
## Workflow
|
|
19
18
|
|
|
20
|
-
1. Any save (create or update) on a Item/Thesis in ERA/Jupiter will trigger an after save callback that will push the item's unique identifier (UUID
|
|
19
|
+
1. Any save (create or update) on a Item/Thesis in ERA/Jupiter will trigger an after save callback that will push the item's unique identifier (UUID) into a Queue.
|
|
21
20
|
2. The queue (Redis) is setup to be a unique set (which only allows one item's UUID to be included in the queue at a single time), and ordered by priority from First In, First out (FIFO).
|
|
22
21
|
3. PushmiPullyu will then monitor the queue. After a certain wait period has passed since an element has been on the queue, PushmiPullyu will then retrieve the elements off the queue and begin to process the preservation event.
|
|
23
|
-
4. All the
|
|
22
|
+
4. All the Item/Thesis information and data required for preservation are retrieved from Jupiter using multiple REST calls to Jupiter's AIP API.
|
|
24
23
|
5. An Archival Information Package (AIP) is created from the item's information. It is then bagged and tarred.
|
|
25
24
|
6. The AIP tar is then uploaded to Swift via a REST call.
|
|
26
25
|
7. On a successful Swift upload, a entry is added for this preservation event to the preservation event logs.
|
data/examples/pushmi_pullyu.yml
CHANGED
|
@@ -21,12 +21,6 @@ minimum_age: 0
|
|
|
21
21
|
redis:
|
|
22
22
|
url: redis://localhost:6379
|
|
23
23
|
|
|
24
|
-
fedora:
|
|
25
|
-
url: http://localhost:8080/fcrepo/rest
|
|
26
|
-
user: fedoraAdmin
|
|
27
|
-
password: fedoraAdmin
|
|
28
|
-
base_path: /dev
|
|
29
|
-
|
|
30
24
|
database:
|
|
31
25
|
url: postgresql://jupiter:mysecretpassword@127.0.0.1/jupiter_development
|
|
32
26
|
|
|
@@ -44,3 +38,9 @@ rollbar:
|
|
|
44
38
|
token: 'abc123xyz'
|
|
45
39
|
proxy_host: 'your_proxy_host_url'
|
|
46
40
|
proxy_port: '80'
|
|
41
|
+
|
|
42
|
+
jupiter:
|
|
43
|
+
user: jupiter@ualberta.ca
|
|
44
|
+
api_key: 5042c4ad-6d22-486d-bc63-2b9e5b9a630a
|
|
45
|
+
jupiter_url: http://localhost:3000/
|
|
46
|
+
aip_api_path: aip/v1
|
data/lib/pushmi_pullyu.rb
CHANGED
|
@@ -1,22 +1,15 @@
|
|
|
1
|
-
# require 'pushmi_pullyu/version' must be first as it declares the PushmiPullyu
|
|
2
|
-
# (This fixes a weird NameError bug when using the nested compact syntax
|
|
3
|
-
# defining modules/classes like `module PushmiPullyu::Logging`)
|
|
4
|
-
require 'pushmi_pullyu/version'
|
|
1
|
+
# require 'pushmi_pullyu/version' must be first as it declares the PushmiPullyu
|
|
2
|
+
# module. (This fixes a weird NameError bug when using the nested compact syntax
|
|
3
|
+
# for defining modules/classes like `module PushmiPullyu::Logging`)
|
|
5
4
|
|
|
5
|
+
require 'pushmi_pullyu/version'
|
|
6
6
|
require 'pushmi_pullyu/logging'
|
|
7
|
-
|
|
8
7
|
require 'pushmi_pullyu/aip'
|
|
9
8
|
require 'pushmi_pullyu/aip/creator'
|
|
10
9
|
require 'pushmi_pullyu/aip/downloader'
|
|
11
|
-
require 'pushmi_pullyu/aip/fedora_fetcher'
|
|
12
|
-
require 'pushmi_pullyu/aip/file_list_creator'
|
|
13
|
-
require 'pushmi_pullyu/aip/owner_email_editor'
|
|
14
|
-
require 'active_record'
|
|
15
|
-
require 'pushmi_pullyu/aip/user'
|
|
16
10
|
require 'pushmi_pullyu/cli'
|
|
17
11
|
require 'pushmi_pullyu/preservation_queue'
|
|
18
12
|
require 'pushmi_pullyu/swift_depositer'
|
|
19
|
-
|
|
20
13
|
require 'active_support'
|
|
21
14
|
require 'active_support/core_ext'
|
|
22
15
|
|
|
@@ -36,12 +29,6 @@ module PushmiPullyu
|
|
|
36
29
|
redis: {
|
|
37
30
|
url: 'redis://localhost:6379'
|
|
38
31
|
},
|
|
39
|
-
fedora: {
|
|
40
|
-
url: 'http://localhost:8080/fcrepo/rest',
|
|
41
|
-
user: 'fedoraAdmin',
|
|
42
|
-
password: 'fedoraAdmin',
|
|
43
|
-
base_path: '/dev'
|
|
44
|
-
},
|
|
45
32
|
swift: {
|
|
46
33
|
tenant: 'tester',
|
|
47
34
|
username: 'test:tester',
|
|
@@ -58,6 +45,12 @@ module PushmiPullyu
|
|
|
58
45
|
pool: ENV['RAILS_MAX_THREADS'] || 5,
|
|
59
46
|
url: ENV['DATABASE_URL'] || ENV['JUPITER_DATABASE_URL'] || 'postgresql://jupiter:mysecretpassword@127.0.0.1',
|
|
60
47
|
database: 'jupiter_development'
|
|
48
|
+
},
|
|
49
|
+
jupiter: {
|
|
50
|
+
user: ENV['JUPITER_USER'],
|
|
51
|
+
api_key: ENV['JUPITER_API_KEY'],
|
|
52
|
+
jupiter_url: ENV['JUPITER_URL'] || 'http://localhost:3000/',
|
|
53
|
+
aip_api_path: ENV['JUPITER_AIP_API_PATH'] || 'aip/v1'
|
|
61
54
|
}
|
|
62
55
|
}.freeze
|
|
63
56
|
|
data/lib/pushmi_pullyu/aip.rb
CHANGED
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
require 'fileutils'
|
|
2
|
+
require 'uuid'
|
|
2
3
|
|
|
3
4
|
module PushmiPullyu::AIP
|
|
4
|
-
class
|
|
5
|
+
class EntityInvalid < StandardError; end
|
|
5
6
|
module_function
|
|
6
7
|
|
|
7
|
-
def create(
|
|
8
|
-
raise
|
|
8
|
+
def create(entity)
|
|
9
|
+
raise EntityInvalid if entity.nil? ||
|
|
10
|
+
UUID.validate(entity[:uuid]) != true ||
|
|
11
|
+
entity[:type].blank?
|
|
9
12
|
|
|
10
|
-
aip_directory = "#{PushmiPullyu.options[:workdir]}/#{
|
|
13
|
+
aip_directory = "#{PushmiPullyu.options[:workdir]}/#{entity[:uuid]}"
|
|
11
14
|
aip_filename = "#{aip_directory}.tar"
|
|
12
15
|
|
|
13
|
-
PushmiPullyu::AIP::Downloader.new(
|
|
14
|
-
PushmiPullyu::AIP::Creator.new(
|
|
16
|
+
PushmiPullyu::AIP::Downloader.new(entity, aip_directory).run
|
|
17
|
+
PushmiPullyu::AIP::Creator.new(entity[:uuid], aip_directory, aip_filename).run
|
|
15
18
|
|
|
16
19
|
yield aip_filename, aip_directory
|
|
17
20
|
|
|
@@ -2,261 +2,213 @@ require 'fileutils'
|
|
|
2
2
|
require 'ostruct'
|
|
3
3
|
require 'rdf'
|
|
4
4
|
require 'rdf/n3'
|
|
5
|
+
require 'net/http'
|
|
6
|
+
require 'uri'
|
|
7
|
+
require 'digest'
|
|
5
8
|
|
|
6
|
-
# Download all of the metadata/datastreams and associated data
|
|
7
|
-
# related to an object
|
|
9
|
+
# Download all of the metadata/datastreams and associated data related to an object
|
|
8
10
|
class PushmiPullyu::AIP::Downloader
|
|
9
11
|
|
|
10
12
|
PREDICATE_URIS = {
|
|
11
13
|
filename: 'http://purl.org/dc/terms/title',
|
|
12
14
|
member_files: 'http://pcdm.org/models#hasFile',
|
|
13
15
|
member_file_sets: 'http://pcdm.org/models#hasMember',
|
|
14
|
-
original_file: 'http://pcdm.org/use#OriginalFile',
|
|
15
16
|
type: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
|
|
16
17
|
}.freeze
|
|
17
18
|
|
|
18
|
-
class
|
|
19
|
-
class
|
|
20
|
-
class
|
|
21
|
-
class NoOriginalFile < StandardError; end
|
|
19
|
+
class JupiterDownloadError < StandardError; end
|
|
20
|
+
class JupiterCopyError < StandardError; end
|
|
21
|
+
class JupiterAuthenticationError < StandardError; end
|
|
22
22
|
|
|
23
|
-
def initialize(
|
|
24
|
-
@
|
|
23
|
+
def initialize(entity, aip_directory)
|
|
24
|
+
@entity = entity
|
|
25
|
+
@entity_identifier = "[#{entity[:type]} - #{entity[:uuid]}]".freeze
|
|
25
26
|
@aip_directory = aip_directory
|
|
26
27
|
end
|
|
27
28
|
|
|
28
29
|
def run
|
|
29
|
-
|
|
30
|
+
PushmiPullyu.logger.info("#{@entity_identifier}: Retreiving data from Jupiter ...")
|
|
30
31
|
|
|
31
|
-
|
|
32
|
+
authenticate_http_calls
|
|
33
|
+
make_directories
|
|
32
34
|
|
|
33
35
|
# Main object metadata
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
download_and_log(
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
member_files(file_set_uuid).each do |file_path|
|
|
52
|
-
path_spec = OpenStruct.new(
|
|
53
|
-
remote: "/files/#{file_path}/fcr:metadata",
|
|
54
|
-
# Note: local file gets clobbered on each download until it finds the right one
|
|
55
|
-
local: "#{file_set_dirs(file_set_uuid).metadata}/original_file_metadata.n3",
|
|
56
|
-
optional: true
|
|
57
|
-
)
|
|
58
|
-
download_and_log(path_spec, file_set_downloader)
|
|
59
|
-
if original_file?(path_spec.local)
|
|
60
|
-
original_file_remote_base = "/files/#{file_path}"
|
|
61
|
-
break
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
raise NoOriginalFile unless original_file_remote_base.present?
|
|
66
|
-
|
|
67
|
-
[:content, :fixity].each do |item|
|
|
68
|
-
path_spec = file_aip_paths(file_set_uuid, original_file_remote_base)[item]
|
|
69
|
-
download_and_log(path_spec, file_set_downloader)
|
|
70
|
-
end
|
|
36
|
+
download_and_log(object_aip_paths[:main_object_remote],
|
|
37
|
+
object_aip_paths[:main_object_local])
|
|
38
|
+
download_and_log(object_aip_paths[:file_sets_remote],
|
|
39
|
+
object_aip_paths[:file_sets_local])
|
|
40
|
+
|
|
41
|
+
# Get file paths for processing
|
|
42
|
+
file_paths = get_file_paths(object_aip_paths[:file_paths_remote])
|
|
43
|
+
|
|
44
|
+
file_paths[:files].each do |file_path|
|
|
45
|
+
file_uuid = file_path[:file_uuid]
|
|
46
|
+
make_file_set_directories(file_uuid)
|
|
47
|
+
copy_and_log(file_uuid, file_path)
|
|
48
|
+
file_aip_path = file_aip_paths(file_uuid)
|
|
49
|
+
download_and_log(file_aip_path[:fixity_remote],
|
|
50
|
+
file_aip_path[:fixity_local])
|
|
51
|
+
download_and_log(file_aip_path[:file_set_remote],
|
|
52
|
+
file_aip_path[:file_set_local])
|
|
71
53
|
end
|
|
72
54
|
end
|
|
73
55
|
|
|
74
56
|
private
|
|
75
57
|
|
|
76
|
-
def
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
58
|
+
def copy_and_log(file_uuid, file_path)
|
|
59
|
+
remote = file_path[:file_path]
|
|
60
|
+
remote_checksum = file_path[:file_checksum]
|
|
61
|
+
files_path = file_set_dirs(file_uuid)[:files]
|
|
62
|
+
output_file = "#{files_path}/#{file_path[:file_name]}"
|
|
63
|
+
log_downloading(remote, output_file)
|
|
64
|
+
FileUtils.copy_file(remote, output_file)
|
|
80
65
|
|
|
81
|
-
|
|
82
|
-
|
|
66
|
+
is_success = File.exist?(output_file) &&
|
|
67
|
+
File.size(remote) == File.size(output_file) &&
|
|
68
|
+
compare_md5(output_file, remote_checksum)
|
|
83
69
|
|
|
84
|
-
is_success = fedora_fetcher.download_object(output_file,
|
|
85
|
-
url_extra: path_spec.remote,
|
|
86
|
-
optional: path_spec.optional,
|
|
87
|
-
is_rdf: is_rdf,
|
|
88
|
-
should_add_user_email: should_add_user_email)
|
|
89
70
|
log_saved(is_success, output_file)
|
|
71
|
+
|
|
72
|
+
raise JupiterCopyError unless is_success
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def compare_md5(local, remote_checksum)
|
|
76
|
+
local_md5 = Digest::MD5.file local
|
|
77
|
+
local_md5.base64digest == remote_checksum
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def authenticate_http_calls
|
|
81
|
+
@uri = URI.parse(PushmiPullyu.options[:jupiter][:jupiter_url])
|
|
82
|
+
@http = Net::HTTP.new(@uri.host, @uri.port)
|
|
83
|
+
request = Net::HTTP::Post.new(@uri.request_uri + 'auth/system')
|
|
84
|
+
request.set_form_data(
|
|
85
|
+
email: PushmiPullyu.options[:jupiter][:user],
|
|
86
|
+
api_key: PushmiPullyu.options[:jupiter][:api_key]
|
|
87
|
+
)
|
|
88
|
+
response = @http.request(request)
|
|
89
|
+
# If we cannot find the set-cookie header then the session was not set
|
|
90
|
+
raise JupiterAuthenticationError if response.response['set-cookie'].nil?
|
|
91
|
+
|
|
92
|
+
@cookies = response.response['set-cookie']
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def download_and_log(remote, local)
|
|
96
|
+
log_downloading(remote, local)
|
|
97
|
+
|
|
98
|
+
@uri = URI.parse(PushmiPullyu.options[:jupiter][:jupiter_url])
|
|
99
|
+
request = Net::HTTP::Get.new(@uri.request_uri + remote)
|
|
100
|
+
# add previously stored cookies
|
|
101
|
+
request['Cookie'] = @cookies
|
|
102
|
+
|
|
103
|
+
response = @http.request(request)
|
|
104
|
+
is_success = if response.is_a?(Net::HTTPSuccess)
|
|
105
|
+
File.open(local, 'wb') do |file|
|
|
106
|
+
file.write(response.body)
|
|
107
|
+
end
|
|
108
|
+
# Response was a success and the file was saved to local
|
|
109
|
+
File.exist? local
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
log_saved(is_success, local)
|
|
113
|
+
raise JupiterDownloadError unless is_success
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def get_file_paths(url)
|
|
117
|
+
request = Net::HTTP::Get.new(@uri.request_uri + url)
|
|
118
|
+
# add previously stored cookies
|
|
119
|
+
request['Cookie'] = @cookies
|
|
120
|
+
|
|
121
|
+
response = @http.request(request)
|
|
122
|
+
|
|
123
|
+
JSON.parse(response.body, symbolize_names: true)
|
|
90
124
|
end
|
|
91
125
|
|
|
92
|
-
def
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
"#{@noid}: #{output_file} -- creating from #{url} ...")
|
|
96
|
-
PushmiPullyu::AIP::FileListCreator.new(url, output_file, member_file_set_uuids).run
|
|
97
|
-
PushmiPullyu::Logging.log_aip_activity(@aip_directory,
|
|
98
|
-
"#{@noid}: #{output_file} -- created")
|
|
126
|
+
def object_uri
|
|
127
|
+
aip_api_url = PushmiPullyu.options[:jupiter][:aip_api_path]
|
|
128
|
+
@object_uri ||= "#{aip_api_url}/#{@entity[:type]}/#{@entity[:uuid]}"
|
|
99
129
|
end
|
|
100
130
|
|
|
101
131
|
### Logging
|
|
102
132
|
|
|
103
|
-
def
|
|
104
|
-
message = "#{@
|
|
133
|
+
def log_downloading(url, output_file)
|
|
134
|
+
message = "#{@entity_identifier}: #{output_file} -- Downloading from #{url} ..."
|
|
105
135
|
PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
|
|
106
136
|
end
|
|
107
137
|
|
|
108
138
|
def log_saved(is_success, output_file)
|
|
109
|
-
message = "#{@
|
|
139
|
+
message = "#{@entity_identifier}: #{output_file} -- #{is_success ? 'Saved' : 'Failed'}"
|
|
110
140
|
PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
|
|
111
141
|
end
|
|
112
142
|
|
|
113
143
|
### Directories
|
|
114
144
|
|
|
115
145
|
def aip_dirs
|
|
116
|
-
@aip_dirs ||=
|
|
146
|
+
@aip_dirs ||= {
|
|
117
147
|
objects: "#{@aip_directory}/data/objects",
|
|
118
148
|
metadata: "#{@aip_directory}/data/objects/metadata",
|
|
119
149
|
files: "#{@aip_directory}/data/objects/files",
|
|
120
150
|
files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
|
121
151
|
logs: "#{@aip_directory}/data/logs",
|
|
122
152
|
file_logs: "#{@aip_directory}/data/logs/files_logs"
|
|
123
|
-
|
|
153
|
+
}
|
|
124
154
|
end
|
|
125
155
|
|
|
126
156
|
def file_set_dirs(file_set_uuid)
|
|
127
157
|
@file_set_dirs ||= {}
|
|
128
|
-
@file_set_dirs[file_set_uuid] ||=
|
|
129
|
-
metadata: "#{aip_dirs
|
|
130
|
-
files: "#{aip_dirs
|
|
131
|
-
logs: "#{aip_dirs
|
|
132
|
-
|
|
158
|
+
@file_set_dirs[file_set_uuid] ||= {
|
|
159
|
+
metadata: "#{aip_dirs[:files_metadata]}/#{file_set_uuid}",
|
|
160
|
+
files: "#{aip_dirs[:files]}/#{file_set_uuid}",
|
|
161
|
+
logs: "#{aip_dirs[:file_logs]}/#{file_set_uuid}"
|
|
162
|
+
}
|
|
133
163
|
end
|
|
134
164
|
|
|
135
165
|
def make_directories
|
|
166
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating directories ...")
|
|
136
167
|
clean_directories
|
|
137
|
-
|
|
138
|
-
aip_dirs.to_h.each_value do |path|
|
|
168
|
+
aip_dirs.each_value do |path|
|
|
139
169
|
FileUtils.mkdir_p(path)
|
|
140
170
|
end
|
|
141
|
-
PushmiPullyu.logger.debug("#{@
|
|
171
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating directories done")
|
|
142
172
|
end
|
|
143
173
|
|
|
144
174
|
def make_file_set_directories(file_set_uuid)
|
|
145
|
-
PushmiPullyu.logger.debug("#{@
|
|
146
|
-
file_set_dirs(file_set_uuid).
|
|
175
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating file set #{file_set_uuid} directories ...")
|
|
176
|
+
file_set_dirs(file_set_uuid).each_value do |path|
|
|
147
177
|
FileUtils.mkdir_p(path)
|
|
148
178
|
end
|
|
149
|
-
PushmiPullyu.logger.debug("#{@
|
|
179
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating file set #{file_set_uuid} directories done")
|
|
150
180
|
end
|
|
151
181
|
|
|
152
182
|
def clean_directories
|
|
153
183
|
return unless File.exist?(@aip_directory)
|
|
154
184
|
|
|
155
|
-
PushmiPullyu.logger.debug("#{@
|
|
185
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Nuking directories ...")
|
|
156
186
|
FileUtils.rm_rf(@aip_directory)
|
|
157
187
|
end
|
|
158
188
|
|
|
159
189
|
### Files
|
|
160
190
|
|
|
161
191
|
def object_aip_paths
|
|
162
|
-
@object_aip_paths ||=
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
local: "#{aip_dirs.files_metadata}/file_order.xml"
|
|
176
|
-
)
|
|
177
|
-
).freeze
|
|
178
|
-
end
|
|
179
|
-
|
|
180
|
-
def file_set_aip_paths(file_set_uuid)
|
|
181
|
-
@file_set_aip_paths ||= {}
|
|
182
|
-
@file_set_aip_paths[file_set_uuid] ||= OpenStruct.new(
|
|
183
|
-
main_object: OpenStruct.new(
|
|
184
|
-
remote: nil, # Base file_set path
|
|
185
|
-
local: "#{file_set_dirs(file_set_uuid).metadata}/file_set_metadata.n3",
|
|
186
|
-
should_add_user_email: true,
|
|
187
|
-
optional: false
|
|
188
|
-
)
|
|
189
|
-
).freeze
|
|
190
|
-
end
|
|
191
|
-
|
|
192
|
-
def file_aip_paths(file_set_uuid, original_file_remote_base)
|
|
192
|
+
@object_aip_paths ||= {
|
|
193
|
+
# Base path
|
|
194
|
+
main_object_remote: object_uri,
|
|
195
|
+
main_object_local: "#{aip_dirs[:metadata]}/object_metadata.n3",
|
|
196
|
+
file_sets_remote: "#{object_uri}/filesets",
|
|
197
|
+
file_sets_local: "#{aip_dirs[:files_metadata]}/file_order.xml",
|
|
198
|
+
# This is downloaded for processing but not saved
|
|
199
|
+
file_paths_remote: "#{object_uri}/file_paths"
|
|
200
|
+
}.freeze
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def file_aip_paths(file_set_uuid)
|
|
204
|
+
file_set_paths = file_set_dirs(file_set_uuid)
|
|
193
205
|
@file_aip_paths ||= {}
|
|
194
|
-
@file_aip_paths[file_set_uuid] ||=
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
fixity: OpenStruct.new(
|
|
201
|
-
remote: "#{original_file_remote_base}/fcr:fixity",
|
|
202
|
-
local: "#{file_set_dirs(file_set_uuid)[:logs]}/content_fixity_report.n3",
|
|
203
|
-
optional: false
|
|
204
|
-
)
|
|
205
|
-
).freeze
|
|
206
|
-
end
|
|
207
|
-
|
|
208
|
-
def member_file_set_uuids
|
|
209
|
-
@member_file_set_uuids ||= []
|
|
210
|
-
return @member_file_set_uuids unless @member_file_set_uuids.empty?
|
|
211
|
-
|
|
212
|
-
member_file_set_predicate = RDF::URI(PREDICATE_URIS[:member_file_sets])
|
|
213
|
-
|
|
214
|
-
graph = RDF::Graph.load(object_aip_paths.main_object.local)
|
|
215
|
-
|
|
216
|
-
graph.query(predicate: member_file_set_predicate) do |results|
|
|
217
|
-
# Get uuid from end of fedora path
|
|
218
|
-
@member_file_set_uuids << results.object.to_s.split('/').last
|
|
219
|
-
end
|
|
220
|
-
return @member_file_set_uuids unless @member_file_set_uuids.empty?
|
|
221
|
-
|
|
222
|
-
raise NoFileSets
|
|
223
|
-
end
|
|
224
|
-
|
|
225
|
-
def file_set_filename(file_set_uuid)
|
|
226
|
-
filename_predicate = RDF::URI(PREDICATE_URIS[:filename])
|
|
227
|
-
|
|
228
|
-
graph = RDF::Graph.load(file_set_aip_paths(file_set_uuid).main_object.local)
|
|
229
|
-
|
|
230
|
-
graph.query(predicate: filename_predicate) do |results|
|
|
231
|
-
return "#{file_set_dirs(file_set_uuid).files}/#{results.object}"
|
|
232
|
-
end
|
|
233
|
-
|
|
234
|
-
raise NoContentFilename
|
|
235
|
-
end
|
|
236
|
-
|
|
237
|
-
def member_files(file_set_uuid)
|
|
238
|
-
member_file_predicate = RDF::URI(PREDICATE_URIS[:member_files])
|
|
239
|
-
|
|
240
|
-
graph = RDF::Graph.load(file_set_aip_paths(file_set_uuid).main_object.local)
|
|
241
|
-
|
|
242
|
-
member_files = []
|
|
243
|
-
graph.query(predicate: member_file_predicate) do |results|
|
|
244
|
-
# Get uuid from end of fedora path
|
|
245
|
-
member_files << results.object.to_s.split('/').last
|
|
246
|
-
end
|
|
247
|
-
return member_files if member_files.present?
|
|
248
|
-
|
|
249
|
-
raise NoMemberFiles
|
|
250
|
-
end
|
|
251
|
-
|
|
252
|
-
def original_file?(metadata_filename)
|
|
253
|
-
type_predicate = RDF::URI(PREDICATE_URIS[:type])
|
|
254
|
-
original_file_uri = RDF::URI(PREDICATE_URIS[:original_file])
|
|
255
|
-
graph = RDF::Graph.load(metadata_filename)
|
|
256
|
-
graph.query(predicate: type_predicate) do |results|
|
|
257
|
-
return true if results.object == original_file_uri
|
|
258
|
-
end
|
|
259
|
-
false
|
|
206
|
+
@file_aip_paths[file_set_uuid] ||= {
|
|
207
|
+
fixity_remote: "#{object_uri}/filesets/#{file_set_uuid}/fixity",
|
|
208
|
+
fixity_local: "#{file_set_paths[:logs]}/content_fixity_report.n3",
|
|
209
|
+
file_set_remote: "#{object_uri}/filesets/#{file_set_uuid}",
|
|
210
|
+
file_set_local: "#{file_set_paths[:metadata]}/file_set_metadata.n3"
|
|
211
|
+
}.freeze
|
|
260
212
|
end
|
|
261
213
|
|
|
262
214
|
end
|
data/lib/pushmi_pullyu/cli.rb
CHANGED
|
@@ -4,6 +4,7 @@ require 'optparse'
|
|
|
4
4
|
require 'rollbar'
|
|
5
5
|
require 'singleton'
|
|
6
6
|
require 'yaml'
|
|
7
|
+
require 'json'
|
|
7
8
|
|
|
8
9
|
# CLI runner
|
|
9
10
|
class PushmiPullyu::CLI
|
|
@@ -64,8 +65,8 @@ class PushmiPullyu::CLI
|
|
|
64
65
|
# add a filter after Rollbar has built the error payload but before it is delivered to the API,
|
|
65
66
|
# in order to strip sensitive information out of certain error messages
|
|
66
67
|
exception_message_transformer = proc do |payload|
|
|
67
|
-
clean_message = payload[:exception][:message].sub(/http:\/\/.+:.+@(.+)\/
|
|
68
|
-
"http://\1/
|
|
68
|
+
clean_message = payload[:exception][:message].sub(/http:\/\/.+:.+@(.+)\/aip\/v1\/(.*)/,
|
|
69
|
+
"http://\1/aip/v1/\2")
|
|
69
70
|
payload[:exception][:message] = clean_message
|
|
70
71
|
payload[:message] = clean_message
|
|
71
72
|
end
|
|
@@ -181,26 +182,29 @@ class PushmiPullyu::CLI
|
|
|
181
182
|
end
|
|
182
183
|
|
|
183
184
|
def run_preservation_cycle
|
|
184
|
-
|
|
185
|
-
|
|
185
|
+
entity_json = JSON.parse(queue.wait_next_item)
|
|
186
|
+
entity = {
|
|
187
|
+
type: entity_json['type'],
|
|
188
|
+
uuid: entity_json['uuid']
|
|
189
|
+
}
|
|
190
|
+
return unless entity[:type].present? && entity[:uuid].present?
|
|
186
191
|
|
|
187
192
|
# add additional information about the error context to errors that occur while processing this item.
|
|
188
|
-
Rollbar.scoped(
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
end
|
|
197
|
-
# rubocop:disable Lint/RescueException
|
|
198
|
-
rescue Exception => e
|
|
199
|
-
Rollbar.error(e)
|
|
200
|
-
logger.error(e)
|
|
201
|
-
# TODO: we could re-raise here and let the daemon die on any preservation error, or just log the issue and
|
|
202
|
-
# move on to the next item.
|
|
193
|
+
Rollbar.scoped(entity_uuid: entity[:uuid]) do
|
|
194
|
+
# Download AIP from Jupiter, bag and tar AIP directory and cleanup after
|
|
195
|
+
# block code
|
|
196
|
+
PushmiPullyu::AIP.create(entity) do |aip_filename, aip_directory|
|
|
197
|
+
# Push tarred AIP to swift API
|
|
198
|
+
deposited_file = swift.deposit_file(aip_filename, options[:swift][:container])
|
|
199
|
+
# Log successful preservation event to the log files
|
|
200
|
+
PushmiPullyu::Logging.log_preservation_event(deposited_file, aip_directory)
|
|
203
201
|
end
|
|
202
|
+
# rubocop:disable Lint/RescueException
|
|
203
|
+
rescue Exception => e
|
|
204
|
+
Rollbar.error(e)
|
|
205
|
+
logger.error(e)
|
|
206
|
+
# TODO: we could re-raise here and let the daemon die on any preservation error, or just log the issue and
|
|
207
|
+
# move on to the next item.
|
|
204
208
|
# rubocop:enable Lint/RescueException
|
|
205
209
|
end
|
|
206
210
|
end
|
|
@@ -48,7 +48,7 @@ module PushmiPullyu::Logging
|
|
|
48
48
|
|
|
49
49
|
message = "#{deposited_file.name} was successfully deposited into Swift Storage!\n"\
|
|
50
50
|
"Here are the details of this preservation event:\n"\
|
|
51
|
-
"\
|
|
51
|
+
"\tUUID: '#{deposited_file.name}'\n"\
|
|
52
52
|
"\tTimestamp of Completion: '#{deposited_file.last_modified}'\n"\
|
|
53
53
|
"\tAIP Checksum: '#{deposited_file.etag}'\n"\
|
|
54
54
|
"\tMetadata: #{deposited_file.metadata}\n"\
|
data/pushmi_pullyu.gemspec
CHANGED
|
@@ -5,10 +5,10 @@ require 'pushmi_pullyu/version'
|
|
|
5
5
|
Gem::Specification.new do |spec|
|
|
6
6
|
spec.name = 'pushmi_pullyu'
|
|
7
7
|
spec.version = PushmiPullyu::VERSION
|
|
8
|
-
spec.authors = ['Shane Murnaghan']
|
|
9
|
-
spec.email = ['murnagha@ualberta.ca']
|
|
8
|
+
spec.authors = ['Shane Murnaghan', 'Omar Rodriguez-Arenas']
|
|
9
|
+
spec.email = ['murnagha@ualberta.ca', 'orodrigu@ualberta.ca']
|
|
10
10
|
|
|
11
|
-
spec.summary = 'Ruby application to manage flow of content from
|
|
11
|
+
spec.summary = 'Ruby application to manage flow of content from Jupiter into Swift for preservation'
|
|
12
12
|
spec.homepage = 'https://github.com/ualbertalib/pushmi_pullyu'
|
|
13
13
|
spec.license = 'MIT'
|
|
14
14
|
|
|
@@ -19,10 +19,9 @@ Gem::Specification.new do |spec|
|
|
|
19
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
20
20
|
spec.require_paths = ['lib']
|
|
21
21
|
|
|
22
|
-
spec.required_ruby_version = '>= 2.
|
|
22
|
+
spec.required_ruby_version = '>= 2.5'
|
|
23
23
|
|
|
24
|
-
spec.add_runtime_dependency '
|
|
25
|
-
spec.add_runtime_dependency 'activesupport', '~> 5.0'
|
|
24
|
+
spec.add_runtime_dependency 'activesupport', '>= 5', '< 7'
|
|
26
25
|
spec.add_runtime_dependency 'bagit', '~> 0.4'
|
|
27
26
|
spec.add_runtime_dependency 'connection_pool', '~> 2.2'
|
|
28
27
|
spec.add_runtime_dependency 'daemons', '~> 1.2', '>= 1.2.4'
|
|
@@ -33,18 +32,19 @@ Gem::Specification.new do |spec|
|
|
|
33
32
|
spec.add_runtime_dependency 'rdf-n3', '>= 1.99', '< 4.0'
|
|
34
33
|
spec.add_runtime_dependency 'redis', '>= 3.3', '< 5.0'
|
|
35
34
|
spec.add_runtime_dependency 'rest-client', '>= 1.8', '< 3.0'
|
|
36
|
-
spec.add_runtime_dependency 'rollbar', '
|
|
35
|
+
spec.add_runtime_dependency 'rollbar', '>= 2.18', '< 4.0'
|
|
37
36
|
|
|
38
|
-
spec.add_development_dependency 'bundler', '~>
|
|
37
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
|
39
38
|
spec.add_development_dependency 'coveralls', '~> 0.8'
|
|
40
|
-
spec.add_development_dependency 'danger', '~>
|
|
39
|
+
spec.add_development_dependency 'danger', '~> 8.0'
|
|
41
40
|
spec.add_development_dependency 'pry', '~> 0.10', '>= 0.10.4'
|
|
42
41
|
spec.add_development_dependency 'pry-byebug', '~> 3.6'
|
|
43
|
-
spec.add_development_dependency 'rake', '~>
|
|
42
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
|
44
43
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
|
45
44
|
spec.add_development_dependency 'rubocop', '~> 0.51'
|
|
46
45
|
spec.add_development_dependency 'rubocop-rspec', '~> 1.10'
|
|
47
46
|
spec.add_development_dependency 'timecop', '~> 0.8'
|
|
48
|
-
spec.add_development_dependency '
|
|
47
|
+
spec.add_development_dependency 'uuid', '~> 2.3.9'
|
|
48
|
+
spec.add_development_dependency 'vcr', '~> 5.0'
|
|
49
49
|
spec.add_development_dependency 'webmock', '~> 3.3'
|
|
50
50
|
end
|
metadata
CHANGED
|
@@ -1,49 +1,36 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pushmi_pullyu
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 2.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Shane Murnaghan
|
|
8
|
+
- Omar Rodriguez-Arenas
|
|
8
9
|
autorequire:
|
|
9
10
|
bindir: exe
|
|
10
11
|
cert_chain: []
|
|
11
|
-
date:
|
|
12
|
+
date: 2021-01-29 00:00:00.000000000 Z
|
|
12
13
|
dependencies:
|
|
13
14
|
- !ruby/object:Gem::Dependency
|
|
14
|
-
name:
|
|
15
|
+
name: activesupport
|
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
|
16
17
|
requirements:
|
|
17
18
|
- - ">="
|
|
18
19
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 5
|
|
20
|
+
version: '5'
|
|
20
21
|
- - "<"
|
|
21
22
|
- !ruby/object:Gem::Version
|
|
22
|
-
version:
|
|
23
|
+
version: '7'
|
|
23
24
|
type: :runtime
|
|
24
25
|
prerelease: false
|
|
25
26
|
version_requirements: !ruby/object:Gem::Requirement
|
|
26
27
|
requirements:
|
|
27
28
|
- - ">="
|
|
28
29
|
- !ruby/object:Gem::Version
|
|
29
|
-
version: 5
|
|
30
|
+
version: '5'
|
|
30
31
|
- - "<"
|
|
31
32
|
- !ruby/object:Gem::Version
|
|
32
|
-
version:
|
|
33
|
-
- !ruby/object:Gem::Dependency
|
|
34
|
-
name: activesupport
|
|
35
|
-
requirement: !ruby/object:Gem::Requirement
|
|
36
|
-
requirements:
|
|
37
|
-
- - "~>"
|
|
38
|
-
- !ruby/object:Gem::Version
|
|
39
|
-
version: '5.0'
|
|
40
|
-
type: :runtime
|
|
41
|
-
prerelease: false
|
|
42
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
43
|
-
requirements:
|
|
44
|
-
- - "~>"
|
|
45
|
-
- !ruby/object:Gem::Version
|
|
46
|
-
version: '5.0'
|
|
33
|
+
version: '7'
|
|
47
34
|
- !ruby/object:Gem::Dependency
|
|
48
35
|
name: bagit
|
|
49
36
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -230,30 +217,36 @@ dependencies:
|
|
|
230
217
|
name: rollbar
|
|
231
218
|
requirement: !ruby/object:Gem::Requirement
|
|
232
219
|
requirements:
|
|
233
|
-
- - "
|
|
220
|
+
- - ">="
|
|
234
221
|
- !ruby/object:Gem::Version
|
|
235
222
|
version: '2.18'
|
|
223
|
+
- - "<"
|
|
224
|
+
- !ruby/object:Gem::Version
|
|
225
|
+
version: '4.0'
|
|
236
226
|
type: :runtime
|
|
237
227
|
prerelease: false
|
|
238
228
|
version_requirements: !ruby/object:Gem::Requirement
|
|
239
229
|
requirements:
|
|
240
|
-
- - "
|
|
230
|
+
- - ">="
|
|
241
231
|
- !ruby/object:Gem::Version
|
|
242
232
|
version: '2.18'
|
|
233
|
+
- - "<"
|
|
234
|
+
- !ruby/object:Gem::Version
|
|
235
|
+
version: '4.0'
|
|
243
236
|
- !ruby/object:Gem::Dependency
|
|
244
237
|
name: bundler
|
|
245
238
|
requirement: !ruby/object:Gem::Requirement
|
|
246
239
|
requirements:
|
|
247
240
|
- - "~>"
|
|
248
241
|
- !ruby/object:Gem::Version
|
|
249
|
-
version: '
|
|
242
|
+
version: '2.0'
|
|
250
243
|
type: :development
|
|
251
244
|
prerelease: false
|
|
252
245
|
version_requirements: !ruby/object:Gem::Requirement
|
|
253
246
|
requirements:
|
|
254
247
|
- - "~>"
|
|
255
248
|
- !ruby/object:Gem::Version
|
|
256
|
-
version: '
|
|
249
|
+
version: '2.0'
|
|
257
250
|
- !ruby/object:Gem::Dependency
|
|
258
251
|
name: coveralls
|
|
259
252
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -274,14 +267,14 @@ dependencies:
|
|
|
274
267
|
requirements:
|
|
275
268
|
- - "~>"
|
|
276
269
|
- !ruby/object:Gem::Version
|
|
277
|
-
version: '
|
|
270
|
+
version: '8.0'
|
|
278
271
|
type: :development
|
|
279
272
|
prerelease: false
|
|
280
273
|
version_requirements: !ruby/object:Gem::Requirement
|
|
281
274
|
requirements:
|
|
282
275
|
- - "~>"
|
|
283
276
|
- !ruby/object:Gem::Version
|
|
284
|
-
version: '
|
|
277
|
+
version: '8.0'
|
|
285
278
|
- !ruby/object:Gem::Dependency
|
|
286
279
|
name: pry
|
|
287
280
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -322,14 +315,14 @@ dependencies:
|
|
|
322
315
|
requirements:
|
|
323
316
|
- - "~>"
|
|
324
317
|
- !ruby/object:Gem::Version
|
|
325
|
-
version: '
|
|
318
|
+
version: '13.0'
|
|
326
319
|
type: :development
|
|
327
320
|
prerelease: false
|
|
328
321
|
version_requirements: !ruby/object:Gem::Requirement
|
|
329
322
|
requirements:
|
|
330
323
|
- - "~>"
|
|
331
324
|
- !ruby/object:Gem::Version
|
|
332
|
-
version: '
|
|
325
|
+
version: '13.0'
|
|
333
326
|
- !ruby/object:Gem::Dependency
|
|
334
327
|
name: rspec
|
|
335
328
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -386,20 +379,34 @@ dependencies:
|
|
|
386
379
|
- - "~>"
|
|
387
380
|
- !ruby/object:Gem::Version
|
|
388
381
|
version: '0.8'
|
|
382
|
+
- !ruby/object:Gem::Dependency
|
|
383
|
+
name: uuid
|
|
384
|
+
requirement: !ruby/object:Gem::Requirement
|
|
385
|
+
requirements:
|
|
386
|
+
- - "~>"
|
|
387
|
+
- !ruby/object:Gem::Version
|
|
388
|
+
version: 2.3.9
|
|
389
|
+
type: :development
|
|
390
|
+
prerelease: false
|
|
391
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
392
|
+
requirements:
|
|
393
|
+
- - "~>"
|
|
394
|
+
- !ruby/object:Gem::Version
|
|
395
|
+
version: 2.3.9
|
|
389
396
|
- !ruby/object:Gem::Dependency
|
|
390
397
|
name: vcr
|
|
391
398
|
requirement: !ruby/object:Gem::Requirement
|
|
392
399
|
requirements:
|
|
393
400
|
- - "~>"
|
|
394
401
|
- !ruby/object:Gem::Version
|
|
395
|
-
version: '
|
|
402
|
+
version: '5.0'
|
|
396
403
|
type: :development
|
|
397
404
|
prerelease: false
|
|
398
405
|
version_requirements: !ruby/object:Gem::Requirement
|
|
399
406
|
requirements:
|
|
400
407
|
- - "~>"
|
|
401
408
|
- !ruby/object:Gem::Version
|
|
402
|
-
version: '
|
|
409
|
+
version: '5.0'
|
|
403
410
|
- !ruby/object:Gem::Dependency
|
|
404
411
|
name: webmock
|
|
405
412
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -417,6 +424,7 @@ dependencies:
|
|
|
417
424
|
description:
|
|
418
425
|
email:
|
|
419
426
|
- murnagha@ualberta.ca
|
|
427
|
+
- orodrigu@ualberta.ca
|
|
420
428
|
executables:
|
|
421
429
|
- pushmi_pullyu
|
|
422
430
|
extensions: []
|
|
@@ -424,11 +432,15 @@ extra_rdoc_files: []
|
|
|
424
432
|
files:
|
|
425
433
|
- ".coveralls.yml"
|
|
426
434
|
- ".editorconfig"
|
|
435
|
+
- ".github/CODEOWNERS"
|
|
436
|
+
- ".github/PULL_REQUEST_TEMPLATE"
|
|
437
|
+
- ".github/workflows/ruby.yml"
|
|
427
438
|
- ".gitignore"
|
|
428
439
|
- ".hound.yml"
|
|
429
440
|
- ".rspec"
|
|
430
441
|
- ".rubocop.yml"
|
|
431
442
|
- ".travis.yml"
|
|
443
|
+
- CHANGELOG.md
|
|
432
444
|
- Dangerfile
|
|
433
445
|
- Gemfile
|
|
434
446
|
- LICENSE.txt
|
|
@@ -445,10 +457,6 @@ files:
|
|
|
445
457
|
- lib/pushmi_pullyu/aip.rb
|
|
446
458
|
- lib/pushmi_pullyu/aip/creator.rb
|
|
447
459
|
- lib/pushmi_pullyu/aip/downloader.rb
|
|
448
|
-
- lib/pushmi_pullyu/aip/fedora_fetcher.rb
|
|
449
|
-
- lib/pushmi_pullyu/aip/file_list_creator.rb
|
|
450
|
-
- lib/pushmi_pullyu/aip/owner_email_editor.rb
|
|
451
|
-
- lib/pushmi_pullyu/aip/user.rb
|
|
452
460
|
- lib/pushmi_pullyu/cli.rb
|
|
453
461
|
- lib/pushmi_pullyu/logging.rb
|
|
454
462
|
- lib/pushmi_pullyu/preservation_queue.rb
|
|
@@ -470,16 +478,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
470
478
|
requirements:
|
|
471
479
|
- - ">="
|
|
472
480
|
- !ruby/object:Gem::Version
|
|
473
|
-
version: 2.
|
|
481
|
+
version: '2.5'
|
|
474
482
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
475
483
|
requirements:
|
|
476
484
|
- - ">="
|
|
477
485
|
- !ruby/object:Gem::Version
|
|
478
486
|
version: '0'
|
|
479
487
|
requirements: []
|
|
480
|
-
|
|
481
|
-
rubygems_version: 2.6.11
|
|
488
|
+
rubygems_version: 3.0.3
|
|
482
489
|
signing_key:
|
|
483
490
|
specification_version: 4
|
|
484
|
-
summary: Ruby application to manage flow of content from
|
|
491
|
+
summary: Ruby application to manage flow of content from Jupiter into Swift for preservation
|
|
485
492
|
test_files: []
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
require 'net/http'
|
|
2
|
-
|
|
3
|
-
class PushmiPullyu::AIP::FedoraFetcher
|
|
4
|
-
|
|
5
|
-
class FedoraFetchError < StandardError; end
|
|
6
|
-
|
|
7
|
-
RDF_FORMAT = 'text/rdf+n3'.freeze
|
|
8
|
-
|
|
9
|
-
def initialize(noid)
|
|
10
|
-
@noid = noid
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
def object_url(url_extra = nil)
|
|
14
|
-
url = "#{PushmiPullyu.options[:fedora][:url]}#{base_path}/#{pairtree}"
|
|
15
|
-
url += url_extra if url_extra
|
|
16
|
-
url
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
# Return true on success, raise an error otherwise
|
|
20
|
-
# (or use 'optional' to return false on 404)
|
|
21
|
-
def download_object(download_path, url_extra: nil,
|
|
22
|
-
optional: false, is_rdf: false,
|
|
23
|
-
should_add_user_email: false)
|
|
24
|
-
|
|
25
|
-
uri = URI(object_url(url_extra))
|
|
26
|
-
|
|
27
|
-
request = Net::HTTP::Get.new(uri)
|
|
28
|
-
request.basic_auth(PushmiPullyu.options[:fedora][:user],
|
|
29
|
-
PushmiPullyu.options[:fedora][:password])
|
|
30
|
-
|
|
31
|
-
request['Accept'] = RDF_FORMAT if is_rdf
|
|
32
|
-
|
|
33
|
-
response = Net::HTTP.start(uri.hostname, uri.port) do |http|
|
|
34
|
-
http.request(request)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
if response.is_a?(Net::HTTPSuccess)
|
|
38
|
-
body = if should_add_user_email
|
|
39
|
-
PushmiPullyu::AIP::OwnerEmailEditor.new(response.body).run
|
|
40
|
-
else
|
|
41
|
-
response.body
|
|
42
|
-
end
|
|
43
|
-
file = File.open(download_path, 'wb')
|
|
44
|
-
file.write(body)
|
|
45
|
-
file.close
|
|
46
|
-
return true
|
|
47
|
-
elsif response.is_a?(Net::HTTPNotFound)
|
|
48
|
-
raise FedoraFetchError unless optional
|
|
49
|
-
|
|
50
|
-
return false
|
|
51
|
-
else
|
|
52
|
-
raise FedoraFetchError
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
private
|
|
57
|
-
|
|
58
|
-
def pairtree
|
|
59
|
-
"#{@noid[0..1]}/#{@noid[2..3]}/#{@noid[4..5]}/#{@noid[6..7]}/#{@noid}"
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def base_path
|
|
63
|
-
PushmiPullyu.options[:fedora][:base_path]
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
end
|
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
require 'rdf'
|
|
2
|
-
require 'rdf/n3'
|
|
3
|
-
require 'rest-client'
|
|
4
|
-
|
|
5
|
-
class PushmiPullyu::AIP::FileListCreator
|
|
6
|
-
|
|
7
|
-
IANA = 'http://www.iana.org/assignments/relation/'.freeze
|
|
8
|
-
PREDICATES = {
|
|
9
|
-
proxy_for: RDF::URI('http://www.openarchives.org/ore/terms/proxyFor'),
|
|
10
|
-
first: RDF::URI(IANA + 'first'),
|
|
11
|
-
last: RDF::URI(IANA + 'last'),
|
|
12
|
-
prev: RDF::URI(IANA + 'prev'),
|
|
13
|
-
next: RDF::URI(IANA + 'next'),
|
|
14
|
-
has_part: RDF::URI('http://purl.org/dc/terms/hasPart')
|
|
15
|
-
}.freeze
|
|
16
|
-
|
|
17
|
-
class NoProxyURIFound < StandardError; end
|
|
18
|
-
class NoFirstProxyFound < StandardError; end
|
|
19
|
-
class FirstProxyHasPrev < StandardError; end
|
|
20
|
-
class ListSourceFileSetMismatch < StandardError; end
|
|
21
|
-
|
|
22
|
-
def initialize(list_source_uri, output_xml_file, file_set_uuids)
|
|
23
|
-
@uri = RDF::URI(list_source_uri)
|
|
24
|
-
@auth_uri = RDF::URI(list_source_uri)
|
|
25
|
-
@auth_uri.user = PushmiPullyu.options[:fedora][:user]
|
|
26
|
-
@auth_uri.password = PushmiPullyu.options[:fedora][:password]
|
|
27
|
-
@output_file = output_xml_file
|
|
28
|
-
|
|
29
|
-
# These are the known fileset uuids, used for validation
|
|
30
|
-
@file_set_uuids = file_set_uuids
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def run
|
|
34
|
-
extract_list_source_uuids
|
|
35
|
-
raise ListSourceFileSetMismatch, @uri.to_s if @list_source_uuids.sort != @file_set_uuids.sort
|
|
36
|
-
|
|
37
|
-
write_output_file
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
def extract_list_source_uuids
|
|
41
|
-
# Note: raises IOError if can't find
|
|
42
|
-
# raises RDF::ReaderError if can't parse
|
|
43
|
-
@graph = RDF::Graph.load(@auth_uri, validate: true)
|
|
44
|
-
@list_source_uuids = []
|
|
45
|
-
|
|
46
|
-
# Fetch first FileSet in list source
|
|
47
|
-
this_proxy = find_first_proxy
|
|
48
|
-
|
|
49
|
-
while @list_source_uuids.count <= num_proxies
|
|
50
|
-
@list_source_uuids << uuid_from_proxy(this_proxy)
|
|
51
|
-
next_proxy = find_next_proxy(this_proxy)
|
|
52
|
-
|
|
53
|
-
break if next_proxy.nil?
|
|
54
|
-
|
|
55
|
-
raise NextPreviousProxyMismatch if this_proxy != find_prev_proxy(next_proxy)
|
|
56
|
-
|
|
57
|
-
this_proxy = next_proxy
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
raise ProxyCountIncorrect if @list_source_uuids.count != num_proxies
|
|
61
|
-
raise LastProxyFailsValidation if this_proxy != find_last_proxy
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
def num_proxies
|
|
65
|
-
@num_proxies ||= @graph.query(subject: @uri, predicate: PREDICATES[:has_part]).count
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
def uuid_from_proxy(proxy_uri)
|
|
69
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:proxy_for]) do |statement|
|
|
70
|
-
return statement.object.to_s.split('/').last
|
|
71
|
-
end
|
|
72
|
-
raise NoProxyURIFound, proxy_uri.to_s
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def find_first_proxy
|
|
76
|
-
@graph.query(subject: @uri, predicate: PREDICATES[:first]) do |statement|
|
|
77
|
-
first_uri = statement.object
|
|
78
|
-
# Validate that the first proxy doesn't have a previous one
|
|
79
|
-
raise FirstProxyHasPrev, @uri.to_s if find_prev_proxy(first_uri)
|
|
80
|
-
|
|
81
|
-
return first_uri
|
|
82
|
-
end
|
|
83
|
-
raise NoFirstProxyFound, @uri.to_s
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def find_last_proxy
|
|
87
|
-
@graph.query(subject: @uri, predicate: PREDICATES[:last]) do |statement|
|
|
88
|
-
last_uri = statement.object
|
|
89
|
-
# Validate that the last proxy doesn't have a next one
|
|
90
|
-
raise LastProxyHasNext, @uri.to_s if find_next_proxy(last_uri)
|
|
91
|
-
|
|
92
|
-
return last_uri
|
|
93
|
-
end
|
|
94
|
-
raise LastProxyFound, @uri.to_s
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
def find_next_proxy(proxy_uri)
|
|
98
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:next]) do |statement|
|
|
99
|
-
return statement.object
|
|
100
|
-
end
|
|
101
|
-
nil
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
def find_prev_proxy(proxy_uri)
|
|
105
|
-
@graph.query(subject: proxy_uri, predicate: PREDICATES[:prev]) do |statement|
|
|
106
|
-
return statement.object
|
|
107
|
-
end
|
|
108
|
-
nil
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
def write_output_file
|
|
112
|
-
File.open(@output_file, 'w') do |file|
|
|
113
|
-
file.write("<file_order>\n")
|
|
114
|
-
@list_source_uuids.each do |uuid|
|
|
115
|
-
file.write(" <uuid>#{uuid}</uuid>\n")
|
|
116
|
-
end
|
|
117
|
-
file.write("</file_order>\n")
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
end
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
require 'net/http'
|
|
2
|
-
|
|
3
|
-
class PushmiPullyu::AIP::OwnerEmailEditor
|
|
4
|
-
|
|
5
|
-
OWNER_PREDICATE = RDF::URI('http://purl.org/ontology/bibo/owner').freeze
|
|
6
|
-
|
|
7
|
-
class NoOwnerPredicate < StandardError; end
|
|
8
|
-
|
|
9
|
-
def initialize(rdf_string)
|
|
10
|
-
@document = rdf_string
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
def run
|
|
14
|
-
setup_db_connection
|
|
15
|
-
is_modified = false
|
|
16
|
-
prefixes = nil
|
|
17
|
-
# Read once to load prefixes (the @things at the top of an n3 file)
|
|
18
|
-
RDF::N3::Reader.new(input = @document) do |reader|
|
|
19
|
-
reader.each_statement { |_statement| }
|
|
20
|
-
prefixes = reader.prefixes
|
|
21
|
-
end
|
|
22
|
-
new_body = RDF::N3::Writer.buffer(prefixes: prefixes) do |writer|
|
|
23
|
-
RDF::N3::Reader.new(input = @document) do |reader|
|
|
24
|
-
reader.each_statement do |statement|
|
|
25
|
-
if statement.predicate == OWNER_PREDICATE
|
|
26
|
-
user = PushmiPullyu::AIP::User.find(statement.object.to_i)
|
|
27
|
-
writer << [statement.subject, statement.predicate, user.email]
|
|
28
|
-
is_modified = true
|
|
29
|
-
else
|
|
30
|
-
writer << statement
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
return new_body if is_modified
|
|
36
|
-
|
|
37
|
-
raise NoOwnerPredicate
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
private
|
|
41
|
-
|
|
42
|
-
def setup_db_connection
|
|
43
|
-
ActiveRecord::Base.establish_connection(database_configuration)
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
def database_configuration
|
|
47
|
-
PushmiPullyu.options[:database][:url]
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
end
|