pushmi_pullyu 1.0.2 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +2 -0
- data/.github/PULL_REQUEST_TEMPLATE +11 -0
- data/.github/workflows/ruby.yml +31 -0
- data/.rubocop.yml +20 -1
- data/.travis.yml +2 -2
- data/CHANGELOG.md +16 -0
- data/Dangerfile +7 -3
- data/README.md +4 -5
- data/examples/pushmi_pullyu.yml +7 -10
- data/lib/pushmi_pullyu.rb +10 -17
- data/lib/pushmi_pullyu/aip.rb +9 -6
- data/lib/pushmi_pullyu/aip/downloader.rb +133 -180
- data/lib/pushmi_pullyu/cli.rb +34 -16
- data/lib/pushmi_pullyu/logging.rb +1 -1
- data/lib/pushmi_pullyu/preservation_queue.rb +1 -0
- data/lib/pushmi_pullyu/swift_depositer.rb +1 -1
- data/lib/pushmi_pullyu/version.rb +1 -1
- data/pushmi_pullyu.gemspec +15 -14
- metadata +82 -49
- data/lib/pushmi_pullyu/aip/fedora_fetcher.rb +0 -65
- data/lib/pushmi_pullyu/aip/file_list_creator.rb +0 -118
- data/lib/pushmi_pullyu/aip/owner_email_editor.rb +0 -62
- data/lib/pushmi_pullyu/aip/user.rb +0 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8596ed87a287d7708b92c6a3538dcff1f976ddd1b2dc0171b30011bcfe62276b
|
4
|
+
data.tar.gz: 9babcb9576246ee00e581056361731b324e27d02c49814af9b26346fbd269120
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d031eef30ea6828c6fac6c094ab040e914fce8094298249f56ebf2b44d8820c22fd3de6ff98abc546e482f278351b6838419d9b09a90cdd9be6b50576896afc
|
7
|
+
data.tar.gz: 858a1d2f2a91a7ee9f9c0257391aa601316afa8fb07c09e883b817ec7b90879f5fdeda9eca34b375aeea5c1ed2c9b06b13c1212a03e30c538d2942a2e4976dca
|
data/.github/CODEOWNERS
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ master ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ master ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
test:
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
services:
|
13
|
+
redis:
|
14
|
+
image: redis:alpine
|
15
|
+
ports: ["6379:6379"]
|
16
|
+
steps:
|
17
|
+
- uses: actions/checkout@v2
|
18
|
+
- name: Set up Ruby
|
19
|
+
uses: ruby/setup-ruby@v1
|
20
|
+
with:
|
21
|
+
ruby-version: 2.6.6
|
22
|
+
- name: Install dependencies
|
23
|
+
run: bundle install
|
24
|
+
- name: Lint with RuboCop
|
25
|
+
run: bundle exec rubocop --parallel
|
26
|
+
- name: Run Danger
|
27
|
+
env:
|
28
|
+
DANGER_GITHUB_API_TOKEN: ${{ secrets.DANGER_GITHUB_API_TOKEN }}
|
29
|
+
run: bundle exec danger
|
30
|
+
- name: Run tests
|
31
|
+
run: bundle exec rake spec
|
data/.rubocop.yml
CHANGED
@@ -10,8 +10,9 @@ AllCops:
|
|
10
10
|
Exclude:
|
11
11
|
- 'tmp/**/*'
|
12
12
|
- 'vendor/**/*'
|
13
|
+
- 'Dangerfile'
|
13
14
|
ExtraDetails: true
|
14
|
-
TargetRubyVersion: 2.
|
15
|
+
TargetRubyVersion: 2.5
|
15
16
|
|
16
17
|
# readability is Actually Good
|
17
18
|
Layout/EmptyLinesAroundClassBody:
|
@@ -59,6 +60,15 @@ Style/ClassAndModuleChildren:
|
|
59
60
|
Style/Documentation:
|
60
61
|
Enabled: false
|
61
62
|
|
63
|
+
Style/HashEachMethods:
|
64
|
+
Enabled: true
|
65
|
+
|
66
|
+
Style/HashTransformKeys:
|
67
|
+
Enabled: true
|
68
|
+
|
69
|
+
Style/HashTransformValues:
|
70
|
+
Enabled: true
|
71
|
+
|
62
72
|
Naming/FileName:
|
63
73
|
Exclude:
|
64
74
|
- Dangerfile
|
@@ -88,3 +98,12 @@ RSpec/MultipleExpectations:
|
|
88
98
|
|
89
99
|
RSpec/DescribedClass:
|
90
100
|
EnforcedStyle: explicit
|
101
|
+
|
102
|
+
Lint/RaiseException:
|
103
|
+
Enabled: true
|
104
|
+
|
105
|
+
Lint/StructNewOverride:
|
106
|
+
Enabled: true
|
107
|
+
|
108
|
+
RSpec/MultipleMemoizedHelpers:
|
109
|
+
Enabled: false
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# Changelog
|
2
|
+
All notable changes to PushmiPullyu project will be documented in this file.
|
3
|
+
|
4
|
+
PushmiPullyu is a Ruby application, whose primary job is to manage the flow of content from [Jupiter](https://github.com/ualbertalib/jupiter/) into Swift for preservation.
|
5
|
+
|
6
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
7
|
+
and releases in PushmiPullyu adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
8
|
+
|
9
|
+
## [Unreleased]
|
10
|
+
|
11
|
+
## [2.0.0] - 2020-12-14
|
12
|
+
|
13
|
+
### Removed
|
14
|
+
- Data output for original_file information
|
15
|
+
|
16
|
+
## [1.0.6] - 2018-11-29
|
data/Dangerfile
CHANGED
@@ -13,9 +13,13 @@ end
|
|
13
13
|
# just leaving a title
|
14
14
|
warn('Please add a detailed summary in the description.') if github.pr_body.length < 5
|
15
15
|
|
16
|
-
#
|
17
|
-
|
18
|
-
|
16
|
+
# Let people say that this isn't worth a CHANGELOG entry in the PR if they choose
|
17
|
+
declared_trivial = (github.pr_title + github.pr_body).include?('#trivial') || !has_app_changes
|
18
|
+
|
19
|
+
if !git.modified_files.include?('CHANGELOG.md') && !declared_trivial
|
20
|
+
error_message = "Please include a CHANGELOG entry. \nYou can find it at " \
|
21
|
+
'[CHANGELOG.md](https://github.com/ualbertalib/pushmi_pullyu/blob/master/CHANGELOG.md).'
|
22
|
+
fail(error_message, sticky: false)
|
19
23
|
end
|
20
24
|
|
21
25
|
# Warn when there is a big PR
|
data/README.md
CHANGED
@@ -5,22 +5,21 @@
|
|
5
5
|
</p>
|
6
6
|
|
7
7
|
[![Gem Version](https://badge.fury.io/rb/pushmi_pullyu.svg)](https://rubygems.org/gems/pushmi_pullyu)
|
8
|
-
[![
|
9
|
-
[![Build Status](https://travis-ci.org/ualbertalib/pushmi_pullyu.svg?branch=master)](https://travis-ci.org/ualbertalib/pushmi_pullyu)
|
8
|
+
[![Github Build Status](https://github.com/ualbertalib/pushmi_pullyu/workflows/CI/badge.svg)](https://github.com/ualbertalib/pushmi_pullyu/actions)
|
10
9
|
[![Coverage Status](https://coveralls.io/repos/github/ualbertalib/pushmi_pullyu/badge.svg?branch=master)](https://coveralls.io/github/ualbertalib/pushmi_pullyu?branch=master)
|
11
10
|
|
12
11
|
PushmiPullyu is a Ruby application, running behind the firewall that protects our Swift environment.
|
13
12
|
|
14
|
-
Its primary job is to manage the flow of content from
|
13
|
+
Its primary job is to manage the flow of content from Jupiter into Swift for preservation.
|
15
14
|
|
16
15
|
![System Infrastructure Diagram](docs/images/system-infrastructure-diagram.png)
|
17
16
|
|
18
17
|
## Workflow
|
19
18
|
|
20
|
-
1. Any save (create or update) on a Item/Thesis in ERA/Jupiter will trigger an after save callback that will push the item's unique identifier (UUID
|
19
|
+
1. Any save (create or update) on a Item/Thesis in ERA/Jupiter will trigger an after save callback that will push the item's unique identifier (UUID) into a Queue.
|
21
20
|
2. The queue (Redis) is setup to be a unique set (which only allows one item's UUID to be included in the queue at a single time), and ordered by priority from First In, First out (FIFO).
|
22
21
|
3. PushmiPullyu will then monitor the queue. After a certain wait period has passed since an element has been on the queue, PushmiPullyu will then retrieve the elements off the queue and begin to process the preservation event.
|
23
|
-
4. All the
|
22
|
+
4. All the Item/Thesis information and data required for preservation are retrieved from Jupiter using multiple REST calls to Jupiter's AIP API.
|
24
23
|
5. An Archival Information Package (AIP) is created from the item's information. It is then bagged and tarred.
|
25
24
|
6. The AIP tar is then uploaded to Swift via a REST call.
|
26
25
|
7. On a successful Swift upload, a entry is added for this preservation event to the preservation event logs.
|
data/examples/pushmi_pullyu.yml
CHANGED
@@ -21,17 +21,8 @@ minimum_age: 0
|
|
21
21
|
redis:
|
22
22
|
url: redis://localhost:6379
|
23
23
|
|
24
|
-
fedora:
|
25
|
-
url: http://localhost:8080/fcrepo/rest
|
26
|
-
user: fedoraAdmin
|
27
|
-
password: fedoraAdmin
|
28
|
-
base_path: /dev
|
29
|
-
|
30
24
|
database:
|
31
|
-
|
32
|
-
url: postgresql://jupiter:mysecretpassword@127.0.0.1
|
33
|
-
database: jupiter_development
|
34
|
-
pool: 5
|
25
|
+
url: postgresql://jupiter:mysecretpassword@127.0.0.1/jupiter_development
|
35
26
|
|
36
27
|
#parameters project_name and project_domain_name are required only for keystone v3 authentication
|
37
28
|
swift:
|
@@ -47,3 +38,9 @@ rollbar:
|
|
47
38
|
token: 'abc123xyz'
|
48
39
|
proxy_host: 'your_proxy_host_url'
|
49
40
|
proxy_port: '80'
|
41
|
+
|
42
|
+
jupiter:
|
43
|
+
user: jupiter@ualberta.ca
|
44
|
+
api_key: 5042c4ad-6d22-486d-bc63-2b9e5b9a630a
|
45
|
+
jupiter_url: http://localhost:3000/
|
46
|
+
aip_api_path: aip/v1
|
data/lib/pushmi_pullyu.rb
CHANGED
@@ -1,22 +1,15 @@
|
|
1
|
-
# require 'pushmi_pullyu/version' must be first as it declares the PushmiPullyu
|
2
|
-
# (This fixes a weird NameError bug when using the nested compact syntax
|
3
|
-
# defining modules/classes like `module PushmiPullyu::Logging`)
|
4
|
-
require 'pushmi_pullyu/version'
|
1
|
+
# require 'pushmi_pullyu/version' must be first as it declares the PushmiPullyu
|
2
|
+
# module. (This fixes a weird NameError bug when using the nested compact syntax
|
3
|
+
# for defining modules/classes like `module PushmiPullyu::Logging`)
|
5
4
|
|
5
|
+
require 'pushmi_pullyu/version'
|
6
6
|
require 'pushmi_pullyu/logging'
|
7
|
-
|
8
7
|
require 'pushmi_pullyu/aip'
|
9
8
|
require 'pushmi_pullyu/aip/creator'
|
10
9
|
require 'pushmi_pullyu/aip/downloader'
|
11
|
-
require 'pushmi_pullyu/aip/fedora_fetcher'
|
12
|
-
require 'pushmi_pullyu/aip/file_list_creator'
|
13
|
-
require 'pushmi_pullyu/aip/owner_email_editor'
|
14
|
-
require 'active_record'
|
15
|
-
require 'pushmi_pullyu/aip/user'
|
16
10
|
require 'pushmi_pullyu/cli'
|
17
11
|
require 'pushmi_pullyu/preservation_queue'
|
18
12
|
require 'pushmi_pullyu/swift_depositer'
|
19
|
-
|
20
13
|
require 'active_support'
|
21
14
|
require 'active_support/core_ext'
|
22
15
|
|
@@ -36,12 +29,6 @@ module PushmiPullyu
|
|
36
29
|
redis: {
|
37
30
|
url: 'redis://localhost:6379'
|
38
31
|
},
|
39
|
-
fedora: {
|
40
|
-
url: 'http://localhost:8080/fcrepo/rest',
|
41
|
-
user: 'fedoraAdmin',
|
42
|
-
password: 'fedoraAdmin',
|
43
|
-
base_path: '/dev'
|
44
|
-
},
|
45
32
|
swift: {
|
46
33
|
tenant: 'tester',
|
47
34
|
username: 'test:tester',
|
@@ -58,6 +45,12 @@ module PushmiPullyu
|
|
58
45
|
pool: ENV['RAILS_MAX_THREADS'] || 5,
|
59
46
|
url: ENV['DATABASE_URL'] || ENV['JUPITER_DATABASE_URL'] || 'postgresql://jupiter:mysecretpassword@127.0.0.1',
|
60
47
|
database: 'jupiter_development'
|
48
|
+
},
|
49
|
+
jupiter: {
|
50
|
+
user: ENV['JUPITER_USER'],
|
51
|
+
api_key: ENV['JUPITER_API_KEY'],
|
52
|
+
jupiter_url: ENV['JUPITER_URL'] || 'http://localhost:3000/',
|
53
|
+
aip_api_path: ENV['JUPITER_AIP_API_PATH'] || 'aip/v1'
|
61
54
|
}
|
62
55
|
}.freeze
|
63
56
|
|
data/lib/pushmi_pullyu/aip.rb
CHANGED
@@ -1,17 +1,20 @@
|
|
1
1
|
require 'fileutils'
|
2
|
+
require 'uuid'
|
2
3
|
|
3
4
|
module PushmiPullyu::AIP
|
4
|
-
class
|
5
|
+
class EntityInvalid < StandardError; end
|
5
6
|
module_function
|
6
7
|
|
7
|
-
def create(
|
8
|
-
raise
|
8
|
+
def create(entity)
|
9
|
+
raise EntityInvalid if entity.nil? ||
|
10
|
+
UUID.validate(entity[:uuid]) != true ||
|
11
|
+
entity[:type].blank?
|
9
12
|
|
10
|
-
aip_directory = "#{PushmiPullyu.options[:workdir]}/#{
|
13
|
+
aip_directory = "#{PushmiPullyu.options[:workdir]}/#{entity[:uuid]}"
|
11
14
|
aip_filename = "#{aip_directory}.tar"
|
12
15
|
|
13
|
-
PushmiPullyu::AIP::Downloader.new(
|
14
|
-
PushmiPullyu::AIP::Creator.new(
|
16
|
+
PushmiPullyu::AIP::Downloader.new(entity, aip_directory).run
|
17
|
+
PushmiPullyu::AIP::Creator.new(entity[:uuid], aip_directory, aip_filename).run
|
15
18
|
|
16
19
|
yield aip_filename, aip_directory
|
17
20
|
|
@@ -2,260 +2,213 @@ require 'fileutils'
|
|
2
2
|
require 'ostruct'
|
3
3
|
require 'rdf'
|
4
4
|
require 'rdf/n3'
|
5
|
+
require 'net/http'
|
6
|
+
require 'uri'
|
7
|
+
require 'digest'
|
5
8
|
|
6
|
-
# Download all of the metadata/datastreams and associated data
|
7
|
-
# related to an object
|
9
|
+
# Download all of the metadata/datastreams and associated data related to an object
|
8
10
|
class PushmiPullyu::AIP::Downloader
|
9
11
|
|
10
12
|
PREDICATE_URIS = {
|
11
13
|
filename: 'http://purl.org/dc/terms/title',
|
12
14
|
member_files: 'http://pcdm.org/models#hasFile',
|
13
15
|
member_file_sets: 'http://pcdm.org/models#hasMember',
|
14
|
-
original_file: 'http://pcdm.org/use#OriginalFile',
|
15
16
|
type: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
|
16
17
|
}.freeze
|
17
18
|
|
18
|
-
class
|
19
|
-
class
|
20
|
-
class
|
21
|
-
class NoOriginalFile < StandardError; end
|
19
|
+
class JupiterDownloadError < StandardError; end
|
20
|
+
class JupiterCopyError < StandardError; end
|
21
|
+
class JupiterAuthenticationError < StandardError; end
|
22
22
|
|
23
|
-
def initialize(
|
24
|
-
@
|
23
|
+
def initialize(entity, aip_directory)
|
24
|
+
@entity = entity
|
25
|
+
@entity_identifier = "[#{entity[:type]} - #{entity[:uuid]}]".freeze
|
25
26
|
@aip_directory = aip_directory
|
26
27
|
end
|
27
28
|
|
28
29
|
def run
|
29
|
-
|
30
|
+
PushmiPullyu.logger.info("#{@entity_identifier}: Retreiving data from Jupiter ...")
|
30
31
|
|
31
|
-
|
32
|
+
authenticate_http_calls
|
33
|
+
make_directories
|
32
34
|
|
33
35
|
# Main object metadata
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
download_and_log(
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
member_files(file_set_uuid).each do |file_path|
|
52
|
-
path_spec = OpenStruct.new(
|
53
|
-
remote: "/files/#{file_path}/fcr:metadata",
|
54
|
-
# Note: local file gets clobbered on each download until it finds the right one
|
55
|
-
local: "#{file_set_dirs(file_set_uuid).metadata}/original_file_metadata.n3",
|
56
|
-
optional: true
|
57
|
-
)
|
58
|
-
download_and_log(path_spec, file_set_downloader)
|
59
|
-
if original_file?(path_spec.local)
|
60
|
-
original_file_remote_base = "/files/#{file_path}"
|
61
|
-
break
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
raise NoOriginalFile unless original_file_remote_base.present?
|
66
|
-
|
67
|
-
[:content, :fixity].each do |item|
|
68
|
-
path_spec = file_aip_paths(file_set_uuid, original_file_remote_base)[item]
|
69
|
-
download_and_log(path_spec, file_set_downloader)
|
70
|
-
end
|
36
|
+
download_and_log(object_aip_paths[:main_object_remote],
|
37
|
+
object_aip_paths[:main_object_local])
|
38
|
+
download_and_log(object_aip_paths[:file_sets_remote],
|
39
|
+
object_aip_paths[:file_sets_local])
|
40
|
+
|
41
|
+
# Get file paths for processing
|
42
|
+
file_paths = get_file_paths(object_aip_paths[:file_paths_remote])
|
43
|
+
|
44
|
+
file_paths[:files].each do |file_path|
|
45
|
+
file_uuid = file_path[:file_uuid]
|
46
|
+
make_file_set_directories(file_uuid)
|
47
|
+
copy_and_log(file_uuid, file_path)
|
48
|
+
file_aip_path = file_aip_paths(file_uuid)
|
49
|
+
download_and_log(file_aip_path[:fixity_remote],
|
50
|
+
file_aip_path[:fixity_local])
|
51
|
+
download_and_log(file_aip_path[:file_set_remote],
|
52
|
+
file_aip_path[:file_set_local])
|
71
53
|
end
|
72
54
|
end
|
73
55
|
|
74
56
|
private
|
75
57
|
|
76
|
-
def
|
77
|
-
|
78
|
-
|
79
|
-
|
58
|
+
def copy_and_log(file_uuid, file_path)
|
59
|
+
remote = file_path[:file_path]
|
60
|
+
remote_checksum = file_path[:file_checksum]
|
61
|
+
files_path = file_set_dirs(file_uuid)[:files]
|
62
|
+
output_file = "#{files_path}/#{file_path[:file_name]}"
|
63
|
+
log_downloading(remote, output_file)
|
64
|
+
FileUtils.copy_file(remote, output_file)
|
80
65
|
|
81
|
-
|
82
|
-
|
66
|
+
is_success = File.exist?(output_file) &&
|
67
|
+
File.size(remote) == File.size(output_file) &&
|
68
|
+
compare_md5(output_file, remote_checksum)
|
83
69
|
|
84
|
-
is_success = fedora_fetcher.download_object(output_file,
|
85
|
-
url_extra: path_spec.remote,
|
86
|
-
optional: path_spec.optional,
|
87
|
-
is_rdf: is_rdf,
|
88
|
-
should_add_user_email: should_add_user_email)
|
89
70
|
log_saved(is_success, output_file)
|
71
|
+
|
72
|
+
raise JupiterCopyError unless is_success
|
73
|
+
end
|
74
|
+
|
75
|
+
def compare_md5(local, remote_checksum)
|
76
|
+
local_md5 = Digest::MD5.file local
|
77
|
+
local_md5.base64digest == remote_checksum
|
78
|
+
end
|
79
|
+
|
80
|
+
def authenticate_http_calls
|
81
|
+
@uri = URI.parse(PushmiPullyu.options[:jupiter][:jupiter_url])
|
82
|
+
@http = Net::HTTP.new(@uri.host, @uri.port)
|
83
|
+
request = Net::HTTP::Post.new(@uri.request_uri + 'auth/system')
|
84
|
+
request.set_form_data(
|
85
|
+
email: PushmiPullyu.options[:jupiter][:user],
|
86
|
+
api_key: PushmiPullyu.options[:jupiter][:api_key]
|
87
|
+
)
|
88
|
+
response = @http.request(request)
|
89
|
+
# If we cannot find the set-cookie header then the session was not set
|
90
|
+
raise JupiterAuthenticationError if response.response['set-cookie'].nil?
|
91
|
+
|
92
|
+
@cookies = response.response['set-cookie']
|
93
|
+
end
|
94
|
+
|
95
|
+
def download_and_log(remote, local)
|
96
|
+
log_downloading(remote, local)
|
97
|
+
|
98
|
+
@uri = URI.parse(PushmiPullyu.options[:jupiter][:jupiter_url])
|
99
|
+
request = Net::HTTP::Get.new(@uri.request_uri + remote)
|
100
|
+
# add previously stored cookies
|
101
|
+
request['Cookie'] = @cookies
|
102
|
+
|
103
|
+
response = @http.request(request)
|
104
|
+
is_success = if response.is_a?(Net::HTTPSuccess)
|
105
|
+
File.open(local, 'wb') do |file|
|
106
|
+
file.write(response.body)
|
107
|
+
end
|
108
|
+
# Response was a success and the file was saved to local
|
109
|
+
File.exist? local
|
110
|
+
end
|
111
|
+
|
112
|
+
log_saved(is_success, local)
|
113
|
+
raise JupiterDownloadError unless is_success
|
114
|
+
end
|
115
|
+
|
116
|
+
def get_file_paths(url)
|
117
|
+
request = Net::HTTP::Get.new(@uri.request_uri + url)
|
118
|
+
# add previously stored cookies
|
119
|
+
request['Cookie'] = @cookies
|
120
|
+
|
121
|
+
response = @http.request(request)
|
122
|
+
|
123
|
+
JSON.parse(response.body, symbolize_names: true)
|
90
124
|
end
|
91
125
|
|
92
|
-
def
|
93
|
-
|
94
|
-
|
95
|
-
"#{@noid}: #{output_file} -- creating from #{url} ...")
|
96
|
-
PushmiPullyu::AIP::FileListCreator.new(url, output_file, member_file_set_uuids).run
|
97
|
-
PushmiPullyu::Logging.log_aip_activity(@aip_directory,
|
98
|
-
"#{@noid}: #{output_file} -- created")
|
126
|
+
def object_uri
|
127
|
+
aip_api_url = PushmiPullyu.options[:jupiter][:aip_api_path]
|
128
|
+
@object_uri ||= "#{aip_api_url}/#{@entity[:type]}/#{@entity[:uuid]}"
|
99
129
|
end
|
100
130
|
|
101
131
|
### Logging
|
102
132
|
|
103
|
-
def
|
104
|
-
message = "#{@
|
133
|
+
def log_downloading(url, output_file)
|
134
|
+
message = "#{@entity_identifier}: #{output_file} -- Downloading from #{url} ..."
|
105
135
|
PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
|
106
136
|
end
|
107
137
|
|
108
138
|
def log_saved(is_success, output_file)
|
109
|
-
message = "#{@
|
139
|
+
message = "#{@entity_identifier}: #{output_file} -- #{is_success ? 'Saved' : 'Failed'}"
|
110
140
|
PushmiPullyu::Logging.log_aip_activity(@aip_directory, message)
|
111
141
|
end
|
112
142
|
|
113
143
|
### Directories
|
114
144
|
|
115
145
|
def aip_dirs
|
116
|
-
@aip_dirs ||=
|
146
|
+
@aip_dirs ||= {
|
117
147
|
objects: "#{@aip_directory}/data/objects",
|
118
148
|
metadata: "#{@aip_directory}/data/objects/metadata",
|
119
149
|
files: "#{@aip_directory}/data/objects/files",
|
120
150
|
files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
|
121
151
|
logs: "#{@aip_directory}/data/logs",
|
122
152
|
file_logs: "#{@aip_directory}/data/logs/files_logs"
|
123
|
-
|
153
|
+
}
|
124
154
|
end
|
125
155
|
|
126
156
|
def file_set_dirs(file_set_uuid)
|
127
157
|
@file_set_dirs ||= {}
|
128
|
-
@file_set_dirs[file_set_uuid] ||=
|
129
|
-
metadata: "#{aip_dirs
|
130
|
-
files: "#{aip_dirs
|
131
|
-
logs: "#{aip_dirs
|
132
|
-
|
158
|
+
@file_set_dirs[file_set_uuid] ||= {
|
159
|
+
metadata: "#{aip_dirs[:files_metadata]}/#{file_set_uuid}",
|
160
|
+
files: "#{aip_dirs[:files]}/#{file_set_uuid}",
|
161
|
+
logs: "#{aip_dirs[:file_logs]}/#{file_set_uuid}"
|
162
|
+
}
|
133
163
|
end
|
134
164
|
|
135
165
|
def make_directories
|
166
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating directories ...")
|
136
167
|
clean_directories
|
137
|
-
|
138
|
-
aip_dirs.to_h.each_value do |path|
|
168
|
+
aip_dirs.each_value do |path|
|
139
169
|
FileUtils.mkdir_p(path)
|
140
170
|
end
|
141
|
-
PushmiPullyu.logger.debug("#{@
|
171
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating directories done")
|
142
172
|
end
|
143
173
|
|
144
174
|
def make_file_set_directories(file_set_uuid)
|
145
|
-
PushmiPullyu.logger.debug("#{@
|
146
|
-
file_set_dirs(file_set_uuid).
|
175
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating file set #{file_set_uuid} directories ...")
|
176
|
+
file_set_dirs(file_set_uuid).each_value do |path|
|
147
177
|
FileUtils.mkdir_p(path)
|
148
178
|
end
|
149
|
-
PushmiPullyu.logger.debug("#{@
|
179
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Creating file set #{file_set_uuid} directories done")
|
150
180
|
end
|
151
181
|
|
152
182
|
def clean_directories
|
153
183
|
return unless File.exist?(@aip_directory)
|
154
|
-
|
184
|
+
|
185
|
+
PushmiPullyu.logger.debug("#{@entity_identifier}: Nuking directories ...")
|
155
186
|
FileUtils.rm_rf(@aip_directory)
|
156
187
|
end
|
157
188
|
|
158
189
|
### Files
|
159
190
|
|
160
191
|
def object_aip_paths
|
161
|
-
@object_aip_paths ||=
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
local: "#{aip_dirs.files_metadata}/file_order.xml"
|
175
|
-
)
|
176
|
-
).freeze
|
177
|
-
end
|
178
|
-
|
179
|
-
def file_set_aip_paths(file_set_uuid)
|
180
|
-
@file_set_aip_paths ||= {}
|
181
|
-
@file_set_aip_paths[file_set_uuid] ||= OpenStruct.new(
|
182
|
-
main_object: OpenStruct.new(
|
183
|
-
remote: nil, # Base file_set path
|
184
|
-
local: "#{file_set_dirs(file_set_uuid).metadata}/file_set_metadata.n3",
|
185
|
-
should_add_user_email: true,
|
186
|
-
optional: false
|
187
|
-
)
|
188
|
-
).freeze
|
189
|
-
end
|
190
|
-
|
191
|
-
def file_aip_paths(file_set_uuid, original_file_remote_base)
|
192
|
+
@object_aip_paths ||= {
|
193
|
+
# Base path
|
194
|
+
main_object_remote: object_uri,
|
195
|
+
main_object_local: "#{aip_dirs[:metadata]}/object_metadata.n3",
|
196
|
+
file_sets_remote: "#{object_uri}/filesets",
|
197
|
+
file_sets_local: "#{aip_dirs[:files_metadata]}/file_order.xml",
|
198
|
+
# This is downloaded for processing but not saved
|
199
|
+
file_paths_remote: "#{object_uri}/file_paths"
|
200
|
+
}.freeze
|
201
|
+
end
|
202
|
+
|
203
|
+
def file_aip_paths(file_set_uuid)
|
204
|
+
file_set_paths = file_set_dirs(file_set_uuid)
|
192
205
|
@file_aip_paths ||= {}
|
193
|
-
@file_aip_paths[file_set_uuid] ||=
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
fixity: OpenStruct.new(
|
200
|
-
remote: "#{original_file_remote_base}/fcr:fixity",
|
201
|
-
local: "#{file_set_dirs(file_set_uuid)[:logs]}/content_fixity_report.n3",
|
202
|
-
optional: false
|
203
|
-
)
|
204
|
-
).freeze
|
205
|
-
end
|
206
|
-
|
207
|
-
def member_file_set_uuids
|
208
|
-
@member_file_set_uuids ||= []
|
209
|
-
return @member_file_set_uuids unless @member_file_set_uuids.empty?
|
210
|
-
|
211
|
-
member_file_set_predicate = RDF::URI(PREDICATE_URIS[:member_file_sets])
|
212
|
-
|
213
|
-
graph = RDF::Graph.load(object_aip_paths.main_object.local)
|
214
|
-
|
215
|
-
graph.query(predicate: member_file_set_predicate) do |results|
|
216
|
-
# Get uuid from end of fedora path
|
217
|
-
@member_file_set_uuids << results.object.to_s.split('/').last
|
218
|
-
end
|
219
|
-
return @member_file_set_uuids unless @member_file_set_uuids.empty?
|
220
|
-
|
221
|
-
raise NoFileSets
|
222
|
-
end
|
223
|
-
|
224
|
-
def file_set_filename(file_set_uuid)
|
225
|
-
filename_predicate = RDF::URI(PREDICATE_URIS[:filename])
|
226
|
-
|
227
|
-
graph = RDF::Graph.load(file_set_aip_paths(file_set_uuid).main_object.local)
|
228
|
-
|
229
|
-
graph.query(predicate: filename_predicate) do |results|
|
230
|
-
return "#{file_set_dirs(file_set_uuid).files}/#{results.object}"
|
231
|
-
end
|
232
|
-
|
233
|
-
raise NoContentFilename
|
234
|
-
end
|
235
|
-
|
236
|
-
def member_files(file_set_uuid)
|
237
|
-
member_file_predicate = RDF::URI(PREDICATE_URIS[:member_files])
|
238
|
-
|
239
|
-
graph = RDF::Graph.load(file_set_aip_paths(file_set_uuid).main_object.local)
|
240
|
-
|
241
|
-
member_files = []
|
242
|
-
graph.query(predicate: member_file_predicate) do |results|
|
243
|
-
# Get uuid from end of fedora path
|
244
|
-
member_files << results.object.to_s.split('/').last
|
245
|
-
end
|
246
|
-
return member_files if member_files.present?
|
247
|
-
|
248
|
-
raise NoMemberFiles
|
249
|
-
end
|
250
|
-
|
251
|
-
def original_file?(metadata_filename)
|
252
|
-
type_predicate = RDF::URI(PREDICATE_URIS[:type])
|
253
|
-
original_file_uri = RDF::URI(PREDICATE_URIS[:original_file])
|
254
|
-
graph = RDF::Graph.load(metadata_filename)
|
255
|
-
graph.query(predicate: type_predicate) do |results|
|
256
|
-
return true if results.object == original_file_uri
|
257
|
-
end
|
258
|
-
false
|
206
|
+
@file_aip_paths[file_set_uuid] ||= {
|
207
|
+
fixity_remote: "#{object_uri}/filesets/#{file_set_uuid}/fixity",
|
208
|
+
fixity_local: "#{file_set_paths[:logs]}/content_fixity_report.n3",
|
209
|
+
file_set_remote: "#{object_uri}/filesets/#{file_set_uuid}",
|
210
|
+
file_set_local: "#{file_set_paths[:metadata]}/file_set_metadata.n3"
|
211
|
+
}.freeze
|
259
212
|
end
|
260
213
|
|
261
214
|
end
|