pushmi_pullyu 0.2.7 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 257884fa2eaea476a4765f03063c07b19927cd61
4
- data.tar.gz: 858b6ae6809fca4f0b291e9a7e868686de8d5b36
2
+ SHA256:
3
+ metadata.gz: 3c97d2575b16539392c9b268d62bf037ae289fc4989dfa976ca13796f01dec01
4
+ data.tar.gz: 2b277188e5873035ebc1c9a2bebdcf7e4837016091d2bc7b4236be90a5f74cef
5
5
  SHA512:
6
- metadata.gz: 17108f728721e24db410f4957fa10401252e458dc98663774a928655cb6ad1e71b10ecc6dcfef62484cbc164ea32e6317310e6775920fa278f8dbf5e8609f1a0
7
- data.tar.gz: 1601222c2a1769f1a5174afea65e01952fe3cea9ad1f162e102a16e2586c5e77801c8b6e742b8f3106efd3735db7179162916810092f1255d7169e6f4efe2091
6
+ metadata.gz: 36131cfed011b68a8f006d2c25968d25717919d4eb9b8016d58e65887fbe933ea61243c649bfc66b385353c527ab81ec0c78b14d226d45379f51bfd237ef5eaa
7
+ data.tar.gz: b39f6b537f3e4617efb7c778908788b038bdd7aea9f2198c34ab5276ec5ca33ddd3a90694ee548868a3b7739aecfdb50d08448477bde8c3a6beee8092d9fcf47
data/.rubocop.yml CHANGED
@@ -59,7 +59,7 @@ Style/ClassAndModuleChildren:
59
59
  Style/Documentation:
60
60
  Enabled: false
61
61
 
62
- Style/FileName:
62
+ Naming/FileName:
63
63
  Exclude:
64
64
  - Dangerfile
65
65
  - Rakefile
data/README.md CHANGED
@@ -17,11 +17,11 @@ Its primary job is to manage the flow of content from Fedora into Swift for pres
17
17
 
18
18
  ## Workflow
19
19
 
20
- 1. Any save (create or update) on a GenericFile in ERA will trigger an after save callback that will push the GenericFile unique identifier (NOID) into a Queue.
21
- 2. The queue (Redis) is setup to be a unique set (which only allows one GenericFile NOID to be included in the queue at a single time), and ordered by priority from First In, First out (FIFO).
20
+ 1. Any save (create or update) on a Item/Thesis in ERA/Jupiter will trigger an after save callback that will push the item's unique identifier (UUID or NOID) into a Queue.
21
+ 2. The queue (Redis) is setup to be a unique set (which only allows one item's UUID to be included in the queue at a single time), and ordered by priority from First In, First out (FIFO).
22
22
  3. PushmiPullyu will then monitor the queue. After a certain wait period has passed since an element has been on the queue, PushmiPullyu will then retrieve the elements off the queue and begin to process the preservation event.
23
- 4. All the GenericFile information and data required for preservation are retrieved from Fedora and Solr using multiple REST calls.
24
- 5. An Archival Information Package (AIP) is created from the GenericFile's information. It is then bagged and tarred.
23
+ 4. All the GenericFile information and data required for preservation are retrieved from Fedora using multiple REST calls. A database connection to the user database fetches (via ActiveRecord )owner emails and modifies the fetched documents, where applicable.
24
+ 5. An Archival Information Package (AIP) is created from the item's information. It is then bagged and tarred.
25
25
  6. The AIP tar is then uploaded to Swift via a REST call.
26
26
  7. On a successful Swift upload, a entry is added for this preservation event to the preservation event logs.
27
27
 
@@ -8,6 +8,7 @@
8
8
  # PushmiPullyu will run this file through ERB when reading it so you can
9
9
  # even put in dynamic logic, like consuming ENV Variables.
10
10
 
11
+ aip_version: 'lightaip-2.0'
11
12
  debug: false
12
13
  logdir: log
13
14
  monitor: false
@@ -20,15 +21,18 @@ minimum_age: 0
20
21
  redis:
21
22
  url: redis://localhost:6379
22
23
 
23
- solr:
24
- url: http://localhost:8983/solr/development
25
-
26
24
  fedora:
27
- url: http://localhost:8983/fedora/rest
25
+ url: http://localhost:8080/fcrepo/rest
28
26
  user: fedoraAdmin
29
27
  password: fedoraAdmin
30
28
  base_path: /dev
31
29
 
30
+ database:
31
+ encoding: utf8
32
+ url: postgresql://jupiter:mysecretpassword@127.0.0.1
33
+ database: jupiter_development
34
+ pool: 5
35
+
32
36
  #parameters project_name and project_domain_name are required only for keystone v3 authentication
33
37
  swift:
34
38
  tenant: tester
@@ -22,11 +22,15 @@ class PushmiPullyu::AIP::Creator
22
22
  private
23
23
 
24
24
  def bag_aip
25
- bag = BagIt::Bag.new(@aip_directory)
25
+ bag = BagIt::Bag.new(@aip_directory, bag_metadata)
26
26
  bag.manifest!
27
27
  raise BagInvalid unless bag.valid?
28
28
  end
29
29
 
30
+ def bag_metadata
31
+ { 'AIP-Version' => PushmiPullyu.options[:aip_version] }
32
+ end
33
+
30
34
  def tar_bag
31
35
  # We want to change the directory to the work directory path so we get the tar file to be exactly
32
36
  # the contents of the noid directory and not the entire work directory structure. For example the noid.tar
@@ -7,7 +7,18 @@ require 'rdf/n3'
7
7
  # related to an object
8
8
  class PushmiPullyu::AIP::Downloader
9
9
 
10
+ PREDICATE_URIS = {
11
+ filename: 'http://purl.org/dc/terms/title',
12
+ member_files: 'http://pcdm.org/models#hasFile',
13
+ member_file_sets: 'http://pcdm.org/models#hasMember',
14
+ original_file: 'http://pcdm.org/use#OriginalFile',
15
+ type: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
16
+ }.freeze
17
+
18
+ class NoFileSets < StandardError; end
19
+ class NoMemberFiles < StandardError; end
10
20
  class NoContentFilename < StandardError; end
21
+ class NoOriginalFile < StandardError; end
11
22
 
12
23
  def initialize(noid, aip_directory)
13
24
  @noid = noid
@@ -19,21 +30,45 @@ class PushmiPullyu::AIP::Downloader
19
30
 
20
31
  PushmiPullyu.logger.info("#{@noid}: Retreiving data from Fedora ...")
21
32
 
22
- [:main_object, :fixity, :content_datastream_metadata, :versions, :thumbnail,
23
- :characterization, :fedora3foxml, :fedora3foxml_metadata].each do |item|
24
- path_spec = aip_paths[item]
25
- download_and_log(path_spec, PushmiPullyu::AIP::FedoraFetcher.new(@noid))
26
- end
33
+ # Main object metadata
34
+ object_downloader = PushmiPullyu::AIP::FedoraFetcher.new(@noid)
35
+ download_and_log(object_aip_paths[:main_object], object_downloader)
27
36
 
28
- # Need content filename from metadata
29
- path_spec = OpenStruct.new(
30
- remote: '/content',
31
- local: content_filename, # lookup filename derived from metadata
32
- optional: false
33
- )
34
- download_and_log(path_spec, PushmiPullyu::AIP::FedoraFetcher.new(@noid))
37
+ # Construct the file ordering file
38
+ list_source_uri = object_downloader.object_url + object_aip_paths.list_source.remote
39
+ create_and_log_file_order_list(list_source_uri)
40
+
41
+ member_file_set_uuids.each do |file_set_uuid|
42
+ make_file_set_directories(file_set_uuid)
43
+
44
+ # FileSet metadata
45
+ file_set_downloader = PushmiPullyu::AIP::FedoraFetcher.new(file_set_uuid)
46
+ path_spec = file_set_aip_paths(file_set_uuid)[:main_object]
47
+ download_and_log(path_spec, file_set_downloader)
48
+
49
+ # Find the original file by looping through the files in the file_set
50
+ original_file_remote_base = nil
51
+ member_files(file_set_uuid).each do |file_path|
52
+ path_spec = OpenStruct.new(
53
+ remote: "/files/#{file_path}/fcr:metadata",
54
+ # Note: local file gets clobbered on each download until it finds the right one
55
+ local: "#{file_set_dirs(file_set_uuid).metadata}/original_file_metadata.n3",
56
+ optional: true
57
+ )
58
+ download_and_log(path_spec, file_set_downloader)
59
+ if original_file?(path_spec.local)
60
+ original_file_remote_base = "/files/#{file_path}"
61
+ break
62
+ end
63
+ end
35
64
 
36
- download_permissions
65
+ raise NoOriginalFile unless original_file_remote_base.present?
66
+
67
+ [:content, :fixity].each do |item|
68
+ path_spec = file_aip_paths(file_set_uuid, original_file_remote_base)[item]
69
+ download_and_log(path_spec, file_set_downloader)
70
+ end
71
+ end
37
72
  end
38
73
 
39
74
  private
@@ -43,35 +78,24 @@ class PushmiPullyu::AIP::Downloader
43
78
 
44
79
  log_fetching(fedora_fetcher.object_url(path_spec.remote), output_file)
45
80
 
46
- is_rdf = (output_file !~ /\.n3$/)
81
+ is_rdf = (output_file =~ /\.n3$/)
82
+ should_add_user_email = path_spec.to_h.fetch(:should_add_user_email, false)
47
83
 
48
84
  is_success = fedora_fetcher.download_object(output_file,
49
85
  url_extra: path_spec.remote,
50
86
  optional: path_spec.optional,
51
- is_rdf: is_rdf)
87
+ is_rdf: is_rdf,
88
+ should_add_user_email: should_add_user_email)
52
89
  log_saved(is_success, output_file)
53
90
  end
54
91
 
55
- def download_permissions
56
- PushmiPullyu.logger.info("#{@noid}: looking up permissions from Solr ...")
57
- results = PushmiPullyu::AIP::SolrFetcher.new(@noid).fetch_permission_object_ids
58
- if results.empty?
59
- PushmiPullyu.logger.info("#{@noid}: permissions not found")
60
- else
61
- results.each do |permission_id|
62
- PushmiPullyu.logger.info("#{@noid}: permission object #{permission_id} found")
63
- download_permission(permission_id)
64
- end
65
- end
66
- end
67
-
68
- def download_permission(permission_id)
69
- path_spec = OpenStruct.new(
70
- remote: nil,
71
- local: "#{aip_dirs.metadata}/permission_#{permission_id}.n3",
72
- optional: false
73
- )
74
- download_and_log(path_spec, PushmiPullyu::AIP::FedoraFetcher.new(permission_id))
92
+ def create_and_log_file_order_list(url)
93
+ output_file = object_aip_paths.file_ordering.local
94
+ PushmiPullyu::Logging.log_aip_activity(@aip_directory,
95
+ "#{@noid}: #{output_file} -- creating from #{url} ...")
96
+ PushmiPullyu::AIP::FileListCreator.new(url, output_file, member_file_set_uuids).run
97
+ PushmiPullyu::Logging.log_aip_activity(@aip_directory,
98
+ "#{@noid}: #{output_file} -- created")
75
99
  end
76
100
 
77
101
  ### Logging
@@ -92,8 +116,19 @@ class PushmiPullyu::AIP::Downloader
92
116
  @aip_dirs ||= OpenStruct.new(
93
117
  objects: "#{@aip_directory}/data/objects",
94
118
  metadata: "#{@aip_directory}/data/objects/metadata",
119
+ files: "#{@aip_directory}/data/objects/files",
120
+ files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
95
121
  logs: "#{@aip_directory}/data/logs",
96
- thumbnails: "#{@aip_directory}/data/thumbnails"
122
+ file_logs: "#{@aip_directory}/data/logs/files_logs"
123
+ )
124
+ end
125
+
126
+ def file_set_dirs(file_set_uuid)
127
+ @file_set_dirs ||= {}
128
+ @file_set_dirs[file_set_uuid] ||= OpenStruct.new(
129
+ metadata: "#{aip_dirs.files_metadata}/#{file_set_uuid}",
130
+ files: "#{aip_dirs.files}/#{file_set_uuid}",
131
+ logs: "#{aip_dirs.file_logs}/#{file_set_uuid}"
97
132
  )
98
133
  end
99
134
 
@@ -106,6 +141,14 @@ class PushmiPullyu::AIP::Downloader
106
141
  PushmiPullyu.logger.debug("#{@noid}: Creating directories done")
107
142
  end
108
143
 
144
+ def make_file_set_directories(file_set_uuid)
145
+ PushmiPullyu.logger.debug("#{@noid}: Creating file set #{file_set_uuid} directories ...")
146
+ file_set_dirs(file_set_uuid).to_h.each_value do |path|
147
+ FileUtils.mkdir_p(path)
148
+ end
149
+ PushmiPullyu.logger.debug("#{@noid}: Creating file set #{file_set_uuid} directories done")
150
+ end
151
+
109
152
  def clean_directories
110
153
  return unless File.exist?(@aip_directory)
111
154
  PushmiPullyu.logger.debug("#{@noid}: Nuking directories ...")
@@ -114,64 +157,105 @@ class PushmiPullyu::AIP::Downloader
114
157
 
115
158
  ### Files
116
159
 
117
- def aip_paths
118
- @aip_paths ||= OpenStruct.new(
160
+ def object_aip_paths
161
+ @object_aip_paths ||= OpenStruct.new(
119
162
  main_object: OpenStruct.new(
120
163
  remote: nil, # Base path
121
164
  local: "#{aip_dirs.metadata}/object_metadata.n3",
165
+ should_add_user_email: true,
122
166
  optional: false
123
167
  ),
124
- fixity: OpenStruct.new(
125
- remote: '/content/fcr:fixity',
126
- local: "#{aip_dirs.logs}/content_fixity_report.n3",
127
- optional: false
168
+ list_source: OpenStruct.new(
169
+ # This is downloaded, but not saved
170
+ remote: '/list_source'
128
171
  ),
129
- content_datastream_metadata: OpenStruct.new(
130
- remote: '/content/fcr:metadata',
131
- local: "#{aip_dirs.metadata}/content_fcr_metadata.n3",
132
- optional: false
133
- ),
134
- versions: OpenStruct.new(
135
- remote: '/content/fcr:versions',
136
- local: "#{aip_dirs.metadata}/content_versions.n3",
172
+ # This is constructed, not downloaded
173
+ file_ordering: OpenStruct.new(
174
+ local: "#{aip_dirs.files_metadata}/file_order.xml"
175
+ )
176
+ ).freeze
177
+ end
178
+
179
+ def file_set_aip_paths(file_set_uuid)
180
+ @file_set_aip_paths ||= {}
181
+ @file_set_aip_paths[file_set_uuid] ||= OpenStruct.new(
182
+ main_object: OpenStruct.new(
183
+ remote: nil, # Base file_set path
184
+ local: "#{file_set_dirs(file_set_uuid).metadata}/file_set_metadata.n3",
185
+ should_add_user_email: true,
137
186
  optional: false
138
- ),
187
+ )
188
+ ).freeze
189
+ end
139
190
 
140
- # Optional downloads
141
- thumbnail: OpenStruct.new(
142
- remote: '/thumbnail',
143
- local: "#{aip_dirs.thumbnails}/thumbnail",
144
- optional: true
145
- ),
146
- characterization: OpenStruct.new(
147
- remote: '/characterization',
148
- local: "#{aip_dirs.logs}/content_characterization.n3",
149
- optional: true
150
- ),
151
- fedora3foxml: OpenStruct.new(
152
- remote: '/fedora3foxml',
153
- local: "#{aip_dirs.metadata}/fedora3foxml.xml",
154
- optional: true
191
+ def file_aip_paths(file_set_uuid, original_file_remote_base)
192
+ @file_aip_paths ||= {}
193
+ @file_aip_paths[file_set_uuid] ||= OpenStruct.new(
194
+ content: OpenStruct.new(
195
+ remote: original_file_remote_base,
196
+ local: file_set_filename(file_set_uuid),
197
+ optional: false
155
198
  ),
156
- fedora3foxml_metadata: OpenStruct.new(
157
- remote: '/fedora3foxml/fcr:metadata',
158
- local: "#{aip_dirs.metadata}/fedora3foxml.n3",
159
- optional: true
199
+ fixity: OpenStruct.new(
200
+ remote: "#{original_file_remote_base}/fcr:fixity",
201
+ local: "#{file_set_dirs(file_set_uuid)[:logs]}/content_fixity_report.n3",
202
+ optional: false
160
203
  )
161
204
  ).freeze
162
205
  end
163
206
 
164
- # Extract filename from main object metadata
165
- def content_filename
166
- filename_predicate = RDF::URI('info:fedora/fedora-system:def/model#downloadFilename')
207
+ def member_file_set_uuids
208
+ @member_file_set_uuids ||= []
209
+ return @member_file_set_uuids unless @member_file_set_uuids.empty?
210
+
211
+ member_file_set_predicate = RDF::URI(PREDICATE_URIS[:member_file_sets])
212
+
213
+ graph = RDF::Graph.load(object_aip_paths.main_object.local)
214
+
215
+ graph.query(predicate: member_file_set_predicate) do |results|
216
+ # Get uuid from end of fedora path
217
+ @member_file_set_uuids << results.object.to_s.split('/').last
218
+ end
219
+ return @member_file_set_uuids unless @member_file_set_uuids.empty?
220
+
221
+ raise NoFileSets
222
+ end
223
+
224
+ def file_set_filename(file_set_uuid)
225
+ filename_predicate = RDF::URI(PREDICATE_URIS[:filename])
167
226
 
168
- graph = RDF::Graph.load(aip_paths.main_object.local)
227
+ graph = RDF::Graph.load(file_set_aip_paths(file_set_uuid).main_object.local)
169
228
 
170
229
  graph.query(predicate: filename_predicate) do |results|
171
- return "#{aip_dirs.objects}/#{results.object}"
230
+ return "#{file_set_dirs(file_set_uuid).files}/#{results.object}"
172
231
  end
173
232
 
174
233
  raise NoContentFilename
175
234
  end
176
235
 
236
+ def member_files(file_set_uuid)
237
+ member_file_predicate = RDF::URI(PREDICATE_URIS[:member_files])
238
+
239
+ graph = RDF::Graph.load(file_set_aip_paths(file_set_uuid).main_object.local)
240
+
241
+ member_files = []
242
+ graph.query(predicate: member_file_predicate) do |results|
243
+ # Get uuid from end of fedora path
244
+ member_files << results.object.to_s.split('/').last
245
+ end
246
+ return member_files if member_files.present?
247
+
248
+ raise NoMemberFiles
249
+ end
250
+
251
+ def original_file?(metadata_filename)
252
+ type_predicate = RDF::URI(PREDICATE_URIS[:type])
253
+ original_file_uri = RDF::URI(PREDICATE_URIS[:original_file])
254
+ graph = RDF::Graph.load(metadata_filename)
255
+ graph.query(predicate: type_predicate) do |results|
256
+ return true if results.object == original_file_uri
257
+ end
258
+ false
259
+ end
260
+
177
261
  end
@@ -19,7 +19,8 @@ class PushmiPullyu::AIP::FedoraFetcher
19
19
  # Return true on success, raise an error otherwise
20
20
  # (or use 'optional' to return false on 404)
21
21
  def download_object(download_path, url_extra: nil,
22
- optional: false, is_rdf: false)
22
+ optional: false, is_rdf: false,
23
+ should_add_user_email: false)
23
24
 
24
25
  uri = URI(object_url(url_extra))
25
26
 
@@ -34,8 +35,13 @@ class PushmiPullyu::AIP::FedoraFetcher
34
35
  end
35
36
 
36
37
  if response.is_a?(Net::HTTPSuccess)
38
+ body = if should_add_user_email
39
+ PushmiPullyu::AIP::OwnerEmailEditor.new(response.body).run
40
+ else
41
+ response.body
42
+ end
37
43
  file = File.open(download_path, 'wb')
38
- file.write(response.body)
44
+ file.write(body)
39
45
  file.close
40
46
  return true
41
47
  elsif response.is_a?(Net::HTTPNotFound)
@@ -0,0 +1,115 @@
1
+ require 'rdf'
2
+ require 'rdf/n3'
3
+
4
+ class PushmiPullyu::AIP::FileListCreator
5
+
6
+ IANA = 'http://www.iana.org/assignments/relation/'.freeze
7
+ PREDICATES = {
8
+ proxy_for: RDF::URI('http://www.openarchives.org/ore/terms/proxyFor'),
9
+ first: RDF::URI(IANA + 'first'),
10
+ last: RDF::URI(IANA + 'last'),
11
+ prev: RDF::URI(IANA + 'prev'),
12
+ next: RDF::URI(IANA + 'next'),
13
+ has_part: RDF::URI('http://purl.org/dc/terms/hasPart')
14
+ }.freeze
15
+
16
+ class NoProxyURIFound < StandardError; end
17
+ class NoFirstProxyFound < StandardError; end
18
+ class FirstProxyHasPrev < StandardError; end
19
+ class ListSourceFileSetMismatch < StandardError; end
20
+
21
+ def initialize(list_source_uri, output_xml_file, file_set_uuids)
22
+ @uri = RDF::URI(list_source_uri)
23
+ @output_file = output_xml_file
24
+
25
+ # These are the known fileset uuids, used for validation
26
+ @file_set_uuids = file_set_uuids
27
+ end
28
+
29
+ def run
30
+ extract_list_source_uuids
31
+ raise ListSourceFileSetMismatch, @uri.to_s if @list_source_uuids.sort != @file_set_uuids.sort
32
+
33
+ write_output_file
34
+ end
35
+
36
+ def extract_list_source_uuids
37
+ # Note: raises IOError if can't find
38
+ # raises RDF::ReaderError if can't parse
39
+ @graph = RDF::Graph.load(@uri, validate: true)
40
+
41
+ @list_source_uuids = []
42
+
43
+ # Fetch first FileSet in list source
44
+ this_proxy = find_first_proxy
45
+
46
+ while @list_source_uuids.count <= num_proxies
47
+ @list_source_uuids << uuid_from_proxy(this_proxy)
48
+ next_proxy = find_next_proxy(this_proxy)
49
+
50
+ break if next_proxy.nil?
51
+
52
+ raise NextPreviousProxyMismatch if this_proxy != find_prev_proxy(next_proxy)
53
+ this_proxy = next_proxy
54
+ end
55
+
56
+ raise ProxyCountIncorrect if @list_source_uuids.count != num_proxies
57
+ raise LastProxyFailsValidation if this_proxy != find_last_proxy
58
+ end
59
+
60
+ def num_proxies
61
+ @num_proxies ||= @graph.query(subject: @uri, predicate: PREDICATES[:has_part]).count
62
+ end
63
+
64
+ def uuid_from_proxy(proxy_uri)
65
+ @graph.query(subject: proxy_uri, predicate: PREDICATES[:proxy_for]) do |statement|
66
+ return statement.object.to_s.split('/').last
67
+ end
68
+ raise NoProxyURIFound, proxy_uri.to_s
69
+ end
70
+
71
+ def find_first_proxy
72
+ @graph.query(subject: @uri, predicate: PREDICATES[:first]) do |statement|
73
+ first_uri = statement.object
74
+ # Validate that the first proxy doesn't have a previous one
75
+ raise FirstProxyHasPrev, @uri.to_s if find_prev_proxy(first_uri)
76
+ return first_uri
77
+ end
78
+ raise NoFirstProxyFound, @uri.to_s
79
+ end
80
+
81
+ def find_last_proxy
82
+ @graph.query(subject: @uri, predicate: PREDICATES[:last]) do |statement|
83
+ last_uri = statement.object
84
+ # Validate that the last proxy doesn't have a next one
85
+ raise LastProxyHasNext, @uri.to_s if find_next_proxy(last_uri)
86
+ return last_uri
87
+ end
88
+ raise LastProxyFound, @uri.to_s
89
+ end
90
+
91
+ def find_next_proxy(proxy_uri)
92
+ @graph.query(subject: proxy_uri, predicate: PREDICATES[:next]) do |statement|
93
+ return statement.object
94
+ end
95
+ nil
96
+ end
97
+
98
+ def find_prev_proxy(proxy_uri)
99
+ @graph.query(subject: proxy_uri, predicate: PREDICATES[:prev]) do |statement|
100
+ return statement.object
101
+ end
102
+ nil
103
+ end
104
+
105
+ def write_output_file
106
+ File.open(@output_file, 'w') do |file|
107
+ file.write("<file_order>\n")
108
+ @list_source_uuids.each do |uuid|
109
+ file.write(" <uuid>#{uuid}</uuid>\n")
110
+ end
111
+ file.write("</file_order>\n")
112
+ end
113
+ end
114
+
115
+ end
@@ -0,0 +1,62 @@
1
+ require 'net/http'
2
+
3
+ class PushmiPullyu::AIP::OwnerEmailEditor
4
+
5
+ OWNER_PREDICATE = RDF::URI('http://purl.org/ontology/bibo/owner').freeze
6
+
7
+ class NoOwnerPredicate < StandardError; end
8
+
9
+ def initialize(rdf_string)
10
+ @document = rdf_string
11
+ end
12
+
13
+ def run
14
+ ensure_database_connection
15
+
16
+ is_modified = false
17
+ prefixes = nil
18
+ # Read once to load prefixes (the @things at the top of an n3 file)
19
+ RDF::N3::Reader.new(input = @document) do |reader|
20
+ reader.each_statement { |_statement| }
21
+ prefixes = reader.prefixes
22
+ end
23
+ new_body = RDF::N3::Writer.buffer(prefixes: prefixes) do |writer|
24
+ RDF::N3::Reader.new(input = @document) do |reader|
25
+ reader.each_statement do |statement|
26
+ if statement.predicate == OWNER_PREDICATE
27
+ user = PushmiPullyu::AIP::User.find(statement.object.to_i)
28
+ writer << [statement.subject, statement.predicate, user.email]
29
+ is_modified = true
30
+ else
31
+ writer << statement
32
+ end
33
+ end
34
+ end
35
+ end
36
+ return new_body if is_modified
37
+ raise NoOwnerPredicate
38
+ end
39
+
40
+ private
41
+
42
+ def ensure_database_connection
43
+ return if ActiveRecord::Base.connected?
44
+ ActiveRecord::Base.establish_connection(database_configuration)
45
+ end
46
+
47
+ def database_configuration
48
+ # Config either from URL, or with more granular options (the later taking precedence)
49
+ config = {}
50
+ uri = URI.parse(PushmiPullyu.options[:database][:url])
51
+ config[:adapter] = PushmiPullyu.options[:database][:adaptor] || uri.scheme
52
+ config[:host] = PushmiPullyu.options[:database][:host] || uri.host
53
+ config[:database] = PushmiPullyu.options[:database][:database] || uri.path.split('/')[1].to_s
54
+ config[:username] = PushmiPullyu.options[:database][:username] || uri.user
55
+ config[:password] = PushmiPullyu.options[:database][:password] || uri.password
56
+ params = CGI.parse(uri.query || '')
57
+ config[:encoding] = PushmiPullyu.options[:database][:encoding] || params['encoding'].to_a.first
58
+ config[:pool] = PushmiPullyu.options[:database][:pool] || params['pool'].to_a.first
59
+ config
60
+ end
61
+
62
+ end
@@ -0,0 +1,2 @@
1
+ class PushmiPullyu::AIP::User < ActiveRecord::Base
2
+ end
@@ -2,6 +2,7 @@ require 'fileutils'
2
2
 
3
3
  module PushmiPullyu::AIP
4
4
  class NoidInvalid < StandardError; end
5
+ module_function
5
6
 
6
7
  def create(noid)
7
8
  raise NoidInvalid if noid.blank? || noid.include?('/')
@@ -12,11 +13,9 @@ module PushmiPullyu::AIP
12
13
  PushmiPullyu::AIP::Downloader.new(noid, aip_directory).run
13
14
  PushmiPullyu::AIP::Creator.new(noid, aip_directory, aip_filename).run
14
15
 
15
- yield aip_filename
16
+ yield aip_filename, aip_directory
16
17
 
17
18
  FileUtils.rm_rf(aip_filename) if File.exist?(aip_filename)
18
19
  FileUtils.rm_rf(aip_directory) if File.exist?(aip_directory)
19
20
  end
20
-
21
- module_function :create
22
21
  end
@@ -41,6 +41,7 @@ class PushmiPullyu::CLI
41
41
  Rollbar.error(e)
42
42
  raise e
43
43
  end
44
+ # rubocop:enable Lint/RescueException
44
45
  end
45
46
 
46
47
  def start_server
@@ -73,12 +74,11 @@ class PushmiPullyu::CLI
73
74
  end
74
75
 
75
76
  def parse_config(config_file)
76
- opts = {}
77
77
  if File.exist?(config_file)
78
- opts = YAML.safe_load(ERB.new(IO.read(config_file)).result).deep_symbolize_keys || opts
78
+ YAML.safe_load(ERB.new(IO.read(config_file)).result).deep_symbolize_keys || {}
79
+ else
80
+ {}
79
81
  end
80
-
81
- opts
82
82
  end
83
83
 
84
84
  # Parse the options.
@@ -174,20 +174,20 @@ class PushmiPullyu::CLI
174
174
  Rollbar.scoped(noid: item) do
175
175
  begin
176
176
  # Download AIP from Fedora, bag and tar AIP directory and cleanup after block code
177
- PushmiPullyu::AIP.create(item) do |aip_filename|
177
+ PushmiPullyu::AIP.create(item) do |aip_filename, aip_directory|
178
178
  # Push tarred AIP to swift API
179
179
  deposited_file = swift.deposit_file(aip_filename, options[:swift][:container])
180
180
  # Log successful preservation event to the log files
181
- PushmiPullyu::Logging.log_preservation_event(deposited_file)
181
+ PushmiPullyu::Logging.log_preservation_event(deposited_file, aip_directory)
182
182
  end
183
- # rubocop:disable RescueWithoutErrorClass
184
- rescue => e
183
+ # rubocop:disable Lint/RescueException
184
+ rescue Exception => e
185
185
  Rollbar.error(e)
186
186
  logger.error(e)
187
187
  # TODO: we could re-raise here and let the daemon die on any preservation error, or just log the issue and
188
188
  # move on to the next item.
189
189
  end
190
- # rubocop:enaable RescueWithoutErrorClass
190
+ # rubocop:enable Lint/RescueException
191
191
  end
192
192
  end
193
193
 
@@ -13,58 +13,96 @@ module PushmiPullyu::Logging
13
13
 
14
14
  end
15
15
 
16
- def self.initialize_logger(log_target = STDOUT)
17
- @logger = Logger.new(log_target)
18
- @logger.level = Logger::INFO
19
- @logger
16
+ def logger
17
+ PushmiPullyu::Logging.logger
20
18
  end
21
19
 
22
- def self.logger
23
- @logger ||= initialize_logger
24
- end
20
+ class << self
25
21
 
26
- def self.log_aip_activity(aip_directory, message)
27
- log_file = "#{aip_directory}/data/logs/aipcreation.log"
28
- aip_logger = Logger.new(log_file)
29
- aip_logger.level = logger.level
22
+ attr_writer :logger
30
23
 
31
- # Log to both the application log, and the log file that gets archived in the AIP
32
- logger.info(message)
33
- aip_logger.info(message)
24
+ def initialize_logger(log_target = STDOUT)
25
+ @logger = Logger.new(log_target)
26
+ @logger.level = Logger::INFO
27
+ @logger
28
+ end
34
29
 
35
- aip_logger.close
36
- end
30
+ def logger
31
+ @logger ||= initialize_logger
32
+ end
37
33
 
38
- def self.log_preservation_event(deposited_file)
39
- preservation_logger = Logger.new("#{PushmiPullyu.options[:logdir]}/preservation_events.log")
34
+ def log_aip_activity(aip_directory, message)
35
+ log_file = "#{aip_directory}/data/logs/aipcreation.log"
36
+ aip_logger = Logger.new(log_file)
37
+ aip_logger.level = logger.level
40
38
 
41
- message = "#{deposited_file.name} was successfully deposited into Swift Storage! \n"\
42
- "Here are the details of this preservation event: \n"\
43
- "\t NOID: '#{deposited_file.name}' \n"\
44
- "\t Timestamp of Completion: '#{deposited_file.last_modified}' \n"\
45
- "\t AIP Checksum: '#{deposited_file.etag}' \n"\
46
- "\t Metadata: #{deposited_file.metadata} \n"
39
+ # Log to both the application log, and the log file that gets archived in the AIP
40
+ logger.info(message)
41
+ aip_logger.info(message)
47
42
 
48
- # Log to both the application log, and the preservation log file
49
- logger.info(message)
50
- preservation_logger.info(message)
43
+ aip_logger.close
44
+ end
51
45
 
52
- preservation_logger.close
53
- end
46
+ def log_preservation_event(deposited_file, aip_directory)
47
+ preservation_logger = Logger.new("#{PushmiPullyu.options[:logdir]}/preservation_events.log")
54
48
 
55
- def self.logger=(log)
56
- @logger = log
57
- end
49
+ message = "#{deposited_file.name} was successfully deposited into Swift Storage!\n"\
50
+ "Here are the details of this preservation event:\n"\
51
+ "\tNOID: '#{deposited_file.name}'\n"\
52
+ "\tTimestamp of Completion: '#{deposited_file.last_modified}'\n"\
53
+ "\tAIP Checksum: '#{deposited_file.etag}'\n"\
54
+ "\tMetadata: #{deposited_file.metadata}\n"\
55
+
56
+ file_details = file_log_details(aip_directory)
57
+
58
+ if file_details.present?
59
+ message << "\tFile Details:\n"
60
+ file_details.each do |file_detail|
61
+ message << %(\t\t{"fileset_uuid": "#{file_detail[:fileset_name]}",
62
+ \t\t"details": {
63
+ \t\t\t"file_name": "#{file_detail[:file_name]}",
64
+ \t\t\t"file_type": "#{file_detail[:file_extension]}",
65
+ \t\t\t"file_size": #{file_detail[:file_size]}
66
+ \t\t}}\n)
67
+ end
68
+ end
58
69
 
59
- def self.reopen
60
- if @logger
61
- @logger.reopen
62
- else
63
- @logger = initialize_logger
70
+ # Log to both the application log, and the preservation log file
71
+ logger.info(message)
72
+ preservation_logger.info(message)
73
+
74
+ preservation_logger.close
75
+ end
76
+
77
+ def reopen
78
+ if @logger
79
+ @logger.reopen
80
+ else
81
+ @logger = initialize_logger
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ def file_log_details(aip_directory)
88
+ file_details = []
89
+ data_files_location = "#{aip_directory}/data/objects/files"
90
+
91
+ if Dir.exist?(data_files_location)
92
+ Dir.glob("#{data_files_location}/*") do |folder|
93
+ Dir.glob("#{folder}/*") do |file|
94
+ file_details << {
95
+ fileset_name: File.dirname(file).split('/')[-1],
96
+ file_name: File.basename(file),
97
+ file_size: File.size(file),
98
+ file_extension: File.extname(file).strip.downcase[1..-1]
99
+ }
100
+ end
101
+ end
102
+ end
103
+
104
+ file_details
64
105
  end
65
- end
66
106
 
67
- def logger
68
- PushmiPullyu::Logging.logger
69
107
  end
70
108
  end
@@ -1,3 +1,3 @@
1
1
  module PushmiPullyu
2
- VERSION = '0.2.7'.freeze
2
+ VERSION = '1.0.1'.freeze
3
3
  end
data/lib/pushmi_pullyu.rb CHANGED
@@ -8,8 +8,11 @@ require 'pushmi_pullyu/logging'
8
8
  require 'pushmi_pullyu/aip'
9
9
  require 'pushmi_pullyu/aip/creator'
10
10
  require 'pushmi_pullyu/aip/downloader'
11
- require 'pushmi_pullyu/aip/solr_fetcher'
12
11
  require 'pushmi_pullyu/aip/fedora_fetcher'
12
+ require 'pushmi_pullyu/aip/file_list_creator'
13
+ require 'pushmi_pullyu/aip/owner_email_editor'
14
+ require 'active_record'
15
+ require 'pushmi_pullyu/aip/user'
13
16
  require 'pushmi_pullyu/cli'
14
17
  require 'pushmi_pullyu/preservation_queue'
15
18
  require 'pushmi_pullyu/swift_depositer'
@@ -20,6 +23,7 @@ require 'active_support/core_ext'
20
23
  # PushmiPullyu main module
21
24
  module PushmiPullyu
22
25
  DEFAULTS = {
26
+ aip_version: 'lightaip-2.0',
23
27
  daemonize: false,
24
28
  debug: false,
25
29
  logdir: 'log',
@@ -32,12 +36,8 @@ module PushmiPullyu
32
36
  redis: {
33
37
  url: 'redis://localhost:6379'
34
38
  },
35
- # TODO: rest of these are examples for solr/fedora/swift... feel free to fill them in correctly
36
- solr: {
37
- url: 'http://localhost:8983/solr/development'
38
- },
39
39
  fedora: {
40
- url: 'http://localhost:8983/fedora/rest',
40
+ url: 'http://localhost:8080/fcrepo/rest',
41
41
  user: 'fedoraAdmin',
42
42
  password: 'fedoraAdmin',
43
43
  base_path: '/dev'
@@ -52,6 +52,12 @@ module PushmiPullyu
52
52
  container: 'ERA'
53
53
  },
54
54
  rollbar: {
55
+ },
56
+ database: {
57
+ encoding: 'utf8',
58
+ pool: ENV['RAILS_MAX_THREADS'] || 5,
59
+ url: ENV['DATABASE_URL'] || ENV['JUPITER_DATABASE_URL'] || 'postgresql://jupiter:mysecretpassword@127.0.0.1',
60
+ database: 'jupiter_development'
55
61
  }
56
62
  }.freeze
57
63
 
@@ -1,4 +1,4 @@
1
- lib = File.expand_path('../lib', __FILE__)
1
+ lib = File.expand_path('lib', __dir__)
2
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
  require 'pushmi_pullyu/version'
4
4
 
@@ -21,15 +21,17 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.required_ruby_version = '>= 2.3.1'
23
23
 
24
+ spec.add_runtime_dependency 'activerecord', '>= 5.1.5', '< 5.3.0'
24
25
  spec.add_runtime_dependency 'activesupport', '~> 5.0'
25
26
  spec.add_runtime_dependency 'bagit', '~> 0.4'
26
27
  spec.add_runtime_dependency 'connection_pool', '~> 2.2'
27
28
  spec.add_runtime_dependency 'daemons', '~> 1.2', '>= 1.2.4'
28
29
  spec.add_runtime_dependency 'minitar', '~> 0.6'
29
30
  spec.add_runtime_dependency 'openstack', '~> 3.3', '>= 3.3.10'
30
- spec.add_runtime_dependency 'rdf', '~> 1.99'
31
- spec.add_runtime_dependency 'rdf-n3', '~> 1.99'
32
- spec.add_runtime_dependency 'redis', '~> 3.3'
31
+ spec.add_runtime_dependency 'pg', '~> 1.0.0'
32
+ spec.add_runtime_dependency 'rdf', '>= 1.99', '< 4.0'
33
+ spec.add_runtime_dependency 'rdf-n3', '>= 1.99', '< 4.0'
34
+ spec.add_runtime_dependency 'redis', '>= 3.3', '< 5.0'
33
35
  spec.add_runtime_dependency 'rollbar', '~> 2.14'
34
36
 
35
37
  spec.add_development_dependency 'bundler', '~> 1.14'
@@ -38,9 +40,9 @@ Gem::Specification.new do |spec|
38
40
  spec.add_development_dependency 'pry', '~> 0.10', '>= 0.10.4'
39
41
  spec.add_development_dependency 'rake', '~> 12.0'
40
42
  spec.add_development_dependency 'rspec', '~> 3.0'
41
- spec.add_development_dependency 'rubocop', '~> 0.45'
43
+ spec.add_development_dependency 'rubocop', '~> 0.51'
42
44
  spec.add_development_dependency 'rubocop-rspec', '~> 1.10'
43
45
  spec.add_development_dependency 'timecop', '~> 0.8'
44
- spec.add_development_dependency 'vcr', '~> 3.0'
45
- spec.add_development_dependency 'webmock', '~> 2.1'
46
+ spec.add_development_dependency 'vcr', '~> 4.0'
47
+ spec.add_development_dependency 'webmock', '~> 3.3'
46
48
  end
metadata CHANGED
@@ -1,15 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pushmi_pullyu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shane Murnaghan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-07 00:00:00.000000000 Z
11
+ date: 2018-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 5.1.5
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: 5.3.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: 5.1.5
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: 5.3.0
13
33
  - !ruby/object:Gem::Dependency
14
34
  name: activesupport
15
35
  requirement: !ruby/object:Gem::Requirement
@@ -107,47 +127,79 @@ dependencies:
107
127
  - !ruby/object:Gem::Version
108
128
  version: 3.3.10
109
129
  - !ruby/object:Gem::Dependency
110
- name: rdf
130
+ name: pg
111
131
  requirement: !ruby/object:Gem::Requirement
112
132
  requirements:
113
133
  - - "~>"
114
134
  - !ruby/object:Gem::Version
115
- version: '1.99'
135
+ version: 1.0.0
116
136
  type: :runtime
117
137
  prerelease: false
118
138
  version_requirements: !ruby/object:Gem::Requirement
119
139
  requirements:
120
140
  - - "~>"
141
+ - !ruby/object:Gem::Version
142
+ version: 1.0.0
143
+ - !ruby/object:Gem::Dependency
144
+ name: rdf
145
+ requirement: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - ">="
148
+ - !ruby/object:Gem::Version
149
+ version: '1.99'
150
+ - - "<"
151
+ - !ruby/object:Gem::Version
152
+ version: '4.0'
153
+ type: :runtime
154
+ prerelease: false
155
+ version_requirements: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
121
158
  - !ruby/object:Gem::Version
122
159
  version: '1.99'
160
+ - - "<"
161
+ - !ruby/object:Gem::Version
162
+ version: '4.0'
123
163
  - !ruby/object:Gem::Dependency
124
164
  name: rdf-n3
125
165
  requirement: !ruby/object:Gem::Requirement
126
166
  requirements:
127
- - - "~>"
167
+ - - ">="
128
168
  - !ruby/object:Gem::Version
129
169
  version: '1.99'
170
+ - - "<"
171
+ - !ruby/object:Gem::Version
172
+ version: '4.0'
130
173
  type: :runtime
131
174
  prerelease: false
132
175
  version_requirements: !ruby/object:Gem::Requirement
133
176
  requirements:
134
- - - "~>"
177
+ - - ">="
135
178
  - !ruby/object:Gem::Version
136
179
  version: '1.99'
180
+ - - "<"
181
+ - !ruby/object:Gem::Version
182
+ version: '4.0'
137
183
  - !ruby/object:Gem::Dependency
138
184
  name: redis
139
185
  requirement: !ruby/object:Gem::Requirement
140
186
  requirements:
141
- - - "~>"
187
+ - - ">="
142
188
  - !ruby/object:Gem::Version
143
189
  version: '3.3'
190
+ - - "<"
191
+ - !ruby/object:Gem::Version
192
+ version: '5.0'
144
193
  type: :runtime
145
194
  prerelease: false
146
195
  version_requirements: !ruby/object:Gem::Requirement
147
196
  requirements:
148
- - - "~>"
197
+ - - ">="
149
198
  - !ruby/object:Gem::Version
150
199
  version: '3.3'
200
+ - - "<"
201
+ - !ruby/object:Gem::Version
202
+ version: '5.0'
151
203
  - !ruby/object:Gem::Dependency
152
204
  name: rollbar
153
205
  requirement: !ruby/object:Gem::Requirement
@@ -258,14 +310,14 @@ dependencies:
258
310
  requirements:
259
311
  - - "~>"
260
312
  - !ruby/object:Gem::Version
261
- version: '0.45'
313
+ version: '0.51'
262
314
  type: :development
263
315
  prerelease: false
264
316
  version_requirements: !ruby/object:Gem::Requirement
265
317
  requirements:
266
318
  - - "~>"
267
319
  - !ruby/object:Gem::Version
268
- version: '0.45'
320
+ version: '0.51'
269
321
  - !ruby/object:Gem::Dependency
270
322
  name: rubocop-rspec
271
323
  requirement: !ruby/object:Gem::Requirement
@@ -300,28 +352,28 @@ dependencies:
300
352
  requirements:
301
353
  - - "~>"
302
354
  - !ruby/object:Gem::Version
303
- version: '3.0'
355
+ version: '4.0'
304
356
  type: :development
305
357
  prerelease: false
306
358
  version_requirements: !ruby/object:Gem::Requirement
307
359
  requirements:
308
360
  - - "~>"
309
361
  - !ruby/object:Gem::Version
310
- version: '3.0'
362
+ version: '4.0'
311
363
  - !ruby/object:Gem::Dependency
312
364
  name: webmock
313
365
  requirement: !ruby/object:Gem::Requirement
314
366
  requirements:
315
367
  - - "~>"
316
368
  - !ruby/object:Gem::Version
317
- version: '2.1'
369
+ version: '3.3'
318
370
  type: :development
319
371
  prerelease: false
320
372
  version_requirements: !ruby/object:Gem::Requirement
321
373
  requirements:
322
374
  - - "~>"
323
375
  - !ruby/object:Gem::Version
324
- version: '2.1'
376
+ version: '3.3'
325
377
  description:
326
378
  email:
327
379
  - murnagha@ualberta.ca
@@ -354,7 +406,9 @@ files:
354
406
  - lib/pushmi_pullyu/aip/creator.rb
355
407
  - lib/pushmi_pullyu/aip/downloader.rb
356
408
  - lib/pushmi_pullyu/aip/fedora_fetcher.rb
357
- - lib/pushmi_pullyu/aip/solr_fetcher.rb
409
+ - lib/pushmi_pullyu/aip/file_list_creator.rb
410
+ - lib/pushmi_pullyu/aip/owner_email_editor.rb
411
+ - lib/pushmi_pullyu/aip/user.rb
358
412
  - lib/pushmi_pullyu/cli.rb
359
413
  - lib/pushmi_pullyu/logging.rb
360
414
  - lib/pushmi_pullyu/preservation_queue.rb
@@ -384,7 +438,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
384
438
  version: '0'
385
439
  requirements: []
386
440
  rubyforge_project:
387
- rubygems_version: 2.4.5.1
441
+ rubygems_version: 2.7.6
388
442
  signing_key:
389
443
  specification_version: 4
390
444
  summary: Ruby application to manage flow of content from Fedora into Swift for preservation
@@ -1,33 +0,0 @@
1
- require 'json'
2
- require 'net/http'
3
-
4
- class PushmiPullyu::AIP::SolrFetcher
5
-
6
- class SolrFetchError < StandardError; end
7
-
8
- def initialize(noid)
9
- @noid = noid
10
- end
11
-
12
- def fetch_permission_object_ids
13
- hash = JSON.parse(run_query_json)
14
-
15
- return [] if hash['response']['docs'].empty?
16
-
17
- hash['response']['docs'].map { |hit| hit['id'] }
18
- end
19
-
20
- private
21
-
22
- # Return fetched results, else raise an error
23
- def run_query_json
24
- response = Net::HTTP.get_response(
25
- URI("#{PushmiPullyu.options[:solr][:url]}/select?q=accessTo_ssim:#{@noid}&fl=id&wt=json")
26
- )
27
-
28
- return response.body if response.is_a?(Net::HTTPSuccess)
29
-
30
- raise SolrFetchError
31
- end
32
-
33
- end