pushmi_pullyu 0.2.7 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 257884fa2eaea476a4765f03063c07b19927cd61
4
- data.tar.gz: 858b6ae6809fca4f0b291e9a7e868686de8d5b36
2
+ SHA256:
3
+ metadata.gz: 3c97d2575b16539392c9b268d62bf037ae289fc4989dfa976ca13796f01dec01
4
+ data.tar.gz: 2b277188e5873035ebc1c9a2bebdcf7e4837016091d2bc7b4236be90a5f74cef
5
5
  SHA512:
6
- metadata.gz: 17108f728721e24db410f4957fa10401252e458dc98663774a928655cb6ad1e71b10ecc6dcfef62484cbc164ea32e6317310e6775920fa278f8dbf5e8609f1a0
7
- data.tar.gz: 1601222c2a1769f1a5174afea65e01952fe3cea9ad1f162e102a16e2586c5e77801c8b6e742b8f3106efd3735db7179162916810092f1255d7169e6f4efe2091
6
+ metadata.gz: 36131cfed011b68a8f006d2c25968d25717919d4eb9b8016d58e65887fbe933ea61243c649bfc66b385353c527ab81ec0c78b14d226d45379f51bfd237ef5eaa
7
+ data.tar.gz: b39f6b537f3e4617efb7c778908788b038bdd7aea9f2198c34ab5276ec5ca33ddd3a90694ee548868a3b7739aecfdb50d08448477bde8c3a6beee8092d9fcf47
data/.rubocop.yml CHANGED
@@ -59,7 +59,7 @@ Style/ClassAndModuleChildren:
59
59
  Style/Documentation:
60
60
  Enabled: false
61
61
 
62
- Style/FileName:
62
+ Naming/FileName:
63
63
  Exclude:
64
64
  - Dangerfile
65
65
  - Rakefile
data/README.md CHANGED
@@ -17,11 +17,11 @@ Its primary job is to manage the flow of content from Fedora into Swift for pres
17
17
 
18
18
  ## Workflow
19
19
 
20
- 1. Any save (create or update) on a GenericFile in ERA will trigger an after save callback that will push the GenericFile unique identifier (NOID) into a Queue.
21
- 2. The queue (Redis) is setup to be a unique set (which only allows one GenericFile NOID to be included in the queue at a single time), and ordered by priority from First In, First out (FIFO).
20
+ 1. Any save (create or update) on a Item/Thesis in ERA/Jupiter will trigger an after save callback that will push the item's unique identifier (UUID or NOID) into a Queue.
21
+ 2. The queue (Redis) is setup to be a unique set (which only allows one item's UUID to be included in the queue at a single time), and ordered by priority from First In, First out (FIFO).
22
22
  3. PushmiPullyu will then monitor the queue. After a certain wait period has passed since an element has been on the queue, PushmiPullyu will then retrieve the elements off the queue and begin to process the preservation event.
23
- 4. All the GenericFile information and data required for preservation are retrieved from Fedora and Solr using multiple REST calls.
24
- 5. An Archival Information Package (AIP) is created from the GenericFile's information. It is then bagged and tarred.
23
+ 4. All the GenericFile information and data required for preservation are retrieved from Fedora using multiple REST calls. A database connection to the user database fetches (via ActiveRecord )owner emails and modifies the fetched documents, where applicable.
24
+ 5. An Archival Information Package (AIP) is created from the item's information. It is then bagged and tarred.
25
25
  6. The AIP tar is then uploaded to Swift via a REST call.
26
26
  7. On a successful Swift upload, a entry is added for this preservation event to the preservation event logs.
27
27
 
@@ -8,6 +8,7 @@
8
8
  # PushmiPullyu will run this file through ERB when reading it so you can
9
9
  # even put in dynamic logic, like consuming ENV Variables.
10
10
 
11
+ aip_version: 'lightaip-2.0'
11
12
  debug: false
12
13
  logdir: log
13
14
  monitor: false
@@ -20,15 +21,18 @@ minimum_age: 0
20
21
  redis:
21
22
  url: redis://localhost:6379
22
23
 
23
- solr:
24
- url: http://localhost:8983/solr/development
25
-
26
24
  fedora:
27
- url: http://localhost:8983/fedora/rest
25
+ url: http://localhost:8080/fcrepo/rest
28
26
  user: fedoraAdmin
29
27
  password: fedoraAdmin
30
28
  base_path: /dev
31
29
 
30
+ database:
31
+ encoding: utf8
32
+ url: postgresql://jupiter:mysecretpassword@127.0.0.1
33
+ database: jupiter_development
34
+ pool: 5
35
+
32
36
  #parameters project_name and project_domain_name are required only for keystone v3 authentication
33
37
  swift:
34
38
  tenant: tester
@@ -22,11 +22,15 @@ class PushmiPullyu::AIP::Creator
22
22
  private
23
23
 
24
24
  def bag_aip
25
- bag = BagIt::Bag.new(@aip_directory)
25
+ bag = BagIt::Bag.new(@aip_directory, bag_metadata)
26
26
  bag.manifest!
27
27
  raise BagInvalid unless bag.valid?
28
28
  end
29
29
 
30
+ def bag_metadata
31
+ { 'AIP-Version' => PushmiPullyu.options[:aip_version] }
32
+ end
33
+
30
34
  def tar_bag
31
35
  # We want to change the directory to the work directory path so we get the tar file to be exactly
32
36
  # the contents of the noid directory and not the entire work directory structure. For example the noid.tar
@@ -7,7 +7,18 @@ require 'rdf/n3'
7
7
  # related to an object
8
8
  class PushmiPullyu::AIP::Downloader
9
9
 
10
+ PREDICATE_URIS = {
11
+ filename: 'http://purl.org/dc/terms/title',
12
+ member_files: 'http://pcdm.org/models#hasFile',
13
+ member_file_sets: 'http://pcdm.org/models#hasMember',
14
+ original_file: 'http://pcdm.org/use#OriginalFile',
15
+ type: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
16
+ }.freeze
17
+
18
+ class NoFileSets < StandardError; end
19
+ class NoMemberFiles < StandardError; end
10
20
  class NoContentFilename < StandardError; end
21
+ class NoOriginalFile < StandardError; end
11
22
 
12
23
  def initialize(noid, aip_directory)
13
24
  @noid = noid
@@ -19,21 +30,45 @@ class PushmiPullyu::AIP::Downloader
19
30
 
20
31
  PushmiPullyu.logger.info("#{@noid}: Retreiving data from Fedora ...")
21
32
 
22
- [:main_object, :fixity, :content_datastream_metadata, :versions, :thumbnail,
23
- :characterization, :fedora3foxml, :fedora3foxml_metadata].each do |item|
24
- path_spec = aip_paths[item]
25
- download_and_log(path_spec, PushmiPullyu::AIP::FedoraFetcher.new(@noid))
26
- end
33
+ # Main object metadata
34
+ object_downloader = PushmiPullyu::AIP::FedoraFetcher.new(@noid)
35
+ download_and_log(object_aip_paths[:main_object], object_downloader)
27
36
 
28
- # Need content filename from metadata
29
- path_spec = OpenStruct.new(
30
- remote: '/content',
31
- local: content_filename, # lookup filename derived from metadata
32
- optional: false
33
- )
34
- download_and_log(path_spec, PushmiPullyu::AIP::FedoraFetcher.new(@noid))
37
+ # Construct the file ordering file
38
+ list_source_uri = object_downloader.object_url + object_aip_paths.list_source.remote
39
+ create_and_log_file_order_list(list_source_uri)
40
+
41
+ member_file_set_uuids.each do |file_set_uuid|
42
+ make_file_set_directories(file_set_uuid)
43
+
44
+ # FileSet metadata
45
+ file_set_downloader = PushmiPullyu::AIP::FedoraFetcher.new(file_set_uuid)
46
+ path_spec = file_set_aip_paths(file_set_uuid)[:main_object]
47
+ download_and_log(path_spec, file_set_downloader)
48
+
49
+ # Find the original file by looping through the files in the file_set
50
+ original_file_remote_base = nil
51
+ member_files(file_set_uuid).each do |file_path|
52
+ path_spec = OpenStruct.new(
53
+ remote: "/files/#{file_path}/fcr:metadata",
54
+ # Note: local file gets clobbered on each download until it finds the right one
55
+ local: "#{file_set_dirs(file_set_uuid).metadata}/original_file_metadata.n3",
56
+ optional: true
57
+ )
58
+ download_and_log(path_spec, file_set_downloader)
59
+ if original_file?(path_spec.local)
60
+ original_file_remote_base = "/files/#{file_path}"
61
+ break
62
+ end
63
+ end
35
64
 
36
- download_permissions
65
+ raise NoOriginalFile unless original_file_remote_base.present?
66
+
67
+ [:content, :fixity].each do |item|
68
+ path_spec = file_aip_paths(file_set_uuid, original_file_remote_base)[item]
69
+ download_and_log(path_spec, file_set_downloader)
70
+ end
71
+ end
37
72
  end
38
73
 
39
74
  private
@@ -43,35 +78,24 @@ class PushmiPullyu::AIP::Downloader
43
78
 
44
79
  log_fetching(fedora_fetcher.object_url(path_spec.remote), output_file)
45
80
 
46
- is_rdf = (output_file !~ /\.n3$/)
81
+ is_rdf = (output_file =~ /\.n3$/)
82
+ should_add_user_email = path_spec.to_h.fetch(:should_add_user_email, false)
47
83
 
48
84
  is_success = fedora_fetcher.download_object(output_file,
49
85
  url_extra: path_spec.remote,
50
86
  optional: path_spec.optional,
51
- is_rdf: is_rdf)
87
+ is_rdf: is_rdf,
88
+ should_add_user_email: should_add_user_email)
52
89
  log_saved(is_success, output_file)
53
90
  end
54
91
 
55
- def download_permissions
56
- PushmiPullyu.logger.info("#{@noid}: looking up permissions from Solr ...")
57
- results = PushmiPullyu::AIP::SolrFetcher.new(@noid).fetch_permission_object_ids
58
- if results.empty?
59
- PushmiPullyu.logger.info("#{@noid}: permissions not found")
60
- else
61
- results.each do |permission_id|
62
- PushmiPullyu.logger.info("#{@noid}: permission object #{permission_id} found")
63
- download_permission(permission_id)
64
- end
65
- end
66
- end
67
-
68
- def download_permission(permission_id)
69
- path_spec = OpenStruct.new(
70
- remote: nil,
71
- local: "#{aip_dirs.metadata}/permission_#{permission_id}.n3",
72
- optional: false
73
- )
74
- download_and_log(path_spec, PushmiPullyu::AIP::FedoraFetcher.new(permission_id))
92
+ def create_and_log_file_order_list(url)
93
+ output_file = object_aip_paths.file_ordering.local
94
+ PushmiPullyu::Logging.log_aip_activity(@aip_directory,
95
+ "#{@noid}: #{output_file} -- creating from #{url} ...")
96
+ PushmiPullyu::AIP::FileListCreator.new(url, output_file, member_file_set_uuids).run
97
+ PushmiPullyu::Logging.log_aip_activity(@aip_directory,
98
+ "#{@noid}: #{output_file} -- created")
75
99
  end
76
100
 
77
101
  ### Logging
@@ -92,8 +116,19 @@ class PushmiPullyu::AIP::Downloader
92
116
  @aip_dirs ||= OpenStruct.new(
93
117
  objects: "#{@aip_directory}/data/objects",
94
118
  metadata: "#{@aip_directory}/data/objects/metadata",
119
+ files: "#{@aip_directory}/data/objects/files",
120
+ files_metadata: "#{@aip_directory}/data/objects/metadata/files_metadata",
95
121
  logs: "#{@aip_directory}/data/logs",
96
- thumbnails: "#{@aip_directory}/data/thumbnails"
122
+ file_logs: "#{@aip_directory}/data/logs/files_logs"
123
+ )
124
+ end
125
+
126
+ def file_set_dirs(file_set_uuid)
127
+ @file_set_dirs ||= {}
128
+ @file_set_dirs[file_set_uuid] ||= OpenStruct.new(
129
+ metadata: "#{aip_dirs.files_metadata}/#{file_set_uuid}",
130
+ files: "#{aip_dirs.files}/#{file_set_uuid}",
131
+ logs: "#{aip_dirs.file_logs}/#{file_set_uuid}"
97
132
  )
98
133
  end
99
134
 
@@ -106,6 +141,14 @@ class PushmiPullyu::AIP::Downloader
106
141
  PushmiPullyu.logger.debug("#{@noid}: Creating directories done")
107
142
  end
108
143
 
144
+ def make_file_set_directories(file_set_uuid)
145
+ PushmiPullyu.logger.debug("#{@noid}: Creating file set #{file_set_uuid} directories ...")
146
+ file_set_dirs(file_set_uuid).to_h.each_value do |path|
147
+ FileUtils.mkdir_p(path)
148
+ end
149
+ PushmiPullyu.logger.debug("#{@noid}: Creating file set #{file_set_uuid} directories done")
150
+ end
151
+
109
152
  def clean_directories
110
153
  return unless File.exist?(@aip_directory)
111
154
  PushmiPullyu.logger.debug("#{@noid}: Nuking directories ...")
@@ -114,64 +157,105 @@ class PushmiPullyu::AIP::Downloader
114
157
 
115
158
  ### Files
116
159
 
117
- def aip_paths
118
- @aip_paths ||= OpenStruct.new(
160
+ def object_aip_paths
161
+ @object_aip_paths ||= OpenStruct.new(
119
162
  main_object: OpenStruct.new(
120
163
  remote: nil, # Base path
121
164
  local: "#{aip_dirs.metadata}/object_metadata.n3",
165
+ should_add_user_email: true,
122
166
  optional: false
123
167
  ),
124
- fixity: OpenStruct.new(
125
- remote: '/content/fcr:fixity',
126
- local: "#{aip_dirs.logs}/content_fixity_report.n3",
127
- optional: false
168
+ list_source: OpenStruct.new(
169
+ # This is downloaded, but not saved
170
+ remote: '/list_source'
128
171
  ),
129
- content_datastream_metadata: OpenStruct.new(
130
- remote: '/content/fcr:metadata',
131
- local: "#{aip_dirs.metadata}/content_fcr_metadata.n3",
132
- optional: false
133
- ),
134
- versions: OpenStruct.new(
135
- remote: '/content/fcr:versions',
136
- local: "#{aip_dirs.metadata}/content_versions.n3",
172
+ # This is constructed, not downloaded
173
+ file_ordering: OpenStruct.new(
174
+ local: "#{aip_dirs.files_metadata}/file_order.xml"
175
+ )
176
+ ).freeze
177
+ end
178
+
179
+ def file_set_aip_paths(file_set_uuid)
180
+ @file_set_aip_paths ||= {}
181
+ @file_set_aip_paths[file_set_uuid] ||= OpenStruct.new(
182
+ main_object: OpenStruct.new(
183
+ remote: nil, # Base file_set path
184
+ local: "#{file_set_dirs(file_set_uuid).metadata}/file_set_metadata.n3",
185
+ should_add_user_email: true,
137
186
  optional: false
138
- ),
187
+ )
188
+ ).freeze
189
+ end
139
190
 
140
- # Optional downloads
141
- thumbnail: OpenStruct.new(
142
- remote: '/thumbnail',
143
- local: "#{aip_dirs.thumbnails}/thumbnail",
144
- optional: true
145
- ),
146
- characterization: OpenStruct.new(
147
- remote: '/characterization',
148
- local: "#{aip_dirs.logs}/content_characterization.n3",
149
- optional: true
150
- ),
151
- fedora3foxml: OpenStruct.new(
152
- remote: '/fedora3foxml',
153
- local: "#{aip_dirs.metadata}/fedora3foxml.xml",
154
- optional: true
191
+ def file_aip_paths(file_set_uuid, original_file_remote_base)
192
+ @file_aip_paths ||= {}
193
+ @file_aip_paths[file_set_uuid] ||= OpenStruct.new(
194
+ content: OpenStruct.new(
195
+ remote: original_file_remote_base,
196
+ local: file_set_filename(file_set_uuid),
197
+ optional: false
155
198
  ),
156
- fedora3foxml_metadata: OpenStruct.new(
157
- remote: '/fedora3foxml/fcr:metadata',
158
- local: "#{aip_dirs.metadata}/fedora3foxml.n3",
159
- optional: true
199
+ fixity: OpenStruct.new(
200
+ remote: "#{original_file_remote_base}/fcr:fixity",
201
+ local: "#{file_set_dirs(file_set_uuid)[:logs]}/content_fixity_report.n3",
202
+ optional: false
160
203
  )
161
204
  ).freeze
162
205
  end
163
206
 
164
- # Extract filename from main object metadata
165
- def content_filename
166
- filename_predicate = RDF::URI('info:fedora/fedora-system:def/model#downloadFilename')
207
+ def member_file_set_uuids
208
+ @member_file_set_uuids ||= []
209
+ return @member_file_set_uuids unless @member_file_set_uuids.empty?
210
+
211
+ member_file_set_predicate = RDF::URI(PREDICATE_URIS[:member_file_sets])
212
+
213
+ graph = RDF::Graph.load(object_aip_paths.main_object.local)
214
+
215
+ graph.query(predicate: member_file_set_predicate) do |results|
216
+ # Get uuid from end of fedora path
217
+ @member_file_set_uuids << results.object.to_s.split('/').last
218
+ end
219
+ return @member_file_set_uuids unless @member_file_set_uuids.empty?
220
+
221
+ raise NoFileSets
222
+ end
223
+
224
+ def file_set_filename(file_set_uuid)
225
+ filename_predicate = RDF::URI(PREDICATE_URIS[:filename])
167
226
 
168
- graph = RDF::Graph.load(aip_paths.main_object.local)
227
+ graph = RDF::Graph.load(file_set_aip_paths(file_set_uuid).main_object.local)
169
228
 
170
229
  graph.query(predicate: filename_predicate) do |results|
171
- return "#{aip_dirs.objects}/#{results.object}"
230
+ return "#{file_set_dirs(file_set_uuid).files}/#{results.object}"
172
231
  end
173
232
 
174
233
  raise NoContentFilename
175
234
  end
176
235
 
236
+ def member_files(file_set_uuid)
237
+ member_file_predicate = RDF::URI(PREDICATE_URIS[:member_files])
238
+
239
+ graph = RDF::Graph.load(file_set_aip_paths(file_set_uuid).main_object.local)
240
+
241
+ member_files = []
242
+ graph.query(predicate: member_file_predicate) do |results|
243
+ # Get uuid from end of fedora path
244
+ member_files << results.object.to_s.split('/').last
245
+ end
246
+ return member_files if member_files.present?
247
+
248
+ raise NoMemberFiles
249
+ end
250
+
251
+ def original_file?(metadata_filename)
252
+ type_predicate = RDF::URI(PREDICATE_URIS[:type])
253
+ original_file_uri = RDF::URI(PREDICATE_URIS[:original_file])
254
+ graph = RDF::Graph.load(metadata_filename)
255
+ graph.query(predicate: type_predicate) do |results|
256
+ return true if results.object == original_file_uri
257
+ end
258
+ false
259
+ end
260
+
177
261
  end
@@ -19,7 +19,8 @@ class PushmiPullyu::AIP::FedoraFetcher
19
19
  # Return true on success, raise an error otherwise
20
20
  # (or use 'optional' to return false on 404)
21
21
  def download_object(download_path, url_extra: nil,
22
- optional: false, is_rdf: false)
22
+ optional: false, is_rdf: false,
23
+ should_add_user_email: false)
23
24
 
24
25
  uri = URI(object_url(url_extra))
25
26
 
@@ -34,8 +35,13 @@ class PushmiPullyu::AIP::FedoraFetcher
34
35
  end
35
36
 
36
37
  if response.is_a?(Net::HTTPSuccess)
38
+ body = if should_add_user_email
39
+ PushmiPullyu::AIP::OwnerEmailEditor.new(response.body).run
40
+ else
41
+ response.body
42
+ end
37
43
  file = File.open(download_path, 'wb')
38
- file.write(response.body)
44
+ file.write(body)
39
45
  file.close
40
46
  return true
41
47
  elsif response.is_a?(Net::HTTPNotFound)
@@ -0,0 +1,115 @@
1
+ require 'rdf'
2
+ require 'rdf/n3'
3
+
4
+ class PushmiPullyu::AIP::FileListCreator
5
+
6
+ IANA = 'http://www.iana.org/assignments/relation/'.freeze
7
+ PREDICATES = {
8
+ proxy_for: RDF::URI('http://www.openarchives.org/ore/terms/proxyFor'),
9
+ first: RDF::URI(IANA + 'first'),
10
+ last: RDF::URI(IANA + 'last'),
11
+ prev: RDF::URI(IANA + 'prev'),
12
+ next: RDF::URI(IANA + 'next'),
13
+ has_part: RDF::URI('http://purl.org/dc/terms/hasPart')
14
+ }.freeze
15
+
16
+ class NoProxyURIFound < StandardError; end
17
+ class NoFirstProxyFound < StandardError; end
18
+ class FirstProxyHasPrev < StandardError; end
19
+ class ListSourceFileSetMismatch < StandardError; end
20
+
21
+ def initialize(list_source_uri, output_xml_file, file_set_uuids)
22
+ @uri = RDF::URI(list_source_uri)
23
+ @output_file = output_xml_file
24
+
25
+ # These are the known fileset uuids, used for validation
26
+ @file_set_uuids = file_set_uuids
27
+ end
28
+
29
+ def run
30
+ extract_list_source_uuids
31
+ raise ListSourceFileSetMismatch, @uri.to_s if @list_source_uuids.sort != @file_set_uuids.sort
32
+
33
+ write_output_file
34
+ end
35
+
36
+ def extract_list_source_uuids
37
+ # Note: raises IOError if can't find
38
+ # raises RDF::ReaderError if can't parse
39
+ @graph = RDF::Graph.load(@uri, validate: true)
40
+
41
+ @list_source_uuids = []
42
+
43
+ # Fetch first FileSet in list source
44
+ this_proxy = find_first_proxy
45
+
46
+ while @list_source_uuids.count <= num_proxies
47
+ @list_source_uuids << uuid_from_proxy(this_proxy)
48
+ next_proxy = find_next_proxy(this_proxy)
49
+
50
+ break if next_proxy.nil?
51
+
52
+ raise NextPreviousProxyMismatch if this_proxy != find_prev_proxy(next_proxy)
53
+ this_proxy = next_proxy
54
+ end
55
+
56
+ raise ProxyCountIncorrect if @list_source_uuids.count != num_proxies
57
+ raise LastProxyFailsValidation if this_proxy != find_last_proxy
58
+ end
59
+
60
+ def num_proxies
61
+ @num_proxies ||= @graph.query(subject: @uri, predicate: PREDICATES[:has_part]).count
62
+ end
63
+
64
+ def uuid_from_proxy(proxy_uri)
65
+ @graph.query(subject: proxy_uri, predicate: PREDICATES[:proxy_for]) do |statement|
66
+ return statement.object.to_s.split('/').last
67
+ end
68
+ raise NoProxyURIFound, proxy_uri.to_s
69
+ end
70
+
71
+ def find_first_proxy
72
+ @graph.query(subject: @uri, predicate: PREDICATES[:first]) do |statement|
73
+ first_uri = statement.object
74
+ # Validate that the first proxy doesn't have a previous one
75
+ raise FirstProxyHasPrev, @uri.to_s if find_prev_proxy(first_uri)
76
+ return first_uri
77
+ end
78
+ raise NoFirstProxyFound, @uri.to_s
79
+ end
80
+
81
+ def find_last_proxy
82
+ @graph.query(subject: @uri, predicate: PREDICATES[:last]) do |statement|
83
+ last_uri = statement.object
84
+ # Validate that the last proxy doesn't have a next one
85
+ raise LastProxyHasNext, @uri.to_s if find_next_proxy(last_uri)
86
+ return last_uri
87
+ end
88
+ raise LastProxyFound, @uri.to_s
89
+ end
90
+
91
+ def find_next_proxy(proxy_uri)
92
+ @graph.query(subject: proxy_uri, predicate: PREDICATES[:next]) do |statement|
93
+ return statement.object
94
+ end
95
+ nil
96
+ end
97
+
98
+ def find_prev_proxy(proxy_uri)
99
+ @graph.query(subject: proxy_uri, predicate: PREDICATES[:prev]) do |statement|
100
+ return statement.object
101
+ end
102
+ nil
103
+ end
104
+
105
+ def write_output_file
106
+ File.open(@output_file, 'w') do |file|
107
+ file.write("<file_order>\n")
108
+ @list_source_uuids.each do |uuid|
109
+ file.write(" <uuid>#{uuid}</uuid>\n")
110
+ end
111
+ file.write("</file_order>\n")
112
+ end
113
+ end
114
+
115
+ end
@@ -0,0 +1,62 @@
1
+ require 'net/http'
2
+
3
+ class PushmiPullyu::AIP::OwnerEmailEditor
4
+
5
+ OWNER_PREDICATE = RDF::URI('http://purl.org/ontology/bibo/owner').freeze
6
+
7
+ class NoOwnerPredicate < StandardError; end
8
+
9
+ def initialize(rdf_string)
10
+ @document = rdf_string
11
+ end
12
+
13
+ def run
14
+ ensure_database_connection
15
+
16
+ is_modified = false
17
+ prefixes = nil
18
+ # Read once to load prefixes (the @things at the top of an n3 file)
19
+ RDF::N3::Reader.new(input = @document) do |reader|
20
+ reader.each_statement { |_statement| }
21
+ prefixes = reader.prefixes
22
+ end
23
+ new_body = RDF::N3::Writer.buffer(prefixes: prefixes) do |writer|
24
+ RDF::N3::Reader.new(input = @document) do |reader|
25
+ reader.each_statement do |statement|
26
+ if statement.predicate == OWNER_PREDICATE
27
+ user = PushmiPullyu::AIP::User.find(statement.object.to_i)
28
+ writer << [statement.subject, statement.predicate, user.email]
29
+ is_modified = true
30
+ else
31
+ writer << statement
32
+ end
33
+ end
34
+ end
35
+ end
36
+ return new_body if is_modified
37
+ raise NoOwnerPredicate
38
+ end
39
+
40
+ private
41
+
42
+ def ensure_database_connection
43
+ return if ActiveRecord::Base.connected?
44
+ ActiveRecord::Base.establish_connection(database_configuration)
45
+ end
46
+
47
+ def database_configuration
48
+ # Config either from URL, or with more granular options (the later taking precedence)
49
+ config = {}
50
+ uri = URI.parse(PushmiPullyu.options[:database][:url])
51
+ config[:adapter] = PushmiPullyu.options[:database][:adaptor] || uri.scheme
52
+ config[:host] = PushmiPullyu.options[:database][:host] || uri.host
53
+ config[:database] = PushmiPullyu.options[:database][:database] || uri.path.split('/')[1].to_s
54
+ config[:username] = PushmiPullyu.options[:database][:username] || uri.user
55
+ config[:password] = PushmiPullyu.options[:database][:password] || uri.password
56
+ params = CGI.parse(uri.query || '')
57
+ config[:encoding] = PushmiPullyu.options[:database][:encoding] || params['encoding'].to_a.first
58
+ config[:pool] = PushmiPullyu.options[:database][:pool] || params['pool'].to_a.first
59
+ config
60
+ end
61
+
62
+ end
@@ -0,0 +1,2 @@
1
+ class PushmiPullyu::AIP::User < ActiveRecord::Base
2
+ end
@@ -2,6 +2,7 @@ require 'fileutils'
2
2
 
3
3
  module PushmiPullyu::AIP
4
4
  class NoidInvalid < StandardError; end
5
+ module_function
5
6
 
6
7
  def create(noid)
7
8
  raise NoidInvalid if noid.blank? || noid.include?('/')
@@ -12,11 +13,9 @@ module PushmiPullyu::AIP
12
13
  PushmiPullyu::AIP::Downloader.new(noid, aip_directory).run
13
14
  PushmiPullyu::AIP::Creator.new(noid, aip_directory, aip_filename).run
14
15
 
15
- yield aip_filename
16
+ yield aip_filename, aip_directory
16
17
 
17
18
  FileUtils.rm_rf(aip_filename) if File.exist?(aip_filename)
18
19
  FileUtils.rm_rf(aip_directory) if File.exist?(aip_directory)
19
20
  end
20
-
21
- module_function :create
22
21
  end
@@ -41,6 +41,7 @@ class PushmiPullyu::CLI
41
41
  Rollbar.error(e)
42
42
  raise e
43
43
  end
44
+ # rubocop:enable Lint/RescueException
44
45
  end
45
46
 
46
47
  def start_server
@@ -73,12 +74,11 @@ class PushmiPullyu::CLI
73
74
  end
74
75
 
75
76
  def parse_config(config_file)
76
- opts = {}
77
77
  if File.exist?(config_file)
78
- opts = YAML.safe_load(ERB.new(IO.read(config_file)).result).deep_symbolize_keys || opts
78
+ YAML.safe_load(ERB.new(IO.read(config_file)).result).deep_symbolize_keys || {}
79
+ else
80
+ {}
79
81
  end
80
-
81
- opts
82
82
  end
83
83
 
84
84
  # Parse the options.
@@ -174,20 +174,20 @@ class PushmiPullyu::CLI
174
174
  Rollbar.scoped(noid: item) do
175
175
  begin
176
176
  # Download AIP from Fedora, bag and tar AIP directory and cleanup after block code
177
- PushmiPullyu::AIP.create(item) do |aip_filename|
177
+ PushmiPullyu::AIP.create(item) do |aip_filename, aip_directory|
178
178
  # Push tarred AIP to swift API
179
179
  deposited_file = swift.deposit_file(aip_filename, options[:swift][:container])
180
180
  # Log successful preservation event to the log files
181
- PushmiPullyu::Logging.log_preservation_event(deposited_file)
181
+ PushmiPullyu::Logging.log_preservation_event(deposited_file, aip_directory)
182
182
  end
183
- # rubocop:disable RescueWithoutErrorClass
184
- rescue => e
183
+ # rubocop:disable Lint/RescueException
184
+ rescue Exception => e
185
185
  Rollbar.error(e)
186
186
  logger.error(e)
187
187
  # TODO: we could re-raise here and let the daemon die on any preservation error, or just log the issue and
188
188
  # move on to the next item.
189
189
  end
190
- # rubocop:enaable RescueWithoutErrorClass
190
+ # rubocop:enable Lint/RescueException
191
191
  end
192
192
  end
193
193
 
@@ -13,58 +13,96 @@ module PushmiPullyu::Logging
13
13
 
14
14
  end
15
15
 
16
- def self.initialize_logger(log_target = STDOUT)
17
- @logger = Logger.new(log_target)
18
- @logger.level = Logger::INFO
19
- @logger
16
+ def logger
17
+ PushmiPullyu::Logging.logger
20
18
  end
21
19
 
22
- def self.logger
23
- @logger ||= initialize_logger
24
- end
20
+ class << self
25
21
 
26
- def self.log_aip_activity(aip_directory, message)
27
- log_file = "#{aip_directory}/data/logs/aipcreation.log"
28
- aip_logger = Logger.new(log_file)
29
- aip_logger.level = logger.level
22
+ attr_writer :logger
30
23
 
31
- # Log to both the application log, and the log file that gets archived in the AIP
32
- logger.info(message)
33
- aip_logger.info(message)
24
+ def initialize_logger(log_target = STDOUT)
25
+ @logger = Logger.new(log_target)
26
+ @logger.level = Logger::INFO
27
+ @logger
28
+ end
34
29
 
35
- aip_logger.close
36
- end
30
+ def logger
31
+ @logger ||= initialize_logger
32
+ end
37
33
 
38
- def self.log_preservation_event(deposited_file)
39
- preservation_logger = Logger.new("#{PushmiPullyu.options[:logdir]}/preservation_events.log")
34
+ def log_aip_activity(aip_directory, message)
35
+ log_file = "#{aip_directory}/data/logs/aipcreation.log"
36
+ aip_logger = Logger.new(log_file)
37
+ aip_logger.level = logger.level
40
38
 
41
- message = "#{deposited_file.name} was successfully deposited into Swift Storage! \n"\
42
- "Here are the details of this preservation event: \n"\
43
- "\t NOID: '#{deposited_file.name}' \n"\
44
- "\t Timestamp of Completion: '#{deposited_file.last_modified}' \n"\
45
- "\t AIP Checksum: '#{deposited_file.etag}' \n"\
46
- "\t Metadata: #{deposited_file.metadata} \n"
39
+ # Log to both the application log, and the log file that gets archived in the AIP
40
+ logger.info(message)
41
+ aip_logger.info(message)
47
42
 
48
- # Log to both the application log, and the preservation log file
49
- logger.info(message)
50
- preservation_logger.info(message)
43
+ aip_logger.close
44
+ end
51
45
 
52
- preservation_logger.close
53
- end
46
+ def log_preservation_event(deposited_file, aip_directory)
47
+ preservation_logger = Logger.new("#{PushmiPullyu.options[:logdir]}/preservation_events.log")
54
48
 
55
- def self.logger=(log)
56
- @logger = log
57
- end
49
+ message = "#{deposited_file.name} was successfully deposited into Swift Storage!\n"\
50
+ "Here are the details of this preservation event:\n"\
51
+ "\tNOID: '#{deposited_file.name}'\n"\
52
+ "\tTimestamp of Completion: '#{deposited_file.last_modified}'\n"\
53
+ "\tAIP Checksum: '#{deposited_file.etag}'\n"\
54
+ "\tMetadata: #{deposited_file.metadata}\n"\
55
+
56
+ file_details = file_log_details(aip_directory)
57
+
58
+ if file_details.present?
59
+ message << "\tFile Details:\n"
60
+ file_details.each do |file_detail|
61
+ message << %(\t\t{"fileset_uuid": "#{file_detail[:fileset_name]}",
62
+ \t\t"details": {
63
+ \t\t\t"file_name": "#{file_detail[:file_name]}",
64
+ \t\t\t"file_type": "#{file_detail[:file_extension]}",
65
+ \t\t\t"file_size": #{file_detail[:file_size]}
66
+ \t\t}}\n)
67
+ end
68
+ end
58
69
 
59
- def self.reopen
60
- if @logger
61
- @logger.reopen
62
- else
63
- @logger = initialize_logger
70
+ # Log to both the application log, and the preservation log file
71
+ logger.info(message)
72
+ preservation_logger.info(message)
73
+
74
+ preservation_logger.close
75
+ end
76
+
77
+ def reopen
78
+ if @logger
79
+ @logger.reopen
80
+ else
81
+ @logger = initialize_logger
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ def file_log_details(aip_directory)
88
+ file_details = []
89
+ data_files_location = "#{aip_directory}/data/objects/files"
90
+
91
+ if Dir.exist?(data_files_location)
92
+ Dir.glob("#{data_files_location}/*") do |folder|
93
+ Dir.glob("#{folder}/*") do |file|
94
+ file_details << {
95
+ fileset_name: File.dirname(file).split('/')[-1],
96
+ file_name: File.basename(file),
97
+ file_size: File.size(file),
98
+ file_extension: File.extname(file).strip.downcase[1..-1]
99
+ }
100
+ end
101
+ end
102
+ end
103
+
104
+ file_details
64
105
  end
65
- end
66
106
 
67
- def logger
68
- PushmiPullyu::Logging.logger
69
107
  end
70
108
  end
@@ -1,3 +1,3 @@
1
1
  module PushmiPullyu
2
- VERSION = '0.2.7'.freeze
2
+ VERSION = '1.0.1'.freeze
3
3
  end
data/lib/pushmi_pullyu.rb CHANGED
@@ -8,8 +8,11 @@ require 'pushmi_pullyu/logging'
8
8
  require 'pushmi_pullyu/aip'
9
9
  require 'pushmi_pullyu/aip/creator'
10
10
  require 'pushmi_pullyu/aip/downloader'
11
- require 'pushmi_pullyu/aip/solr_fetcher'
12
11
  require 'pushmi_pullyu/aip/fedora_fetcher'
12
+ require 'pushmi_pullyu/aip/file_list_creator'
13
+ require 'pushmi_pullyu/aip/owner_email_editor'
14
+ require 'active_record'
15
+ require 'pushmi_pullyu/aip/user'
13
16
  require 'pushmi_pullyu/cli'
14
17
  require 'pushmi_pullyu/preservation_queue'
15
18
  require 'pushmi_pullyu/swift_depositer'
@@ -20,6 +23,7 @@ require 'active_support/core_ext'
20
23
  # PushmiPullyu main module
21
24
  module PushmiPullyu
22
25
  DEFAULTS = {
26
+ aip_version: 'lightaip-2.0',
23
27
  daemonize: false,
24
28
  debug: false,
25
29
  logdir: 'log',
@@ -32,12 +36,8 @@ module PushmiPullyu
32
36
  redis: {
33
37
  url: 'redis://localhost:6379'
34
38
  },
35
- # TODO: rest of these are examples for solr/fedora/swift... feel free to fill them in correctly
36
- solr: {
37
- url: 'http://localhost:8983/solr/development'
38
- },
39
39
  fedora: {
40
- url: 'http://localhost:8983/fedora/rest',
40
+ url: 'http://localhost:8080/fcrepo/rest',
41
41
  user: 'fedoraAdmin',
42
42
  password: 'fedoraAdmin',
43
43
  base_path: '/dev'
@@ -52,6 +52,12 @@ module PushmiPullyu
52
52
  container: 'ERA'
53
53
  },
54
54
  rollbar: {
55
+ },
56
+ database: {
57
+ encoding: 'utf8',
58
+ pool: ENV['RAILS_MAX_THREADS'] || 5,
59
+ url: ENV['DATABASE_URL'] || ENV['JUPITER_DATABASE_URL'] || 'postgresql://jupiter:mysecretpassword@127.0.0.1',
60
+ database: 'jupiter_development'
55
61
  }
56
62
  }.freeze
57
63
 
@@ -1,4 +1,4 @@
1
- lib = File.expand_path('../lib', __FILE__)
1
+ lib = File.expand_path('lib', __dir__)
2
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
  require 'pushmi_pullyu/version'
4
4
 
@@ -21,15 +21,17 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.required_ruby_version = '>= 2.3.1'
23
23
 
24
+ spec.add_runtime_dependency 'activerecord', '>= 5.1.5', '< 5.3.0'
24
25
  spec.add_runtime_dependency 'activesupport', '~> 5.0'
25
26
  spec.add_runtime_dependency 'bagit', '~> 0.4'
26
27
  spec.add_runtime_dependency 'connection_pool', '~> 2.2'
27
28
  spec.add_runtime_dependency 'daemons', '~> 1.2', '>= 1.2.4'
28
29
  spec.add_runtime_dependency 'minitar', '~> 0.6'
29
30
  spec.add_runtime_dependency 'openstack', '~> 3.3', '>= 3.3.10'
30
- spec.add_runtime_dependency 'rdf', '~> 1.99'
31
- spec.add_runtime_dependency 'rdf-n3', '~> 1.99'
32
- spec.add_runtime_dependency 'redis', '~> 3.3'
31
+ spec.add_runtime_dependency 'pg', '~> 1.0.0'
32
+ spec.add_runtime_dependency 'rdf', '>= 1.99', '< 4.0'
33
+ spec.add_runtime_dependency 'rdf-n3', '>= 1.99', '< 4.0'
34
+ spec.add_runtime_dependency 'redis', '>= 3.3', '< 5.0'
33
35
  spec.add_runtime_dependency 'rollbar', '~> 2.14'
34
36
 
35
37
  spec.add_development_dependency 'bundler', '~> 1.14'
@@ -38,9 +40,9 @@ Gem::Specification.new do |spec|
38
40
  spec.add_development_dependency 'pry', '~> 0.10', '>= 0.10.4'
39
41
  spec.add_development_dependency 'rake', '~> 12.0'
40
42
  spec.add_development_dependency 'rspec', '~> 3.0'
41
- spec.add_development_dependency 'rubocop', '~> 0.45'
43
+ spec.add_development_dependency 'rubocop', '~> 0.51'
42
44
  spec.add_development_dependency 'rubocop-rspec', '~> 1.10'
43
45
  spec.add_development_dependency 'timecop', '~> 0.8'
44
- spec.add_development_dependency 'vcr', '~> 3.0'
45
- spec.add_development_dependency 'webmock', '~> 2.1'
46
+ spec.add_development_dependency 'vcr', '~> 4.0'
47
+ spec.add_development_dependency 'webmock', '~> 3.3'
46
48
  end
metadata CHANGED
@@ -1,15 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pushmi_pullyu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shane Murnaghan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-07 00:00:00.000000000 Z
11
+ date: 2018-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 5.1.5
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: 5.3.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: 5.1.5
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: 5.3.0
13
33
  - !ruby/object:Gem::Dependency
14
34
  name: activesupport
15
35
  requirement: !ruby/object:Gem::Requirement
@@ -107,47 +127,79 @@ dependencies:
107
127
  - !ruby/object:Gem::Version
108
128
  version: 3.3.10
109
129
  - !ruby/object:Gem::Dependency
110
- name: rdf
130
+ name: pg
111
131
  requirement: !ruby/object:Gem::Requirement
112
132
  requirements:
113
133
  - - "~>"
114
134
  - !ruby/object:Gem::Version
115
- version: '1.99'
135
+ version: 1.0.0
116
136
  type: :runtime
117
137
  prerelease: false
118
138
  version_requirements: !ruby/object:Gem::Requirement
119
139
  requirements:
120
140
  - - "~>"
141
+ - !ruby/object:Gem::Version
142
+ version: 1.0.0
143
+ - !ruby/object:Gem::Dependency
144
+ name: rdf
145
+ requirement: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - ">="
148
+ - !ruby/object:Gem::Version
149
+ version: '1.99'
150
+ - - "<"
151
+ - !ruby/object:Gem::Version
152
+ version: '4.0'
153
+ type: :runtime
154
+ prerelease: false
155
+ version_requirements: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
121
158
  - !ruby/object:Gem::Version
122
159
  version: '1.99'
160
+ - - "<"
161
+ - !ruby/object:Gem::Version
162
+ version: '4.0'
123
163
  - !ruby/object:Gem::Dependency
124
164
  name: rdf-n3
125
165
  requirement: !ruby/object:Gem::Requirement
126
166
  requirements:
127
- - - "~>"
167
+ - - ">="
128
168
  - !ruby/object:Gem::Version
129
169
  version: '1.99'
170
+ - - "<"
171
+ - !ruby/object:Gem::Version
172
+ version: '4.0'
130
173
  type: :runtime
131
174
  prerelease: false
132
175
  version_requirements: !ruby/object:Gem::Requirement
133
176
  requirements:
134
- - - "~>"
177
+ - - ">="
135
178
  - !ruby/object:Gem::Version
136
179
  version: '1.99'
180
+ - - "<"
181
+ - !ruby/object:Gem::Version
182
+ version: '4.0'
137
183
  - !ruby/object:Gem::Dependency
138
184
  name: redis
139
185
  requirement: !ruby/object:Gem::Requirement
140
186
  requirements:
141
- - - "~>"
187
+ - - ">="
142
188
  - !ruby/object:Gem::Version
143
189
  version: '3.3'
190
+ - - "<"
191
+ - !ruby/object:Gem::Version
192
+ version: '5.0'
144
193
  type: :runtime
145
194
  prerelease: false
146
195
  version_requirements: !ruby/object:Gem::Requirement
147
196
  requirements:
148
- - - "~>"
197
+ - - ">="
149
198
  - !ruby/object:Gem::Version
150
199
  version: '3.3'
200
+ - - "<"
201
+ - !ruby/object:Gem::Version
202
+ version: '5.0'
151
203
  - !ruby/object:Gem::Dependency
152
204
  name: rollbar
153
205
  requirement: !ruby/object:Gem::Requirement
@@ -258,14 +310,14 @@ dependencies:
258
310
  requirements:
259
311
  - - "~>"
260
312
  - !ruby/object:Gem::Version
261
- version: '0.45'
313
+ version: '0.51'
262
314
  type: :development
263
315
  prerelease: false
264
316
  version_requirements: !ruby/object:Gem::Requirement
265
317
  requirements:
266
318
  - - "~>"
267
319
  - !ruby/object:Gem::Version
268
- version: '0.45'
320
+ version: '0.51'
269
321
  - !ruby/object:Gem::Dependency
270
322
  name: rubocop-rspec
271
323
  requirement: !ruby/object:Gem::Requirement
@@ -300,28 +352,28 @@ dependencies:
300
352
  requirements:
301
353
  - - "~>"
302
354
  - !ruby/object:Gem::Version
303
- version: '3.0'
355
+ version: '4.0'
304
356
  type: :development
305
357
  prerelease: false
306
358
  version_requirements: !ruby/object:Gem::Requirement
307
359
  requirements:
308
360
  - - "~>"
309
361
  - !ruby/object:Gem::Version
310
- version: '3.0'
362
+ version: '4.0'
311
363
  - !ruby/object:Gem::Dependency
312
364
  name: webmock
313
365
  requirement: !ruby/object:Gem::Requirement
314
366
  requirements:
315
367
  - - "~>"
316
368
  - !ruby/object:Gem::Version
317
- version: '2.1'
369
+ version: '3.3'
318
370
  type: :development
319
371
  prerelease: false
320
372
  version_requirements: !ruby/object:Gem::Requirement
321
373
  requirements:
322
374
  - - "~>"
323
375
  - !ruby/object:Gem::Version
324
- version: '2.1'
376
+ version: '3.3'
325
377
  description:
326
378
  email:
327
379
  - murnagha@ualberta.ca
@@ -354,7 +406,9 @@ files:
354
406
  - lib/pushmi_pullyu/aip/creator.rb
355
407
  - lib/pushmi_pullyu/aip/downloader.rb
356
408
  - lib/pushmi_pullyu/aip/fedora_fetcher.rb
357
- - lib/pushmi_pullyu/aip/solr_fetcher.rb
409
+ - lib/pushmi_pullyu/aip/file_list_creator.rb
410
+ - lib/pushmi_pullyu/aip/owner_email_editor.rb
411
+ - lib/pushmi_pullyu/aip/user.rb
358
412
  - lib/pushmi_pullyu/cli.rb
359
413
  - lib/pushmi_pullyu/logging.rb
360
414
  - lib/pushmi_pullyu/preservation_queue.rb
@@ -384,7 +438,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
384
438
  version: '0'
385
439
  requirements: []
386
440
  rubyforge_project:
387
- rubygems_version: 2.4.5.1
441
+ rubygems_version: 2.7.6
388
442
  signing_key:
389
443
  specification_version: 4
390
444
  summary: Ruby application to manage flow of content from Fedora into Swift for preservation
@@ -1,33 +0,0 @@
1
- require 'json'
2
- require 'net/http'
3
-
4
- class PushmiPullyu::AIP::SolrFetcher
5
-
6
- class SolrFetchError < StandardError; end
7
-
8
- def initialize(noid)
9
- @noid = noid
10
- end
11
-
12
- def fetch_permission_object_ids
13
- hash = JSON.parse(run_query_json)
14
-
15
- return [] if hash['response']['docs'].empty?
16
-
17
- hash['response']['docs'].map { |hit| hit['id'] }
18
- end
19
-
20
- private
21
-
22
- # Return fetched results, else raise an error
23
- def run_query_json
24
- response = Net::HTTP.get_response(
25
- URI("#{PushmiPullyu.options[:solr][:url]}/select?q=accessTo_ssim:#{@noid}&fl=id&wt=json")
26
- )
27
-
28
- return response.body if response.is_a?(Net::HTTPSuccess)
29
-
30
- raise SolrFetchError
31
- end
32
-
33
- end