bulkrax 3.1.0 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 23cd36b2c3dc13769f7d20da3c0d53f2d5c2cabef15d5beeeccee75aed6d679a
4
- data.tar.gz: 868f0ec5c54862943e126af0ecf63d49771bc9a4ac88e566074205d7077e063c
3
+ metadata.gz: 49b7cfb404f1878429bbac77aad6d1167e97377fe28b54e2042cee7f6ca04e67
4
+ data.tar.gz: c3789c193dc1610c99117e5e2c450ed477a7390995916c4b6b419900f5cf539b
5
5
  SHA512:
6
- metadata.gz: a4f9c359a539054dba54000ca30bd1209bb862254c75463c2598807e8fd91e7e562e3bd6203f5f347ffe17da22f58d7915c1090109e88aabccac774c0281b1ee
7
- data.tar.gz: 11c0083e67c4add719f04c01d20fac3216f6e994407dfdc48ae5f6d82696241a2b81ef6156279568b92e4bd56d3598085e83bac3718d04892a4fc96bbefd1028
6
+ metadata.gz: a669e9b566770ad21e3a6242d24779f5c73847b2b97d4f7334f06209ac7de3634e7667550ba35ce6930f9ceaffda8efccb29cf3277b40f29d808ddbd9623a1b8
7
+ data.tar.gz: 794e6de65d4ebb5665ab73fdac3eba2d4157ff1b07f3da6cc63a6323b2127dc40021d69ea193da2a356e276f08dde8ccc51dc1d8c563e5ba3e8ecb592e4b4c88
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
1
  # Bulkrax
2
2
  Bulkrax is a batteries included importer for Samvera applications. It currently includes support for OAI-PMH (DC and Qualified DC) and CSV out of the box. It is also designed to be extensible, allowing you to easily add new importers in to your application or to include them with other gems. Bulkrax provides a full admin interface including creating, editing, scheduling and reviewing imports.
3
3
 
4
-
5
4
  ## Installation
6
5
 
7
6
  ### Install Generator
@@ -9,9 +8,9 @@ Bulkrax is a batteries included importer for Samvera applications. It currently
9
8
  Add this line to your application's Gemfile:
10
9
 
11
10
  ```ruby
12
- gem 'bulkrax', '1.0.0'
11
+ gem 'bulkrax'
13
12
  # or if using from github
14
- gem 'bulkrax', git: 'https://github.com/samvera-labs/bulkrax.git'
13
+ gem 'bulkrax', git: 'https://github.com/samvera-labs/bulkrax.git', branch: 'main'
15
14
  ```
16
15
 
17
16
  And then execute:
@@ -27,7 +26,7 @@ If using Sidekiq, set up queues for `import` and `export`.
27
26
  Add this line to your application's Gemfile:
28
27
 
29
28
  ```ruby
30
- gem 'bulkrax', git: 'https://github.com/samvera-labs/bulkrax.git'
29
+ gem 'bulkrax'
31
30
  ```
32
31
 
33
32
  And then execute:
@@ -46,13 +46,9 @@ module Bulkrax
46
46
  end
47
47
 
48
48
  if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.blank?
49
- reschedule(
50
- parent_identifier: parent_identifier,
51
- importer_run_id: importer_run_id
52
- )
49
+ reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
53
50
  return false # stop current job from continuing to run after rescheduling
54
51
  end
55
-
56
52
  @parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier,
57
53
  importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
58
54
  importerexporter_type: "Bulkrax::Importer").first
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteCollectionJob < DeleteJob; end
5
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteFileSetJob < DeleteJob; end
5
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ # rubocop:disable Rails/SkipsModelValidations
8
+ def perform(entry, importer_run)
9
+ obj = entry.factory.find
10
+ obj&.delete
11
+ ImporterRun.find(importer_run.id).increment!(:deleted_records)
12
+ ImporterRun.find(importer_run.id).decrement!(:enqueued_records)
13
+ entry.save!
14
+ entry.importer.current_run = ImporterRun.find(importer_run.id)
15
+ entry.importer.record_status
16
+ entry.status_info("Deleted", ImporterRun.find(importer_run.id))
17
+ end
18
+ # rubocop:enable Rails/SkipsModelValidations
19
+ end
20
+ end
@@ -1,20 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class DeleteWorkJob < ApplicationJob
5
- queue_as :import
6
-
7
- # rubocop:disable Rails/SkipsModelValidations
8
- def perform(entry, importer_run)
9
- work = entry.factory.find
10
- work&.delete
11
- ImporterRun.find(importer_run.id).increment!(:deleted_records)
12
- ImporterRun.find(importer_run.id).decrement!(:enqueued_records)
13
- entry.save!
14
- entry.importer.current_run = ImporterRun.find(importer_run.id)
15
- entry.importer.record_status
16
- entry.status_info("Deleted", ImporterRun.find(importer_run.id))
17
- end
18
- # rubocop:enable Rails/SkipsModelValidations
19
- end
4
+ class DeleteWorkJob < DeleteJob; end
20
5
  end
@@ -9,16 +9,18 @@ module Bulkrax
9
9
  entry = Entry.find(args[0])
10
10
  begin
11
11
  entry.build
12
- entry.save
12
+ entry.save!
13
13
  ImporterRun.find(args[1]).increment!(:processed_records)
14
14
  ImporterRun.find(args[1]).increment!(:processed_collections)
15
- ImporterRun.find(args[1]).decrement!(:enqueued_records)
15
+ ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
16
16
  rescue => e
17
17
  ImporterRun.find(args[1]).increment!(:failed_records)
18
18
  ImporterRun.find(args[1]).increment!(:failed_collections)
19
- ImporterRun.find(args[1]).decrement!(:enqueued_records)
19
+ ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
20
20
  raise e
21
21
  end
22
+ entry.importer.current_run = ImporterRun.find(args[1])
23
+ entry.importer.record_status
22
24
  end
23
25
  # rubocop:enable Rails/SkipsModelValidations
24
26
  end
@@ -26,17 +26,17 @@ module Bulkrax
26
26
  ImporterRun.find(importer_run_id).increment!(:failed_file_sets)
27
27
  # rubocop:enable Rails/SkipsModelValidations
28
28
  end
29
- ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations
29
+ ImporterRun.find(importer_run_id).decrement!(:enqueued_records) unless ImporterRun.find(importer_run_id).enqueued_records <= 0 # rubocop:disable Rails/SkipsModelValidations
30
30
  entry.save!
31
+ entry.importer.current_run = ImporterRun.find(importer_run_id)
32
+ entry.importer.record_status
31
33
 
32
34
  rescue MissingParentError => e
33
35
  # try waiting for the parent record to be created
34
36
  entry.import_attempts += 1
35
37
  entry.save!
36
38
  if entry.import_attempts < 5
37
- ImportFileSetJob
38
- .set(wait: (entry.import_attempts + 1).minutes)
39
- .perform_later(entry_id, importer_run_id)
39
+ ImportFileSetJob.set(wait: (entry.import_attempts + 1).minutes).perform_later(entry_id, importer_run_id)
40
40
  else
41
41
  ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations
42
42
  entry.status_info(e)
@@ -93,17 +93,31 @@ module Bulkrax
93
93
  end
94
94
 
95
95
  def build_export_metadata
96
- # make_round_trippable
97
96
  self.parsed_metadata = {}
98
- self.parsed_metadata['id'] = hyrax_record.id
99
- self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
100
- self.parsed_metadata['model'] = hyrax_record.has_model.first
97
+
98
+ build_system_metadata
99
+ build_files_metadata unless hyrax_record.is_a?(Collection)
101
100
  build_relationship_metadata
102
101
  build_mapping_metadata
103
- build_files unless hyrax_record.is_a?(Collection)
102
+
104
103
  self.parsed_metadata
105
104
  end
106
105
 
106
+ # Metadata required by Bulkrax for round-tripping
107
+ def build_system_metadata
108
+ self.parsed_metadata['id'] = hyrax_record.id
109
+ self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
110
+ self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
111
+ end
112
+
113
+ def build_files_metadata
114
+ file_mapping = key_for_export('file')
115
+ file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
116
+ filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
117
+
118
+ handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
119
+ end
120
+
107
121
  def build_relationship_metadata
108
122
  # Includes all relationship methods for all exportable record types (works, Collections, FileSets)
109
123
  relationship_methods = {
@@ -127,12 +141,10 @@ module Bulkrax
127
141
 
128
142
  def build_mapping_metadata
129
143
  mapping.each do |key, value|
130
- next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
131
- next if key == "model"
132
- # relationships handled by #build_relationship_metadata
133
- next if [related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
134
- next if key == 'file' # handled by #build_files
144
+ # these keys are handled by other methods
145
+ next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
135
146
  next if value['excluded']
147
+ next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
136
148
 
137
149
  object_key = key if value.key?('object')
138
150
  next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
@@ -205,14 +217,6 @@ module Bulkrax
205
217
  end
206
218
  end
207
219
 
208
- def build_files
209
- file_mapping = mapping['file']&.[]('from')&.first || 'file'
210
- file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
211
-
212
- filenames = file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact
213
- handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
214
- end
215
-
216
220
  def handle_join_on_export(key, values, join)
217
221
  if join
218
222
  parsed_metadata[key] = values.join(' | ') # TODO: make split char dynamic
@@ -224,16 +228,6 @@ module Bulkrax
224
228
  end
225
229
  end
226
230
 
227
- # In order for the existing exported hyrax_record, to be updated by a re-import
228
- # we need a unique value in system_identifier
229
- # add the existing hyrax_record id to system_identifier
230
- def make_round_trippable
231
- values = hyrax_record.send(work_identifier.to_s).to_a
232
- values << hyrax_record.id
233
- hyrax_record.send("#{work_identifier}=", values)
234
- hyrax_record.save
235
- end
236
-
237
231
  def record
238
232
  @record ||= raw_metadata
239
233
  end
@@ -263,7 +257,8 @@ module Bulkrax
263
257
  end
264
258
 
265
259
  def collections_created?
266
- collection_identifiers.length == self.collection_ids.length
260
+ # TODO: look into if this method is still needed after new relationships code
261
+ true
267
262
  end
268
263
 
269
264
  def find_collection_ids
@@ -2,31 +2,6 @@
2
2
 
3
3
  module Bulkrax
4
4
  class CsvFileSetEntry < CsvEntry
5
- def factory_class
6
- ::FileSet
7
- end
8
-
9
- def add_path_to_file
10
- parsed_metadata['file'].each_with_index do |filename, i|
11
- path_to_file = ::File.join(parser.path_to_files, filename)
12
-
13
- parsed_metadata['file'][i] = path_to_file
14
- end
15
- raise ::StandardError, "one or more file paths are invalid: #{parsed_metadata['file'].join(', ')}" unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
16
-
17
- parsed_metadata['file']
18
- end
19
-
20
- def validate_presence_of_filename!
21
- return if parsed_metadata&.[]('file')&.map(&:present?)&.any?
22
-
23
- raise StandardError, 'File set must have a filename'
24
- end
25
-
26
- def validate_presence_of_parent!
27
- return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
28
-
29
- raise StandardError, 'File set must be related to at least one work'
30
- end
5
+ include FileSetEntryBehavior
31
6
  end
32
7
  end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class RdfFileSetEntry < RdfEntry
5
+ include FileSetEntryBehavior
6
+ end
7
+ end
@@ -12,7 +12,7 @@ module Bulkrax
12
12
  # check for our entry in our current importer first
13
13
  importer_id = ImporterRun.find(importer_run_id).importer_id
14
14
  default_scope = { identifier: identifier, importerexporter_type: 'Bulkrax::Importer' }
15
- record = Entry.find_by(default_scope, importerexporter_id: importer_id) || Entry.find_by(default_scope)
15
+ record = Entry.find_by(default_scope.merge({ importerexporter_id: importer_id })) || Entry.find_by(default_scope)
16
16
 
17
17
  # TODO(alishaevn): discuss whether we are only looking for Collection models here
18
18
  # use ActiveFedora::Base.find(identifier) instead?
@@ -42,19 +42,22 @@ module Bulkrax
42
42
  end
43
43
  end
44
44
 
45
- # Prepend the file_set id to ensure a unique filename
45
+ # Prepend the file_set id to ensure a unique filename and also one that is not longer than 255 characters
46
46
  def filename(file_set)
47
47
  return if file_set.original_file.blank?
48
48
  fn = file_set.original_file.file_name.first
49
49
  mime = Mime::Type.lookup(file_set.original_file.mime_type)
50
50
  ext_mime = MIME::Types.of(file_set.original_file.file_name).first
51
51
  if fn.include?(file_set.id) || importerexporter.metadata_only?
52
- return fn if mime.to_s == ext_mime.to_s
53
- return "#{fn}.#{mime.to_sym}"
52
+ filename = "#{fn}.#{mime.to_sym}"
53
+ filename = fn if mime.to_s == ext_mime.to_s
54
54
  else
55
- return "#{file_set.id}_#{fn}" if mime.to_s == ext_mime.to_s
56
- return "#{file_set.id}_#{fn}.#{mime.to_sym}"
55
+ filename = "#{file_set.id}_#{fn}.#{mime.to_sym}"
56
+ filename = "#{file_set.id}_#{fn}" if mime.to_s == ext_mime.to_s
57
57
  end
58
+ # Remove extention truncate and reattach
59
+ ext = File.extname(filename)
60
+ "#{File.basename(filename, ext)[0...(220 - ext.length)]}#{ext}"
58
61
  end
59
62
  end
60
63
  end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module FileSetEntryBehavior
5
+ def factory_class
6
+ ::FileSet
7
+ end
8
+
9
+ def add_path_to_file
10
+ parsed_metadata['file'].each_with_index do |filename, i|
11
+ path_to_file = ::File.join(parser.path_to_files, filename)
12
+
13
+ parsed_metadata['file'][i] = path_to_file
14
+ end
15
+ raise ::StandardError, "one or more file paths are invalid: #{parsed_metadata['file'].join(', ')}" unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
16
+
17
+ parsed_metadata['file']
18
+ end
19
+
20
+ def validate_presence_of_filename!
21
+ return if parsed_metadata&.[]('file')&.map(&:present?)&.any?
22
+
23
+ raise StandardError, 'File set must have a filename'
24
+ end
25
+
26
+ def validate_presence_of_parent!
27
+ return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
28
+
29
+ raise StandardError, 'File set must be related to at least one work'
30
+ end
31
+ end
32
+ end
@@ -23,6 +23,11 @@ module Bulkrax
23
23
  Entry
24
24
  end
25
25
 
26
+ def file_set_entry_class
27
+ csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
28
+ csv_format ? CsvFileSetEntry : RdfFileSetEntry
29
+ end
30
+
26
31
  # Take a random sample of 10 metadata_paths and work out the import fields from that
27
32
  def import_fields
28
33
  raise StandardError, 'No metadata files were found' if metadata_paths.blank?
@@ -11,12 +11,15 @@ module Bulkrax
11
11
  end
12
12
 
13
13
  def records(_opts = {})
14
+ return @records if @records.present?
15
+
14
16
  file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
15
17
  # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
16
18
  csv_data = entry_class.read_data(file_for_import)
17
19
  importer.parser_fields['total'] = csv_data.count
18
20
  importer.save
19
- @records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
21
+
22
+ @records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
20
23
  end
21
24
 
22
25
  def build_records
@@ -145,7 +148,6 @@ module Bulkrax
145
148
  'Bulkrax::Importer',
146
149
  current_record.to_h)
147
150
  if current_record[:delete].present?
148
- # TODO: create a "Delete" job for file_sets and collections
149
151
  "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
150
152
  else
151
153
  "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
@@ -217,7 +219,7 @@ module Bulkrax
217
219
  instance_variable_set(instance_var, ActiveFedora::SolrService.post(
218
220
  extra_filters.to_s,
219
221
  fq: [
220
- "#{::Solrizer.solr_name(work_identifier)}:(#{complete_entry_identifiers.join(' OR ')})",
222
+ %(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
221
223
  "has_model_ssim:(#{models_to_search.join(' OR ')})"
222
224
  ],
223
225
  fl: 'id',
@@ -178,7 +178,7 @@
178
178
  <% elsif e.status == "Pending" %>
179
179
  <td><span class="glyphicon glyphicon-option-horizontal" style="color: blue;"></span> <%= e.status %></td>
180
180
  <% else %>
181
- <td><span class="glyphicon glyphicon-remove" style="color: red;"></span> <%= e.status %></td>
181
+ <td><span class="glyphicon glyphicon-remove" style="color: <%= e.status == 'Deleted' ? 'green' : 'red' %>;"></span> <%= e.status %></td>
182
182
  <% end %>
183
183
  <% if e.last_error.present? %>
184
184
  <td><%= link_to e.last_error.dig("error_class"), bulkrax.importer_entry_path(@importer.id, e.id) %></td>
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '3.1.0'
4
+ VERSION = '3.2.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-19 00:00:00.000000000 Z
11
+ date: 2022-05-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -260,6 +260,9 @@ files:
260
260
  - app/helpers/bulkrax/validation_helper.rb
261
261
  - app/jobs/bulkrax/application_job.rb
262
262
  - app/jobs/bulkrax/create_relationships_job.rb
263
+ - app/jobs/bulkrax/delete_collection_job.rb
264
+ - app/jobs/bulkrax/delete_file_set_job.rb
265
+ - app/jobs/bulkrax/delete_job.rb
263
266
  - app/jobs/bulkrax/delete_work_job.rb
264
267
  - app/jobs/bulkrax/download_cloud_file_job.rb
265
268
  - app/jobs/bulkrax/export_work_job.rb
@@ -291,6 +294,7 @@ files:
291
294
  - app/models/bulkrax/pending_relationship.rb
292
295
  - app/models/bulkrax/rdf_collection_entry.rb
293
296
  - app/models/bulkrax/rdf_entry.rb
297
+ - app/models/bulkrax/rdf_file_set_entry.rb
294
298
  - app/models/bulkrax/status.rb
295
299
  - app/models/bulkrax/xml_entry.rb
296
300
  - app/models/concerns/bulkrax/download_behavior.rb
@@ -298,6 +302,7 @@ files:
298
302
  - app/models/concerns/bulkrax/errored_entries.rb
299
303
  - app/models/concerns/bulkrax/export_behavior.rb
300
304
  - app/models/concerns/bulkrax/file_factory.rb
305
+ - app/models/concerns/bulkrax/file_set_entry_behavior.rb
301
306
  - app/models/concerns/bulkrax/has_local_processing.rb
302
307
  - app/models/concerns/bulkrax/has_matchers.rb
303
308
  - app/models/concerns/bulkrax/import_behavior.rb