bulkrax 1.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +12 -4
  4. data/app/controllers/bulkrax/importers_controller.rb +22 -17
  5. data/app/factories/bulkrax/object_factory.rb +44 -61
  6. data/app/jobs/bulkrax/create_relationships_job.rb +187 -0
  7. data/app/jobs/bulkrax/delete_work_job.rb +6 -2
  8. data/app/jobs/bulkrax/export_work_job.rb +3 -1
  9. data/app/jobs/bulkrax/exporter_job.rb +1 -0
  10. data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +2 -2
  11. data/app/jobs/bulkrax/importer_job.rb +16 -1
  12. data/app/matchers/bulkrax/application_matcher.rb +9 -6
  13. data/app/models/bulkrax/csv_collection_entry.rb +8 -6
  14. data/app/models/bulkrax/csv_entry.rb +139 -45
  15. data/app/models/bulkrax/entry.rb +19 -8
  16. data/app/models/bulkrax/exporter.rb +12 -5
  17. data/app/models/bulkrax/importer.rb +22 -5
  18. data/app/models/bulkrax/oai_entry.rb +5 -1
  19. data/app/models/bulkrax/rdf_entry.rb +16 -7
  20. data/app/models/bulkrax/xml_entry.rb +4 -0
  21. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  22. data/app/models/concerns/bulkrax/file_factory.rb +2 -1
  23. data/app/models/concerns/bulkrax/has_matchers.rb +59 -16
  24. data/app/models/concerns/bulkrax/import_behavior.rb +39 -5
  25. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +19 -0
  26. data/app/models/concerns/bulkrax/status_info.rb +4 -4
  27. data/app/parsers/bulkrax/application_parser.rb +59 -84
  28. data/app/parsers/bulkrax/bagit_parser.rb +12 -3
  29. data/app/parsers/bulkrax/csv_parser.rb +137 -63
  30. data/app/parsers/bulkrax/oai_dc_parser.rb +5 -2
  31. data/app/parsers/bulkrax/xml_parser.rb +5 -0
  32. data/app/views/bulkrax/exporters/_form.html.erb +1 -1
  33. data/app/views/bulkrax/exporters/show.html.erb +13 -1
  34. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +45 -14
  35. data/app/views/bulkrax/importers/edit.html.erb +2 -0
  36. data/app/views/bulkrax/importers/index.html.erb +15 -17
  37. data/app/views/bulkrax/importers/show.html.erb +6 -2
  38. data/config/locales/bulkrax.en.yml +1 -0
  39. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
  40. data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
  41. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
  42. data/lib/bulkrax/engine.rb +1 -1
  43. data/lib/bulkrax/version.rb +1 -1
  44. data/lib/bulkrax.rb +9 -17
  45. data/lib/generators/bulkrax/templates/bin/importer +17 -11
  46. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
  47. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
  48. metadata +12 -6
  49. data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -11,6 +11,8 @@ module Bulkrax
11
11
  unless self.importerexporter.validate_only
12
12
  raise CollectionsCreatedError unless collections_created?
13
13
  @item = factory.run!
14
+ parent_jobs if self.parsed_metadata[related_parents_parsed_mapping].present?
15
+ child_jobs if self.parsed_metadata[related_children_parsed_mapping].present?
14
16
  end
15
17
  rescue RSolr::Error::Http, CollectionsCreatedError => e
16
18
  raise e
@@ -22,7 +24,23 @@ module Bulkrax
22
24
  return @item
23
25
  end
24
26
 
25
- def find_or_create_collection_ids
27
+ def parent_jobs
28
+ self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
29
+ next if parent_identifier.blank?
30
+
31
+ CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, parent_identifier: parent_identifier, importer_run: self.last_run)
32
+ end
33
+ end
34
+
35
+ def child_jobs
36
+ self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
37
+ next if child_identifier.blank?
38
+
39
+ CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, child_identifier: child_identifier, importer_run: self.last_run)
40
+ end
41
+ end
42
+
43
+ def find_collection_ids
26
44
  self.collection_ids
27
45
  end
28
46
 
@@ -57,15 +75,27 @@ module Bulkrax
57
75
  end
58
76
 
59
77
  def add_collections
60
- return if find_or_create_collection_ids.blank?
61
- self.parsed_metadata['collections'] = []
62
- self.parsed_metadata['collections'] += find_or_create_collection_ids.map { |c| { id: c } }
78
+ return if find_collection_ids.blank?
79
+
80
+ ActiveSupport::Deprecation.warn(
81
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
82
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
83
+ )
84
+ self.parsed_metadata['member_of_collections_attributes'] = {}
85
+ find_collection_ids.each_with_index do |c, i|
86
+ self.parsed_metadata['member_of_collections_attributes'][i.to_s] = { id: c }
87
+ end
63
88
  end
64
89
 
65
90
  def factory
91
+ ActiveSupport::Deprecation.warn(
92
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
93
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
94
+ )
66
95
  @factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
67
96
  source_identifier_value: identifier,
68
97
  work_identifier: parser.work_identifier,
98
+ collection_field_mapping: parser.collection_field_mapping,
69
99
  replace_files: replace_files,
70
100
  user: user,
71
101
  klass: factory_class,
@@ -80,7 +110,11 @@ module Bulkrax
80
110
  else
81
111
  Bulkrax.default_work_type
82
112
  end
83
- fc.constantize
113
+
114
+ # return the name of the collection or work
115
+ fc.tr!(' ', '_')
116
+ fc.downcase! if fc.match?(/[-_]/)
117
+ fc.camelcase.constantize
84
118
  rescue NameError
85
119
  nil
86
120
  rescue
@@ -25,10 +25,29 @@ module Bulkrax
25
25
  if collection
26
26
  current_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
27
27
  else
28
+ # TODO: differentiate between work and collection counts for exporters
28
29
  current_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
29
30
  end
30
31
  current_run.enqueued_records = index + 1
31
32
  current_run.save!
32
33
  end
34
+
35
+ def keys_without_numbers(keys)
36
+ keys.map { |key| key_without_numbers(key) }
37
+ end
38
+
39
+ def key_without_numbers(key)
40
+ key.gsub(/_\d+/, '').sub(/^\d+_/, '')
41
+ end
42
+
43
+ # Is this a file?
44
+ def file?
45
+ parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
46
+ end
47
+
48
+ # Is this a zip file?
49
+ def zip?
50
+ parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
51
+ end
33
52
  end
34
53
  end
@@ -33,13 +33,13 @@ module Bulkrax
33
33
  current_status&.created_at
34
34
  end
35
35
 
36
- def status_info(e = nil)
36
+ def status_info(e = nil, current_run = nil)
37
37
  if e.nil?
38
- self.statuses.create!(status_message: 'Complete', runnable: last_run)
38
+ self.statuses.create!(status_message: 'Complete', runnable: current_run || last_run)
39
39
  elsif e.is_a?(String)
40
- self.statuses.create!(status_message: e, runnable: last_run)
40
+ self.statuses.create!(status_message: e, runnable: current_run || last_run)
41
41
  else
42
- self.statuses.create!(status_message: 'Failed', runnable: last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
42
+ self.statuses.create!(status_message: 'Failed', runnable: current_run || last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
43
43
  end
44
44
  end
45
45
 
@@ -1,15 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class ApplicationParser
5
- attr_accessor :importerexporter
4
+ class ApplicationParser # rubocop:disable Metrics/ClassLength
5
+ attr_accessor :importerexporter, :headers
6
6
  alias importer importerexporter
7
7
  alias exporter importerexporter
8
- delegate :only_updates, :limit, :current_run, :errors,
9
- :seen, :increment_counters, :parser_fields, :user,
10
- :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
11
- :status, :status_info, :status_at,
12
- to: :importerexporter
8
+ delegate :only_updates, :limit, :current_run, :errors, :mapping,
9
+ :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
10
+ :key_without_numbers, :status, :status_info, :status_at,
11
+ :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
12
+ to: :importerexporter
13
13
 
14
14
  def self.parser_fields
15
15
  {}
@@ -25,6 +25,7 @@ module Bulkrax
25
25
 
26
26
  def initialize(importerexporter)
27
27
  @importerexporter = importerexporter
28
+ @headers = []
28
29
  end
29
30
 
30
31
  # @api
@@ -43,20 +44,54 @@ module Bulkrax
43
44
  end
44
45
 
45
46
  def source_identifier
46
- @source_identifier ||= identifier_hash.values.first&.[]("from")&.first&.to_sym || :source_identifier
47
+ @source_identifier ||= get_field_mapping_hash_for('source_identifier')&.values&.first&.[]('from')&.first&.to_sym || :source_identifier
47
48
  end
48
49
 
49
50
  def work_identifier
50
- @work_identifier ||= identifier_hash.keys.first&.to_sym || :source
51
+ @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
51
52
  end
52
53
 
53
- def identifier_hash
54
- @identifier_hash ||= importerexporter.mapping.select do |_, h|
55
- h.key?("source_identifier")
56
- end
57
- raise StandardError, "more than one source_identifier declared: #{@identifier_hash.keys.join(', ')}" if @identifier_hash.length > 1
54
+ def related_parents_raw_mapping
55
+ @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
56
+ end
57
+
58
+ def related_parents_parsed_mapping
59
+ @related_parents_parsed_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first
60
+ end
61
+
62
+ def related_children_raw_mapping
63
+ @related_children_raw_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.values&.first&.[]('from')&.first
64
+ end
65
+
66
+ def related_children_parsed_mapping
67
+ @related_children_parsed_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first
68
+ end
69
+
70
+ def get_field_mapping_hash_for(key)
71
+ return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
72
+
73
+ instance_variable_set(
74
+ "@#{key}_hash",
75
+ importerexporter.mapping.with_indifferent_access.select { |_, h| h.key?(key) }
76
+ )
77
+ raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1
58
78
 
59
- @identifier_hash
79
+ instance_variable_get("@#{key}_hash")
80
+ end
81
+
82
+ def collection_field_mapping
83
+ ActiveSupport::Deprecation.warn(
84
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
85
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
86
+ )
87
+ Bulkrax.collection_field_mapping[self.entry_class.to_s]&.to_sym || :collection
88
+ end
89
+
90
+ def model_field_mappings
91
+ model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
92
+ model_mappings |= ['model']
93
+
94
+ model_mappings
60
95
  end
61
96
 
62
97
  def perform_method
@@ -91,76 +126,19 @@ module Bulkrax
91
126
  path
92
127
  end
93
128
 
129
+ # Base path for imported and exported files
130
+ def base_path(type = 'import')
131
+ ENV['HYKU_MULTITENANT'] ? File.join(Bulkrax.send("#{type}_path"), Site.instance.account.name) : Bulkrax.send("#{type}_path")
132
+ end
133
+
94
134
  # Path where we'll store the import metadata and files
95
135
  # this is used for uploaded and cloud files
96
136
  def path_for_import
97
- @path_for_import = File.join(Bulkrax.import_path, importerexporter.path_string)
137
+ @path_for_import = File.join(base_path, importerexporter.path_string)
98
138
  FileUtils.mkdir_p(@path_for_import) unless File.exist?(@path_for_import)
99
139
  @path_for_import
100
140
  end
101
141
 
102
- # Optional, only used by certain parsers
103
- # Other parsers should override with a custom or empty method
104
- # Will be skipped unless the #record is a Hash
105
- def create_parent_child_relationships
106
- parents.each do |key, value|
107
- parent = entry_class.where(
108
- identifier: key,
109
- importerexporter_id: importerexporter.id,
110
- importerexporter_type: 'Bulkrax::Importer'
111
- ).first
112
-
113
- # not finding the entries here indicates that the given identifiers are incorrect
114
- # in that case we should log that
115
- children = value.map do |child|
116
- entry_class.where(
117
- identifier: child,
118
- importerexporter_id: importerexporter.id,
119
- importerexporter_type: 'Bulkrax::Importer'
120
- ).first
121
- end.compact.uniq
122
-
123
- if parent.present? && (children.length != value.length)
124
- # Increment the failures for the number we couldn't find
125
- # Because all of our entries have been created by now, if we can't find them, the data is wrong
126
- Rails.logger.error("Expected #{value.length} children for parent entry #{parent.id}, found #{children.length}")
127
- break if children.empty?
128
- Rails.logger.warn("Adding #{children.length} children to parent entry #{parent.id} (expected #{value.length})")
129
- end
130
- parent_id = parent.id
131
- child_entry_ids = children.map(&:id)
132
- ChildRelationshipsJob.perform_later(parent_id, child_entry_ids, current_run.id)
133
- end
134
- rescue StandardError => e
135
- status_info(e)
136
- end
137
-
138
- def parents
139
- @parents ||= setup_parents
140
- end
141
-
142
- def setup_parents
143
- pts = []
144
- records.each do |record|
145
- r = if record.respond_to?(:to_h)
146
- record.to_h
147
- else
148
- record
149
- end
150
- next unless r.is_a?(Hash)
151
- children = if r[:children].is_a?(String)
152
- r[:children].split(/\s*[:;|]\s*/)
153
- else
154
- r[:children]
155
- end
156
- next if children.blank?
157
- pts << {
158
- r[source_identifier] => children
159
- }
160
- end
161
- pts.blank? ? pts : pts.inject(:merge)
162
- end
163
-
164
142
  def setup_export_file
165
143
  raise StandardError, 'must be defined' if exporter?
166
144
  end
@@ -288,12 +266,9 @@ module Bulkrax
288
266
  private
289
267
 
290
268
  def real_import_file_path
291
- if file? && zip?
292
- unzip(parser_fields['import_file_path'])
293
- return importer_unzip_path
294
- else
295
- parser_fields['import_file_path']
296
- end
269
+ return importer_unzip_path if file? && zip?
270
+
271
+ parser_fields['import_file_path']
297
272
  end
298
273
  end
299
274
  end
@@ -40,7 +40,7 @@ module Bulkrax
40
40
  raise StandardError, 'No metadata files were found' if path.blank?
41
41
  data = entry_class.read_data(path)
42
42
  data = entry_class.data_for_entry(data, source_identifier)
43
- data[:file] = bag.bag_files.join('|')
43
+ data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
44
44
  data
45
45
  end
46
46
  end
@@ -58,7 +58,7 @@ module Bulkrax
58
58
  collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
59
59
  }
60
60
  new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
61
- ImportWorkCollectionJob.perform_now(new_entry.id, current_run.id)
61
+ ImportCollectionJob.perform_now(new_entry.id, current_run.id)
62
62
  increment_counters(index, true)
63
63
  end
64
64
  end
@@ -83,13 +83,22 @@ module Bulkrax
83
83
  end
84
84
 
85
85
  def collections
86
- records.map { |r| r[:collection].split(/\s*[;|]\s*/) if r[:collection].present? }.flatten.compact.uniq
86
+ ActiveSupport::Deprecation.warn(
87
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
88
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
89
+ )
90
+ records.map { |r| r[collection_field_mapping].split(/\s*[;|]\s*/) if r[collection_field_mapping].present? }.flatten.compact.uniq
87
91
  end
88
92
 
89
93
  def collections_total
90
94
  collections.size
91
95
  end
92
96
 
97
+ # TODO: change to differentiate between collection and work records when adding ability to import collection metadata
98
+ def works_total
99
+ total
100
+ end
101
+
93
102
  def total
94
103
  metadata_paths.count
95
104
  end