bulkrax 1.0.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +12 -4
  4. data/app/controllers/bulkrax/importers_controller.rb +22 -17
  5. data/app/factories/bulkrax/object_factory.rb +44 -61
  6. data/app/jobs/bulkrax/create_relationships_job.rb +187 -0
  7. data/app/jobs/bulkrax/delete_work_job.rb +6 -2
  8. data/app/jobs/bulkrax/export_work_job.rb +3 -1
  9. data/app/jobs/bulkrax/exporter_job.rb +1 -0
  10. data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +2 -2
  11. data/app/jobs/bulkrax/importer_job.rb +16 -1
  12. data/app/matchers/bulkrax/application_matcher.rb +9 -6
  13. data/app/models/bulkrax/csv_collection_entry.rb +8 -6
  14. data/app/models/bulkrax/csv_entry.rb +139 -45
  15. data/app/models/bulkrax/entry.rb +19 -8
  16. data/app/models/bulkrax/exporter.rb +12 -5
  17. data/app/models/bulkrax/importer.rb +22 -5
  18. data/app/models/bulkrax/oai_entry.rb +5 -1
  19. data/app/models/bulkrax/rdf_entry.rb +16 -7
  20. data/app/models/bulkrax/xml_entry.rb +4 -0
  21. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  22. data/app/models/concerns/bulkrax/file_factory.rb +2 -1
  23. data/app/models/concerns/bulkrax/has_matchers.rb +59 -16
  24. data/app/models/concerns/bulkrax/import_behavior.rb +39 -5
  25. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +19 -0
  26. data/app/models/concerns/bulkrax/status_info.rb +4 -4
  27. data/app/parsers/bulkrax/application_parser.rb +59 -84
  28. data/app/parsers/bulkrax/bagit_parser.rb +12 -3
  29. data/app/parsers/bulkrax/csv_parser.rb +137 -63
  30. data/app/parsers/bulkrax/oai_dc_parser.rb +5 -2
  31. data/app/parsers/bulkrax/xml_parser.rb +5 -0
  32. data/app/views/bulkrax/exporters/_form.html.erb +1 -1
  33. data/app/views/bulkrax/exporters/show.html.erb +13 -1
  34. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +45 -14
  35. data/app/views/bulkrax/importers/edit.html.erb +2 -0
  36. data/app/views/bulkrax/importers/index.html.erb +15 -17
  37. data/app/views/bulkrax/importers/show.html.erb +6 -2
  38. data/config/locales/bulkrax.en.yml +1 -0
  39. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
  40. data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
  41. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
  42. data/lib/bulkrax/engine.rb +1 -1
  43. data/lib/bulkrax/version.rb +1 -1
  44. data/lib/bulkrax.rb +9 -17
  45. data/lib/generators/bulkrax/templates/bin/importer +17 -11
  46. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
  47. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
  48. metadata +12 -6
  49. data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -11,6 +11,8 @@ module Bulkrax
11
11
  unless self.importerexporter.validate_only
12
12
  raise CollectionsCreatedError unless collections_created?
13
13
  @item = factory.run!
14
+ parent_jobs if self.parsed_metadata[related_parents_parsed_mapping].present?
15
+ child_jobs if self.parsed_metadata[related_children_parsed_mapping].present?
14
16
  end
15
17
  rescue RSolr::Error::Http, CollectionsCreatedError => e
16
18
  raise e
@@ -22,7 +24,23 @@ module Bulkrax
22
24
  return @item
23
25
  end
24
26
 
25
- def find_or_create_collection_ids
27
+ def parent_jobs
28
+ self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
29
+ next if parent_identifier.blank?
30
+
31
+ CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, parent_identifier: parent_identifier, importer_run: self.last_run)
32
+ end
33
+ end
34
+
35
+ def child_jobs
36
+ self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
37
+ next if child_identifier.blank?
38
+
39
+ CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, child_identifier: child_identifier, importer_run: self.last_run)
40
+ end
41
+ end
42
+
43
+ def find_collection_ids
26
44
  self.collection_ids
27
45
  end
28
46
 
@@ -57,15 +75,27 @@ module Bulkrax
57
75
  end
58
76
 
59
77
  def add_collections
60
- return if find_or_create_collection_ids.blank?
61
- self.parsed_metadata['collections'] = []
62
- self.parsed_metadata['collections'] += find_or_create_collection_ids.map { |c| { id: c } }
78
+ return if find_collection_ids.blank?
79
+
80
+ ActiveSupport::Deprecation.warn(
81
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
82
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
83
+ )
84
+ self.parsed_metadata['member_of_collections_attributes'] = {}
85
+ find_collection_ids.each_with_index do |c, i|
86
+ self.parsed_metadata['member_of_collections_attributes'][i.to_s] = { id: c }
87
+ end
63
88
  end
64
89
 
65
90
  def factory
91
+ ActiveSupport::Deprecation.warn(
92
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
93
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
94
+ )
66
95
  @factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
67
96
  source_identifier_value: identifier,
68
97
  work_identifier: parser.work_identifier,
98
+ collection_field_mapping: parser.collection_field_mapping,
69
99
  replace_files: replace_files,
70
100
  user: user,
71
101
  klass: factory_class,
@@ -80,7 +110,11 @@ module Bulkrax
80
110
  else
81
111
  Bulkrax.default_work_type
82
112
  end
83
- fc.constantize
113
+
114
+ # return the name of the collection or work
115
+ fc.tr!(' ', '_')
116
+ fc.downcase! if fc.match?(/[-_]/)
117
+ fc.camelcase.constantize
84
118
  rescue NameError
85
119
  nil
86
120
  rescue
@@ -25,10 +25,29 @@ module Bulkrax
25
25
  if collection
26
26
  current_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
27
27
  else
28
+ # TODO: differentiate between work and collection counts for exporters
28
29
  current_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
29
30
  end
30
31
  current_run.enqueued_records = index + 1
31
32
  current_run.save!
32
33
  end
34
+
35
+ def keys_without_numbers(keys)
36
+ keys.map { |key| key_without_numbers(key) }
37
+ end
38
+
39
+ def key_without_numbers(key)
40
+ key.gsub(/_\d+/, '').sub(/^\d+_/, '')
41
+ end
42
+
43
+ # Is this a file?
44
+ def file?
45
+ parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
46
+ end
47
+
48
+ # Is this a zip file?
49
+ def zip?
50
+ parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
51
+ end
33
52
  end
34
53
  end
@@ -33,13 +33,13 @@ module Bulkrax
33
33
  current_status&.created_at
34
34
  end
35
35
 
36
- def status_info(e = nil)
36
+ def status_info(e = nil, current_run = nil)
37
37
  if e.nil?
38
- self.statuses.create!(status_message: 'Complete', runnable: last_run)
38
+ self.statuses.create!(status_message: 'Complete', runnable: current_run || last_run)
39
39
  elsif e.is_a?(String)
40
- self.statuses.create!(status_message: e, runnable: last_run)
40
+ self.statuses.create!(status_message: e, runnable: current_run || last_run)
41
41
  else
42
- self.statuses.create!(status_message: 'Failed', runnable: last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
42
+ self.statuses.create!(status_message: 'Failed', runnable: current_run || last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
43
43
  end
44
44
  end
45
45
 
@@ -1,15 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class ApplicationParser
5
- attr_accessor :importerexporter
4
+ class ApplicationParser # rubocop:disable Metrics/ClassLength
5
+ attr_accessor :importerexporter, :headers
6
6
  alias importer importerexporter
7
7
  alias exporter importerexporter
8
- delegate :only_updates, :limit, :current_run, :errors,
9
- :seen, :increment_counters, :parser_fields, :user,
10
- :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
11
- :status, :status_info, :status_at,
12
- to: :importerexporter
8
+ delegate :only_updates, :limit, :current_run, :errors, :mapping,
9
+ :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
10
+ :key_without_numbers, :status, :status_info, :status_at,
11
+ :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
12
+ to: :importerexporter
13
13
 
14
14
  def self.parser_fields
15
15
  {}
@@ -25,6 +25,7 @@ module Bulkrax
25
25
 
26
26
  def initialize(importerexporter)
27
27
  @importerexporter = importerexporter
28
+ @headers = []
28
29
  end
29
30
 
30
31
  # @api
@@ -43,20 +44,54 @@ module Bulkrax
43
44
  end
44
45
 
45
46
  def source_identifier
46
- @source_identifier ||= identifier_hash.values.first&.[]("from")&.first&.to_sym || :source_identifier
47
+ @source_identifier ||= get_field_mapping_hash_for('source_identifier')&.values&.first&.[]('from')&.first&.to_sym || :source_identifier
47
48
  end
48
49
 
49
50
  def work_identifier
50
- @work_identifier ||= identifier_hash.keys.first&.to_sym || :source
51
+ @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
51
52
  end
52
53
 
53
- def identifier_hash
54
- @identifier_hash ||= importerexporter.mapping.select do |_, h|
55
- h.key?("source_identifier")
56
- end
57
- raise StandardError, "more than one source_identifier declared: #{@identifier_hash.keys.join(', ')}" if @identifier_hash.length > 1
54
+ def related_parents_raw_mapping
55
+ @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
56
+ end
57
+
58
+ def related_parents_parsed_mapping
59
+ @related_parents_parsed_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first
60
+ end
61
+
62
+ def related_children_raw_mapping
63
+ @related_children_raw_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.values&.first&.[]('from')&.first
64
+ end
65
+
66
+ def related_children_parsed_mapping
67
+ @related_children_parsed_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first
68
+ end
69
+
70
+ def get_field_mapping_hash_for(key)
71
+ return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
72
+
73
+ instance_variable_set(
74
+ "@#{key}_hash",
75
+ importerexporter.mapping.with_indifferent_access.select { |_, h| h.key?(key) }
76
+ )
77
+ raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1
58
78
 
59
- @identifier_hash
79
+ instance_variable_get("@#{key}_hash")
80
+ end
81
+
82
+ def collection_field_mapping
83
+ ActiveSupport::Deprecation.warn(
84
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
85
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
86
+ )
87
+ Bulkrax.collection_field_mapping[self.entry_class.to_s]&.to_sym || :collection
88
+ end
89
+
90
+ def model_field_mappings
91
+ model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
92
+ model_mappings |= ['model']
93
+
94
+ model_mappings
60
95
  end
61
96
 
62
97
  def perform_method
@@ -91,76 +126,19 @@ module Bulkrax
91
126
  path
92
127
  end
93
128
 
129
+ # Base path for imported and exported files
130
+ def base_path(type = 'import')
131
+ ENV['HYKU_MULTITENANT'] ? File.join(Bulkrax.send("#{type}_path"), Site.instance.account.name) : Bulkrax.send("#{type}_path")
132
+ end
133
+
94
134
  # Path where we'll store the import metadata and files
95
135
  # this is used for uploaded and cloud files
96
136
  def path_for_import
97
- @path_for_import = File.join(Bulkrax.import_path, importerexporter.path_string)
137
+ @path_for_import = File.join(base_path, importerexporter.path_string)
98
138
  FileUtils.mkdir_p(@path_for_import) unless File.exist?(@path_for_import)
99
139
  @path_for_import
100
140
  end
101
141
 
102
- # Optional, only used by certain parsers
103
- # Other parsers should override with a custom or empty method
104
- # Will be skipped unless the #record is a Hash
105
- def create_parent_child_relationships
106
- parents.each do |key, value|
107
- parent = entry_class.where(
108
- identifier: key,
109
- importerexporter_id: importerexporter.id,
110
- importerexporter_type: 'Bulkrax::Importer'
111
- ).first
112
-
113
- # not finding the entries here indicates that the given identifiers are incorrect
114
- # in that case we should log that
115
- children = value.map do |child|
116
- entry_class.where(
117
- identifier: child,
118
- importerexporter_id: importerexporter.id,
119
- importerexporter_type: 'Bulkrax::Importer'
120
- ).first
121
- end.compact.uniq
122
-
123
- if parent.present? && (children.length != value.length)
124
- # Increment the failures for the number we couldn't find
125
- # Because all of our entries have been created by now, if we can't find them, the data is wrong
126
- Rails.logger.error("Expected #{value.length} children for parent entry #{parent.id}, found #{children.length}")
127
- break if children.empty?
128
- Rails.logger.warn("Adding #{children.length} children to parent entry #{parent.id} (expected #{value.length})")
129
- end
130
- parent_id = parent.id
131
- child_entry_ids = children.map(&:id)
132
- ChildRelationshipsJob.perform_later(parent_id, child_entry_ids, current_run.id)
133
- end
134
- rescue StandardError => e
135
- status_info(e)
136
- end
137
-
138
- def parents
139
- @parents ||= setup_parents
140
- end
141
-
142
- def setup_parents
143
- pts = []
144
- records.each do |record|
145
- r = if record.respond_to?(:to_h)
146
- record.to_h
147
- else
148
- record
149
- end
150
- next unless r.is_a?(Hash)
151
- children = if r[:children].is_a?(String)
152
- r[:children].split(/\s*[:;|]\s*/)
153
- else
154
- r[:children]
155
- end
156
- next if children.blank?
157
- pts << {
158
- r[source_identifier] => children
159
- }
160
- end
161
- pts.blank? ? pts : pts.inject(:merge)
162
- end
163
-
164
142
  def setup_export_file
165
143
  raise StandardError, 'must be defined' if exporter?
166
144
  end
@@ -288,12 +266,9 @@ module Bulkrax
288
266
  private
289
267
 
290
268
  def real_import_file_path
291
- if file? && zip?
292
- unzip(parser_fields['import_file_path'])
293
- return importer_unzip_path
294
- else
295
- parser_fields['import_file_path']
296
- end
269
+ return importer_unzip_path if file? && zip?
270
+
271
+ parser_fields['import_file_path']
297
272
  end
298
273
  end
299
274
  end
@@ -40,7 +40,7 @@ module Bulkrax
40
40
  raise StandardError, 'No metadata files were found' if path.blank?
41
41
  data = entry_class.read_data(path)
42
42
  data = entry_class.data_for_entry(data, source_identifier)
43
- data[:file] = bag.bag_files.join('|')
43
+ data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
44
44
  data
45
45
  end
46
46
  end
@@ -58,7 +58,7 @@ module Bulkrax
58
58
  collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
59
59
  }
60
60
  new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
61
- ImportWorkCollectionJob.perform_now(new_entry.id, current_run.id)
61
+ ImportCollectionJob.perform_now(new_entry.id, current_run.id)
62
62
  increment_counters(index, true)
63
63
  end
64
64
  end
@@ -83,13 +83,22 @@ module Bulkrax
83
83
  end
84
84
 
85
85
  def collections
86
- records.map { |r| r[:collection].split(/\s*[;|]\s*/) if r[:collection].present? }.flatten.compact.uniq
86
+ ActiveSupport::Deprecation.warn(
87
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
88
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
89
+ )
90
+ records.map { |r| r[collection_field_mapping].split(/\s*[;|]\s*/) if r[collection_field_mapping].present? }.flatten.compact.uniq
87
91
  end
88
92
 
89
93
  def collections_total
90
94
  collections.size
91
95
  end
92
96
 
97
+ # TODO: change to differentiate between collection and work records when adding ability to import collection metadata
98
+ def works_total
99
+ total
100
+ end
101
+
93
102
  def total
94
103
  metadata_paths.count
95
104
  end