bulkrax 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/app/controllers/bulkrax/exporters_controller.rb +12 -4
  4. data/app/controllers/bulkrax/importers_controller.rb +22 -17
  5. data/app/factories/bulkrax/object_factory.rb +44 -61
  6. data/app/jobs/bulkrax/create_relationships_job.rb +187 -0
  7. data/app/jobs/bulkrax/delete_work_job.rb +6 -2
  8. data/app/jobs/bulkrax/export_work_job.rb +3 -1
  9. data/app/jobs/bulkrax/exporter_job.rb +1 -0
  10. data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +2 -2
  11. data/app/jobs/bulkrax/importer_job.rb +16 -1
  12. data/app/matchers/bulkrax/application_matcher.rb +9 -6
  13. data/app/models/bulkrax/csv_collection_entry.rb +8 -6
  14. data/app/models/bulkrax/csv_entry.rb +139 -45
  15. data/app/models/bulkrax/entry.rb +19 -8
  16. data/app/models/bulkrax/exporter.rb +12 -5
  17. data/app/models/bulkrax/importer.rb +22 -5
  18. data/app/models/bulkrax/oai_entry.rb +5 -1
  19. data/app/models/bulkrax/rdf_entry.rb +16 -7
  20. data/app/models/bulkrax/xml_entry.rb +4 -0
  21. data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
  22. data/app/models/concerns/bulkrax/file_factory.rb +2 -1
  23. data/app/models/concerns/bulkrax/has_matchers.rb +59 -16
  24. data/app/models/concerns/bulkrax/import_behavior.rb +35 -5
  25. data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +19 -0
  26. data/app/models/concerns/bulkrax/status_info.rb +4 -4
  27. data/app/parsers/bulkrax/application_parser.rb +59 -84
  28. data/app/parsers/bulkrax/bagit_parser.rb +12 -3
  29. data/app/parsers/bulkrax/csv_parser.rb +117 -62
  30. data/app/parsers/bulkrax/oai_dc_parser.rb +5 -2
  31. data/app/parsers/bulkrax/xml_parser.rb +5 -0
  32. data/app/views/bulkrax/exporters/_form.html.erb +1 -1
  33. data/app/views/bulkrax/exporters/show.html.erb +13 -1
  34. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +45 -14
  35. data/app/views/bulkrax/importers/edit.html.erb +2 -0
  36. data/app/views/bulkrax/importers/index.html.erb +15 -17
  37. data/app/views/bulkrax/importers/show.html.erb +6 -2
  38. data/config/locales/bulkrax.en.yml +1 -0
  39. data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
  40. data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
  41. data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
  42. data/lib/bulkrax/engine.rb +1 -1
  43. data/lib/bulkrax/version.rb +1 -1
  44. data/lib/bulkrax.rb +9 -17
  45. data/lib/generators/bulkrax/templates/bin/importer +17 -11
  46. data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
  47. data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
  48. metadata +13 -7
  49. data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
@@ -12,6 +12,8 @@ module Bulkrax
12
12
  raise CollectionsCreatedError unless collections_created?
13
13
  @item = factory.run!
14
14
  end
15
+ parent_jobs if self.parsed_metadata[related_parents_parsed_mapping].present?
16
+ child_jobs if self.parsed_metadata[related_children_parsed_mapping].present?
15
17
  rescue RSolr::Error::Http, CollectionsCreatedError => e
16
18
  raise e
17
19
  rescue StandardError => e
@@ -22,7 +24,19 @@ module Bulkrax
22
24
  return @item
23
25
  end
24
26
 
25
- def find_or_create_collection_ids
27
+ def parent_jobs
28
+ self.parsed_metadata[related_parents_parsed_mapping].each do |parent_identifier|
29
+ CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, parent_identifier: parent_identifier, importer_run: self.last_run)
30
+ end
31
+ end
32
+
33
+ def child_jobs
34
+ self.parsed_metadata[related_children_parsed_mapping].each do |child_identifier|
35
+ CreateRelationshipsJob.perform_later(entry_identifier: self.identifier, child_identifier: child_identifier, importer_run: self.last_run)
36
+ end
37
+ end
38
+
39
+ def find_collection_ids
26
40
  self.collection_ids
27
41
  end
28
42
 
@@ -57,15 +71,27 @@ module Bulkrax
57
71
  end
58
72
 
59
73
  def add_collections
60
- return if find_or_create_collection_ids.blank?
61
- self.parsed_metadata['collections'] = []
62
- self.parsed_metadata['collections'] += find_or_create_collection_ids.map { |c| { id: c } }
74
+ return if find_collection_ids.blank?
75
+
76
+ ActiveSupport::Deprecation.warn(
77
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
78
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
79
+ )
80
+ self.parsed_metadata['member_of_collections_attributes'] = {}
81
+ find_collection_ids.each_with_index do |c, i|
82
+ self.parsed_metadata['member_of_collections_attributes'][i.to_s] = { id: c }
83
+ end
63
84
  end
64
85
 
65
86
  def factory
87
+ ActiveSupport::Deprecation.warn(
88
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
89
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
90
+ )
66
91
  @factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
67
92
  source_identifier_value: identifier,
68
93
  work_identifier: parser.work_identifier,
94
+ collection_field_mapping: parser.collection_field_mapping,
69
95
  replace_files: replace_files,
70
96
  user: user,
71
97
  klass: factory_class,
@@ -80,7 +106,11 @@ module Bulkrax
80
106
  else
81
107
  Bulkrax.default_work_type
82
108
  end
83
- fc.constantize
109
+
110
+ # return the name of the collection or work
111
+ fc.tr!(' ', '_')
112
+ fc.downcase! if fc.match?(/[-_]/)
113
+ fc.camelcase.constantize
84
114
  rescue NameError
85
115
  nil
86
116
  rescue
@@ -25,10 +25,29 @@ module Bulkrax
25
25
  if collection
26
26
  current_run.total_collection_entries = index + 1 unless parser.collections_total.positive?
27
27
  else
28
+ # TODO: differentiate between work and collection counts for exporters
28
29
  current_run.total_work_entries = index + 1 unless limit.to_i.positive? || parser.total.positive?
29
30
  end
30
31
  current_run.enqueued_records = index + 1
31
32
  current_run.save!
32
33
  end
34
+
35
+ def keys_without_numbers(keys)
36
+ keys.map { |key| key_without_numbers(key) }
37
+ end
38
+
39
+ def key_without_numbers(key)
40
+ key.gsub(/_\d+/, '').sub(/^\d+_/, '')
41
+ end
42
+
43
+ # Is this a file?
44
+ def file?
45
+ parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
46
+ end
47
+
48
+ # Is this a zip file?
49
+ def zip?
50
+ parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
51
+ end
33
52
  end
34
53
  end
@@ -33,13 +33,13 @@ module Bulkrax
33
33
  current_status&.created_at
34
34
  end
35
35
 
36
- def status_info(e = nil)
36
+ def status_info(e = nil, current_run = nil)
37
37
  if e.nil?
38
- self.statuses.create!(status_message: 'Complete', runnable: last_run)
38
+ self.statuses.create!(status_message: 'Complete', runnable: current_run || last_run)
39
39
  elsif e.is_a?(String)
40
- self.statuses.create!(status_message: e, runnable: last_run)
40
+ self.statuses.create!(status_message: e, runnable: current_run || last_run)
41
41
  else
42
- self.statuses.create!(status_message: 'Failed', runnable: last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
42
+ self.statuses.create!(status_message: 'Failed', runnable: current_run || last_run, error_class: e.class.to_s, error_message: e.message, error_backtrace: e.backtrace)
43
43
  end
44
44
  end
45
45
 
@@ -1,15 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- class ApplicationParser
5
- attr_accessor :importerexporter
4
+ class ApplicationParser # rubocop:disable Metrics/ClassLength
5
+ attr_accessor :importerexporter, :headers
6
6
  alias importer importerexporter
7
7
  alias exporter importerexporter
8
- delegate :only_updates, :limit, :current_run, :errors,
9
- :seen, :increment_counters, :parser_fields, :user,
10
- :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
11
- :status, :status_info, :status_at,
12
- to: :importerexporter
8
+ delegate :only_updates, :limit, :current_run, :errors, :mapping,
9
+ :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers,
10
+ :key_without_numbers, :status, :status_info, :status_at,
11
+ :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
12
+ to: :importerexporter
13
13
 
14
14
  def self.parser_fields
15
15
  {}
@@ -25,6 +25,7 @@ module Bulkrax
25
25
 
26
26
  def initialize(importerexporter)
27
27
  @importerexporter = importerexporter
28
+ @headers = []
28
29
  end
29
30
 
30
31
  # @api
@@ -43,20 +44,54 @@ module Bulkrax
43
44
  end
44
45
 
45
46
  def source_identifier
46
- @source_identifier ||= identifier_hash.values.first&.[]("from")&.first&.to_sym || :source_identifier
47
+ @source_identifier ||= get_field_mapping_hash_for('source_identifier')&.values&.first&.[]('from')&.first&.to_sym || :source_identifier
47
48
  end
48
49
 
49
50
  def work_identifier
50
- @work_identifier ||= identifier_hash.keys.first&.to_sym || :source
51
+ @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
51
52
  end
52
53
 
53
- def identifier_hash
54
- @identifier_hash ||= importerexporter.mapping.select do |_, h|
55
- h.key?("source_identifier")
56
- end
57
- raise StandardError, "more than one source_identifier declared: #{@identifier_hash.keys.join(', ')}" if @identifier_hash.length > 1
54
+ def related_parents_raw_mapping
55
+ @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
56
+ end
57
+
58
+ def related_parents_parsed_mapping
59
+ @related_parents_parsed_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first
60
+ end
61
+
62
+ def related_children_raw_mapping
63
+ @related_children_raw_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.values&.first&.[]('from')&.first
64
+ end
65
+
66
+ def related_children_parsed_mapping
67
+ @related_children_parsed_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first
68
+ end
69
+
70
+ def get_field_mapping_hash_for(key)
71
+ return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
72
+
73
+ instance_variable_set(
74
+ "@#{key}_hash",
75
+ importerexporter.mapping.with_indifferent_access.select { |_, h| h.key?(key) }
76
+ )
77
+ raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1
58
78
 
59
- @identifier_hash
79
+ instance_variable_get("@#{key}_hash")
80
+ end
81
+
82
+ def collection_field_mapping
83
+ ActiveSupport::Deprecation.warn(
84
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
85
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
86
+ )
87
+ Bulkrax.collection_field_mapping[self.entry_class.to_s]&.to_sym || :collection
88
+ end
89
+
90
+ def model_field_mappings
91
+ model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
92
+ model_mappings |= ['model']
93
+
94
+ model_mappings
60
95
  end
61
96
 
62
97
  def perform_method
@@ -91,76 +126,19 @@ module Bulkrax
91
126
  path
92
127
  end
93
128
 
129
+ # Base path for imported and exported files
130
+ def base_path(type = 'import')
131
+ ENV['HYKU_MULTITENANT'] ? File.join(Bulkrax.send("#{type}_path"), Site.instance.account.name) : Bulkrax.send("#{type}_path")
132
+ end
133
+
94
134
  # Path where we'll store the import metadata and files
95
135
  # this is used for uploaded and cloud files
96
136
  def path_for_import
97
- @path_for_import = File.join(Bulkrax.import_path, importerexporter.path_string)
137
+ @path_for_import = File.join(base_path, importerexporter.path_string)
98
138
  FileUtils.mkdir_p(@path_for_import) unless File.exist?(@path_for_import)
99
139
  @path_for_import
100
140
  end
101
141
 
102
- # Optional, only used by certain parsers
103
- # Other parsers should override with a custom or empty method
104
- # Will be skipped unless the #record is a Hash
105
- def create_parent_child_relationships
106
- parents.each do |key, value|
107
- parent = entry_class.where(
108
- identifier: key,
109
- importerexporter_id: importerexporter.id,
110
- importerexporter_type: 'Bulkrax::Importer'
111
- ).first
112
-
113
- # not finding the entries here indicates that the given identifiers are incorrect
114
- # in that case we should log that
115
- children = value.map do |child|
116
- entry_class.where(
117
- identifier: child,
118
- importerexporter_id: importerexporter.id,
119
- importerexporter_type: 'Bulkrax::Importer'
120
- ).first
121
- end.compact.uniq
122
-
123
- if parent.present? && (children.length != value.length)
124
- # Increment the failures for the number we couldn't find
125
- # Because all of our entries have been created by now, if we can't find them, the data is wrong
126
- Rails.logger.error("Expected #{value.length} children for parent entry #{parent.id}, found #{children.length}")
127
- break if children.empty?
128
- Rails.logger.warn("Adding #{children.length} children to parent entry #{parent.id} (expected #{value.length})")
129
- end
130
- parent_id = parent.id
131
- child_entry_ids = children.map(&:id)
132
- ChildRelationshipsJob.perform_later(parent_id, child_entry_ids, current_run.id)
133
- end
134
- rescue StandardError => e
135
- status_info(e)
136
- end
137
-
138
- def parents
139
- @parents ||= setup_parents
140
- end
141
-
142
- def setup_parents
143
- pts = []
144
- records.each do |record|
145
- r = if record.respond_to?(:to_h)
146
- record.to_h
147
- else
148
- record
149
- end
150
- next unless r.is_a?(Hash)
151
- children = if r[:children].is_a?(String)
152
- r[:children].split(/\s*[:;|]\s*/)
153
- else
154
- r[:children]
155
- end
156
- next if children.blank?
157
- pts << {
158
- r[source_identifier] => children
159
- }
160
- end
161
- pts.blank? ? pts : pts.inject(:merge)
162
- end
163
-
164
142
  def setup_export_file
165
143
  raise StandardError, 'must be defined' if exporter?
166
144
  end
@@ -288,12 +266,9 @@ module Bulkrax
288
266
  private
289
267
 
290
268
  def real_import_file_path
291
- if file? && zip?
292
- unzip(parser_fields['import_file_path'])
293
- return importer_unzip_path
294
- else
295
- parser_fields['import_file_path']
296
- end
269
+ return importer_unzip_path if file? && zip?
270
+
271
+ parser_fields['import_file_path']
297
272
  end
298
273
  end
299
274
  end
@@ -40,7 +40,7 @@ module Bulkrax
40
40
  raise StandardError, 'No metadata files were found' if path.blank?
41
41
  data = entry_class.read_data(path)
42
42
  data = entry_class.data_for_entry(data, source_identifier)
43
- data[:file] = bag.bag_files.join('|')
43
+ data[:file] = bag.bag_files.join('|') unless importerexporter.metadata_only?
44
44
  data
45
45
  end
46
46
  end
@@ -58,7 +58,7 @@ module Bulkrax
58
58
  collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
59
59
  }
60
60
  new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
61
- ImportWorkCollectionJob.perform_now(new_entry.id, current_run.id)
61
+ ImportCollectionJob.perform_now(new_entry.id, current_run.id)
62
62
  increment_counters(index, true)
63
63
  end
64
64
  end
@@ -83,13 +83,22 @@ module Bulkrax
83
83
  end
84
84
 
85
85
  def collections
86
- records.map { |r| r[:collection].split(/\s*[;|]\s*/) if r[:collection].present? }.flatten.compact.uniq
86
+ ActiveSupport::Deprecation.warn(
87
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
88
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
89
+ )
90
+ records.map { |r| r[collection_field_mapping].split(/\s*[;|]\s*/) if r[collection_field_mapping].present? }.flatten.compact.uniq
87
91
  end
88
92
 
89
93
  def collections_total
90
94
  collections.size
91
95
  end
92
96
 
97
+ # TODO: change to differentiate between collection and work records when adding ability to import collection metadata
98
+ def works_total
99
+ total
100
+ end
101
+
93
102
  def total
94
103
  metadata_paths.count
95
104
  end
@@ -2,31 +2,47 @@
2
2
 
3
3
  require 'csv'
4
4
  module Bulkrax
5
- class CsvParser < ApplicationParser
5
+ class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
6
6
  include ErroredEntries
7
7
  def self.export_supported?
8
8
  true
9
9
  end
10
10
 
11
- def initialize(importerexporter)
12
- @importerexporter = importerexporter
11
+ def records(_opts = {})
12
+ file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
13
+ # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
14
+ csv_data = entry_class.read_data(file_for_import)
15
+ importer.parser_fields['total'] = csv_data.count
16
+ importer.save
17
+ @records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil) }
13
18
  end
14
19
 
15
20
  def collections
16
- # does the CSV contain a collection column?
17
- return [] unless import_fields.include?(:collection)
21
+ ActiveSupport::Deprecation.warn(
22
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
23
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
24
+ )
18
25
  # retrieve a list of unique collections
19
- records.map { |r| r[:collection].split(/\s*[;|]\s*/) if r[:collection].present? }.flatten.compact.uniq
26
+ records.map do |r|
27
+ collections = []
28
+ r[collection_field_mapping].split(/\s*[;|]\s*/).each { |title| collections << { title: title } } if r[collection_field_mapping].present?
29
+ model_field_mappings.each do |model_mapping|
30
+ collections << r if r[model_mapping.to_sym]&.downcase == 'collection'
31
+ end
32
+ collections
33
+ end.flatten.compact.uniq
20
34
  end
21
35
 
22
36
  def collections_total
23
37
  collections.size
24
38
  end
25
39
 
26
- def records(_opts = {})
27
- file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
28
- # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
29
- @records ||= entry_class.read_data(file_for_import).map { |record_data| entry_class.data_for_entry(record_data, nil) }
40
+ def works
41
+ records - collections
42
+ end
43
+
44
+ def works_total
45
+ works.size
30
46
  end
31
47
 
32
48
  # We could use CsvEntry#fields_from_data(data) but that would mean re-reading the data
@@ -44,8 +60,9 @@ module Bulkrax
44
60
  end
45
61
 
46
62
  def valid_import?
47
- error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(import_fields).join(', ')}"
48
- raise StandardError, error_alert unless required_elements?(import_fields)
63
+ import_strings = keys_without_numbers(import_fields.map(&:to_s))
64
+ error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(import_strings).join(', ')}"
65
+ raise StandardError, error_alert unless required_elements?(import_strings)
49
66
 
50
67
  file_paths.is_a?(Array)
51
68
  rescue StandardError => e
@@ -56,26 +73,26 @@ module Bulkrax
56
73
  def create_collections
57
74
  collections.each_with_index do |collection, index|
58
75
  next if collection.blank?
59
- metadata = {
60
- title: [collection],
61
- work_identifier => [collection],
62
- visibility: 'open',
63
- collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
64
- }
65
- new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', metadata)
66
- ImportWorkCollectionJob.perform_now(new_entry.id, current_run.id)
76
+ break if records.find_index(collection).present? && limit_reached?(limit, records.find_index(collection))
77
+
78
+ new_entry = find_or_create_entry(collection_entry_class, unique_collection_identifier(collection), 'Bulkrax::Importer', collection.to_h)
79
+ # TODO: add support for :delete option
80
+ ImportCollectionJob.perform_now(new_entry.id, current_run.id)
67
81
  increment_counters(index, true)
68
82
  end
83
+ importer.record_status
84
+ rescue StandardError => e
85
+ status_info(e)
69
86
  end
70
87
 
71
88
  def create_works
72
- records.each_with_index do |record, index|
73
- next unless record_has_source_identifier(record, index)
74
- break if limit_reached?(limit, index)
89
+ works.each_with_index do |work, index|
90
+ next unless record_has_source_identifier(work, records.find_index(work))
91
+ break if limit_reached?(limit, records.find_index(work))
75
92
 
76
- seen[record[source_identifier]] = true
77
- new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record.to_h.compact)
78
- if record[:delete].present?
93
+ seen[work[source_identifier]] = true
94
+ new_entry = find_or_create_entry(entry_class, work[source_identifier], 'Bulkrax::Importer', work.to_h)
95
+ if work[:delete].present?
79
96
  DeleteWorkJob.send(perform_method, new_entry, current_run)
80
97
  else
81
98
  ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
@@ -99,10 +116,6 @@ module Bulkrax
99
116
  path
100
117
  end
101
118
 
102
- def create_parent_child_relationships
103
- super
104
- end
105
-
106
119
  def extra_filters
107
120
  output = ""
108
121
  if importerexporter.start_date.present?
@@ -117,6 +130,8 @@ module Bulkrax
117
130
 
118
131
  def current_work_ids
119
132
  case importerexporter.export_from
133
+ when 'all'
134
+ ActiveFedora::SolrService.query("has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')}) #{extra_filters}", rows: 2_147_483_647).map(&:id)
120
135
  when 'collection'
121
136
  ActiveFedora::SolrService.query("member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters}", rows: 2_000_000_000).map(&:id)
122
137
  when 'worktype'
@@ -126,9 +141,16 @@ module Bulkrax
126
141
  complete_statuses = Bulkrax::Status.latest_by_statusable
127
142
  .includes(:statusable)
128
143
  .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', entry_ids, 'Bulkrax::Entry', 'Complete')
129
- complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier }
130
144
 
131
- ActiveFedora::SolrService.query("#{work_identifier}_tesim:(#{complete_entry_identifiers.join(' OR ')})#{extra_filters}", rows: 2_000_000_000).map(&:id)
145
+ complete_entry_identifiers = complete_statuses.map { |s| s.statusable&.identifier&.gsub(':', '\:') }
146
+ extra_filters = extra_filters.presence || '*:*'
147
+
148
+ ActiveFedora::SolrService.get(
149
+ extra_filters.to_s,
150
+ fq: "#{work_identifier}_sim:(#{complete_entry_identifiers.join(' OR ')})",
151
+ fl: 'id',
152
+ rows: 2_000_000_000
153
+ )['response']['docs'].map { |obj| obj['id'] }
132
154
  end
133
155
  end
134
156
 
@@ -136,12 +158,18 @@ module Bulkrax
136
158
  current_work_ids.each_with_index do |wid, index|
137
159
  break if limit_reached?(limit, index)
138
160
  new_entry = find_or_create_entry(entry_class, wid, 'Bulkrax::Exporter')
139
- Bulkrax::ExportWorkJob.perform_now(new_entry.id, current_run.id)
161
+ begin
162
+ entry = Bulkrax::ExportWorkJob.perform_now(new_entry.id, current_run.id)
163
+ rescue => e
164
+ Rails.logger.info("#{e.message} was detected during export")
165
+ end
166
+ self.headers |= entry.parsed_metadata.keys if entry
140
167
  end
141
168
  end
142
169
  alias create_from_collection create_new_entries
143
170
  alias create_from_importer create_new_entries
144
171
  alias create_from_worktype create_new_entries
172
+ alias create_from_all create_new_entries
145
173
 
146
174
  def entry_class
147
175
  CsvEntry
@@ -154,19 +182,11 @@ module Bulkrax
154
182
  # See https://stackoverflow.com/questions/2650517/count-the-number-of-lines-in-a-file-without-reading-entire-file-into-memory
155
183
  # Changed to grep as wc -l counts blank lines, and ignores the final unescaped line (which may or may not contain data)
156
184
  def total
157
- if importer?
158
- return @total if @total&.positive?
159
- # windows enocded
160
- @total = `grep -c ^M #{real_import_file_path}`.to_i - 1
161
- # unix encoded
162
- @total = `grep -vc ^$ #{real_import_file_path}`.to_i - 1 if @total < 1
163
- elsif exporter?
164
- @total = importerexporter.entries.count
165
- else
166
- @total = 0
167
- end
168
- return @total
169
- rescue StandardErrorr
185
+ @total = importer.parser_fields['total'] || 0 if importer?
186
+ @total = importerexporter.entries.count if exporter?
187
+
188
+ return @total || 0
189
+ rescue StandardError
170
190
  @total = 0
171
191
  end
172
192
 
@@ -201,31 +221,58 @@ module Bulkrax
201
221
  end
202
222
  end
203
223
 
204
- def key_allowed(key)
205
- !Bulkrax.reserved_properties.include?(key) &&
206
- new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
224
+ def export_key_allowed(key)
225
+ new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
207
226
  key != source_identifier.to_s
208
227
  end
209
228
 
210
229
  # All possible column names
211
230
  def export_headers
212
- headers = ['id']
213
- headers << source_identifier.to_s
214
- headers << 'model'
215
- importerexporter.mapping.each_key { |key| headers << key if key_allowed(key) }
216
- headers << 'file'
231
+ headers = sort_headers(self.headers)
232
+
233
+ # we don't want access_control_id exported and we want file at the end
234
+ headers.delete('access_control_id') if headers.include?('access_control_id')
235
+
236
+ # add the headers below at the beginning or end to maintain the preexisting export behavior
237
+ headers.prepend('model')
238
+ headers.prepend(source_identifier.to_s)
239
+ headers.prepend('id')
240
+
217
241
  headers.uniq
218
242
  end
219
243
 
244
+ def object_names
245
+ return @object_names if @object_names
246
+
247
+ @object_names = mapping.values.map { |value| value['object'] }
248
+ @object_names.uniq!.delete(nil)
249
+
250
+ @object_names
251
+ end
252
+
253
+ def sort_headers(headers)
254
+ # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
255
+ # while keeping objects grouped together
256
+ headers.sort_by do |item|
257
+ number = item.match(/\d+/)&.[](0) || 0.to_s
258
+ sort_number = number.rjust(4, "0")
259
+ object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
260
+ remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
261
+ "#{object_prefix}_#{sort_number}_#{remainder}"
262
+ end
263
+ end
264
+
220
265
  # in the parser as it is specific to the format
221
266
  def setup_export_file
222
- File.join(importerexporter.exporter_export_path, 'export.csv')
267
+ File.join(importerexporter.exporter_export_path, "export_#{importerexporter.export_source}_from_#{importerexporter.export_from}.csv")
223
268
  end
224
269
 
225
270
  # Retrieve file paths for [:file] mapping in records
226
271
  # and check all listed files exist.
227
272
  def file_paths
228
273
  raise StandardError, 'No records were found' if records.blank?
274
+ return [] if importerexporter.metadata_only?
275
+
229
276
  @file_paths ||= records.map do |r|
230
277
  file_mapping = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
231
278
  next if r[file_mapping].blank?
@@ -244,23 +291,31 @@ module Bulkrax
244
291
  # Retrieve the path where we expect to find the files
245
292
  def path_to_files
246
293
  @path_to_files ||= File.join(
247
- File.file?(import_file_path) ? File.dirname(import_file_path) : import_file_path,
294
+ zip? ? importer_unzip_path : File.dirname(import_file_path),
248
295
  'files'
249
296
  )
250
297
  end
251
298
 
252
299
  private
253
300
 
301
+ def unique_collection_identifier(collection_hash)
302
+ entry_uid = collection_hash[source_identifier]
303
+ entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
304
+ Bulkrax.fill_in_blank_source_identifiers.call(self, records.find_index(collection_hash))
305
+ else
306
+ collection_hash[:title].split(/\s*[;|]\s*/).first
307
+ end
308
+
309
+ entry_uid
310
+ end
311
+
254
312
  # Override to return the first CSV in the path, if a zip file is supplied
255
313
  # We expect a single CSV at the top level of the zip in the CSVParser
256
314
  # but we are willing to go look for it if need be
257
315
  def real_import_file_path
258
- if file? && zip?
259
- unzip(parser_fields['import_file_path'])
260
- return Dir["#{importer_unzip_path}/**/*.csv"].first
261
- else
262
- parser_fields['import_file_path']
263
- end
316
+ return Dir["#{importer_unzip_path}/**/*.csv"].first if file? && zip?
317
+
318
+ parser_fields['import_file_path']
264
319
  end
265
320
  end
266
321
  end
@@ -75,7 +75,7 @@ module Bulkrax
75
75
 
76
76
  new_entry = collection_entry_class.where(importerexporter: importerexporter, identifier: unique_collection_identifier, raw_metadata: metadata).first_or_create!
77
77
  # perform now to ensure this gets created before work imports start
78
- ImportWorkCollectionJob.perform_now(new_entry.id, importerexporter.current_run.id)
78
+ ImportCollectionJob.perform_now(new_entry.id, importerexporter.current_run.id)
79
79
  increment_counters(index, true)
80
80
  end
81
81
  end
@@ -119,7 +119,10 @@ module Bulkrax
119
119
  end
120
120
  end
121
121
 
122
- def create_parent_child_relationships; end
122
+ # TODO: change to differentiate between collection and work records when adding ability to import collection metadata
123
+ def works_total
124
+ total
125
+ end
123
126
 
124
127
  def total
125
128
  @total ||= records(quick: true).doc.find(".//resumptionToken").to_a.first.attributes["completeListSize"].to_i