bulkrax 1.0.2 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +12 -4
- data/app/controllers/bulkrax/importers_controller.rb +23 -17
- data/app/factories/bulkrax/object_factory.rb +84 -63
- data/app/jobs/bulkrax/create_relationships_job.rb +156 -0
- data/app/jobs/bulkrax/delete_work_job.rb +6 -2
- data/app/jobs/bulkrax/export_work_job.rb +3 -1
- data/app/jobs/bulkrax/exporter_job.rb +1 -0
- data/app/jobs/bulkrax/{import_work_collection_job.rb → import_collection_job.rb} +4 -2
- data/app/jobs/bulkrax/import_file_set_job.rb +69 -0
- data/app/jobs/bulkrax/import_work_job.rb +2 -0
- data/app/jobs/bulkrax/importer_job.rb +18 -1
- data/app/matchers/bulkrax/application_matcher.rb +5 -5
- data/app/models/bulkrax/csv_collection_entry.rb +8 -6
- data/app/models/bulkrax/csv_entry.rb +132 -65
- data/app/models/bulkrax/csv_file_set_entry.rb +26 -0
- data/app/models/bulkrax/entry.rb +19 -8
- data/app/models/bulkrax/exporter.rb +12 -5
- data/app/models/bulkrax/importer.rb +24 -5
- data/app/models/bulkrax/oai_entry.rb +5 -1
- data/app/models/bulkrax/rdf_entry.rb +16 -7
- data/app/models/bulkrax/xml_entry.rb +4 -0
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +39 -0
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +44 -13
- data/app/models/concerns/bulkrax/import_behavior.rb +40 -5
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +23 -2
- data/app/models/concerns/bulkrax/status_info.rb +4 -4
- data/app/parsers/bulkrax/application_parser.rb +67 -84
- data/app/parsers/bulkrax/bagit_parser.rb +13 -4
- data/app/parsers/bulkrax/csv_parser.rb +170 -64
- data/app/parsers/bulkrax/oai_dc_parser.rb +6 -3
- data/app/parsers/bulkrax/xml_parser.rb +5 -0
- data/app/views/bulkrax/exporters/_form.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +2 -1
- data/app/views/bulkrax/importers/index.html.erb +17 -17
- data/app/views/bulkrax/importers/show.html.erb +52 -6
- data/config/locales/bulkrax.en.yml +1 -0
- data/db/migrate/20190731114016_change_importer_and_exporter_to_polymorphic.rb +5 -1
- data/db/migrate/20211004170708_change_bulkrax_statuses_error_message_column_type_to_text.rb +5 -0
- data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +6 -0
- data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +7 -0
- data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +5 -0
- data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +6 -0
- data/lib/bulkrax/engine.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +9 -17
- data/lib/generators/bulkrax/templates/bin/importer +17 -11
- data/lib/generators/bulkrax/templates/config/bulkrax_api.yml +3 -1
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +7 -12
- metadata +22 -10
- data/app/jobs/bulkrax/child_relationships_job.rb +0 -128
| @@ -6,17 +6,34 @@ module Bulkrax | |
| 6 6 |  | 
| 7 7 | 
             
                def perform(importer_id, only_updates_since_last_import = false)
         | 
| 8 8 | 
             
                  importer = Importer.find(importer_id)
         | 
| 9 | 
            +
             | 
| 9 10 | 
             
                  importer.current_run
         | 
| 11 | 
            +
                  unzip_imported_file(importer.parser)
         | 
| 10 12 | 
             
                  import(importer, only_updates_since_last_import)
         | 
| 13 | 
            +
                  update_current_run_counters(importer)
         | 
| 11 14 | 
             
                  schedule(importer) if importer.schedulable?
         | 
| 12 15 | 
             
                end
         | 
| 13 16 |  | 
| 14 17 | 
             
                def import(importer, only_updates_since_last_import)
         | 
| 15 18 | 
             
                  importer.only_updates = only_updates_since_last_import || false
         | 
| 16 19 | 
             
                  return unless importer.valid_import?
         | 
| 20 | 
            +
             | 
| 17 21 | 
             
                  importer.import_collections
         | 
| 18 22 | 
             
                  importer.import_works
         | 
| 19 | 
            -
                  importer. | 
| 23 | 
            +
                  importer.import_file_sets
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                def unzip_imported_file(parser)
         | 
| 27 | 
            +
                  return unless parser.file? && parser.zip?
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  parser.unzip(parser.parser_fields['import_file_path'])
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                def update_current_run_counters(importer)
         | 
| 33 | 
            +
                  importer.current_run.total_work_entries = importer.limit || importer.parser.works_total
         | 
| 34 | 
            +
                  importer.current_run.total_collection_entries = importer.parser.collections_total
         | 
| 35 | 
            +
                  importer.current_run.total_file_set_entries = importer.parser.file_sets_total
         | 
| 36 | 
            +
                  importer.current_run.save!
         | 
| 20 37 | 
             
                end
         | 
| 21 38 |  | 
| 22 39 | 
             
                def schedule(importer)
         | 
| @@ -20,9 +20,9 @@ module Bulkrax | |
| 20 20 | 
             
                    return unless content.send(self.if[0], Regexp.new(self.if[1]))
         | 
| 21 21 | 
             
                  end
         | 
| 22 22 |  | 
| 23 | 
            -
                  @result  | 
| 24 | 
            -
                  @result.strip | 
| 25 | 
            -
                  process_split
         | 
| 23 | 
            +
                  # @result will evaluate to an empty string for nil content values
         | 
| 24 | 
            +
                  @result = content.to_s.gsub(/\s/, ' ').strip # remove any line feeds and tabs
         | 
| 25 | 
            +
                  process_split if @result.present?
         | 
| 26 26 | 
             
                  @result = @result[0] if @result.is_a?(Array) && @result.size == 1
         | 
| 27 27 | 
             
                  process_parse
         | 
| 28 28 | 
             
                  return @result
         | 
| @@ -66,14 +66,14 @@ module Bulkrax | |
| 66 66 | 
             
                end
         | 
| 67 67 |  | 
| 68 68 | 
             
                def parse_subject(src)
         | 
| 69 | 
            -
                  string = src. | 
| 69 | 
            +
                  string = src.strip.downcase
         | 
| 70 70 | 
             
                  return if string.blank?
         | 
| 71 71 |  | 
| 72 72 | 
             
                  string.slice(0, 1).capitalize + string.slice(1..-1)
         | 
| 73 73 | 
             
                end
         | 
| 74 74 |  | 
| 75 75 | 
             
                def parse_types(src)
         | 
| 76 | 
            -
                  src. | 
| 76 | 
            +
                  src.strip.titleize
         | 
| 77 77 | 
             
                end
         | 
| 78 78 |  | 
| 79 79 | 
             
                # Allow for mapping a model field to the work type or collection
         | 
| @@ -6,14 +6,16 @@ module Bulkrax | |
| 6 6 | 
             
                  Collection
         | 
| 7 7 | 
             
                end
         | 
| 8 8 |  | 
| 9 | 
            -
                 | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
                   | 
| 9 | 
            +
                # Use identifier set by CsvParser#unique_collection_identifier, which falls back
         | 
| 10 | 
            +
                # on the Collection's first title if record[source_identifier] is not present
         | 
| 11 | 
            +
                def add_identifier
         | 
| 12 | 
            +
                  self.parsed_metadata[work_identifier] = [self.identifier].flatten
         | 
| 13 13 | 
             
                end
         | 
| 14 14 |  | 
| 15 | 
            -
                def  | 
| 16 | 
            -
                   | 
| 15 | 
            +
                def add_collection_type_gid
         | 
| 16 | 
            +
                  return if self.parsed_metadata['collection_type_gid'].present?
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                  self.parsed_metadata['collection_type_gid'] = ::Hyrax::CollectionType.find_or_create_default_collection_type.gid
         | 
| 17 19 | 
             
                end
         | 
| 18 20 | 
             
              end
         | 
| 19 21 | 
             
            end
         | 
| @@ -14,59 +14,71 @@ module Bulkrax | |
| 14 14 | 
             
                def self.read_data(path)
         | 
| 15 15 | 
             
                  raise StandardError, 'CSV path empty' if path.blank?
         | 
| 16 16 | 
             
                  CSV.read(path,
         | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 17 | 
            +
                    headers: true,
         | 
| 18 | 
            +
                    header_converters: :symbol,
         | 
| 19 | 
            +
                    encoding: 'utf-8')
         | 
| 20 20 | 
             
                end
         | 
| 21 21 |  | 
| 22 22 | 
             
                def self.data_for_entry(data, _source_id)
         | 
| 23 | 
            +
                  ActiveSupport::Deprecation.warn(
         | 
| 24 | 
            +
                    'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
         | 
| 25 | 
            +
                    ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
         | 
| 26 | 
            +
                  )
         | 
| 23 27 | 
             
                  # If a multi-line CSV data is passed, grab the first row
         | 
| 24 28 | 
             
                  data = data.first if data.is_a?(CSV::Table)
         | 
| 25 29 | 
             
                  # model has to be separated so that it doesn't get mistranslated by to_h
         | 
| 26 30 | 
             
                  raw_data = data.to_h
         | 
| 27 | 
            -
                  raw_data[:model] = data[:model]
         | 
| 31 | 
            +
                  raw_data[:model] = data[:model] if data[:model].present?
         | 
| 28 32 | 
             
                  # If the collection field mapping is not 'collection', add 'collection' - the parser needs it
         | 
| 29 33 | 
             
                  raw_data[:collection] = raw_data[collection_field.to_sym] if raw_data.keys.include?(collection_field.to_sym) && collection_field != 'collection'
         | 
| 30 | 
            -
                  # If the children field mapping is not 'children', add 'children' - the parser needs it
         | 
| 31 | 
            -
                  raw_data[:children] = raw_data[collection_field.to_sym] if raw_data.keys.include?(children_field.to_sym) && children_field != 'children'
         | 
| 32 34 | 
             
                  return raw_data
         | 
| 33 35 | 
             
                end
         | 
| 34 36 |  | 
| 35 | 
            -
                def  | 
| 36 | 
            -
                   | 
| 37 | 
            -
             | 
| 37 | 
            +
                def build_metadata
         | 
| 38 | 
            +
                  raise StandardError, 'Record not found' if record.nil?
         | 
| 39 | 
            +
                  raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
         | 
| 38 40 |  | 
| 39 | 
            -
             | 
| 40 | 
            -
                   | 
| 41 | 
            -
             | 
| 41 | 
            +
                  self.parsed_metadata = {}
         | 
| 42 | 
            +
                  add_identifier
         | 
| 43 | 
            +
                  add_visibility
         | 
| 44 | 
            +
                  add_ingested_metadata
         | 
| 45 | 
            +
                  add_metadata_for_model
         | 
| 46 | 
            +
                  add_rights_statement
         | 
| 47 | 
            +
                  add_collections
         | 
| 48 | 
            +
                  add_local
         | 
| 42 49 |  | 
| 43 | 
            -
             | 
| 44 | 
            -
                  keys.map { |key| key_without_numbers(key) }
         | 
| 50 | 
            +
                  self.parsed_metadata
         | 
| 45 51 | 
             
                end
         | 
| 46 52 |  | 
| 47 | 
            -
                def  | 
| 48 | 
            -
                   | 
| 53 | 
            +
                def add_identifier
         | 
| 54 | 
            +
                  self.parsed_metadata[work_identifier] = [record[source_identifier]]
         | 
| 49 55 | 
             
                end
         | 
| 50 56 |  | 
| 51 | 
            -
                def  | 
| 52 | 
            -
                   | 
| 53 | 
            -
             | 
| 57 | 
            +
                def add_metadata_for_model
         | 
| 58 | 
            +
                  if factory_class == Collection
         | 
| 59 | 
            +
                    add_collection_type_gid
         | 
| 60 | 
            +
                  elsif factory_class == FileSet
         | 
| 61 | 
            +
                    add_path_to_file
         | 
| 62 | 
            +
                    validate_presence_of_parent!
         | 
| 63 | 
            +
                  else
         | 
| 64 | 
            +
                    add_file unless importerexporter.metadata_only?
         | 
| 65 | 
            +
                    add_admin_set_id
         | 
| 66 | 
            +
                  end
         | 
| 67 | 
            +
                end
         | 
| 54 68 |  | 
| 55 | 
            -
             | 
| 56 | 
            -
                   | 
| 69 | 
            +
                def add_ingested_metadata
         | 
| 70 | 
            +
                  ActiveSupport::Deprecation.warn(
         | 
| 71 | 
            +
                    'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
         | 
| 72 | 
            +
                    ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
         | 
| 73 | 
            +
                  )
         | 
| 74 | 
            +
                  # we do not want to sort the values in the record before adding the metadata.
         | 
| 75 | 
            +
                  # if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
         | 
| 57 76 | 
             
                  record.each do |key, value|
         | 
| 58 | 
            -
                    next if  | 
| 77 | 
            +
                    next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
         | 
| 59 78 |  | 
| 60 79 | 
             
                    index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
         | 
| 61 80 | 
             
                    add_metadata(key_without_numbers(key), value, index)
         | 
| 62 81 | 
             
                  end
         | 
| 63 | 
            -
                  add_file
         | 
| 64 | 
            -
                  add_visibility
         | 
| 65 | 
            -
                  add_rights_statement
         | 
| 66 | 
            -
                  add_admin_set_id
         | 
| 67 | 
            -
                  add_collections
         | 
| 68 | 
            -
                  add_local
         | 
| 69 | 
            -
                  self.parsed_metadata
         | 
| 70 82 | 
             
                end
         | 
| 71 83 |  | 
| 72 84 | 
             
                def add_file
         | 
| @@ -76,7 +88,11 @@ module Bulkrax | |
| 76 88 | 
             
                  elsif record['file'].is_a?(Array)
         | 
| 77 89 | 
             
                    self.parsed_metadata['file'] = record['file']
         | 
| 78 90 | 
             
                  end
         | 
| 79 | 
            -
                  self.parsed_metadata['file'] = self.parsed_metadata['file'].map  | 
| 91 | 
            +
                  self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f|
         | 
| 92 | 
            +
                    next if f.blank?
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                    path_to_file(f.tr(' ', '_'))
         | 
| 95 | 
            +
                  end.compact
         | 
| 80 96 | 
             
                end
         | 
| 81 97 |  | 
| 82 98 | 
             
                def build_export_metadata
         | 
| @@ -86,10 +102,20 @@ module Bulkrax | |
| 86 102 | 
             
                  self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
         | 
| 87 103 | 
             
                  self.parsed_metadata['model'] = hyrax_record.has_model.first
         | 
| 88 104 | 
             
                  build_mapping_metadata
         | 
| 89 | 
            -
             | 
| 90 | 
            -
                   | 
| 91 | 
            -
             | 
| 105 | 
            +
             | 
| 106 | 
            +
                  # TODO: fix the "send" parameter in the conditional below
         | 
| 107 | 
            +
                  # currently it returns: "NoMethodError - undefined method 'bulkrax_identifier' for #<Collection:0x00007fbe6a3b4248>"
         | 
| 108 | 
            +
                  if mapping['collection']&.[]('join')
         | 
| 109 | 
            +
                    self.parsed_metadata['collection'] = hyrax_record.member_of_collection_ids.join('; ')
         | 
| 110 | 
            +
                    #   self.parsed_metadata['collection'] = hyrax_record.member_of_collections.map { |c| c.send(work_identifier)&.first }.compact.uniq.join(';')
         | 
| 111 | 
            +
                  else
         | 
| 112 | 
            +
                    hyrax_record.member_of_collections.each_with_index do |collection, i|
         | 
| 113 | 
            +
                      self.parsed_metadata["collection_#{i + 1}"] = collection.id
         | 
| 114 | 
            +
                      #     self.parsed_metadata["collection_#{i + 1}"] = collection.send(work_identifier)&.first
         | 
| 115 | 
            +
                    end
         | 
| 92 116 | 
             
                  end
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                  build_files unless hyrax_record.is_a?(Collection)
         | 
| 93 119 | 
             
                  self.parsed_metadata
         | 
| 94 120 | 
             
                end
         | 
| 95 121 |  | 
| @@ -97,24 +123,51 @@ module Bulkrax | |
| 97 123 | 
             
                  mapping.each do |key, value|
         | 
| 98 124 | 
             
                    next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
         | 
| 99 125 | 
             
                    next if key == "model"
         | 
| 126 | 
            +
                    next if value['excluded']
         | 
| 100 127 |  | 
| 101 128 | 
             
                    object_key = key if value.key?('object')
         | 
| 102 129 | 
             
                    next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
         | 
| 103 130 |  | 
| 104 | 
            -
                    data = object_key.present? ? hyrax_record.send(value['object']) : hyrax_record.send(key.to_s)
         | 
| 105 131 | 
             
                    if object_key.present?
         | 
| 106 | 
            -
                       | 
| 107 | 
            -
             | 
| 132 | 
            +
                      build_object(value)
         | 
| 133 | 
            +
                    else
         | 
| 134 | 
            +
                      build_value(key, value)
         | 
| 135 | 
            +
                    end
         | 
| 136 | 
            +
                  end
         | 
| 137 | 
            +
                end
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                def build_object(value)
         | 
| 140 | 
            +
                  data = hyrax_record.send(value['object'])
         | 
| 141 | 
            +
                  return if data.empty?
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                  data = data.to_a if data.is_a?(ActiveTriples::Relation)
         | 
| 144 | 
            +
                  object_metadata(Array.wrap(data))
         | 
| 145 | 
            +
                end
         | 
| 108 146 |  | 
| 109 | 
            -
             | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 147 | 
            +
                def build_value(key, value)
         | 
| 148 | 
            +
                  data = hyrax_record.send(key.to_s)
         | 
| 149 | 
            +
                  if data.is_a?(ActiveTriples::Relation)
         | 
| 150 | 
            +
                    if value['join']
         | 
| 151 | 
            +
                      self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join('; ').to_s
         | 
| 112 152 | 
             
                    else
         | 
| 113 | 
            -
                       | 
| 153 | 
            +
                      data.each_with_index do |d, i|
         | 
| 154 | 
            +
                        self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
         | 
| 155 | 
            +
                      end
         | 
| 114 156 | 
             
                    end
         | 
| 157 | 
            +
                  else
         | 
| 158 | 
            +
                    self.parsed_metadata[key_for_export(key)] = prepare_export_data(data)
         | 
| 115 159 | 
             
                  end
         | 
| 116 160 | 
             
                end
         | 
| 117 161 |  | 
| 162 | 
            +
                # On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction
         | 
| 163 | 
            +
                # metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
         | 
| 164 | 
            +
                def key_for_export(key)
         | 
| 165 | 
            +
                  clean_key = key_without_numbers(key)
         | 
| 166 | 
            +
                  unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
         | 
| 167 | 
            +
                  # Bring the number back if there is one
         | 
| 168 | 
            +
                  "#{unnumbered_key}#{key.sub(clean_key, '')}"
         | 
| 169 | 
            +
                end
         | 
| 170 | 
            +
             | 
| 118 171 | 
             
                def prepare_export_data(datum)
         | 
| 119 172 | 
             
                  if datum.is_a?(ActiveTriples::Resource)
         | 
| 120 173 | 
             
                    datum.to_uri.to_s
         | 
| @@ -123,30 +176,34 @@ module Bulkrax | |
| 123 176 | 
             
                  end
         | 
| 124 177 | 
             
                end
         | 
| 125 178 |  | 
| 126 | 
            -
                def object_metadata(data | 
| 127 | 
            -
                  data =  | 
| 179 | 
            +
                def object_metadata(data)
         | 
| 180 | 
            +
                  data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
         | 
| 128 181 |  | 
| 129 182 | 
             
                  data.each_with_index do |obj, index|
         | 
| 130 | 
            -
                    next  | 
| 131 | 
            -
             | 
| 132 | 
            -
                     | 
| 183 | 
            +
                    next if obj.nil?
         | 
| 184 | 
            +
                    # allow the object_key to be valid whether it's a string or symbol
         | 
| 185 | 
            +
                    obj = obj.with_indifferent_access
         | 
| 133 186 |  | 
| 134 | 
            -
                    obj | 
| 135 | 
            -
                       | 
| 187 | 
            +
                    obj.each_key do |key|
         | 
| 188 | 
            +
                      if obj[key].is_a?(Array)
         | 
| 189 | 
            +
                        obj[key].each_with_index do |_nested_item, nested_index|
         | 
| 190 | 
            +
                          self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
         | 
| 191 | 
            +
                        end
         | 
| 192 | 
            +
                      else
         | 
| 193 | 
            +
                        self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
         | 
| 194 | 
            +
                      end
         | 
| 136 195 | 
             
                    end
         | 
| 137 196 | 
             
                  end
         | 
| 138 197 | 
             
                end
         | 
| 139 198 |  | 
| 140 | 
            -
                def  | 
| 141 | 
            -
                   | 
| 142 | 
            -
             | 
| 143 | 
            -
             | 
| 144 | 
            -
             | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
                   | 
| 148 | 
            -
             | 
| 149 | 
            -
                  return gsub_data.map { |d| JSON.parse(d) }
         | 
| 199 | 
            +
                def build_files
         | 
| 200 | 
            +
                  if mapping['file']&.[]('join')
         | 
| 201 | 
            +
                    self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
         | 
| 202 | 
            +
                  else
         | 
| 203 | 
            +
                    hyrax_record.file_sets.each_with_index do |fs, i|
         | 
| 204 | 
            +
                      self.parsed_metadata["file_#{i + 1}"] = filename(fs).to_s if filename(fs).present?
         | 
| 205 | 
            +
                    end
         | 
| 206 | 
            +
                  end
         | 
| 150 207 | 
             
                end
         | 
| 151 208 |  | 
| 152 209 | 
             
                # In order for the existing exported hyrax_record, to be updated by a re-import
         | 
| @@ -167,18 +224,28 @@ module Bulkrax | |
| 167 224 | 
             
                  Bulkrax::CsvMatcher
         | 
| 168 225 | 
             
                end
         | 
| 169 226 |  | 
| 227 | 
            +
                def possible_collection_ids
         | 
| 228 | 
            +
                  ActiveSupport::Deprecation.warn(
         | 
| 229 | 
            +
                    'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
         | 
| 230 | 
            +
                    ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
         | 
| 231 | 
            +
                  )
         | 
| 232 | 
            +
                  @possible_collection_ids ||= record.inject([]) do |memo, (key, value)|
         | 
| 233 | 
            +
                    memo += value.split(/\s*[:;|]\s*/) if self.class.collection_field.to_s == key_without_numbers(key) && value.present?
         | 
| 234 | 
            +
                    memo
         | 
| 235 | 
            +
                  end || []
         | 
| 236 | 
            +
                end
         | 
| 237 | 
            +
             | 
| 170 238 | 
             
                def collections_created?
         | 
| 171 | 
            -
                   | 
| 172 | 
            -
                  record[self.class.collection_field].split(/\s*[:;|]\s*/).length == self.collection_ids.length
         | 
| 239 | 
            +
                  possible_collection_ids.length == self.collection_ids.length
         | 
| 173 240 | 
             
                end
         | 
| 174 241 |  | 
| 175 | 
            -
                def  | 
| 242 | 
            +
                def find_collection_ids
         | 
| 176 243 | 
             
                  return self.collection_ids if collections_created?
         | 
| 177 | 
            -
                   | 
| 178 | 
            -
             | 
| 179 | 
            -
             | 
| 180 | 
            -
                       | 
| 181 | 
            -
                      self.collection_ids << c.id unless  | 
| 244 | 
            +
                  if possible_collection_ids.present?
         | 
| 245 | 
            +
                    possible_collection_ids.each do |collection_id|
         | 
| 246 | 
            +
                      c = find_collection(collection_id)
         | 
| 247 | 
            +
                      skip = c.blank? || self.collection_ids.include?(c.id)
         | 
| 248 | 
            +
                      self.collection_ids << c.id unless skip
         | 
| 182 249 | 
             
                    end
         | 
| 183 250 | 
             
                  end
         | 
| 184 251 | 
             
                  self.collection_ids
         | 
| @@ -0,0 +1,26 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Bulkrax
         | 
| 4 | 
            +
              class CsvFileSetEntry < CsvEntry
         | 
| 5 | 
            +
                def factory_class
         | 
| 6 | 
            +
                  ::FileSet
         | 
| 7 | 
            +
                end
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def add_path_to_file
         | 
| 10 | 
            +
                  parsed_metadata['file'].each_with_index do |filename, i|
         | 
| 11 | 
            +
                    path_to_file = ::File.join(parser.path_to_files, filename)
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                    parsed_metadata['file'][i] = path_to_file
         | 
| 14 | 
            +
                  end
         | 
| 15 | 
            +
                  raise ::StandardError, 'one or more file paths are invalid' unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  parsed_metadata['file']
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                def validate_presence_of_parent!
         | 
| 21 | 
            +
                  return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                  raise StandardError, 'File set must be related to at least one work'
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
              end
         | 
| 26 | 
            +
            end
         | 
    
        data/app/models/bulkrax/entry.rb
    CHANGED
    
    | @@ -23,12 +23,22 @@ module Bulkrax | |
| 23 23 |  | 
| 24 24 | 
             
                attr_accessor :all_attrs
         | 
| 25 25 |  | 
| 26 | 
            -
                delegate :parser, | 
| 26 | 
            +
                delegate :parser,
         | 
| 27 | 
            +
                  :mapping,
         | 
| 28 | 
            +
                  :replace_files,
         | 
| 29 | 
            +
                  :update_files,
         | 
| 30 | 
            +
                  :keys_without_numbers,
         | 
| 31 | 
            +
                  :key_without_numbers,
         | 
| 32 | 
            +
                  to: :importerexporter
         | 
| 27 33 |  | 
| 28 34 | 
             
                delegate :client,
         | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
| 35 | 
            +
                  :collection_name,
         | 
| 36 | 
            +
                  :user,
         | 
| 37 | 
            +
                  :related_parents_raw_mapping,
         | 
| 38 | 
            +
                  :related_parents_parsed_mapping,
         | 
| 39 | 
            +
                  :related_children_raw_mapping,
         | 
| 40 | 
            +
                  :related_children_parsed_mapping,
         | 
| 41 | 
            +
                  to: :parser
         | 
| 32 42 |  | 
| 33 43 | 
             
                # Retrieve fields from the file
         | 
| 34 44 | 
             
                # @param data - the source data
         | 
| @@ -61,13 +71,13 @@ module Bulkrax | |
| 61 71 | 
             
                end
         | 
| 62 72 |  | 
| 63 73 | 
             
                def self.collection_field
         | 
| 74 | 
            +
                  ActiveSupport::Deprecation.warn(
         | 
| 75 | 
            +
                    'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
         | 
| 76 | 
            +
                    ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
         | 
| 77 | 
            +
                  )
         | 
| 64 78 | 
             
                  Bulkrax.collection_field_mapping[self.to_s]
         | 
| 65 79 | 
             
                end
         | 
| 66 80 |  | 
| 67 | 
            -
                def self.children_field
         | 
| 68 | 
            -
                  Bulkrax.parent_child_field_mapping[self.to_s]
         | 
| 69 | 
            -
                end
         | 
| 70 | 
            -
             | 
| 71 81 | 
             
                def build
         | 
| 72 82 | 
             
                  return if type.nil?
         | 
| 73 83 | 
             
                  self.save if self.new_record? # must be saved for statuses
         | 
| @@ -96,6 +106,7 @@ module Bulkrax | |
| 96 106 | 
             
                end
         | 
| 97 107 |  | 
| 98 108 | 
             
                def find_collection(collection_identifier)
         | 
| 109 | 
            +
                  return unless Collection.properties.keys.include?(work_identifier)
         | 
| 99 110 | 
             
                  Collection.where(
         | 
| 100 111 | 
             
                    work_identifier => collection_identifier
         | 
| 101 112 | 
             
                  ).detect { |m| m.send(work_identifier).include?(collection_identifier) }
         | 
| @@ -14,7 +14,7 @@ module Bulkrax | |
| 14 14 | 
             
                validates :name, presence: true
         | 
| 15 15 | 
             
                validates :parser_klass, presence: true
         | 
| 16 16 |  | 
| 17 | 
            -
                delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, to: :parser
         | 
| 17 | 
            +
                delegate :write, :create_from_collection, :create_from_importer, :create_from_worktype, :create_from_all, to: :parser
         | 
| 18 18 |  | 
| 19 19 | 
             
                def export
         | 
| 20 20 | 
             
                  current_run && setup_export_path
         | 
| @@ -25,6 +25,8 @@ module Bulkrax | |
| 25 25 | 
             
                    create_from_importer
         | 
| 26 26 | 
             
                  when 'worktype'
         | 
| 27 27 | 
             
                    create_from_worktype
         | 
| 28 | 
            +
                  when 'all'
         | 
| 29 | 
            +
                    create_from_all
         | 
| 28 30 | 
             
                  end
         | 
| 29 31 | 
             
                rescue StandardError => e
         | 
| 30 32 | 
             
                  status_info(e)
         | 
| @@ -77,7 +79,8 @@ module Bulkrax | |
| 77 79 | 
             
                  [
         | 
| 78 80 | 
             
                    [I18n.t('bulkrax.exporter.labels.importer'), 'importer'],
         | 
| 79 81 | 
             
                    [I18n.t('bulkrax.exporter.labels.collection'), 'collection'],
         | 
| 80 | 
            -
                    [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype']
         | 
| 82 | 
            +
                    [I18n.t('bulkrax.exporter.labels.worktype'), 'worktype'],
         | 
| 83 | 
            +
                    [I18n.t('bulkrax.exporter.labels.all'), 'all']
         | 
| 81 84 | 
             
                  ]
         | 
| 82 85 | 
             
                end
         | 
| 83 86 |  | 
| @@ -105,18 +108,22 @@ module Bulkrax | |
| 105 108 | 
             
                end
         | 
| 106 109 |  | 
| 107 110 | 
             
                def exporter_export_path
         | 
| 108 | 
            -
                  @exporter_export_path ||= File.join( | 
| 111 | 
            +
                  @exporter_export_path ||= File.join(parser.base_path('export'), self.id.to_s, self.exporter_runs.last.id.to_s)
         | 
| 109 112 | 
             
                end
         | 
| 110 113 |  | 
| 111 114 | 
             
                def exporter_export_zip_path
         | 
| 112 | 
            -
                  @exporter_export_zip_path ||= File.join( | 
| 115 | 
            +
                  @exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_#{self.exporter_runs.last.id}.zip")
         | 
| 113 116 | 
             
                rescue
         | 
| 114 | 
            -
                  @exporter_export_zip_path ||= File.join( | 
| 117 | 
            +
                  @exporter_export_zip_path ||= File.join(parser.base_path('export'), "export_#{self.id}_0.zip")
         | 
| 115 118 | 
             
                end
         | 
| 116 119 |  | 
| 117 120 | 
             
                def export_properties
         | 
| 118 121 | 
             
                  properties = Hyrax.config.registered_curation_concern_types.map { |work| work.constantize.properties.keys }.flatten.uniq.sort
         | 
| 119 122 | 
             
                  properties.reject { |prop| Bulkrax.reserved_properties.include?(prop) }
         | 
| 120 123 | 
             
                end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                def metadata_only?
         | 
| 126 | 
            +
                  export_type == 'metadata'
         | 
| 127 | 
            +
                end
         | 
| 121 128 | 
             
              end
         | 
| 122 129 | 
             
            end
         | 
| @@ -18,8 +18,7 @@ module Bulkrax | |
| 18 18 | 
             
                validates :admin_set_id, presence: true
         | 
| 19 19 | 
             
                validates :parser_klass, presence: true
         | 
| 20 20 |  | 
| 21 | 
            -
                delegate :valid_import?, : | 
| 22 | 
            -
                         :write_errored_entries_file, :visibility, to: :parser
         | 
| 21 | 
            +
                delegate :valid_import?, :write_errored_entries_file, :visibility, to: :parser
         | 
| 23 22 |  | 
| 24 23 | 
             
                attr_accessor :only_updates, :file_style, :file
         | 
| 25 24 | 
             
                attr_writer :current_run
         | 
| @@ -97,7 +96,16 @@ module Bulkrax | |
| 97 96 | 
             
                end
         | 
| 98 97 |  | 
| 99 98 | 
             
                def current_run
         | 
| 100 | 
            -
                  @current_run ||=  | 
| 99 | 
            +
                  @current_run ||= if file? && zip?
         | 
| 100 | 
            +
                                     self.importer_runs.create!
         | 
| 101 | 
            +
                                   else
         | 
| 102 | 
            +
                                     entry_counts = {
         | 
| 103 | 
            +
                                       total_work_entries: self.limit || parser.works_total,
         | 
| 104 | 
            +
                                       total_collection_entries: parser.collections_total,
         | 
| 105 | 
            +
                                       total_file_set_entries: parser.file_sets_total
         | 
| 106 | 
            +
                                     }
         | 
| 107 | 
            +
                                     self.importer_runs.create!(entry_counts)
         | 
| 108 | 
            +
                                   end
         | 
| 101 109 | 
             
                end
         | 
| 102 110 |  | 
| 103 111 | 
             
                def last_run
         | 
| @@ -131,6 +139,13 @@ module Bulkrax | |
| 131 139 | 
             
                  status_info(e)
         | 
| 132 140 | 
             
                end
         | 
| 133 141 |  | 
| 142 | 
            +
                def import_file_sets
         | 
| 143 | 
            +
                  self.save if self.new_record? # Object needs to be saved for statuses
         | 
| 144 | 
            +
                  parser.create_file_sets
         | 
| 145 | 
            +
                rescue StandardError => e
         | 
| 146 | 
            +
                  status_info(e)
         | 
| 147 | 
            +
                end
         | 
| 148 | 
            +
             | 
| 134 149 | 
             
                # Prepend the base_url to ensure unique set identifiers
         | 
| 135 150 | 
             
                # @todo - move to parser, as this is OAI specific
         | 
| 136 151 | 
             
                def unique_collection_identifier(id)
         | 
| @@ -149,11 +164,11 @@ module Bulkrax | |
| 149 164 |  | 
| 150 165 | 
             
                # If the import data is zipped, unzip it to this path
         | 
| 151 166 | 
             
                def importer_unzip_path
         | 
| 152 | 
            -
                  @importer_unzip_path ||= File.join( | 
| 167 | 
            +
                  @importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
         | 
| 153 168 | 
             
                end
         | 
| 154 169 |  | 
| 155 170 | 
             
                def errored_entries_csv_path
         | 
| 156 | 
            -
                  @errored_entries_csv_path ||= File.join( | 
| 171 | 
            +
                  @errored_entries_csv_path ||= File.join(parser.base_path, "import_#{path_string}_errored_entries.csv")
         | 
| 157 172 | 
             
                end
         | 
| 158 173 |  | 
| 159 174 | 
             
                def path_string
         | 
| @@ -161,5 +176,9 @@ module Bulkrax | |
| 161 176 | 
             
                rescue
         | 
| 162 177 | 
             
                  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
         | 
| 163 178 | 
             
                end
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                def metadata_only?
         | 
| 181 | 
            +
                  parser.parser_fields['metadata_only'] == true
         | 
| 182 | 
            +
                end
         | 
| 164 183 | 
             
              end
         | 
| 165 184 | 
             
            end
         | 
| @@ -26,6 +26,10 @@ module Bulkrax | |
| 26 26 | 
             
                end
         | 
| 27 27 |  | 
| 28 28 | 
             
                def build_metadata
         | 
| 29 | 
            +
                  ActiveSupport::Deprecation.warn(
         | 
| 30 | 
            +
                    'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
         | 
| 31 | 
            +
                    ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
         | 
| 32 | 
            +
                  )
         | 
| 29 33 | 
             
                  self.parsed_metadata = {}
         | 
| 30 34 | 
             
                  self.parsed_metadata[work_identifier] = [record.header.identifier]
         | 
| 31 35 |  | 
| @@ -56,7 +60,7 @@ module Bulkrax | |
| 56 60 | 
             
                # Retrieve list of collections for the entry; add to collection_ids
         | 
| 57 61 | 
             
                # If OAI-PMH doesn't return setSpec in the headers for GetRecord, use parser.collection_name
         | 
| 58 62 | 
             
                #   in this case, if 'All' is selected, records will not be added to a collection.
         | 
| 59 | 
            -
                def  | 
| 63 | 
            +
                def find_collection_ids
         | 
| 60 64 | 
             
                  return self.collection_ids if collections_created?
         | 
| 61 65 | 
             
                  if sets.blank? || parser.collection_name != 'all'
         | 
| 62 66 | 
             
                    # c = Collection.where(Bulkrax.system_identifier_field => importerexporter.unique_collection_identifier(parser.collection_name)).first
         | 
| @@ -14,6 +14,10 @@ module Bulkrax | |
| 14 14 | 
             
                end
         | 
| 15 15 |  | 
| 16 16 | 
             
                def self.data_for_entry(data, source_id)
         | 
| 17 | 
            +
                  ActiveSupport::Deprecation.warn(
         | 
| 18 | 
            +
                    'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
         | 
| 19 | 
            +
                    ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
         | 
| 20 | 
            +
                  )
         | 
| 17 21 | 
             
                  reader = data
         | 
| 18 22 | 
             
                  format = reader.class.format.to_sym
         | 
| 19 23 | 
             
                  collections = []
         | 
| @@ -22,7 +26,7 @@ module Bulkrax | |
| 22 26 | 
             
                  data = RDF::Writer.for(format).buffer do |writer|
         | 
| 23 27 | 
             
                    reader.each_statement do |statement|
         | 
| 24 28 | 
             
                      collections << statement.object.to_s if collection_field.present? && collection_field == statement.predicate.to_s
         | 
| 25 | 
            -
                      children << statement.object.to_s if  | 
| 29 | 
            +
                      children << statement.object.to_s if related_children_parsed_mapping.present? && related_children_parsed_mapping == statement.predicate.to_s
         | 
| 26 30 | 
             
                      delete = statement.object.to_s if /deleted/.match?(statement.predicate.to_s)
         | 
| 27 31 | 
             
                      writer << statement
         | 
| 28 32 | 
             
                    end
         | 
| @@ -37,12 +41,13 @@ module Bulkrax | |
| 37 41 | 
             
                  }
         | 
| 38 42 | 
             
                end
         | 
| 39 43 |  | 
| 40 | 
            -
                def self. | 
| 41 | 
            -
                   | 
| 42 | 
            -
             | 
| 44 | 
            +
                def self.related_children_parsed_mapping
         | 
| 45 | 
            +
                  return @related_children_parsed_mapping if @related_children_parsed_mapping.present?
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  rdf_related_children_field_mapping = Bulkrax.field_mappings['Bulkrax::RdfParser']&.select { |_, h| h.key?('related_children_field_mapping') }
         | 
| 48 | 
            +
                  return if rdf_related_children_field_mapping.blank?
         | 
| 43 49 |  | 
| 44 | 
            -
             | 
| 45 | 
            -
                  Bulkrax.parent_child_field_mapping[self.to_s]
         | 
| 50 | 
            +
                  @related_children_parsed_mapping = rdf_related_children_field_mapping&.keys&.first
         | 
| 46 51 | 
             
                end
         | 
| 47 52 |  | 
| 48 53 | 
             
                def record
         | 
| @@ -50,6 +55,10 @@ module Bulkrax | |
| 50 55 | 
             
                end
         | 
| 51 56 |  | 
| 52 57 | 
             
                def build_metadata
         | 
| 58 | 
            +
                  ActiveSupport::Deprecation.warn(
         | 
| 59 | 
            +
                    'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
         | 
| 60 | 
            +
                    ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
         | 
| 61 | 
            +
                  )
         | 
| 53 62 | 
             
                  raise StandardError, 'Record not found' if record.nil?
         | 
| 54 63 | 
             
                  raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
         | 
| 55 64 |  | 
| @@ -76,7 +85,7 @@ module Bulkrax | |
| 76 85 | 
             
                  self.raw_metadata['collection'].length == self.collection_ids.length
         | 
| 77 86 | 
             
                end
         | 
| 78 87 |  | 
| 79 | 
            -
                def  | 
| 88 | 
            +
                def find_collection_ids
         | 
| 80 89 | 
             
                  return self.collection_ids if collections_created?
         | 
| 81 90 | 
             
                  if self.raw_metadata['collection'].present?
         | 
| 82 91 | 
             
                    self.raw_metadata['collection'].each do |collection|
         | 
| @@ -39,6 +39,10 @@ module Bulkrax | |
| 39 39 | 
             
                end
         | 
| 40 40 |  | 
| 41 41 | 
             
                def build_metadata
         | 
| 42 | 
            +
                  ActiveSupport::Deprecation.warn(
         | 
| 43 | 
            +
                    'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
         | 
| 44 | 
            +
                    ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
         | 
| 45 | 
            +
                  )
         | 
| 42 46 | 
             
                  raise StandardError, 'Record not found' if record.nil?
         | 
| 43 47 | 
             
                  raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
         | 
| 44 48 | 
             
                  self.parsed_metadata = {}
         |