bulkrax 3.0.0 → 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3976f63546f72c7a1fcc47ba595aa3e6486f2f6bcf7fb39a65f9cceb61513dc2
4
- data.tar.gz: 267eb1e87602fdc7a510eff6ac61a459c11a1bbc3691b0b07e9aebd9d84de743
3
+ metadata.gz: 41dde3161532c80ff433be4697fa6c42f0f453dfea3547181c44914a68e3b466
4
+ data.tar.gz: 67473e62b537f71aa77aa664c361ba1e86c60e77451e4813a44597a9f465010b
5
5
  SHA512:
6
- metadata.gz: 6512d5c1169d1024b9b5e16847772cb8a662b98638127204e198e09463a54bd3c93f36eac12460e20c43156148ff546a85fbea3d01dd9a0a55df179c56fc8b5d
7
- data.tar.gz: f80a6017b2247235a10b7d23fbbd5199464ac018cd7a9d5609ad8a6c97898e7b20b718c8cb2d9821231de9654f68caf6f39359eb82270c4832d15385ad1f7d12
6
+ metadata.gz: 3859148384111048a2a2c096cee9bde5a9c78cb8e5a73b5ddb1d31c3ddefe7a221d1ebff97e3b1f96f09e746ac60b3ec1c14e615ef162fe1a5e16f883a3a7ecf
7
+ data.tar.gz: bbc83265d59d5026546e6e92ccab3b93d6d4bf35ae08145ab6b27112ed587449572d763fdf3d876ec391dfce43d8b5fa96154e8507053ff2fac7b81bc2409580
data/README.md CHANGED
@@ -1,7 +1,6 @@
1
1
  # Bulkrax
2
2
  Bulkrax is a batteries included importer for Samvera applications. It currently includes support for OAI-PMH (DC and Qualified DC) and CSV out of the box. It is also designed to be extensible, allowing you to easily add new importers in to your application or to include them with other gems. Bulkrax provides a full admin interface including creating, editing, scheduling and reviewing imports.
3
3
 
4
-
5
4
  ## Installation
6
5
 
7
6
  ### Install Generator
@@ -9,9 +8,9 @@ Bulkrax is a batteries included importer for Samvera applications. It currently
9
8
  Add this line to your application's Gemfile:
10
9
 
11
10
  ```ruby
12
- gem 'bulkrax', '1.0.0'
11
+ gem 'bulkrax'
13
12
  # or if using from github
14
- gem 'bulkrax', git: 'https://github.com/samvera-labs/bulkrax.git'
13
+ gem 'bulkrax', git: 'https://github.com/samvera-labs/bulkrax.git', branch: 'main'
15
14
  ```
16
15
 
17
16
  And then execute:
@@ -27,7 +26,7 @@ If using Sidekiq, set up queues for `import` and `export`.
27
26
  Add this line to your application's Gemfile:
28
27
 
29
28
  ```ruby
30
- gem 'bulkrax', git: 'https://github.com/samvera-labs/bulkrax.git'
29
+ gem 'bulkrax'
31
30
  ```
32
31
 
33
32
  And then execute:
@@ -46,18 +46,16 @@ module Bulkrax
46
46
  end
47
47
 
48
48
  if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.blank?
49
- reschedule(
50
- parent_identifier: parent_identifier,
51
- importer_run_id: importer_run_id
52
- )
49
+ reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id })
53
50
  return false # stop current job from continuing to run after rescheduling
54
51
  end
55
-
52
+ importer_id = ImporterRun.find(importer_run_id).importer_id
56
53
  @parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier,
57
- importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
54
+ importerexporter_id: importer_id,
58
55
  importerexporter_type: "Bulkrax::Importer").first
59
56
  create_relationships
60
57
  pending_relationships.each(&:destroy)
58
+ Bulkrax::Importer.find(importer_id).record_status
61
59
  rescue ::StandardError => e
62
60
  parent_entry ? parent_entry.status_info(e) : child_entry.status_info(e)
63
61
  Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations
@@ -82,18 +80,18 @@ module Bulkrax
82
80
  # Work-Collection membership is added to the child as member_of_collection_ids
83
81
  # This is adding the reverse relationship, from the child to the parent
84
82
  def collection_parent_work_child
85
- child_records[:works].each do |child_record|
86
- ::Hyrax::Collections::NestedCollectionPersistenceService.persist_nested_collection_for(parent: parent_record, child: child_record)
87
- # TODO: add counters for :processed_parents and :failed_parents
88
- Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
89
- end
83
+ child_work_ids = child_records[:works].map(&:id)
84
+ parent_record.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX
85
+
86
+ parent_record.add_member_objects(child_work_ids)
87
+ ImporterRun.find(importer_run_id).increment!(:processed_relationships, child_work_ids.count) # rubocop:disable Rails/SkipsModelValidations
90
88
  end
91
89
 
92
90
  # Collection-Collection membership is added to the as member_ids
93
91
  def collection_parent_collection_child
94
92
  child_records[:collections].each do |child_record|
95
93
  ::Hyrax::Collections::NestedCollectionPersistenceService.persist_nested_collection_for(parent: parent_record, child: child_record)
96
- Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
94
+ ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
97
95
  end
98
96
  end
99
97
 
@@ -103,14 +101,12 @@ module Bulkrax
103
101
  child_records[:works].each_with_index do |child_record, i|
104
102
  records_hash[i] = { id: child_record.id }
105
103
  end
106
- attrs = {
107
- work_members_attributes: records_hash
108
- }
109
- parent_record.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX if parent_record.respond_to?(:reindex_extent)
104
+ attrs = { work_members_attributes: records_hash }
105
+ parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
110
106
  env = Hyrax::Actors::Environment.new(parent_record, Ability.new(user), attrs)
107
+
111
108
  Hyrax::CurationConcern.actor.update(env)
112
- # TODO: add counters for :processed_parents and :failed_parents
113
- Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
109
+ ImporterRun.find(importer_run_id).increment!(:processed_relationships, child_records[:works].count) # rubocop:disable Rails/SkipsModelValidations
114
110
  end
115
111
 
116
112
  def reschedule(parent_identifier:, importer_run_id:)
@@ -27,6 +27,7 @@ module Bulkrax
27
27
  end
28
28
  exporter_run = ExporterRun.find(args[1])
29
29
  return entry if exporter_run.enqueued_records.positive?
30
+
30
31
  if exporter_run.failed_records.positive?
31
32
  exporter_run.exporter.status_info('Complete (with failures)')
32
33
  else
@@ -9,16 +9,18 @@ module Bulkrax
9
9
  entry = Entry.find(args[0])
10
10
  begin
11
11
  entry.build
12
- entry.save
12
+ entry.save!
13
13
  ImporterRun.find(args[1]).increment!(:processed_records)
14
14
  ImporterRun.find(args[1]).increment!(:processed_collections)
15
- ImporterRun.find(args[1]).decrement!(:enqueued_records)
15
+ ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
16
16
  rescue => e
17
17
  ImporterRun.find(args[1]).increment!(:failed_records)
18
18
  ImporterRun.find(args[1]).increment!(:failed_collections)
19
- ImporterRun.find(args[1]).decrement!(:enqueued_records)
19
+ ImporterRun.find(args[1]).decrement!(:enqueued_records) unless ImporterRun.find(args[1]).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
20
20
  raise e
21
21
  end
22
+ entry.importer.current_run = ImporterRun.find(args[1])
23
+ entry.importer.record_status
22
24
  end
23
25
  # rubocop:enable Rails/SkipsModelValidations
24
26
  end
@@ -26,17 +26,17 @@ module Bulkrax
26
26
  ImporterRun.find(importer_run_id).increment!(:failed_file_sets)
27
27
  # rubocop:enable Rails/SkipsModelValidations
28
28
  end
29
- ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations
29
+ ImporterRun.find(importer_run_id).decrement!(:enqueued_records) unless ImporterRun.find(importer_run_id).enqueued_records <= 0 # rubocop:disable Rails/SkipsModelValidations
30
30
  entry.save!
31
+ entry.importer.current_run = ImporterRun.find(importer_run_id)
32
+ entry.importer.record_status
31
33
 
32
34
  rescue MissingParentError => e
33
35
  # try waiting for the parent record to be created
34
36
  entry.import_attempts += 1
35
37
  entry.save!
36
38
  if entry.import_attempts < 5
37
- ImportFileSetJob
38
- .set(wait: (entry.import_attempts + 1).minutes)
39
- .perform_later(entry_id, importer_run_id)
39
+ ImportFileSetJob.set(wait: (entry.import_attempts + 1).minutes).perform_later(entry_id, importer_run_id)
40
40
  else
41
41
  ImporterRun.find(importer_run_id).decrement!(:enqueued_records) # rubocop:disable Rails/SkipsModelValidations
42
42
  entry.status_info(e)
@@ -46,6 +46,7 @@ module Bulkrax
46
46
  add_visibility
47
47
  add_metadata_for_model
48
48
  add_rights_statement
49
+ sanitize_controlled_uri_values!
49
50
  add_local
50
51
 
51
52
  self.parsed_metadata
@@ -2,31 +2,6 @@
2
2
 
3
3
  module Bulkrax
4
4
  class CsvFileSetEntry < CsvEntry
5
- def factory_class
6
- ::FileSet
7
- end
8
-
9
- def add_path_to_file
10
- parsed_metadata['file'].each_with_index do |filename, i|
11
- path_to_file = ::File.join(parser.path_to_files, filename)
12
-
13
- parsed_metadata['file'][i] = path_to_file
14
- end
15
- raise ::StandardError, "one or more file paths are invalid: #{parsed_metadata['file'].join(', ')}" unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
16
-
17
- parsed_metadata['file']
18
- end
19
-
20
- def validate_presence_of_filename!
21
- return if parsed_metadata&.[]('file')&.map(&:present?)&.any?
22
-
23
- raise StandardError, 'File set must have a filename'
24
- end
25
-
26
- def validate_presence_of_parent!
27
- return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
28
-
29
- raise StandardError, 'File set must be related to at least one work'
30
- end
5
+ include FileSetEntryBehavior
31
6
  end
32
7
  end
@@ -96,7 +96,8 @@ module Bulkrax
96
96
  end
97
97
 
98
98
  def current_run
99
- @current_run ||= self.exporter_runs.create!(total_work_entries: self.limit || parser.total)
99
+ total = self.limit || parser.total
100
+ @current_run ||= self.exporter_runs.create!(total_work_entries: total, enqueued_records: total)
100
101
  end
101
102
 
102
103
  def last_run
@@ -142,7 +142,7 @@ module Bulkrax
142
142
 
143
143
  def import_objects(types_array = nil)
144
144
  self.only_updates ||= false
145
- types = types_array || %w[work collection file_set relationship]
145
+ types = types_array || %w[collection work file_set relationship]
146
146
  if parser.class == Bulkrax::CsvParser
147
147
  parser.create_objects(types)
148
148
  else
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class RdfFileSetEntry < RdfEntry
5
+ include FileSetEntryBehavior
6
+ end
7
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ module FileSetEntryBehavior
5
+ def factory_class
6
+ ::FileSet
7
+ end
8
+
9
+ def add_path_to_file
10
+ parsed_metadata['file'].each_with_index do |filename, i|
11
+ path_to_file = ::File.join(parser.path_to_files, filename)
12
+
13
+ parsed_metadata['file'][i] = path_to_file
14
+ end
15
+ raise ::StandardError, "one or more file paths are invalid: #{parsed_metadata['file'].join(', ')}" unless parsed_metadata['file'].map { |file_path| ::File.file?(file_path) }.all?
16
+
17
+ parsed_metadata['file']
18
+ end
19
+
20
+ def validate_presence_of_filename!
21
+ return if parsed_metadata&.[]('file')&.map(&:present?)&.any?
22
+
23
+ raise StandardError, 'File set must have a filename'
24
+ end
25
+
26
+ def validate_presence_of_parent!
27
+ return if parsed_metadata[related_parents_parsed_mapping]&.map(&:present?)&.any?
28
+
29
+ raise StandardError, 'File set must be related to at least one work'
30
+ end
31
+ end
32
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  # Import Behavior for Entry classes
5
- module ImportBehavior
5
+ module ImportBehavior # rubocop:disable Metrics/ModuleLength
6
6
  extend ActiveSupport::Concern
7
7
 
8
8
  def build_for_importer
@@ -21,6 +21,8 @@ module Bulkrax
21
21
  status_info(e)
22
22
  else
23
23
  status_info
24
+ ensure
25
+ self.save!
24
26
  end
25
27
  return @item
26
28
  end
@@ -103,6 +105,65 @@ module Bulkrax
103
105
  end
104
106
  end
105
107
 
108
+ # Attempt to sanitize Questioning Authority URI values for configured controlled fields of common
109
+ # data entry mistakes. Controlled URI values are only valid if they are an exact match.
110
+ # Example:
111
+ # Valid value: http://rightsstatements.org/vocab/InC/1.0/
112
+ # Provided value: https://rightsstatements.org/vocab/InC/1.0
113
+ # Sanitized value: http://rightsstatements.org/vocab/InC/1.0/ ("s" from "https" removed, trailing "/" added)
114
+ #
115
+ # @return [Boolean] true if all controlled URI values are sanitized successfully
116
+ def sanitize_controlled_uri_values!
117
+ Bulkrax.qa_controlled_properties.each do |field|
118
+ next if parsed_metadata[field].blank?
119
+
120
+ parsed_metadata[field].each_with_index do |value, i|
121
+ next if value.blank?
122
+
123
+ if (validated_uri_value = validate_value(value, field))
124
+ parsed_metadata[field][i] = validated_uri_value
125
+ else
126
+ debug_msg = %(Unable to locate active authority ID "#{value}" in config/authorities/#{field.pluralize}.yml)
127
+ Rails.logger.debug(debug_msg)
128
+ error_msg = %("#{value}" is not a valid and/or active authority ID for the :#{field} field)
129
+ raise ::StandardError, error_msg
130
+ end
131
+ end
132
+ end
133
+
134
+ true
135
+ end
136
+
137
+ # @param value [String] value to validate
138
+ # @param field [String] name of the controlled property
139
+ # @return [String, nil] validated URI value or nil
140
+ def validate_value(value, field)
141
+ if value.match?(::URI::DEFAULT_PARSER.make_regexp)
142
+ value = value.strip.chomp
143
+ # add trailing forward slash unless one is already present
144
+ value << '/' unless value.match?(%r{/$})
145
+ end
146
+
147
+ valid = if active_id_for_authority?(value, field)
148
+ true
149
+ else
150
+ value.include?('https') ? value.sub!('https', 'http') : value.sub!('http', 'https')
151
+ active_id_for_authority?(value, field)
152
+ end
153
+
154
+ valid ? value : nil
155
+ end
156
+
157
+ # @param value [String] value to check
158
+ # @param field [String] name of the controlled property
159
+ # @return [Boolean] provided value is a present, active authority ID for the provided field
160
+ def active_id_for_authority?(value, field)
161
+ field_service = ('Hyrax::' + "#{field}_service".camelcase).constantize
162
+ active_authority_ids = field_service.new.active_elements.map { |ae| ae['id'] }
163
+
164
+ active_authority_ids.include?(value)
165
+ end
166
+
106
167
  def factory
107
168
  @factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata,
108
169
  source_identifier_value: identifier,
@@ -23,6 +23,11 @@ module Bulkrax
23
23
  Entry
24
24
  end
25
25
 
26
+ def file_set_entry_class
27
+ csv_format = Bulkrax::Importer.last.parser_fields['metadata_format'] == "Bulkrax::CsvEntry"
28
+ csv_format ? CsvFileSetEntry : RdfFileSetEntry
29
+ end
30
+
26
31
  # Take a random sample of 10 metadata_paths and work out the import fields from that
27
32
  def import_fields
28
33
  raise StandardError, 'No metadata files were found' if metadata_paths.blank?
@@ -217,7 +217,7 @@ module Bulkrax
217
217
  instance_variable_set(instance_var, ActiveFedora::SolrService.post(
218
218
  extra_filters.to_s,
219
219
  fq: [
220
- "#{::Solrizer.solr_name(work_identifier)}:(#{complete_entry_identifiers.join(' OR ')})",
220
+ %(#{::Solrizer.solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
221
221
  "has_model_ssim:(#{models_to_search.join(' OR ')})"
222
222
  ],
223
223
  fl: 'id',
@@ -270,7 +270,7 @@ module Bulkrax
270
270
  # Changed to grep as wc -l counts blank lines, and ignores the final unescaped line (which may or may not contain data)
271
271
  def total
272
272
  @total = importer.parser_fields['total'] || 0 if importer?
273
- @total = importerexporter.entries.count if exporter?
273
+ @total = limit || current_record_ids.count if exporter?
274
274
 
275
275
  return @total || 0
276
276
  rescue StandardError
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Bulkrax
4
- VERSION = '3.0.0'
4
+ VERSION = '3.1.1'
5
5
  end
data/lib/bulkrax.rb CHANGED
@@ -12,13 +12,13 @@ module Bulkrax
12
12
  :related_children_field_mapping,
13
13
  :related_parents_field_mapping,
14
14
  :reserved_properties,
15
+ :qa_controlled_properties,
15
16
  :field_mappings,
16
17
  :import_path,
17
18
  :export_path,
18
19
  :removed_image_path,
19
20
  :server_name,
20
- :api_definition,
21
- :removed_image_path
21
+ :api_definition
22
22
 
23
23
  self.parsers = [
24
24
  { name: "OAI - Dublin Core", class_name: "Bulkrax::OaiDcParser", partial: "oai_fields" },
@@ -119,6 +119,11 @@ module Bulkrax
119
119
  original_url
120
120
  relative_path
121
121
  ]
122
+
123
+ # List of Questioning Authority properties that are controlled via YAML files in
124
+ # the config/authorities/ directory. For example, the :rights_statement property
125
+ # is controlled by the active terms in config/authorities/rights_statements.yml
126
+ self.qa_controlled_properties = %w[rights_statement license]
122
127
  end
123
128
 
124
129
  def self.api_definition
@@ -131,8 +136,6 @@ module Bulkrax
131
136
  )
132
137
  end
133
138
 
134
- self.removed_image_path = 'app/assets/images/bulkrax/removed.png'
135
-
136
139
  # this function maps the vars from your app into your engine
137
140
  def self.setup
138
141
  yield self
@@ -61,6 +61,12 @@ Bulkrax.setup do |config|
61
61
 
62
62
  # Properties that should not be used in imports/exports. They are reserved for use by Hyrax.
63
63
  # config.reserved_properties += ['my_field']
64
+
65
+ # List of Questioning Authority properties that are controlled via YAML files in
66
+ # the config/authorities/ directory. For example, the :rights_statement property
67
+ # is controlled by the active terms in config/authorities/rights_statements.yml
68
+ # Defaults: 'rights_statement' and 'license'
69
+ # config.qa_controlled_properties += ['my_field']
64
70
  end
65
71
 
66
72
  # Sidebar for hyrax 3+ support
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulkrax
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-18 00:00:00.000000000 Z
11
+ date: 2022-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -291,6 +291,7 @@ files:
291
291
  - app/models/bulkrax/pending_relationship.rb
292
292
  - app/models/bulkrax/rdf_collection_entry.rb
293
293
  - app/models/bulkrax/rdf_entry.rb
294
+ - app/models/bulkrax/rdf_file_set_entry.rb
294
295
  - app/models/bulkrax/status.rb
295
296
  - app/models/bulkrax/xml_entry.rb
296
297
  - app/models/concerns/bulkrax/download_behavior.rb
@@ -298,6 +299,7 @@ files:
298
299
  - app/models/concerns/bulkrax/errored_entries.rb
299
300
  - app/models/concerns/bulkrax/export_behavior.rb
300
301
  - app/models/concerns/bulkrax/file_factory.rb
302
+ - app/models/concerns/bulkrax/file_set_entry_behavior.rb
301
303
  - app/models/concerns/bulkrax/has_local_processing.rb
302
304
  - app/models/concerns/bulkrax/has_matchers.rb
303
305
  - app/models/concerns/bulkrax/import_behavior.rb
@@ -382,7 +384,7 @@ homepage: https://github.com/samvera-labs/bulkrax
382
384
  licenses:
383
385
  - Apache-2.0
384
386
  metadata: {}
385
- post_install_message:
387
+ post_install_message:
386
388
  rdoc_options: []
387
389
  require_paths:
388
390
  - lib
@@ -397,8 +399,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
397
399
  - !ruby/object:Gem::Version
398
400
  version: '0'
399
401
  requirements: []
400
- rubygems_version: 3.1.2
401
- signing_key:
402
+ rubygems_version: 3.1.4
403
+ signing_key:
402
404
  specification_version: 4
403
405
  summary: Import and export tool for Hyrax and Hyku
404
406
  test_files: []