bulkrax 4.3.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/exporters.js +12 -0
- data/app/controllers/bulkrax/entries_controller.rb +5 -0
- data/app/controllers/bulkrax/exporters_controller.rb +5 -0
- data/app/controllers/bulkrax/importers_controller.rb +9 -1
- data/app/factories/bulkrax/object_factory.rb +87 -11
- data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
- data/app/jobs/bulkrax/import_file_set_job.rb +1 -0
- data/app/jobs/bulkrax/import_work_job.rb +23 -13
- data/app/matchers/bulkrax/application_matcher.rb +5 -3
- data/app/models/bulkrax/csv_entry.rb +20 -8
- data/app/models/bulkrax/entry.rb +2 -1
- data/app/models/bulkrax/importer.rb +20 -15
- data/app/models/bulkrax/oai_entry.rb +42 -9
- data/app/models/bulkrax/rdf_entry.rb +1 -1
- data/app/models/bulkrax/xml_entry.rb +54 -12
- data/app/models/concerns/bulkrax/file_factory.rb +9 -3
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +8 -1
- data/app/models/concerns/bulkrax/import_behavior.rb +27 -19
- data/app/parsers/bulkrax/application_parser.rb +90 -13
- data/app/parsers/bulkrax/csv_parser.rb +13 -6
- data/app/parsers/bulkrax/oai_dc_parser.rb +2 -2
- data/app/parsers/bulkrax/xml_parser.rb +6 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +107 -0
- data/app/views/bulkrax/exporters/_form.html.erb +3 -3
- data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +3 -3
- data/app/views/hyrax/dashboard/sidebar/_bulkrax_sidebar_additions.html.erb +9 -5
- data/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb +18 -7
- data/db/migrate/20211203195233_rename_children_counters_to_relationships.rb +1 -1
- data/db/migrate/20211220195027_add_file_set_counters_to_importer_runs.rb +1 -1
- data/db/migrate/20220118001339_add_import_attempts_to_entries.rb +1 -1
- data/db/migrate/20220119213325_add_work_counters_to_importer_runs.rb +1 -1
- data/db/migrate/20220301001839_create_bulkrax_pending_relationships.rb +1 -1
- data/db/migrate/20220303212810_add_order_to_bulkrax_pending_relationships.rb +1 -1
- data/db/migrate/20220412233954_add_include_thumbnails_to_bulkrax_exporters.rb +1 -1
- data/db/migrate/20220413180915_add_generated_metadata_to_bulkrax_exporters.rb +1 -1
- data/db/migrate/20220609001128_rename_bulkrax_importer_run_to_importer_run.rb +1 -1
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +85 -11
- data/lib/generators/bulkrax/install_generator.rb +20 -0
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +13 -1
- data/lib/tasks/reset.rake +65 -0
- metadata +6 -4
@@ -4,7 +4,7 @@ require 'nokogiri'
|
|
4
4
|
module Bulkrax
|
5
5
|
# Generic XML Entry
|
6
6
|
class XmlEntry < Entry
|
7
|
-
serialize :raw_metadata,
|
7
|
+
serialize :raw_metadata, Bulkrax::NormalizedJson
|
8
8
|
|
9
9
|
def self.fields_from_data(data); end
|
10
10
|
|
@@ -43,15 +43,14 @@ module Bulkrax
|
|
43
43
|
raise StandardError, "Missing source identifier (#{source_identifier})" if self.raw_metadata[source_identifier].blank?
|
44
44
|
self.parsed_metadata = {}
|
45
45
|
self.parsed_metadata[work_identifier] = [self.raw_metadata[source_identifier]]
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
46
|
+
|
47
|
+
# We need to establish the #factory_class before we proceed with the metadata. See
|
48
|
+
# https://github.com/samvera-labs/bulkrax/issues/702 for further details.
|
49
|
+
#
|
50
|
+
# tl;dr - if we don't have the right factory_class we might skip properties that are
|
51
|
+
# specifically assigned to the factory class
|
52
|
+
establish_factory_class
|
53
|
+
add_metadata_from_record
|
55
54
|
add_visibility
|
56
55
|
add_rights_statement
|
57
56
|
add_admin_set_id
|
@@ -63,11 +62,54 @@ module Bulkrax
|
|
63
62
|
self.parsed_metadata
|
64
63
|
end
|
65
64
|
|
66
|
-
|
67
|
-
|
65
|
+
def establish_factory_class
|
66
|
+
model_field_names = parser.model_field_mappings
|
67
|
+
|
68
|
+
each_candidate_metadata_node_name_and_content(elements: parser.model_field_mappings) do |name, content|
|
69
|
+
next unless model_field_names.include?(name)
|
70
|
+
add_metadata(name, content)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def add_metadata_from_record
|
75
|
+
each_candidate_metadata_node_name_and_content do |name, content|
|
76
|
+
add_metadata(name, content)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def each_candidate_metadata_node_name_and_content(elements: field_mapping_from_values_for_xml_element_names)
|
81
|
+
elements.each do |name|
|
82
|
+
# NOTE: the XML element name's case matters
|
83
|
+
nodes = record.xpath("//*[name()='#{name}']")
|
84
|
+
next if nodes.empty?
|
85
|
+
|
86
|
+
nodes.each do |node|
|
87
|
+
node.children.each do |content|
|
88
|
+
next if content.to_s.blank?
|
89
|
+
|
90
|
+
yield(name, content.to_s)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Returns the explicitly declared "from" key's value of each parser's element's value. (Yes, I
|
97
|
+
# would like a proper class for the thing I just tried to describe.)
|
98
|
+
#
|
99
|
+
# @return [Array<String>]
|
100
|
+
#
|
101
|
+
# @todo Additionally, we may want to revisit the XML parser fundamental logic; namely we only
|
102
|
+
# parse nodes that are explicitly declared with in the `from`. This is a bit different
|
103
|
+
# than other parsers, in that they will make assumptions about each encountered column (in
|
104
|
+
# the case of CSV) or node (in the case of OAI). tl;dr - Here there be dragons.
|
105
|
+
def field_mapping_from_values_for_xml_element_names
|
68
106
|
Bulkrax.field_mappings[self.importerexporter.parser_klass].map do |_k, v|
|
69
107
|
v[:from]
|
70
108
|
end.flatten.compact.uniq
|
71
109
|
end
|
110
|
+
|
111
|
+
# Included for potential downstream adopters
|
112
|
+
alias xml_elements field_mapping_from_values_for_xml_element_names
|
113
|
+
deprecation_deprecate xml_elements: "Use '#{self}#field_mapping_from_values_for_xml_element_names' instead"
|
72
114
|
end
|
73
115
|
end
|
@@ -45,9 +45,15 @@ module Bulkrax
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def new_remote_files
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
@new_remote_files ||= if object.is_a? FileSet
|
49
|
+
parsed_remote_files.select do |file|
|
50
|
+
# is the url valid?
|
51
|
+
is_valid = file[:url]&.match(URI::ABS_URI)
|
52
|
+
# does the file already exist
|
53
|
+
is_existing = object.import_url && object.import_url == file[:url]
|
54
|
+
is_valid && !is_existing
|
55
|
+
end
|
56
|
+
elsif object.present? && object.file_sets.present?
|
51
57
|
parsed_remote_files.select do |file|
|
52
58
|
# is the url valid?
|
53
59
|
is_valid = file[:url]&.match(URI::ABS_URI)
|
@@ -6,7 +6,14 @@ module Bulkrax
|
|
6
6
|
::FileSet
|
7
7
|
end
|
8
8
|
|
9
|
+
def file_reference
|
10
|
+
return 'file' if parsed_metadata&.[]('file')&.map(&:present?)&.any?
|
11
|
+
return 'remote_files' if parsed_metadata&.[]('remote_files')&.map(&:present?)&.any?
|
12
|
+
end
|
13
|
+
|
9
14
|
def add_path_to_file
|
15
|
+
return unless file_reference == 'file'
|
16
|
+
|
10
17
|
parsed_metadata['file'].each_with_index do |filename, i|
|
11
18
|
next if filename.blank?
|
12
19
|
|
@@ -22,7 +29,7 @@ module Bulkrax
|
|
22
29
|
end
|
23
30
|
|
24
31
|
def validate_presence_of_filename!
|
25
|
-
return if parsed_metadata&.[](
|
32
|
+
return if parsed_metadata&.[](file_reference)&.map(&:present?)&.any?
|
26
33
|
|
27
34
|
raise StandardError, 'File set must have a filename'
|
28
35
|
end
|
@@ -117,23 +117,30 @@ module Bulkrax
|
|
117
117
|
Bulkrax.qa_controlled_properties.each do |field|
|
118
118
|
next if parsed_metadata[field].blank?
|
119
119
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
parsed_metadata[field][i] = validated_uri_value
|
125
|
-
else
|
126
|
-
debug_msg = %(Unable to locate active authority ID "#{value}" in config/authorities/#{field.pluralize}.yml)
|
127
|
-
Rails.logger.debug(debug_msg)
|
128
|
-
error_msg = %("#{value}" is not a valid and/or active authority ID for the :#{field} field)
|
129
|
-
raise ::StandardError, error_msg
|
120
|
+
if multiple?(field)
|
121
|
+
parsed_metadata[field].each_with_index do |value, i|
|
122
|
+
next if value.blank?
|
123
|
+
parsed_metadata[field][i] = sanitize_controlled_uri_value(field, value)
|
130
124
|
end
|
125
|
+
else
|
126
|
+
parsed_metadata[field] = sanitize_controlled_uri_value(field, parsed_metadata[field])
|
131
127
|
end
|
132
128
|
end
|
133
129
|
|
134
130
|
true
|
135
131
|
end
|
136
132
|
|
133
|
+
def sanitize_controlled_uri_value(field, value)
|
134
|
+
if (validated_uri_value = validate_value(value, field))
|
135
|
+
validated_uri_value
|
136
|
+
else
|
137
|
+
debug_msg = %(Unable to locate active authority ID "#{value}" in config/authorities/#{field.pluralize}.yml)
|
138
|
+
Rails.logger.debug(debug_msg)
|
139
|
+
error_msg = %("#{value}" is not a valid and/or active authority ID for the :#{field} field)
|
140
|
+
raise ::StandardError, error_msg
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
137
144
|
# @param value [String] value to validate
|
138
145
|
# @param field [String] name of the controlled property
|
139
146
|
# @return [String, nil] validated URI value or nil
|
@@ -165,15 +172,16 @@ module Bulkrax
|
|
165
172
|
end
|
166
173
|
|
167
174
|
def factory
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
175
|
+
of = Bulkrax.object_factory || Bulkrax::ObjectFactory
|
176
|
+
@factory ||= of.new(attributes: self.parsed_metadata,
|
177
|
+
source_identifier_value: identifier,
|
178
|
+
work_identifier: parser.work_identifier,
|
179
|
+
related_parents_parsed_mapping: parser.related_parents_parsed_mapping,
|
180
|
+
replace_files: replace_files,
|
181
|
+
user: user,
|
182
|
+
klass: factory_class,
|
183
|
+
importer_run_id: importerexporter.last_run.id,
|
184
|
+
update_files: update_files)
|
177
185
|
end
|
178
186
|
|
179
187
|
def factory_class
|
@@ -2,6 +2,9 @@
|
|
2
2
|
require 'zip'
|
3
3
|
|
4
4
|
module Bulkrax
|
5
|
+
# An abstract class that establishes the API for Bulkrax's import and export parsing.
|
6
|
+
#
|
7
|
+
# @abstract Subclass the Bulkrax::ApplicationParser to create a parser that handles a specific format (e.g. CSV, Bagit, XML, etc).
|
5
8
|
class ApplicationParser # rubocop:disable Metrics/ClassLength
|
6
9
|
attr_accessor :importerexporter, :headers
|
7
10
|
alias importer importerexporter
|
@@ -12,14 +15,21 @@ module Bulkrax
|
|
12
15
|
:exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only,
|
13
16
|
to: :importerexporter
|
14
17
|
|
18
|
+
# @todo Convert to `class_attribute :parser_fiels, default: {}`
|
15
19
|
def self.parser_fields
|
16
20
|
{}
|
17
21
|
end
|
18
22
|
|
23
|
+
# @return [TrueClass,FalseClass] this parser does or does not support exports.
|
24
|
+
#
|
25
|
+
# @todo Convert to `class_attribute :export_supported, default: false, instance_predicate: true` and `self << class; alias export_supported? export_supported; end`
|
19
26
|
def self.export_supported?
|
20
27
|
false
|
21
28
|
end
|
22
29
|
|
30
|
+
# @return [TrueClass,FalseClass] this parser does or does not support imports.
|
31
|
+
#
|
32
|
+
# @todo Convert to `class_attribute :import_supported, default: false, instance_predicate: true` and `self << class; alias import_supported? import_supported; end`
|
23
33
|
def self.import_supported?
|
24
34
|
true
|
25
35
|
end
|
@@ -29,49 +39,76 @@ module Bulkrax
|
|
29
39
|
@headers = []
|
30
40
|
end
|
31
41
|
|
32
|
-
# @api
|
42
|
+
# @api public
|
43
|
+
# @abstract Subclass and override {#entry_class} to implement behavior for the parser.
|
33
44
|
def entry_class
|
34
|
-
raise
|
45
|
+
raise NotImplementedError, 'must be defined'
|
35
46
|
end
|
36
47
|
|
37
|
-
# @api
|
48
|
+
# @api public
|
49
|
+
# @abstract Subclass and override {#collection_entry_class} to implement behavior for the parser.
|
38
50
|
def collection_entry_class
|
39
|
-
raise
|
51
|
+
raise NotImplementedError, 'must be defined'
|
40
52
|
end
|
41
53
|
|
42
|
-
# @api
|
54
|
+
# @api public
|
55
|
+
# @abstract Subclass and override {#file_set_entry_class} to implement behavior for the parser.
|
56
|
+
def file_set_entry_class
|
57
|
+
raise NotImplementedError, 'must be defined'
|
58
|
+
end
|
59
|
+
|
60
|
+
# @api public
|
61
|
+
# @abstract Subclass and override {#records} to implement behavior for the parser.
|
43
62
|
def records(_opts = {})
|
44
|
-
raise
|
63
|
+
raise NotImplementedError, 'must be defined'
|
45
64
|
end
|
46
65
|
|
66
|
+
# @return [Symbol] the name of the identifying property in the source system from which we're
|
67
|
+
# importing (e.g. is *not* this application that mounts *this* Bulkrax engine).
|
68
|
+
#
|
69
|
+
# @see #work_identifier
|
70
|
+
# @see https://github.com/samvera-labs/bulkrax/wiki/CSV-Importer#source-identifier Bulkrax Wiki regarding source identifier
|
47
71
|
def source_identifier
|
48
72
|
@source_identifier ||= get_field_mapping_hash_for('source_identifier')&.values&.first&.[]('from')&.first&.to_sym || :source_identifier
|
49
73
|
end
|
50
74
|
|
75
|
+
# @return [Symbol] the name of the identifying property for the system which we're importing
|
76
|
+
# into (e.g. the application that mounts *this* Bulkrax engine)
|
77
|
+
# @see #source_identifier
|
51
78
|
def work_identifier
|
52
79
|
@work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
|
53
80
|
end
|
54
81
|
|
82
|
+
# @return [String]
|
55
83
|
def generated_metadata_mapping
|
56
84
|
@generated_metadata_mapping ||= 'generated'
|
57
85
|
end
|
58
86
|
|
87
|
+
# @return [String, NilClass]
|
88
|
+
# @see #related_parents_raw_mapping
|
59
89
|
def related_parents_raw_mapping
|
60
90
|
@related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
|
61
91
|
end
|
62
92
|
|
93
|
+
# @return [String]
|
94
|
+
# @see #related_parents_field_mapping
|
63
95
|
def related_parents_parsed_mapping
|
64
96
|
@related_parents_parsed_mapping ||= (get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first || 'parents')
|
65
97
|
end
|
66
98
|
|
99
|
+
# @return [String, NilClass]
|
100
|
+
# @see #related_children_parsed_mapping
|
67
101
|
def related_children_raw_mapping
|
68
102
|
@related_children_raw_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.values&.first&.[]('from')&.first
|
69
103
|
end
|
70
104
|
|
105
|
+
# @return [String]
|
106
|
+
# @see #related_children_raw_mapping
|
71
107
|
def related_children_parsed_mapping
|
72
108
|
@related_children_parsed_mapping ||= (get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first || 'children')
|
73
109
|
end
|
74
110
|
|
111
|
+
# @api private
|
75
112
|
def get_field_mapping_hash_for(key)
|
76
113
|
return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present?
|
77
114
|
|
@@ -85,6 +122,7 @@ module Bulkrax
|
|
85
122
|
instance_variable_get("@#{key}_hash")
|
86
123
|
end
|
87
124
|
|
125
|
+
# @return [Array<String>]
|
88
126
|
def model_field_mappings
|
89
127
|
model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || []
|
90
128
|
model_mappings |= ['model']
|
@@ -92,6 +130,7 @@ module Bulkrax
|
|
92
130
|
model_mappings
|
93
131
|
end
|
94
132
|
|
133
|
+
# @return [String]
|
95
134
|
def perform_method
|
96
135
|
if self.validate_only
|
97
136
|
'perform_now'
|
@@ -100,29 +139,55 @@ module Bulkrax
|
|
100
139
|
end
|
101
140
|
end
|
102
141
|
|
142
|
+
# The visibility of the record. Acceptable values are: "open", "embaro", "lease", "authenticated", "restricted". The default is "open"
|
143
|
+
#
|
144
|
+
# @return [String]
|
145
|
+
# @see https://github.com/samvera/hydra-head/blob/main/hydra-access-controls/app/models/concerns/hydra/access_controls/access_right.rb Hydra::AccessControls::AccessRight for details on the range of values.
|
146
|
+
# @see https://github.com/samvera/hyrax/blob/bd2bcffc33e183904be2c175367648815f25bc2b/app/services/hyrax/visibility_intention.rb Hyrax::VisibilityIntention for how we process the visibility.
|
103
147
|
def visibility
|
104
148
|
@visibility ||= self.parser_fields['visibility'] || 'open'
|
105
149
|
end
|
106
150
|
|
151
|
+
# @api public
|
152
|
+
#
|
153
|
+
# @param types [Array<Symbol>] the types of objects that we'll create.
|
154
|
+
#
|
155
|
+
# @see Bulkrax::Importer::DEFAULT_OBJECT_TYPES
|
156
|
+
# @see #create_collections
|
157
|
+
# @see #create_works
|
158
|
+
# @see #create_file_sets
|
159
|
+
# @see #create_relationships
|
160
|
+
def create_objects(types = [])
|
161
|
+
types.each do |object_type|
|
162
|
+
send("create_#{object_type.pluralize}")
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# @abstract Subclass and override {#create_collections} to implement behavior for the parser.
|
107
167
|
def create_collections
|
108
|
-
raise
|
168
|
+
raise NotImplementedError, 'must be defined' if importer?
|
109
169
|
end
|
110
170
|
|
171
|
+
# @abstract Subclass and override {#create_works} to implement behavior for the parser.
|
111
172
|
def create_works
|
112
|
-
raise
|
173
|
+
raise NotImplementedError, 'must be defined' if importer?
|
113
174
|
end
|
114
175
|
|
176
|
+
# @abstract Subclass and override {#create_file_sets} to implement behavior for the parser.
|
115
177
|
def create_file_sets
|
116
|
-
raise
|
178
|
+
raise NotImplementedError, 'must be defined' if importer?
|
117
179
|
end
|
118
180
|
|
181
|
+
# @abstract Subclass and override {#create_relationships} to implement behavior for the parser.
|
119
182
|
def create_relationships
|
120
|
-
raise
|
183
|
+
raise NotImplementedError, 'must be defined' if importer?
|
121
184
|
end
|
122
185
|
|
123
186
|
# Optional, define if using browse everything for file upload
|
124
187
|
def retrieve_cloud_files(files); end
|
125
188
|
|
189
|
+
# @param file [#path, #original_filename] the file object that with the relevant data for the
|
190
|
+
# import.
|
126
191
|
def write_import_file(file)
|
127
192
|
path = File.join(path_for_import, file.original_filename)
|
128
193
|
FileUtils.mv(
|
@@ -133,6 +198,8 @@ module Bulkrax
|
|
133
198
|
end
|
134
199
|
|
135
200
|
# Base path for imported and exported files
|
201
|
+
# @param [String]
|
202
|
+
# @return [String] the base path for files that this parser will "parse"
|
136
203
|
def base_path(type = 'import')
|
137
204
|
# account for multiple versions of hyku
|
138
205
|
is_multitenant = ENV['HYKU_MULTITENANT'] == 'true' || ENV['SETTINGS__MULTITENANCY__ENABLED'] == 'true'
|
@@ -141,41 +208,48 @@ module Bulkrax
|
|
141
208
|
|
142
209
|
# Path where we'll store the import metadata and files
|
143
210
|
# this is used for uploaded and cloud files
|
211
|
+
# @return [String]
|
144
212
|
def path_for_import
|
145
213
|
@path_for_import = File.join(base_path, importerexporter.path_string)
|
146
214
|
FileUtils.mkdir_p(@path_for_import) unless File.exist?(@path_for_import)
|
147
215
|
@path_for_import
|
148
216
|
end
|
149
217
|
|
218
|
+
# @abstract Subclass and override {#setup_export_file} to implement behavior for the parser.
|
150
219
|
def setup_export_file
|
151
|
-
raise
|
220
|
+
raise NotImplementedError, 'must be defined' if exporter?
|
152
221
|
end
|
153
222
|
|
223
|
+
# @abstract Subclass and override {#write_files} to implement behavior for the parser.
|
154
224
|
def write_files
|
155
|
-
raise
|
225
|
+
raise NotImplementedError, 'must be defined' if exporter?
|
156
226
|
end
|
157
227
|
|
228
|
+
# @return [TrueClass,FalseClass]
|
158
229
|
def importer?
|
159
230
|
importerexporter.is_a?(Bulkrax::Importer)
|
160
231
|
end
|
161
232
|
|
233
|
+
# @return [TrueClass,FalseClass]
|
162
234
|
def exporter?
|
163
235
|
importerexporter.is_a?(Bulkrax::Exporter)
|
164
236
|
end
|
165
237
|
|
166
238
|
# @param limit [Integer] limit set on the importerexporter
|
167
239
|
# @param index [Integer] index of current iteration
|
168
|
-
# @return [
|
240
|
+
# @return [TrueClass,FalseClass]
|
169
241
|
def limit_reached?(limit, index)
|
170
242
|
return false if limit.nil? || limit.zero? # no limit
|
171
243
|
index >= limit
|
172
244
|
end
|
173
245
|
|
174
246
|
# Override to add specific validations
|
247
|
+
# @return [TrueClass,FalseClass]
|
175
248
|
def valid_import?
|
176
249
|
true
|
177
250
|
end
|
178
251
|
|
252
|
+
# @return [TrueClass,FalseClass]
|
179
253
|
def record_has_source_identifier(record, index)
|
180
254
|
if record[source_identifier].blank?
|
181
255
|
if Bulkrax.fill_in_blank_source_identifiers.present?
|
@@ -199,6 +273,7 @@ module Bulkrax
|
|
199
273
|
end
|
200
274
|
# rubocop:enable Rails/SkipsModelValidations
|
201
275
|
|
276
|
+
# @return [Array<String>]
|
202
277
|
def required_elements
|
203
278
|
if Bulkrax.fill_in_blank_source_identifiers
|
204
279
|
['title']
|
@@ -287,12 +362,14 @@ module Bulkrax
|
|
287
362
|
end
|
288
363
|
|
289
364
|
# Path for the import
|
365
|
+
# @return [String]
|
290
366
|
def import_file_path
|
291
367
|
@import_file_path ||= real_import_file_path
|
292
368
|
end
|
293
369
|
|
294
370
|
private
|
295
371
|
|
372
|
+
# @return [String]
|
296
373
|
def real_import_file_path
|
297
374
|
return importer_unzip_path if file? && zip?
|
298
375
|
parser_fields['import_file_path']
|
@@ -180,7 +180,7 @@ module Bulkrax
|
|
180
180
|
end
|
181
181
|
|
182
182
|
def current_work_ids
|
183
|
-
ActiveSupport::
|
183
|
+
ActiveSupport::Deprecation.warn('Bulkrax::CsvParser#current_work_ids will be replaced with #current_record_ids in version 3.0')
|
184
184
|
current_record_ids
|
185
185
|
end
|
186
186
|
|
@@ -196,10 +196,13 @@ module Bulkrax
|
|
196
196
|
@collection_ids = ActiveFedora::SolrService.query("has_model_ssim:Collection #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
197
197
|
@file_set_ids = ActiveFedora::SolrService.query("has_model_ssim:FileSet #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
198
198
|
when 'collection'
|
199
|
-
@work_ids = ActiveFedora::SolrService.query(
|
199
|
+
@work_ids = ActiveFedora::SolrService.query(
|
200
|
+
"member_of_collection_ids_ssim:#{importerexporter.export_source + extra_filters} AND has_model_ssim:(#{Hyrax.config.curation_concerns.join(' OR ')})", method: :post, rows: 2_000_000_000
|
201
|
+
).map(&:id)
|
200
202
|
# get the parent collection and child collections
|
201
203
|
@collection_ids = ActiveFedora::SolrService.query("id:#{importerexporter.export_source} #{extra_filters}", method: :post, rows: 2_147_483_647).map(&:id)
|
202
|
-
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
|
204
|
+
@collection_ids += ActiveFedora::SolrService.query("has_model_ssim:Collection AND member_of_collection_ids_ssim:#{importerexporter.export_source}", method: :post,
|
205
|
+
rows: 2_147_483_647).map(&:id)
|
203
206
|
find_child_file_sets(@work_ids)
|
204
207
|
when 'worktype'
|
205
208
|
@work_ids = ActiveFedora::SolrService.query("has_model_ssim:#{importerexporter.export_source + extra_filters}", method: :post, rows: 2_000_000_000).map(&:id)
|
@@ -234,7 +237,7 @@ module Bulkrax
|
|
234
237
|
instance_variable_set(instance_var, ActiveFedora::SolrService.post(
|
235
238
|
extra_filters.to_s,
|
236
239
|
fq: [
|
237
|
-
%(#{
|
240
|
+
%(#{solr_name(work_identifier)}:("#{complete_entry_identifiers.join('" OR "')}")),
|
238
241
|
"has_model_ssim:(#{models_to_search.join(' OR ')})"
|
239
242
|
],
|
240
243
|
fl: 'id',
|
@@ -243,6 +246,10 @@ module Bulkrax
|
|
243
246
|
end
|
244
247
|
end
|
245
248
|
|
249
|
+
def solr_name(base_name)
|
250
|
+
Module.const_defined?(:Solrizer) ? ::Solrizer.solr_name(base_name) : ::ActiveFedora.index_field_mapper.solr_name(base_name)
|
251
|
+
end
|
252
|
+
|
246
253
|
def create_new_entries
|
247
254
|
current_record_ids.each_with_index do |id, index|
|
248
255
|
break if limit_reached?(limit, index)
|
@@ -440,7 +447,7 @@ module Bulkrax
|
|
440
447
|
file_mapping = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
|
441
448
|
next if r[file_mapping].blank?
|
442
449
|
|
443
|
-
r[file_mapping].split(
|
450
|
+
r[file_mapping].split(Bulkrax.multi_value_element_split_on).map do |f|
|
444
451
|
file = File.join(path_to_files, f.tr(' ', '_'))
|
445
452
|
if File.exist?(file) # rubocop:disable Style/GuardClause
|
446
453
|
file
|
@@ -468,7 +475,7 @@ module Bulkrax
|
|
468
475
|
entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
|
469
476
|
Bulkrax.fill_in_blank_source_identifiers.call(self, records.find_index(collection_hash))
|
470
477
|
else
|
471
|
-
collection_hash[:title].split(
|
478
|
+
collection_hash[:title].split(Bulkrax.multi_value_element_split_on).first
|
472
479
|
end
|
473
480
|
|
474
481
|
entry_uid
|
@@ -13,8 +13,7 @@ module Bulkrax
|
|
13
13
|
def client
|
14
14
|
@client ||= OAI::Client.new(importerexporter.parser_fields['base_url'],
|
15
15
|
headers: headers,
|
16
|
-
parser: 'libxml'
|
17
|
-
metadata_prefix: importerexporter.parser_fields['metadata_prefix'])
|
16
|
+
parser: 'libxml')
|
18
17
|
rescue StandardError
|
19
18
|
raise OAIError
|
20
19
|
end
|
@@ -32,6 +31,7 @@ module Bulkrax
|
|
32
31
|
end
|
33
32
|
|
34
33
|
def records(opts = {})
|
34
|
+
opts[:metadata_prefix] ||= importerexporter.parser_fields['metadata_prefix']
|
35
35
|
opts[:set] = collection_name unless collection_name == 'all'
|
36
36
|
|
37
37
|
opts[:from] = importerexporter&.last_imported_at&.strftime("%Y-%m-%d") if importerexporter.last_imported_at && only_updates
|
@@ -12,6 +12,12 @@ module Bulkrax
|
|
12
12
|
# @todo not yet supported
|
13
13
|
def create_collections; end
|
14
14
|
|
15
|
+
# @todo not yet supported
|
16
|
+
def file_set_entry_class; end
|
17
|
+
|
18
|
+
# @todo not yet supported
|
19
|
+
def create_file_sets; end
|
20
|
+
|
15
21
|
# TODO: change to differentiate between collection and work records when adding ability to import collection metadata
|
16
22
|
def works_total
|
17
23
|
total
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Bulkrax
|
3
|
+
# This module is rather destructive; it will break relationships between the works, file sets, and
|
4
|
+
# collections that were imported via an importer. You probably don't want to run this on your
|
5
|
+
# data, except in the case where you have been testing a Bulkrax::Importer, the parsers and
|
6
|
+
# mappings. Then, you might have relationships that you want to remove.
|
7
|
+
#
|
8
|
+
# tl;dr - Caution this will break things!
|
9
|
+
class RemoveRelationshipsForImporter
|
10
|
+
# @api public
|
11
|
+
#
|
12
|
+
# Remove the relationships of the works and collections for all of the Bulkrax::Entry records
|
13
|
+
# associated with the given Bulkrax::Importer.
|
14
|
+
#
|
15
|
+
# @param importer [Bulkrax::Importer]
|
16
|
+
# @param with_progress_bar [Boolean]
|
17
|
+
def self.break_relationships_for!(importer:, with_progress_bar: false)
|
18
|
+
entries = importer.entries.select(&:succeeded?)
|
19
|
+
progress_bar = build_progress_bar_for(with_progress_bar: with_progress_bar, entries: entries)
|
20
|
+
new(progress_bar: progress_bar, entries: entries).break_relationships!
|
21
|
+
end
|
22
|
+
|
23
|
+
# @api private
|
24
|
+
#
|
25
|
+
# A null object that conforms to this class's use of a progress bar.
|
26
|
+
module NullProgressBar
|
27
|
+
def self.increment; end
|
28
|
+
end
|
29
|
+
|
30
|
+
# @api private
|
31
|
+
#
|
32
|
+
# @return [#increment]
|
33
|
+
def self.build_progress_bar_for(with_progress_bar:, entries:)
|
34
|
+
return NullProgressBar unless with_progress_bar
|
35
|
+
|
36
|
+
begin
|
37
|
+
require 'ruby-progressbar'
|
38
|
+
ProgessBar.create(total: entries.count)
|
39
|
+
rescue LoadError
|
40
|
+
Rails.logger.info("Using NullProgressBar because ProgressBar is not available due to a LoadError.")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# @param entries [#each]
|
45
|
+
# @param progress_bar [#increment]
|
46
|
+
def initialize(entries:, progress_bar:)
|
47
|
+
@progress_bar = progress_bar
|
48
|
+
@entries = entries
|
49
|
+
end
|
50
|
+
|
51
|
+
attr_reader :entries, :progress_bar
|
52
|
+
|
53
|
+
def break_relationships!
|
54
|
+
entries.each do |entry|
|
55
|
+
progress_bar.increment
|
56
|
+
|
57
|
+
obj = entry.factory.find
|
58
|
+
next if obj.is_a?(FileSet) # FileSets must be attached to a Work
|
59
|
+
|
60
|
+
if obj.is_a?(Collection)
|
61
|
+
remove_relationships_from_collection(obj)
|
62
|
+
else
|
63
|
+
remove_relationships_from_work(obj)
|
64
|
+
end
|
65
|
+
|
66
|
+
obj.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
|
67
|
+
obj.save!
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def remove_relationships_from_collection(collection)
|
72
|
+
# Remove child work relationships
|
73
|
+
collection.member_works.each do |work|
|
74
|
+
change = work.member_of_collections.delete(collection)
|
75
|
+
work.save! if change.present?
|
76
|
+
end
|
77
|
+
|
78
|
+
# Remove parent collection relationships
|
79
|
+
collection.member_of_collections.each do |parent_col|
|
80
|
+
Hyrax::Collections::NestedCollectionPersistenceService
|
81
|
+
.remove_nested_relationship_for(parent: parent_col, child: collection)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Remove child collection relationships
|
85
|
+
collection.member_collections.each do |child_col|
|
86
|
+
Hyrax::Collections::NestedCollectionPersistenceService
|
87
|
+
.remove_nested_relationship_for(parent: collection, child: child_col)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def remove_relationships_from_work(work)
|
92
|
+
# Remove parent collection relationships
|
93
|
+
work.member_of_collections = []
|
94
|
+
|
95
|
+
# Remove parent work relationships
|
96
|
+
work.member_of_works.each do |parent_work|
|
97
|
+
parent_work.members.delete(work)
|
98
|
+
parent_work.save!
|
99
|
+
end
|
100
|
+
|
101
|
+
# Remove child work relationships
|
102
|
+
work.member_works.each do |child_work|
|
103
|
+
work.member_works.delete(child_work)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|