bulkrax 7.0.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/javascripts/bulkrax/datatables.js +1 -1
- data/app/controllers/bulkrax/exporters_controller.rb +1 -1
- data/app/controllers/bulkrax/importers_controller.rb +2 -1
- data/app/controllers/concerns/bulkrax/datatables_behavior.rb +15 -15
- data/app/factories/bulkrax/object_factory.rb +135 -163
- data/app/factories/bulkrax/object_factory_interface.rb +491 -0
- data/app/factories/bulkrax/valkyrie_object_factory.rb +402 -0
- data/app/helpers/bulkrax/importers_helper.rb +1 -1
- data/app/helpers/bulkrax/validation_helper.rb +4 -4
- data/app/jobs/bulkrax/create_relationships_job.rb +27 -16
- data/app/jobs/bulkrax/delete_job.rb +3 -2
- data/app/jobs/bulkrax/download_cloud_file_job.rb +16 -3
- data/app/jobs/bulkrax/import_file_set_job.rb +5 -2
- data/app/jobs/bulkrax/importer_job.rb +18 -2
- data/app/matchers/bulkrax/application_matcher.rb +0 -2
- data/app/models/bulkrax/csv_collection_entry.rb +1 -1
- data/app/models/bulkrax/csv_entry.rb +7 -6
- data/app/models/bulkrax/entry.rb +7 -11
- data/app/models/bulkrax/exporter.rb +2 -2
- data/app/models/bulkrax/importer.rb +1 -3
- data/app/models/bulkrax/oai_entry.rb +0 -3
- data/app/models/bulkrax/oai_set_entry.rb +1 -1
- data/app/models/bulkrax/rdf_collection_entry.rb +1 -1
- data/app/models/bulkrax/rdf_entry.rb +70 -69
- data/app/models/bulkrax/xml_entry.rb +0 -1
- data/app/models/concerns/bulkrax/dynamic_record_lookup.rb +2 -19
- data/app/models/concerns/bulkrax/export_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/file_factory.rb +174 -118
- data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +2 -2
- data/app/models/concerns/bulkrax/has_matchers.rb +28 -25
- data/app/models/concerns/bulkrax/import_behavior.rb +10 -17
- data/app/models/concerns/bulkrax/importer_exporter_behavior.rb +3 -2
- data/app/parsers/bulkrax/application_parser.rb +31 -7
- data/app/parsers/bulkrax/bagit_parser.rb +175 -174
- data/app/parsers/bulkrax/csv_parser.rb +15 -5
- data/app/parsers/bulkrax/oai_dc_parser.rb +18 -0
- data/app/parsers/bulkrax/parser_export_record_set.rb +18 -22
- data/app/parsers/bulkrax/xml_parser.rb +0 -2
- data/app/services/bulkrax/factory_class_finder.rb +2 -0
- data/app/services/bulkrax/remove_relationships_for_importer.rb +3 -1
- data/app/services/hyrax/custom_queries/find_by_source_identifier.rb +50 -0
- data/app/services/wings/custom_queries/find_by_source_identifier.rb +32 -0
- data/app/views/bulkrax/entries/_parsed_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/_raw_metadata.html.erb +2 -2
- data/app/views/bulkrax/entries/show.html.erb +9 -8
- data/app/views/bulkrax/exporters/edit.html.erb +1 -1
- data/app/views/bulkrax/exporters/new.html.erb +1 -1
- data/app/views/bulkrax/exporters/show.html.erb +4 -2
- data/app/views/bulkrax/importers/_browse_everything.html.erb +2 -2
- data/app/views/bulkrax/importers/_csv_fields.html.erb +1 -1
- data/app/views/bulkrax/importers/edit.html.erb +1 -1
- data/app/views/bulkrax/importers/new.html.erb +1 -1
- data/app/views/bulkrax/importers/show.html.erb +1 -1
- data/app/views/bulkrax/importers/upload_corrected_entries.html.erb +2 -2
- data/app/views/bulkrax/shared/_bulkrax_errors.html.erb +1 -1
- data/app/views/bulkrax/shared/_bulkrax_field_mapping.html.erb +1 -1
- data/config/locales/bulkrax.en.yml +7 -0
- data/db/migrate/20230608153601_add_indices_to_bulkrax.rb +20 -9
- data/db/migrate/20240307053156_add_index_to_metadata_bulkrax_identifier.rb +18 -0
- data/lib/bulkrax/engine.rb +23 -6
- data/lib/bulkrax/version.rb +1 -1
- data/lib/bulkrax.rb +54 -52
- data/lib/generators/bulkrax/templates/config/initializers/bulkrax.rb +2 -0
- data/lib/tasks/bulkrax_tasks.rake +1 -0
- data/lib/tasks/reset.rake +4 -4
- metadata +24 -8
- data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +0 -27
- data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +0 -8
- data/lib/bulkrax/persistence_layer.rb +0 -38
@@ -1,153 +1,209 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Bulkrax
|
4
|
+
##
|
5
|
+
# NOTE: Historically (e.g. Bulkrax v7.0.0 and earlier) we mixed in all of the
|
6
|
+
# {Bulkrax::FileFactory} methods into {Bulkrax::ObjectFactory}. However, with
|
7
|
+
# the introduction of {Bulkrax::ValkyrieObjectFactory} we needed to account
|
8
|
+
# for branching logic.
|
9
|
+
#
|
10
|
+
# This refactor where we expose the bare minimum interface of file interaction
|
11
|
+
# should help with encapsulation.
|
12
|
+
#
|
13
|
+
# The refactor pattern was to find FileFactory methods used by the
|
14
|
+
# ObjectFactory and delegate those to the new {FileFactory::InnerWorkings}
|
15
|
+
# class. Likewise within the InnerWorkings we wanted to delegate to the given
|
16
|
+
# object_factory the methods that the InnerWorkings need.
|
17
|
+
#
|
18
|
+
# Futher, by preserving the FileFactory as a mixed in module, downstream
|
19
|
+
# implementers will hopefully experience less of an impact regarding this
|
20
|
+
# change.
|
4
21
|
module FileFactory
|
5
22
|
extend ActiveSupport::Concern
|
6
23
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
def upload_ids
|
14
|
-
return [] if klass == Collection
|
15
|
-
attributes[:file] = file_paths
|
16
|
-
import_files
|
17
|
-
end
|
24
|
+
included do
|
25
|
+
class_attribute :file_set_factory_inner_workings_class, default: Bulkrax::FileFactory::InnerWorkings
|
26
|
+
|
27
|
+
def file_set_factory_inner_workings
|
28
|
+
@file_set_factory_inner_workings ||= file_set_factory_inner_workings_class.new(object_factory: self)
|
29
|
+
end
|
18
30
|
|
19
|
-
|
20
|
-
@update_files = update_files
|
21
|
-
hash = {}
|
22
|
-
return hash if klass == Collection
|
23
|
-
hash[:uploaded_files] = upload_ids if attributes[:file].present?
|
24
|
-
hash[:remote_files] = new_remote_files if new_remote_files.present?
|
25
|
-
hash
|
31
|
+
delegate :file_attributes, :destroy_existing_files, to: :file_set_factory_inner_workings
|
26
32
|
end
|
27
33
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
34
|
+
class InnerWorkings
|
35
|
+
def initialize(object_factory:)
|
36
|
+
@object_factory = object_factory
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :object_factory
|
40
|
+
|
41
|
+
delegate :object, :klass, :attributes, :user, to: :object_factory
|
42
|
+
|
43
|
+
# Find existing files or upload new files. This assumes a Work will have unique file titles;
|
44
|
+
# and that those file titles will not have changed
|
45
|
+
# could filter by URIs instead (slower).
|
46
|
+
# When an uploaded_file already exists we do not want to pass its id in `file_attributes`
|
47
|
+
# otherwise it gets reuploaded by `work_actor`.
|
48
|
+
# support multiple files; ensure attributes[:file] is an Array
|
49
|
+
def upload_ids
|
50
|
+
return [] if klass == Bulkrax.collection_model_class
|
51
|
+
attributes[:file] = file_paths
|
52
|
+
import_files
|
53
|
+
end
|
54
|
+
|
55
|
+
def file_attributes(update_files = false)
|
56
|
+
# NOTE: Unclear why we're changing a instance variable based on what was
|
57
|
+
# passed, which itself is derived from the instance variable we're about
|
58
|
+
# to change. It's very easy to mutate the initialized @update_files if
|
59
|
+
# you don't pass the parameter.
|
60
|
+
object_factory.update_files = update_files
|
61
|
+
hash = {}
|
62
|
+
return hash if klass == Bulkrax.collection_model_class
|
63
|
+
hash[:uploaded_files] = upload_ids if attributes[:file].present?
|
64
|
+
hash[:remote_files] = new_remote_files if new_remote_files.present?
|
65
|
+
hash
|
66
|
+
end
|
67
|
+
|
68
|
+
# Its possible to get just an array of strings here, so we need to make sure they are all hashes
|
69
|
+
def parsed_remote_files
|
70
|
+
return @parsed_remote_files if @parsed_remote_files.present?
|
71
|
+
@parsed_remote_files = attributes[:remote_files] || []
|
72
|
+
@parsed_remote_files = @parsed_remote_files.map do |file_value|
|
73
|
+
if file_value.is_a?(Hash)
|
74
|
+
file_value
|
75
|
+
elsif file_value.is_a?(String)
|
76
|
+
name = Bulkrax::Importer.safe_uri_filename(file_value)
|
77
|
+
{ url: file_value, file_name: name }
|
78
|
+
else
|
79
|
+
Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
|
80
|
+
nil
|
81
|
+
end
|
41
82
|
end
|
83
|
+
@parsed_remote_files.delete(nil)
|
84
|
+
@parsed_remote_files
|
42
85
|
end
|
43
|
-
@parsed_remote_files.delete(nil)
|
44
|
-
@parsed_remote_files
|
45
|
-
end
|
46
86
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
is_existing = object.file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
|
62
|
-
is_valid && !is_existing
|
63
|
-
end
|
64
|
-
else
|
65
|
-
parsed_remote_files.select do |file|
|
66
|
-
file[:url]&.match(URI::ABS_URI)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
87
|
+
def new_remote_files
|
88
|
+
return @new_remote_files if @new_remote_files
|
89
|
+
|
90
|
+
# TODO: This code could first loop through all remote files and select
|
91
|
+
# only the valid ones; then load the file_sets and do comparisons.
|
92
|
+
file_sets = object_factory.class.file_sets_for(resource: object)
|
93
|
+
@new_remote_files = parsed_remote_files.select do |file|
|
94
|
+
# is the url valid?
|
95
|
+
is_valid = file[:url]&.match(URI::ABS_URI)
|
96
|
+
# does the file already exist
|
97
|
+
is_existing = file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
|
98
|
+
is_valid && !is_existing
|
99
|
+
end
|
100
|
+
end
|
70
101
|
|
71
|
-
|
72
|
-
|
73
|
-
|
102
|
+
def file_paths
|
103
|
+
@file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
|
104
|
+
end
|
74
105
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
106
|
+
# Retrieve the orginal filenames for the files to be imported
|
107
|
+
def work_files_filenames
|
108
|
+
object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
|
109
|
+
end
|
79
110
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
111
|
+
# Retrieve the filenames for the files to be imported
|
112
|
+
def import_files_filenames
|
113
|
+
file_paths.map { |f| f.split('/').last }
|
114
|
+
end
|
84
115
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
116
|
+
# Called if #replace_files is true
|
117
|
+
# Destroy all file_sets for this object
|
118
|
+
# Reload the object to ensure the remaining methods have the most up to date object
|
119
|
+
def destroy_existing_files
|
120
|
+
return unless object.present? && object.file_sets.present?
|
121
|
+
object.file_sets.each do |fs|
|
122
|
+
Hyrax::Actors::FileSetActor.new(fs, @user).destroy
|
123
|
+
end
|
124
|
+
@object = object.reload
|
125
|
+
log_deleted_fs(object)
|
92
126
|
end
|
93
|
-
@object = object.reload
|
94
|
-
log_deleted_fs(object)
|
95
|
-
end
|
96
127
|
|
97
|
-
|
98
|
-
|
99
|
-
|
128
|
+
def set_removed_filesets
|
129
|
+
local_file_sets.each do |fileset|
|
130
|
+
# TODO: We need to consider the Valkyrie pathway
|
131
|
+
next if fileset.is_a?(Valkyrie::Resource)
|
132
|
+
|
133
|
+
remove_file_set(file_set: fileset)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def remove_file_set(file_set:)
|
138
|
+
# TODO: We need to consider the Valkyrie pathway
|
139
|
+
file = file_set.files.first
|
140
|
+
file.create_version
|
100
141
|
opts = {}
|
101
|
-
opts[:path] =
|
142
|
+
opts[:path] = file.id.split('/', 2).last
|
102
143
|
opts[:original_name] = 'removed.png'
|
103
144
|
opts[:mime_type] = 'image/png'
|
104
145
|
|
105
|
-
|
106
|
-
|
107
|
-
::CreateDerivativesJob.set(wait: 1.minute).perform_later(
|
146
|
+
file_set.add_file(File.open(Bulkrax.removed_image_path), opts)
|
147
|
+
file_set.save
|
148
|
+
::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
|
108
149
|
end
|
109
|
-
end
|
110
150
|
|
111
|
-
|
112
|
-
|
113
|
-
|
151
|
+
def local_file_sets
|
152
|
+
# NOTE: we'll be mutating this list of file_sets via the import_files
|
153
|
+
# method
|
154
|
+
@local_file_sets ||= ordered_file_sets
|
155
|
+
end
|
114
156
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
end
|
157
|
+
def ordered_file_sets
|
158
|
+
Bulkrax.object_factory.ordered_file_sets_for(object)
|
159
|
+
end
|
119
160
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
161
|
+
##
|
162
|
+
# @return [Array<Integer>] An array of Hyrax::UploadFile#id representing the
|
163
|
+
# files that we should be uploading.
|
164
|
+
def import_files
|
165
|
+
paths = file_paths.map { |path| import_file(path) }.compact
|
166
|
+
set_removed_filesets if local_file_sets.present?
|
167
|
+
paths
|
168
|
+
end
|
125
169
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
170
|
+
def import_file(path)
|
171
|
+
u = Hyrax::UploadedFile.new
|
172
|
+
u.user_id = user.id
|
173
|
+
u.file = CarrierWave::SanitizedFile.new(path)
|
174
|
+
update_filesets(u)
|
175
|
+
end
|
176
|
+
|
177
|
+
def update_filesets(current_file)
|
178
|
+
if @update_files && local_file_sets.present?
|
179
|
+
# NOTE: We're mutating local_file_sets as we process the updated file.
|
180
|
+
fileset = local_file_sets.shift
|
181
|
+
update_file_set(file_set: fileset, uploaded: current_file)
|
182
|
+
else
|
183
|
+
current_file.save
|
184
|
+
current_file.id
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
##
|
189
|
+
# @return [NilClass] indicating that we've successfully began work on the file_set.
|
190
|
+
def update_file_set(file_set:, uploaded:)
|
191
|
+
# TODO: We need to consider the Valkyrie pathway
|
192
|
+
file = file_set.files.first
|
193
|
+
uploaded_file = uploaded.file
|
132
194
|
|
133
|
-
|
134
|
-
if @update_files && local_file_sets.present?
|
135
|
-
fileset = local_file_sets.shift
|
136
|
-
return nil if fileset.files.first.checksum.value == Digest::SHA1.file(current_file.file.path).to_s
|
195
|
+
return nil if file.checksum.value == Digest::SHA1.file(uploaded_file.path).to_s
|
137
196
|
|
138
|
-
|
197
|
+
file.create_version
|
139
198
|
opts = {}
|
140
|
-
opts[:path] =
|
141
|
-
opts[:original_name] =
|
142
|
-
opts[:mime_type] =
|
199
|
+
opts[:path] = file.id.split('/', 2).last
|
200
|
+
opts[:original_name] = uploaded_file.file.original_filename
|
201
|
+
opts[:mime_type] = uploaded_file.content_type
|
143
202
|
|
144
|
-
|
145
|
-
|
146
|
-
::CreateDerivativesJob.set(wait: 1.minute).perform_later(
|
203
|
+
file_set.add_file(File.open(uploaded_file.to_s), opts)
|
204
|
+
file_set.save
|
205
|
+
::CreateDerivativesJob.set(wait: 1.minute).perform_later(file_set, file.id)
|
147
206
|
nil
|
148
|
-
else
|
149
|
-
current_file.save
|
150
|
-
current_file.id
|
151
207
|
end
|
152
208
|
end
|
153
209
|
end
|
@@ -5,7 +5,7 @@ module Bulkrax
|
|
5
5
|
extend ActiveSupport::Concern
|
6
6
|
|
7
7
|
included do
|
8
|
-
self.default_work_type =
|
8
|
+
self.default_work_type = Bulkrax.file_model_class.to_s
|
9
9
|
end
|
10
10
|
|
11
11
|
def file_reference
|
@@ -47,7 +47,7 @@ module Bulkrax
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def child_jobs
|
50
|
-
raise ::StandardError,
|
50
|
+
raise ::StandardError, "A #{Bulkrax.file_model_class} cannot be a parent of a #{Bulkrax.collection_model_class}, Work, or other #{Bulkrax.file_model_class}"
|
51
51
|
end
|
52
52
|
end
|
53
53
|
end
|
@@ -56,6 +56,10 @@ module Bulkrax
|
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
59
|
+
def get_object_name(field)
|
60
|
+
mapping&.[](field)&.[]('object')
|
61
|
+
end
|
62
|
+
|
59
63
|
def set_parsed_data(name, value)
|
60
64
|
return parsed_metadata[name] = value unless multiple?(name)
|
61
65
|
|
@@ -125,41 +129,40 @@ module Bulkrax
|
|
125
129
|
|
126
130
|
return false if excluded?(field)
|
127
131
|
return true if supported_bulkrax_fields.include?(field)
|
128
|
-
|
132
|
+
|
133
|
+
Bulkrax.object_factory.field_supported?(field: field, model: factory_class)
|
129
134
|
end
|
130
135
|
|
131
136
|
def supported_bulkrax_fields
|
132
|
-
@supported_bulkrax_fields ||=
|
133
|
-
|
134
|
-
id
|
135
|
-
file
|
136
|
-
remote_files
|
137
|
-
model
|
138
|
-
visibility
|
139
|
-
delete
|
140
|
-
#{related_parents_parsed_mapping}
|
141
|
-
#{related_children_parsed_mapping}
|
142
|
-
]
|
137
|
+
@supported_bulkrax_fields ||= fields_that_are_always_singular +
|
138
|
+
fields_that_are_always_multiple
|
143
139
|
end
|
144
140
|
|
141
|
+
##
|
142
|
+
# Determine a multiple properties field
|
145
143
|
def multiple?(field)
|
146
|
-
|
147
|
-
|
148
|
-
file
|
149
|
-
remote_files
|
150
|
-
rights_statement
|
151
|
-
#{related_parents_parsed_mapping}
|
152
|
-
#{related_children_parsed_mapping}
|
153
|
-
]
|
144
|
+
return true if fields_that_are_always_singular.include?(field.to_s)
|
145
|
+
return false if fields_that_are_always_multiple.include?(field.to_s)
|
154
146
|
|
155
|
-
|
156
|
-
|
147
|
+
Bulkrax.object_factory.field_multi_value?(field: field, model: factory_class)
|
148
|
+
end
|
157
149
|
|
158
|
-
|
150
|
+
def fields_that_are_always_multiple
|
151
|
+
%w[id delete model visibility]
|
159
152
|
end
|
160
153
|
|
161
|
-
def
|
162
|
-
|
154
|
+
def fields_that_are_always_singular
|
155
|
+
@fields_that_are_always_singular ||= %W[
|
156
|
+
file
|
157
|
+
remote_files
|
158
|
+
rights_statement
|
159
|
+
#{related_parents_parsed_mapping}
|
160
|
+
#{related_children_parsed_mapping}
|
161
|
+
]
|
162
|
+
end
|
163
|
+
|
164
|
+
def schema_form_definitions
|
165
|
+
@schema_form_definitions ||= ::SchemaLoader.new.form_definitions_for(factory_class.name.underscore.to_sym)
|
163
166
|
end
|
164
167
|
|
165
168
|
# Hyrax field to use for the given import field
|
@@ -11,7 +11,7 @@ module Bulkrax
|
|
11
11
|
unless self.importerexporter.validate_only
|
12
12
|
raise CollectionsCreatedError unless collections_created?
|
13
13
|
@item = factory.run!
|
14
|
-
add_user_to_permission_templates!
|
14
|
+
add_user_to_permission_templates!
|
15
15
|
parent_jobs if self.parsed_metadata[related_parents_parsed_mapping]&.join.present?
|
16
16
|
child_jobs if self.parsed_metadata[related_children_parsed_mapping]&.join.present?
|
17
17
|
end
|
@@ -28,22 +28,15 @@ module Bulkrax
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def add_user_to_permission_templates!
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
)
|
39
|
-
|
40
|
-
permission_template_id: permission_template.id,
|
41
|
-
agent_id: 'admin',
|
42
|
-
agent_type: 'group',
|
43
|
-
access: 'manage'
|
44
|
-
)
|
45
|
-
|
46
|
-
@item.reset_access_controls!
|
31
|
+
# NOTE: This is a cheat for the class is a CollectionEntry. Consider
|
32
|
+
# that we have default_work_type.
|
33
|
+
#
|
34
|
+
# TODO: This guard clause is not necessary as we can handle it in the
|
35
|
+
# underlying factory. However, to do that requires adjusting about 7
|
36
|
+
# failing specs. So for now this refactor appears acceptable
|
37
|
+
return unless defined?(::Hyrax)
|
38
|
+
return unless self.class.to_s.include?("Collection")
|
39
|
+
factory.add_user_to_collection_permissions(collection: @item, user: user)
|
47
40
|
end
|
48
41
|
|
49
42
|
def parent_jobs
|
@@ -1,5 +1,4 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'marcel'
|
3
2
|
|
4
3
|
module Bulkrax
|
5
4
|
module ImporterExporterBehavior
|
@@ -54,9 +53,11 @@ module Bulkrax
|
|
54
53
|
filename = parser_fields&.[]('import_file_path')
|
55
54
|
return false unless filename
|
56
55
|
return false unless File.file?(filename)
|
56
|
+
|
57
57
|
returning_value = false
|
58
58
|
File.open(filename) do |file|
|
59
|
-
|
59
|
+
mime_type = ::Marcel::MimeType.for(file)
|
60
|
+
returning_value = mime_type.include?('application/zip') || mime_type.include?('application/gzip')
|
60
61
|
end
|
61
62
|
returning_value
|
62
63
|
end
|
@@ -1,6 +1,4 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'zip'
|
3
|
-
require 'marcel'
|
4
2
|
|
5
3
|
module Bulkrax
|
6
4
|
# An abstract class that establishes the API for Bulkrax's import and export parsing.
|
@@ -232,7 +230,7 @@ module Bulkrax
|
|
232
230
|
type_col = Bulkrax::Entry.arel_table['type']
|
233
231
|
status_col = Bulkrax::Entry.arel_table['status_message']
|
234
232
|
|
235
|
-
query = (type == 'work' ? type_col.
|
233
|
+
query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize))
|
236
234
|
query.and(status_col.in(statuses))
|
237
235
|
end
|
238
236
|
|
@@ -242,16 +240,30 @@ module Bulkrax
|
|
242
240
|
return 0
|
243
241
|
end
|
244
242
|
|
243
|
+
def record_raw_metadata(record)
|
244
|
+
record.to_h
|
245
|
+
end
|
246
|
+
|
247
|
+
def record_deleted?(record)
|
248
|
+
return false unless record.key?(:delete)
|
249
|
+
ActiveModel::Type::Boolean.new.cast(record[:delete])
|
250
|
+
end
|
251
|
+
|
252
|
+
def record_remove_and_rerun?(record)
|
253
|
+
return false unless record.key?(:remove_and_rerun)
|
254
|
+
ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
|
255
|
+
end
|
256
|
+
|
245
257
|
def create_entry_and_job(current_record, type, identifier = nil)
|
246
258
|
identifier ||= current_record[source_identifier]
|
247
259
|
new_entry = find_or_create_entry(send("#{type}_entry_class"),
|
248
260
|
identifier,
|
249
261
|
'Bulkrax::Importer',
|
250
|
-
current_record
|
262
|
+
record_raw_metadata(current_record))
|
251
263
|
new_entry.status_info('Pending', importer.current_run)
|
252
|
-
if current_record
|
264
|
+
if record_deleted?(current_record)
|
253
265
|
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
|
254
|
-
elsif current_record
|
266
|
+
elsif record_remove_and_rerun?(current_record) || remove_and_rerun
|
255
267
|
delay = calculate_type_delay(type)
|
256
268
|
"Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
|
257
269
|
else
|
@@ -260,7 +272,7 @@ module Bulkrax
|
|
260
272
|
end
|
261
273
|
|
262
274
|
# Optional, define if using browse everything for file upload
|
263
|
-
def retrieve_cloud_files(
|
275
|
+
def retrieve_cloud_files(_files, _importer); end
|
264
276
|
|
265
277
|
# @param file [#path, #original_filename] the file object that with the relevant data for the
|
266
278
|
# import.
|
@@ -382,6 +394,9 @@ module Bulkrax
|
|
382
394
|
identifier: identifier
|
383
395
|
)
|
384
396
|
entry.raw_metadata = raw_metadata
|
397
|
+
# Setting parsed_metadata specifically for the id so we can find the object via the
|
398
|
+
# id in a delete. This is likely to get clobbered in a regular import, which is fine.
|
399
|
+
entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id')
|
385
400
|
entry.save!
|
386
401
|
entry
|
387
402
|
end
|
@@ -413,6 +428,8 @@ module Bulkrax
|
|
413
428
|
end
|
414
429
|
|
415
430
|
def unzip(file_to_unzip)
|
431
|
+
return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
|
432
|
+
|
416
433
|
Zip::File.open(file_to_unzip) do |zip_file|
|
417
434
|
zip_file.each do |entry|
|
418
435
|
entry_path = File.join(importer_unzip_path, entry.name)
|
@@ -422,6 +439,13 @@ module Bulkrax
|
|
422
439
|
end
|
423
440
|
end
|
424
441
|
|
442
|
+
def untar(file_to_untar)
|
443
|
+
Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
|
444
|
+
command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
|
445
|
+
result = system(command)
|
446
|
+
raise "Failed to extract #{file_to_untar}" unless result
|
447
|
+
end
|
448
|
+
|
425
449
|
def zip
|
426
450
|
FileUtils.mkdir_p(exporter_export_zip_path)
|
427
451
|
|