cdm_migrator 3.3.2 → 3.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/cdm_migrator/csv_controller.rb +11 -57
- data/app/jobs/cdm_migrator/batch_create_files_with_ordered_members_job.rb +49 -22
- data/app/jobs/cdm_migrator/create_work_job.rb +36 -11
- data/app/services/csv_export_service.rb +84 -0
- data/lib/cdm_migrator/version.rb +1 -1
- data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +4 -0
- metadata +24 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9bd9baa9f5c277e22421a1726d4652d2231a4067aabcf2e4f048ac9aa7264fc2
|
4
|
+
data.tar.gz: f698973f8726c227326fd7e0c393af4201c7dd9352f43d669cff9659f310da85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6c264c4d7a122f598aef1d746766771b408defb9a15e014490f7a2acd43772197c5e66d15a99120c13089f013fe702f9dc5aeaebbb9cbc9f70b2110b54cac55d
|
7
|
+
data.tar.gz: 85241bcb0111ead83629678cfde68fb729d49997890dbc0432857e6f7976f74f3f4296b0e6aecac309b71a84b398ea02fa24f1d346d652912a60eded09c39c0c
|
@@ -29,7 +29,7 @@ module CdmMigrator
|
|
29
29
|
|
30
30
|
def upload
|
31
31
|
@admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
|
32
|
-
@collections =
|
32
|
+
@collections = Hyrax.config.collection_class.all.map { |col| [col.title.first, col.id] }
|
33
33
|
end
|
34
34
|
|
35
35
|
def create
|
@@ -88,7 +88,7 @@ module CdmMigrator
|
|
88
88
|
end
|
89
89
|
|
90
90
|
def edit
|
91
|
-
@collections =
|
91
|
+
@collections = Hyrax.config.collection_class.all.map { |c| [c.title.first, c.id] }
|
92
92
|
end
|
93
93
|
|
94
94
|
def update
|
@@ -102,7 +102,7 @@ module CdmMigrator
|
|
102
102
|
elsif type.include? "Work"
|
103
103
|
metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
|
104
104
|
elsif type.include? "File"
|
105
|
-
metadata = create_data(row.except('id', 'type'),
|
105
|
+
metadata = create_data(row.except('id', 'type'), file_form, obj, mvs)
|
106
106
|
end
|
107
107
|
unless metadata.nil?
|
108
108
|
obj.attributes = metadata
|
@@ -114,28 +114,19 @@ module CdmMigrator
|
|
114
114
|
end
|
115
115
|
|
116
116
|
def export
|
117
|
-
|
117
|
+
# Get a collection's member works from Solr
|
118
|
+
solr = RSolr.connect url: Blacklight.connection_config[:url]
|
118
119
|
response = solr.get 'select', params: {
|
119
120
|
q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
|
121
|
+
fq: ["has_model_ssim:FileSet OR has_model_ssim:*Work"],
|
120
122
|
rows: 3400,
|
121
123
|
fl: "id"
|
122
124
|
}
|
123
125
|
unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
|
124
126
|
work_ids = response['response']['docs'].map { |doc| doc['id'] }
|
125
127
|
end
|
126
|
-
#works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
|
127
|
-
@csv_headers = ['type'] + work_fields
|
128
|
-
@csv_array = [@csv_headers.join(',')]
|
129
|
-
work_ids.each do |work_id|
|
130
|
-
doc = ::SolrDocument.find work_id
|
131
|
-
add_line doc
|
132
|
-
doc._source[:file_set_ids_ssim].each do |file_id|
|
133
|
-
file_doc = ::SolrDocument.find file_id
|
134
|
-
add_line file_doc
|
135
|
-
end
|
136
|
-
end
|
137
128
|
|
138
|
-
send_data
|
129
|
+
send_data CsvExportService.new(available_works).csv_for(work_ids),
|
139
130
|
:type => 'text/csv; charset=iso-8859-5; header=present',
|
140
131
|
:disposition => "attachment; filename=export.csv"
|
141
132
|
end
|
@@ -146,43 +137,6 @@ module CdmMigrator
|
|
146
137
|
authorize! :create, available_works.first
|
147
138
|
end
|
148
139
|
|
149
|
-
def add_line doc
|
150
|
-
line_hash = {}
|
151
|
-
line_hash['type'] = doc._source[:has_model_ssim].first
|
152
|
-
work_fields.each do |field|
|
153
|
-
line_hash[field] = create_cell doc, field
|
154
|
-
end
|
155
|
-
@csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
|
156
|
-
|
157
|
-
end
|
158
|
-
|
159
|
-
def work_fields
|
160
|
-
@fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
|
161
|
-
end
|
162
|
-
|
163
|
-
def excluded_fields
|
164
|
-
%w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
|
165
|
-
relative_path import_url part_of resource_type access_control_id
|
166
|
-
representative_id thumbnail_id rendering_ids admin_set_id embargo_id
|
167
|
-
lease_id]
|
168
|
-
end
|
169
|
-
|
170
|
-
def create_cell w, field
|
171
|
-
if field.include? 'date' or field == 'chronological_coverage'
|
172
|
-
if w._source[field+'_tesim'].is_a?(Array)
|
173
|
-
w._source[field+'_tesim'].join('|')
|
174
|
-
else
|
175
|
-
w._source[field+'_tesim']
|
176
|
-
end
|
177
|
-
elsif w.respond_to?(field.to_sym)
|
178
|
-
if w.send(field).is_a?(Array)
|
179
|
-
w.send(field).join('|')
|
180
|
-
else
|
181
|
-
w.send(field)
|
182
|
-
end
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
140
|
def available_works
|
187
141
|
@available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
|
188
142
|
end
|
@@ -277,7 +231,7 @@ module CdmMigrator
|
|
277
231
|
if file_path.present? && File.file?(file_path) && @max_file_size
|
278
232
|
if File.size(file_path.gsub("file://", "")) > @max_file_size
|
279
233
|
@error_list[row_number] = { "file size" => "The file at #{file_path} is too large to be uploaded. Please compress the file or split it into parts.
|
280
|
-
|
234
|
+
Each part should be under #{helpers.number_to_human_size(@max_file_size)}." }
|
281
235
|
end
|
282
236
|
end
|
283
237
|
end
|
@@ -346,9 +300,9 @@ module CdmMigrator
|
|
346
300
|
hash[field.to_s] = "May contain the wrong multi-value separator or a typo in the URI."
|
347
301
|
end
|
348
302
|
else # Or val should be string
|
349
|
-
|
350
|
-
|
351
|
-
|
303
|
+
invalid_chars = ["\\"]
|
304
|
+
# Make exceptions for backslashes that are part of whitespace characters
|
305
|
+
# by deleting them before checking for stray \s
|
352
306
|
if val.delete("\t\r\n\s\n").match Regexp.union(invalid_chars)
|
353
307
|
hash[field.to_s] = "May contain an invalid character such as #{invalid_chars.to_sentence(last_word_connector: ", or ")}."
|
354
308
|
end
|
@@ -14,32 +14,59 @@ module CdmMigrator
|
|
14
14
|
# advantage of the same strategy as Hyrax::Actors::FileSetOrderedMembersActor
|
15
15
|
# but you don't need the OrderedMembersActor constant initialized.
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
def perform work, ingest_work, user
|
18
|
+
# Reload the work to get the most recent and accurate member associations
|
19
|
+
work.reload
|
20
|
+
if work.ordered_members.to_a.empty? && work.file_sets.empty?
|
21
|
+
attach_files(work, ingest_work.files, user)
|
22
|
+
else
|
23
|
+
delete_excess_file_sets(work)
|
24
|
+
ordered_count = work.reload.ordered_members.to_a.count
|
25
|
+
unless ordered_count == ingest_work.files.count
|
26
|
+
# Attach any files that might be missing
|
27
|
+
files = ingest_work[ordered_count..]
|
28
|
+
attach_files(work, files, user)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
first_file_set = work.ordered_members.to_a.first
|
32
|
+
work.representative = first_file_set
|
33
|
+
work.thumbnail = first_file_set
|
34
|
+
work.save!
|
35
|
+
work.file_sets.each { |fs| CdmIngestFilesJob.perform_later(fs, fs.import_url, user, ingest_work) }
|
23
36
|
end
|
24
37
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
38
|
+
private
|
39
|
+
|
40
|
+
def attach_files(work, ingest_work_files, user)
|
41
|
+
ingest_work_files.each do |file|
|
42
|
+
url = file[:url]
|
43
|
+
ordered_members = work.ordered_members
|
44
|
+
# last_file = ingest_work.files.last==file
|
45
|
+
::FileSet.new(import_url: url, label: file[:title]) do |fs|
|
46
|
+
fs.attributes = file[:metadata]
|
47
|
+
fs.save!
|
48
|
+
ordered_members << fs
|
35
49
|
end
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
50
|
+
end
|
51
|
+
work.save!
|
52
|
+
work.reload.ordered_members.to_a.each do |file_set|
|
53
|
+
Hyrax.config.callback.run(:after_create_fileset, file_set, user, warn: false)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Sometimes when this job fails, file sets are attached to the work
|
58
|
+
# without attaching them as ordered members. This creates "ghost files"
|
59
|
+
# that don't show up in the interface but are still linked to the work as members
|
60
|
+
def delete_excess_file_sets(work)
|
61
|
+
ordered_members = work.ordered_members.to_a
|
62
|
+
ghost_members = work.file_sets.select { |fs| ordered_members.exclude? fs }
|
63
|
+
if ghost_members.any?
|
64
|
+
# Unlink the file sets from the parent work first because it makes deleting them faster
|
65
|
+
work.members = ordered_members
|
40
66
|
work.save!
|
41
|
-
|
67
|
+
ghost_members.each(&:destroy!)
|
68
|
+
end
|
42
69
|
end
|
43
|
-
|
70
|
+
|
44
71
|
end
|
45
72
|
end
|
@@ -4,20 +4,15 @@ module CdmMigrator
|
|
4
4
|
|
5
5
|
def perform(ingest_work, user, admin_set_id, collection_id)
|
6
6
|
admin_set = ::AdminSet.find(admin_set_id) rescue nil
|
7
|
-
collection =
|
7
|
+
collection = Hyrax.config.collection_class.find(collection_id) rescue nil
|
8
8
|
work = Object.const_get(ingest_work.work_type).new
|
9
|
-
#status_after, embargo_date, lease_date = nil, nil, nil
|
10
9
|
work.apply_depositor_metadata(user)
|
11
10
|
work.attributes = ingest_work.data
|
12
|
-
if ingest_work.data.has_key? 'downloadable'
|
13
|
-
# Convert string to boolean
|
14
|
-
work.downloadable = ActiveModel::Type::Boolean.new.cast(ingest_work.data['downloadable'])
|
15
|
-
elsif work.attributes.include? 'downloadable' # Set work to downloadable by default
|
16
|
-
work.downloadable = true
|
17
|
-
end
|
18
11
|
work.member_of_collections = [collection] if collection
|
19
12
|
work.admin_set = admin_set if admin_set
|
20
13
|
work.date_uploaded = DateTime.now
|
14
|
+
add_configured_permissions(work)
|
15
|
+
work.try(:to_controlled_vocab)
|
21
16
|
begin
|
22
17
|
work.save!
|
23
18
|
# Weird error where descriptions with whitespace chars \n or \r don't save the 1st time
|
@@ -29,9 +24,39 @@ module CdmMigrator
|
|
29
24
|
work.description = old_descr
|
30
25
|
work.save!
|
31
26
|
end
|
32
|
-
#
|
33
|
-
#
|
34
|
-
BatchCreateFilesJob.perform_later(work, ingest_work, user)
|
27
|
+
# Creating file (sets) with Hyrax::Actors::OrderedMembersActor is now the default.
|
28
|
+
# To use the original Hyrax::Actors::FileSetActor, replace the line below with
|
29
|
+
# BatchCreateFilesJob.perform_later(work, ingest_work, user)
|
30
|
+
BatchCreateFilesWithOrderedMembersJob.perform_later(work, ingest_work, user)
|
35
31
|
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def add_configured_permissions(work)
|
36
|
+
work_type = work.class
|
37
|
+
return if configured_permissions.nil? # Nothing configured at all
|
38
|
+
|
39
|
+
permissions_config = configured_permissions.dig(work_type.to_s)
|
40
|
+
|
41
|
+
return if permissions_config.nil? # Nothing configured for this work type
|
42
|
+
|
43
|
+
permissions = permissions_config.map do |permission_level, group_name|
|
44
|
+
# Check if the permission level is configured in Hyrax
|
45
|
+
raise "#{permission_level} permission is not configured in this repo. Is it set in Hyrax.config.permission_levels?" unless Hyrax.config.permission_levels.index(permission_level).presence
|
46
|
+
# Construct each permission as a hash
|
47
|
+
group_name.map do |group|
|
48
|
+
{ name: group, type: "group", access: permission_level }
|
49
|
+
end.flatten
|
50
|
+
end.flatten
|
51
|
+
|
52
|
+
# Finally, set the work's permissions_attributes
|
53
|
+
work.permissions_attributes = permissions
|
54
|
+
end
|
55
|
+
|
56
|
+
# @return[Hash] - A hash like { "GenericWork"=> { "edit" => ["admin"], "download" => ["public"] } }
|
57
|
+
def configured_permissions
|
58
|
+
CdmMigrator::Engine.config.dig('default_work_permissions')
|
59
|
+
end
|
60
|
+
|
36
61
|
end
|
37
62
|
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module CdmMigrator
|
2
|
+
class CsvExportService
|
3
|
+
|
4
|
+
# A service for exporting work and file set metadata to a csv. You can customize headers/fields by
|
5
|
+
# overriding the included_fields method below. You can also specify different work types by changing/overriding
|
6
|
+
# the available_work_types method.
|
7
|
+
|
8
|
+
# @param [Array <Class>] - the available work types, passed in from the controller, such as GenericWork
|
9
|
+
def initialize(work_types)
|
10
|
+
@work_types = work_types
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param [Array <String>] - the work ids (for GenericWork or other work type) to export metadata for
|
14
|
+
# @param [String] - where to save the csv file to (filepath)
|
15
|
+
def write_to_csv(work_ids, filepath)
|
16
|
+
File.open(filepath, 'w') { |file| file.write(rows_for(work_ids).map(&:to_csv).join) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def csv_for(work_ids)
|
20
|
+
rows_for(work_ids).map(&:to_csv).join
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param [Array <String>] - the work ids (for GenericWork or other work type) to export metadata for
|
24
|
+
# @return [Array <Array>] - An array of arrays where each nested array contains the metadata
|
25
|
+
# for a work or file set and corresponds to a csv row.
|
26
|
+
def rows_for(work_ids)
|
27
|
+
csv_array = [csv_headers]
|
28
|
+
work_ids.each_with_object(csv_array).each do |work_id|
|
29
|
+
doc = ::SolrDocument.find work_id
|
30
|
+
csv_array << row_for(doc)
|
31
|
+
doc._source[:file_set_ids_ssim].each do |file_id|
|
32
|
+
file_doc = ::SolrDocument.find file_id
|
33
|
+
csv_array << row_for(file_doc)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param [SolrDocument] - Any model that has the properties listed in #included_fields (e.g. GenericWork, FileSet)
|
39
|
+
# @return [Array <String>] - the csv row for the given document
|
40
|
+
def row_for(document)
|
41
|
+
line_hash = {}
|
42
|
+
line_hash['type'] = document._source[:has_model_ssim].first
|
43
|
+
included_fields.each do |field|
|
44
|
+
line_hash[field] = create_cell document, field
|
45
|
+
end
|
46
|
+
line_hash.values_at(*csv_headers).map { |cell| cell.blank? ? '' : cell }
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# @return [Array <String>]
|
52
|
+
def included_fields
|
53
|
+
@work_types.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
|
54
|
+
end
|
55
|
+
|
56
|
+
def excluded_fields
|
57
|
+
%w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
|
58
|
+
relative_path import_url part_of resource_type access_control_id
|
59
|
+
representative_id thumbnail_id rendering_ids admin_set_id embargo_id
|
60
|
+
lease_id]
|
61
|
+
end
|
62
|
+
|
63
|
+
# @param [SolrDocument] - the document to create a cell for
|
64
|
+
# @param [String or Symbol] - the name of the field
|
65
|
+
# NOTE: any fields you want to include must also be added to the SolrDocument model as methods
|
66
|
+
# because of the check for respond_to?
|
67
|
+
def create_cell document, field
|
68
|
+
properties = document.hydra_model.properties
|
69
|
+
if document.respond_to?(field.to_sym)
|
70
|
+
if properties.keys.include?(field) && properties[field].multiple? && field.to_sym != :doi
|
71
|
+
document.send(field).join('|')
|
72
|
+
else
|
73
|
+
document.send(field)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# @return [Array <String>] - the heaaders for the csv
|
79
|
+
def csv_headers
|
80
|
+
['type'] + included_fields
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
data/lib/cdm_migrator/version.rb
CHANGED
@@ -51,3 +51,7 @@ default_fields:
|
|
51
51
|
- identifier
|
52
52
|
- based_near
|
53
53
|
- related_url
|
54
|
+
default_work_permissions:
|
55
|
+
# GenericWork: # the work type to set default permissions for
|
56
|
+
# edit: # the permission level (must also be set in config/initializers/hyrax.rb: Hyrax.config.permission_levels = ...). Default available values are edit, read
|
57
|
+
# - admin # the group name (currently only groups are supported, not user names)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdm_migrator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- sephirothkod
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -24,6 +24,26 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '5.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: hyrax
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.2'
|
34
|
+
- - "<"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '4.0'
|
37
|
+
type: :runtime
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '3.2'
|
44
|
+
- - "<"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '4.0'
|
27
47
|
- !ruby/object:Gem::Dependency
|
28
48
|
name: engine_cart
|
29
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,6 +118,7 @@ files:
|
|
98
118
|
- app/models/cdm_migrator/application_record.rb
|
99
119
|
- app/models/cdm_migrator/batch_ingest.rb
|
100
120
|
- app/models/cdm_migrator/ingest_work.rb
|
121
|
+
- app/services/csv_export_service.rb
|
101
122
|
- app/views/cdm_migrator/cdm/collection.html.erb
|
102
123
|
- app/views/cdm_migrator/cdm/mappings.html.erb
|
103
124
|
- app/views/cdm_migrator/csv/_batches_list.html.erb
|
@@ -141,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
141
162
|
- !ruby/object:Gem::Version
|
142
163
|
version: '0'
|
143
164
|
requirements: []
|
144
|
-
rubygems_version: 3.
|
165
|
+
rubygems_version: 3.5.3
|
145
166
|
signing_key:
|
146
167
|
specification_version: 4
|
147
168
|
summary: ContentDM to Hyrax migrator.
|