cdm_migrator 3.3.2 → 3.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/controllers/cdm_migrator/csv_controller.rb +11 -57
- data/app/jobs/cdm_migrator/create_work_job.rb +36 -11
- data/app/services/csv_export_service.rb +84 -0
- data/lib/cdm_migrator/version.rb +1 -1
- data/lib/generators/cdm_migrator/install/templates/config/cdm_migrator.yml +4 -0
- metadata +24 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b875ba84c55ed42472bfb7bc10a476e49adf23733aa0a8053dc577e66c57203
|
4
|
+
data.tar.gz: 543cedc0d2926d5f4aa27021cd92fb6de38e8e5fb0f09ace7b76a3d27633be36
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7880e669d69504d69850d5e7d3453a0cc62bcb763bf1f7237c8d3328b07f8c7742f77781a9a5417e2f04172e7a9a398846f5cff5a9b503f19bf6791cefdb062
|
7
|
+
data.tar.gz: a0cea1c0b7cb2f3f9e8b53adc820fb5a3043e56a3c3ba68aee888798a543e3438a718a4ea08bbfef7a4230d3e37b7858c7a9075cbb903e9a5307b78855ae53f4
|
@@ -29,7 +29,7 @@ module CdmMigrator
|
|
29
29
|
|
30
30
|
def upload
|
31
31
|
@admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
|
32
|
-
@collections =
|
32
|
+
@collections = Hyrax.config.collection_class.all.map { |col| [col.title.first, col.id] }
|
33
33
|
end
|
34
34
|
|
35
35
|
def create
|
@@ -88,7 +88,7 @@ module CdmMigrator
|
|
88
88
|
end
|
89
89
|
|
90
90
|
def edit
|
91
|
-
@collections =
|
91
|
+
@collections = Hyrax.config.collection_class.all.map { |c| [c.title.first, c.id] }
|
92
92
|
end
|
93
93
|
|
94
94
|
def update
|
@@ -102,7 +102,7 @@ module CdmMigrator
|
|
102
102
|
elsif type.include? "Work"
|
103
103
|
metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
|
104
104
|
elsif type.include? "File"
|
105
|
-
metadata = create_data(row.except('id', 'type'),
|
105
|
+
metadata = create_data(row.except('id', 'type'), file_form, obj, mvs)
|
106
106
|
end
|
107
107
|
unless metadata.nil?
|
108
108
|
obj.attributes = metadata
|
@@ -114,28 +114,19 @@ module CdmMigrator
|
|
114
114
|
end
|
115
115
|
|
116
116
|
def export
|
117
|
-
|
117
|
+
# Get a collection's member works from Solr
|
118
|
+
solr = RSolr.connect url: Blacklight.connection_config[:url]
|
118
119
|
response = solr.get 'select', params: {
|
119
120
|
q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
|
121
|
+
fq: ["has_model_ssim:FileSet OR has_model_ssim:*Work"],
|
120
122
|
rows: 3400,
|
121
123
|
fl: "id"
|
122
124
|
}
|
123
125
|
unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
|
124
126
|
work_ids = response['response']['docs'].map { |doc| doc['id'] }
|
125
127
|
end
|
126
|
-
#works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
|
127
|
-
@csv_headers = ['type'] + work_fields
|
128
|
-
@csv_array = [@csv_headers.join(',')]
|
129
|
-
work_ids.each do |work_id|
|
130
|
-
doc = ::SolrDocument.find work_id
|
131
|
-
add_line doc
|
132
|
-
doc._source[:file_set_ids_ssim].each do |file_id|
|
133
|
-
file_doc = ::SolrDocument.find file_id
|
134
|
-
add_line file_doc
|
135
|
-
end
|
136
|
-
end
|
137
128
|
|
138
|
-
send_data
|
129
|
+
send_data CsvExportService.new(available_works).csv_for(work_ids),
|
139
130
|
:type => 'text/csv; charset=iso-8859-5; header=present',
|
140
131
|
:disposition => "attachment; filename=export.csv"
|
141
132
|
end
|
@@ -146,43 +137,6 @@ module CdmMigrator
|
|
146
137
|
authorize! :create, available_works.first
|
147
138
|
end
|
148
139
|
|
149
|
-
def add_line doc
|
150
|
-
line_hash = {}
|
151
|
-
line_hash['type'] = doc._source[:has_model_ssim].first
|
152
|
-
work_fields.each do |field|
|
153
|
-
line_hash[field] = create_cell doc, field
|
154
|
-
end
|
155
|
-
@csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
|
156
|
-
|
157
|
-
end
|
158
|
-
|
159
|
-
def work_fields
|
160
|
-
@fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
|
161
|
-
end
|
162
|
-
|
163
|
-
def excluded_fields
|
164
|
-
%w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
|
165
|
-
relative_path import_url part_of resource_type access_control_id
|
166
|
-
representative_id thumbnail_id rendering_ids admin_set_id embargo_id
|
167
|
-
lease_id]
|
168
|
-
end
|
169
|
-
|
170
|
-
def create_cell w, field
|
171
|
-
if field.include? 'date' or field == 'chronological_coverage'
|
172
|
-
if w._source[field+'_tesim'].is_a?(Array)
|
173
|
-
w._source[field+'_tesim'].join('|')
|
174
|
-
else
|
175
|
-
w._source[field+'_tesim']
|
176
|
-
end
|
177
|
-
elsif w.respond_to?(field.to_sym)
|
178
|
-
if w.send(field).is_a?(Array)
|
179
|
-
w.send(field).join('|')
|
180
|
-
else
|
181
|
-
w.send(field)
|
182
|
-
end
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
140
|
def available_works
|
187
141
|
@available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
|
188
142
|
end
|
@@ -277,7 +231,7 @@ module CdmMigrator
|
|
277
231
|
if file_path.present? && File.file?(file_path) && @max_file_size
|
278
232
|
if File.size(file_path.gsub("file://", "")) > @max_file_size
|
279
233
|
@error_list[row_number] = { "file size" => "The file at #{file_path} is too large to be uploaded. Please compress the file or split it into parts.
|
280
|
-
|
234
|
+
Each part should be under #{helpers.number_to_human_size(@max_file_size)}." }
|
281
235
|
end
|
282
236
|
end
|
283
237
|
end
|
@@ -346,9 +300,9 @@ module CdmMigrator
|
|
346
300
|
hash[field.to_s] = "May contain the wrong multi-value separator or a typo in the URI."
|
347
301
|
end
|
348
302
|
else # Or val should be string
|
349
|
-
|
350
|
-
|
351
|
-
|
303
|
+
invalid_chars = ["\\"]
|
304
|
+
# Make exceptions for backslashes that are part of whitespace characters
|
305
|
+
# by deleting them before checking for stray \s
|
352
306
|
if val.delete("\t\r\n\s\n").match Regexp.union(invalid_chars)
|
353
307
|
hash[field.to_s] = "May contain an invalid character such as #{invalid_chars.to_sentence(last_word_connector: ", or ")}."
|
354
308
|
end
|
@@ -4,20 +4,15 @@ module CdmMigrator
|
|
4
4
|
|
5
5
|
def perform(ingest_work, user, admin_set_id, collection_id)
|
6
6
|
admin_set = ::AdminSet.find(admin_set_id) rescue nil
|
7
|
-
collection =
|
7
|
+
collection = Hyrax.config.collection_class.find(collection_id) rescue nil
|
8
8
|
work = Object.const_get(ingest_work.work_type).new
|
9
|
-
#status_after, embargo_date, lease_date = nil, nil, nil
|
10
9
|
work.apply_depositor_metadata(user)
|
11
10
|
work.attributes = ingest_work.data
|
12
|
-
if ingest_work.data.has_key? 'downloadable'
|
13
|
-
# Convert string to boolean
|
14
|
-
work.downloadable = ActiveModel::Type::Boolean.new.cast(ingest_work.data['downloadable'])
|
15
|
-
elsif work.attributes.include? 'downloadable' # Set work to downloadable by default
|
16
|
-
work.downloadable = true
|
17
|
-
end
|
18
11
|
work.member_of_collections = [collection] if collection
|
19
12
|
work.admin_set = admin_set if admin_set
|
20
13
|
work.date_uploaded = DateTime.now
|
14
|
+
add_configured_permissions(work)
|
15
|
+
work.try(:to_controlled_vocab)
|
21
16
|
begin
|
22
17
|
work.save!
|
23
18
|
# Weird error where descriptions with whitespace chars \n or \r don't save the 1st time
|
@@ -29,9 +24,39 @@ module CdmMigrator
|
|
29
24
|
work.description = old_descr
|
30
25
|
work.save!
|
31
26
|
end
|
32
|
-
#
|
33
|
-
#
|
34
|
-
BatchCreateFilesJob.perform_later(work, ingest_work, user)
|
27
|
+
# Creating file (sets) with Hyrax::Actors::OrderedMembersActor is now the default.
|
28
|
+
# To use the original Hyrax::Actors::FileSetActor, replace the line below with
|
29
|
+
# BatchCreateFilesJob.perform_later(work, ingest_work, user)
|
30
|
+
BatchCreateFilesWithOrderedMembersJob.perform_later(work, ingest_work, user)
|
35
31
|
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def add_configured_permissions(work)
|
36
|
+
work_type = work.class
|
37
|
+
return if configured_permissions.nil? # Nothing configured at all
|
38
|
+
|
39
|
+
permissions_config = configured_permissions.dig(work_type.to_s)
|
40
|
+
|
41
|
+
return if permissions_config.nil? # Nothing configured for this work type
|
42
|
+
|
43
|
+
permissions = permissions_config.map do |permission_level, group_name|
|
44
|
+
# Check if the permission level is configured in Hyrax
|
45
|
+
raise "#{permission_level} permission is not configured in this repo. Is it set in Hyrax.config.permission_levels?" unless Hyrax.config.permission_levels.index(permission_level).presence
|
46
|
+
# Construct each permission as a hash
|
47
|
+
group_name.map do |group|
|
48
|
+
{ name: group, type: "group", access: permission_level }
|
49
|
+
end.flatten
|
50
|
+
end.flatten
|
51
|
+
|
52
|
+
# Finally, set the work's permissions_attributes
|
53
|
+
work.permissions_attributes = permissions
|
54
|
+
end
|
55
|
+
|
56
|
+
# @return[Hash] - A hash like { "GenericWork"=> { "edit" => ["admin"], "download" => ["public"] } }
|
57
|
+
def configured_permissions
|
58
|
+
CdmMigrator::Engine.config.dig('default_work_permissions')
|
59
|
+
end
|
60
|
+
|
36
61
|
end
|
37
62
|
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module CdmMigrator
|
2
|
+
class CsvExportService
|
3
|
+
|
4
|
+
# A service for exporting work and file set metadata to a csv. You can customize headers/fields by
|
5
|
+
# overriding the included_fields method below. You can also specify different work types by changing/overriding
|
6
|
+
# the available_work_types method.
|
7
|
+
|
8
|
+
# @param [Array <Class>] - the available work types, passed in from the controller, such as GenericWork
|
9
|
+
def initialize(work_types)
|
10
|
+
@work_types = work_types
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param [Array <String>] - the work ids (for GenericWork or other work type) to export metadata for
|
14
|
+
# @param [String] - where to save the csv file to (filepath)
|
15
|
+
def write_to_csv(work_ids, filepath)
|
16
|
+
File.open(filepath, 'w') { |file| file.write(rows_for(work_ids).map(&:to_csv).join) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def csv_for(work_ids)
|
20
|
+
rows_for(work_ids).map(&:to_csv).join
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param [Array <String>] - the work ids (for GenericWork or other work type) to export metadata for
|
24
|
+
# @return [Array <Array>] - An array of arrays where each nested array contains the metadata
|
25
|
+
# for a work or file set and corresponds to a csv row.
|
26
|
+
def rows_for(work_ids)
|
27
|
+
csv_array = [csv_headers]
|
28
|
+
work_ids.each_with_object(csv_array).each do |work_id|
|
29
|
+
doc = ::SolrDocument.find work_id
|
30
|
+
csv_array << row_for(doc)
|
31
|
+
doc._source[:file_set_ids_ssim].each do |file_id|
|
32
|
+
file_doc = ::SolrDocument.find file_id
|
33
|
+
csv_array << row_for(file_doc)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param [SolrDocument] - Any model that has the properties listed in #included_fields (e.g. GenericWork, FileSet)
|
39
|
+
# @return [Array <String>] - the csv row for the given document
|
40
|
+
def row_for(document)
|
41
|
+
line_hash = {}
|
42
|
+
line_hash['type'] = document._source[:has_model_ssim].first
|
43
|
+
included_fields.each do |field|
|
44
|
+
line_hash[field] = create_cell document, field
|
45
|
+
end
|
46
|
+
line_hash.values_at(*csv_headers).map { |cell| cell.blank? ? '' : cell }
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# @return [Array <String>]
|
52
|
+
def included_fields
|
53
|
+
@work_types.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
|
54
|
+
end
|
55
|
+
|
56
|
+
def excluded_fields
|
57
|
+
%w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
|
58
|
+
relative_path import_url part_of resource_type access_control_id
|
59
|
+
representative_id thumbnail_id rendering_ids admin_set_id embargo_id
|
60
|
+
lease_id]
|
61
|
+
end
|
62
|
+
|
63
|
+
# @param [SolrDocument] - the document to create a cell for
|
64
|
+
# @param [String or Symbol] - the name of the field
|
65
|
+
# NOTE: any fields you want to include must also be added to the SolrDocument model as methods
|
66
|
+
# because of the check for respond_to?
|
67
|
+
def create_cell document, field
|
68
|
+
properties = document.hydra_model.properties
|
69
|
+
if document.respond_to?(field.to_sym)
|
70
|
+
if properties.keys.include?(field) && properties[field].multiple? && field.to_sym != :doi
|
71
|
+
document.send(field).join('|')
|
72
|
+
else
|
73
|
+
document.send(field)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# @return [Array <String>] - the heaaders for the csv
|
79
|
+
def csv_headers
|
80
|
+
['type'] + included_fields
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
data/lib/cdm_migrator/version.rb
CHANGED
@@ -51,3 +51,7 @@ default_fields:
|
|
51
51
|
- identifier
|
52
52
|
- based_near
|
53
53
|
- related_url
|
54
|
+
default_work_permissions:
|
55
|
+
# GenericWork: # the work type to set default permissions for
|
56
|
+
# edit: # the permission level (must also be set in config/initializers/hyrax.rb: Hyrax.config.permission_levels = ...). Default available values are edit, read
|
57
|
+
# - admin # the group name (currently only groups are supported, not user names)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdm_migrator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- sephirothkod
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -24,6 +24,26 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '5.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: hyrax
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.2'
|
34
|
+
- - "<"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '4.0'
|
37
|
+
type: :runtime
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '3.2'
|
44
|
+
- - "<"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '4.0'
|
27
47
|
- !ruby/object:Gem::Dependency
|
28
48
|
name: engine_cart
|
29
49
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,6 +118,7 @@ files:
|
|
98
118
|
- app/models/cdm_migrator/application_record.rb
|
99
119
|
- app/models/cdm_migrator/batch_ingest.rb
|
100
120
|
- app/models/cdm_migrator/ingest_work.rb
|
121
|
+
- app/services/csv_export_service.rb
|
101
122
|
- app/views/cdm_migrator/cdm/collection.html.erb
|
102
123
|
- app/views/cdm_migrator/cdm/mappings.html.erb
|
103
124
|
- app/views/cdm_migrator/csv/_batches_list.html.erb
|
@@ -141,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
141
162
|
- !ruby/object:Gem::Version
|
142
163
|
version: '0'
|
143
164
|
requirements: []
|
144
|
-
rubygems_version: 3.
|
165
|
+
rubygems_version: 3.5.3
|
145
166
|
signing_key:
|
146
167
|
specification_version: 4
|
147
168
|
summary: ContentDM to Hyrax migrator.
|