cdm_migrator 3.3.2 → 3.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 88e952c96bf9c9088bfd1b1b6b5ccd0ff0ea28089440767f55e7232b20aeec69
4
- data.tar.gz: 53ca89586e9f42f5318301846ac34ed7cca77ff5ed4b27e86d956746726eb1b5
3
+ metadata.gz: 9bd9baa9f5c277e22421a1726d4652d2231a4067aabcf2e4f048ac9aa7264fc2
4
+ data.tar.gz: f698973f8726c227326fd7e0c393af4201c7dd9352f43d669cff9659f310da85
5
5
  SHA512:
6
- metadata.gz: 1e08e51b6c1a5dcd2d21c8968158e8c1ab6a12d68c46fcf16854c5b3019c622bd382726b85d570e9457f80c8093548972c982d3e9d4696ad02eff7448c8c590f
7
- data.tar.gz: 9d6e141f1abac1ebca916d51d7727e692ba0d18f7f4f6e9a91e206b1c293fd5d5fc64ad1ea154990913e9796ed627a5085bfebbceeaf0420690035fa16af3efa
6
+ metadata.gz: 6c264c4d7a122f598aef1d746766771b408defb9a15e014490f7a2acd43772197c5e66d15a99120c13089f013fe702f9dc5aeaebbb9cbc9f70b2110b54cac55d
7
+ data.tar.gz: 85241bcb0111ead83629678cfde68fb729d49997890dbc0432857e6f7976f74f3f4296b0e6aecac309b71a84b398ea02fa24f1d346d652912a60eded09c39c0c
@@ -29,7 +29,7 @@ module CdmMigrator
29
29
 
30
30
  def upload
31
31
  @admin_sets = AdminSet.all.map { |as| [as.title.first, as.id] }
32
- @collections = Collection.all.map { |col| [col.title.first, col.id] }
32
+ @collections = Hyrax.config.collection_class.all.map { |col| [col.title.first, col.id] }
33
33
  end
34
34
 
35
35
  def create
@@ -88,7 +88,7 @@ module CdmMigrator
88
88
  end
89
89
 
90
90
  def edit
91
- @collections = ::Collection.all.map { |c| [c.title.first, c.id] }
91
+ @collections = Hyrax.config.collection_class.all.map { |c| [c.title.first, c.id] }
92
92
  end
93
93
 
94
94
  def update
@@ -102,7 +102,7 @@ module CdmMigrator
102
102
  elsif type.include? "Work"
103
103
  metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
104
104
  elsif type.include? "File"
105
- metadata = create_data(row.except('id', 'type'), work_form(type), obj, mvs)
105
+ metadata = create_data(row.except('id', 'type'), file_form, obj, mvs)
106
106
  end
107
107
  unless metadata.nil?
108
108
  obj.attributes = metadata
@@ -114,28 +114,19 @@ module CdmMigrator
114
114
  end
115
115
 
116
116
  def export
117
- solr = RSolr.connect url: Account.find_by(tenant: Apartment::Tenant.current).solr_endpoint.url
117
+ # Get a collection's member works from Solr
118
+ solr = RSolr.connect url: Blacklight.connection_config[:url]
118
119
  response = solr.get 'select', params: {
119
120
  q: "member_of_collection_ids_ssim:#{params[:collection_id]}",
121
+ fq: ["has_model_ssim:FileSet OR has_model_ssim:*Work"],
120
122
  rows: 3400,
121
123
  fl: "id"
122
124
  }
123
125
  unless response['response']['docs'].empty? || response['response']['docs'][0].empty?
124
126
  work_ids = response['response']['docs'].map { |doc| doc['id'] }
125
127
  end
126
- #works = ::ActiveFedora::Base.where member_of_collection_ids_ssim: params[:collection_id]
127
- @csv_headers = ['type'] + work_fields
128
- @csv_array = [@csv_headers.join(',')]
129
- work_ids.each do |work_id|
130
- doc = ::SolrDocument.find work_id
131
- add_line doc
132
- doc._source[:file_set_ids_ssim].each do |file_id|
133
- file_doc = ::SolrDocument.find file_id
134
- add_line file_doc
135
- end
136
- end
137
128
 
138
- send_data @csv_array.join("\n"),
129
+ send_data CsvExportService.new(available_works).csv_for(work_ids),
139
130
  :type => 'text/csv; charset=iso-8859-5; header=present',
140
131
  :disposition => "attachment; filename=export.csv"
141
132
  end
@@ -146,43 +137,6 @@ module CdmMigrator
146
137
  authorize! :create, available_works.first
147
138
  end
148
139
 
149
- def add_line doc
150
- line_hash = {}
151
- line_hash['type'] = doc._source[:has_model_ssim].first
152
- work_fields.each do |field|
153
- line_hash[field] = create_cell doc, field
154
- end
155
- @csv_array << line_hash.values_at(*@csv_headers).map { |cell| cell = '' if cell.nil?; "\"#{cell.gsub("\"", "\"\"")}\"" }.join(',')
156
-
157
- end
158
-
159
- def work_fields
160
- @fields ||= available_works.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
161
- end
162
-
163
- def excluded_fields
164
- %w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
165
- relative_path import_url part_of resource_type access_control_id
166
- representative_id thumbnail_id rendering_ids admin_set_id embargo_id
167
- lease_id]
168
- end
169
-
170
- def create_cell w, field
171
- if field.include? 'date' or field == 'chronological_coverage'
172
- if w._source[field+'_tesim'].is_a?(Array)
173
- w._source[field+'_tesim'].join('|')
174
- else
175
- w._source[field+'_tesim']
176
- end
177
- elsif w.respond_to?(field.to_sym)
178
- if w.send(field).is_a?(Array)
179
- w.send(field).join('|')
180
- else
181
- w.send(field)
182
- end
183
- end
184
- end
185
-
186
140
  def available_works
187
141
  @available_works ||= Hyrax::QuickClassificationQuery.new(current_user).authorized_models
188
142
  end
@@ -277,7 +231,7 @@ module CdmMigrator
277
231
  if file_path.present? && File.file?(file_path) && @max_file_size
278
232
  if File.size(file_path.gsub("file://", "")) > @max_file_size
279
233
  @error_list[row_number] = { "file size" => "The file at #{file_path} is too large to be uploaded. Please compress the file or split it into parts.
280
- Each part should be under #{helpers.number_to_human_size(@max_file_size)}." }
234
+ Each part should be under #{helpers.number_to_human_size(@max_file_size)}." }
281
235
  end
282
236
  end
283
237
  end
@@ -346,9 +300,9 @@ module CdmMigrator
346
300
  hash[field.to_s] = "May contain the wrong multi-value separator or a typo in the URI."
347
301
  end
348
302
  else # Or val should be string
349
- invalid_chars = ["\\"]
350
- # Make exceptions for backslashes that are part of whitespace characters
351
- # by deleting them before checking for stray \s
303
+ invalid_chars = ["\\"]
304
+ # Make exceptions for backslashes that are part of whitespace characters
305
+ # by deleting them before checking for stray \s
352
306
  if val.delete("\t\r\n\s\n").match Regexp.union(invalid_chars)
353
307
  hash[field.to_s] = "May contain an invalid character such as #{invalid_chars.to_sentence(last_word_connector: ", or ")}."
354
308
  end
@@ -14,32 +14,59 @@ module CdmMigrator
14
14
  # advantage of the same strategy as Hyrax::Actors::FileSetOrderedMembersActor
15
15
  # but you don't need the OrderedMembersActor constant initialized.
16
16
 
17
- # This rescue is a safeguard against creating lots of orphan file sets if there
18
- # are recurring errors (see https://tinyurl.com/nh4c5e9j). Instead, CdmMigrator
19
- # will fall back to creating file sets one-by-one if this job fails once.
20
- rescue_from(StandardError) do |exception|
21
- Rails.logger.error "BatchCreateFilesWithOrderedMembersJob error: #{exception.to_s}"
22
- RestartUploadFromMiddleJob.perform_later(arguments[0], arguments[1], arguments[2])
17
+ def perform work, ingest_work, user
18
+ # Reload the work to get the most recent and accurate member associations
19
+ work.reload
20
+ if work.ordered_members.to_a.empty? && work.file_sets.empty?
21
+ attach_files(work, ingest_work.files, user)
22
+ else
23
+ delete_excess_file_sets(work)
24
+ ordered_count = work.reload.ordered_members.to_a.count
25
+ unless ordered_count == ingest_work.files.count
26
+ # Attach any files that might be missing
27
+ files = ingest_work[ordered_count..]
28
+ attach_files(work, files, user)
29
+ end
30
+ end
31
+ first_file_set = work.ordered_members.to_a.first
32
+ work.representative = first_file_set
33
+ work.thumbnail = first_file_set
34
+ work.save!
35
+ work.file_sets.each { |fs| CdmIngestFilesJob.perform_later(fs, fs.import_url, user, ingest_work) }
23
36
  end
24
37
 
25
- def perform work, ingest_work, user
26
- ordered_members = []
27
- ingest_work.files.each do |file|
28
- url = file[:url]
29
- last_file = ingest_work.files.last==file
30
- ::FileSet.new(import_url: url, label: file[:title]) do |fs|
31
- fs.attributes = file[:metadata]
32
- fs.save!
33
- ordered_members << fs
34
- end
38
+ private
39
+
40
+ def attach_files(work, ingest_work_files, user)
41
+ ingest_work_files.each do |file|
42
+ url = file[:url]
43
+ ordered_members = work.ordered_members
44
+ # last_file = ingest_work.files.last==file
45
+ ::FileSet.new(import_url: url, label: file[:title]) do |fs|
46
+ fs.attributes = file[:metadata]
47
+ fs.save!
48
+ ordered_members << fs
35
49
  end
36
- actor = Hyrax::Actors::OrderedMembersActor.new(ordered_members, user)
37
- actor.attach_ordered_members_to_work(work)
38
- work.representative = work.ordered_members.to_a.first
39
- work.thumbnail_id = work.ordered_member_ids.first
50
+ end
51
+ work.save!
52
+ work.reload.ordered_members.to_a.each do |file_set|
53
+ Hyrax.config.callback.run(:after_create_fileset, file_set, user, warn: false)
54
+ end
55
+ end
56
+
57
+ # Sometimes when this job fails, file sets are attached to the work
58
+ # without attaching them as ordered members. This creates "ghost files"
59
+ # that don't show up in the interface but are still linked to the work as members
60
+ def delete_excess_file_sets(work)
61
+ ordered_members = work.ordered_members.to_a
62
+ ghost_members = work.file_sets.select { |fs| ordered_members.exclude? fs }
63
+ if ghost_members.any?
64
+ # Unlink the file sets from the parent work first because it makes deleting them faster
65
+ work.members = ordered_members
40
66
  work.save!
41
- work.file_sets.each { |fs| CdmIngestFilesJob.perform_later(fs, fs.import_url, user, ingest_work) }
67
+ ghost_members.each(&:destroy!)
68
+ end
42
69
  end
43
-
70
+
44
71
  end
45
72
  end
@@ -4,20 +4,15 @@ module CdmMigrator
4
4
 
5
5
  def perform(ingest_work, user, admin_set_id, collection_id)
6
6
  admin_set = ::AdminSet.find(admin_set_id) rescue nil
7
- collection = ::Collection.find(collection_id) rescue nil
7
+ collection = Hyrax.config.collection_class.find(collection_id) rescue nil
8
8
  work = Object.const_get(ingest_work.work_type).new
9
- #status_after, embargo_date, lease_date = nil, nil, nil
10
9
  work.apply_depositor_metadata(user)
11
10
  work.attributes = ingest_work.data
12
- if ingest_work.data.has_key? 'downloadable'
13
- # Convert string to boolean
14
- work.downloadable = ActiveModel::Type::Boolean.new.cast(ingest_work.data['downloadable'])
15
- elsif work.attributes.include? 'downloadable' # Set work to downloadable by default
16
- work.downloadable = true
17
- end
18
11
  work.member_of_collections = [collection] if collection
19
12
  work.admin_set = admin_set if admin_set
20
13
  work.date_uploaded = DateTime.now
14
+ add_configured_permissions(work)
15
+ work.try(:to_controlled_vocab)
21
16
  begin
22
17
  work.save!
23
18
  # Weird error where descriptions with whitespace chars \n or \r don't save the 1st time
@@ -29,9 +24,39 @@ module CdmMigrator
29
24
  work.description = old_descr
30
25
  work.save!
31
26
  end
32
- # To use the BatchCreateFilesWithOrderedMembersJob instead, replace the following line
33
- # with BatchCreateFilesWithOrderedMembersJob.perform_later(work, ingest_work, user)
34
- BatchCreateFilesJob.perform_later(work, ingest_work, user)
27
+ # Creating file (sets) with Hyrax::Actors::OrderedMembersActor is now the default.
28
+ # To use the original Hyrax::Actors::FileSetActor, replace the line below with
29
+ # BatchCreateFilesJob.perform_later(work, ingest_work, user)
30
+ BatchCreateFilesWithOrderedMembersJob.perform_later(work, ingest_work, user)
35
31
  end
32
+
33
+ private
34
+
35
+ def add_configured_permissions(work)
36
+ work_type = work.class
37
+ return if configured_permissions.nil? # Nothing configured at all
38
+
39
+ permissions_config = configured_permissions.dig(work_type.to_s)
40
+
41
+ return if permissions_config.nil? # Nothing configured for this work type
42
+
43
+ permissions = permissions_config.map do |permission_level, group_name|
44
+ # Check if the permission level is configured in Hyrax
45
+ raise "#{permission_level} permission is not configured in this repo. Is it set in Hyrax.config.permission_levels?" unless Hyrax.config.permission_levels.index(permission_level).presence
46
+ # Construct each permission as a hash
47
+ group_name.map do |group|
48
+ { name: group, type: "group", access: permission_level }
49
+ end.flatten
50
+ end.flatten
51
+
52
+ # Finally, set the work's permissions_attributes
53
+ work.permissions_attributes = permissions
54
+ end
55
+
56
+ # @return[Hash] - A hash like { "GenericWork"=> { "edit" => ["admin"], "download" => ["public"] } }
57
+ def configured_permissions
58
+ CdmMigrator::Engine.config.dig('default_work_permissions')
59
+ end
60
+
36
61
  end
37
62
  end
@@ -0,0 +1,84 @@
1
+ module CdmMigrator
2
+ class CsvExportService
3
+
4
+ # A service for exporting work and file set metadata to a csv. You can customize headers/fields by
5
+ # overriding the included_fields method below. You can also specify different work types by changing/overriding
6
+ # the available_work_types method.
7
+
8
+ # @param [Array <Class>] - the available work types, passed in from the controller, such as GenericWork
9
+ def initialize(work_types)
10
+ @work_types = work_types
11
+ end
12
+
13
+ # @param [Array <String>] - the work ids (for GenericWork or other work type) to export metadata for
14
+ # @param [String] - where to save the csv file to (filepath)
15
+ def write_to_csv(work_ids, filepath)
16
+ File.open(filepath, 'w') { |file| file.write(rows_for(work_ids).map(&:to_csv).join) }
17
+ end
18
+
19
+ def csv_for(work_ids)
20
+ rows_for(work_ids).map(&:to_csv).join
21
+ end
22
+
23
+ # @param [Array <String>] - the work ids (for GenericWork or other work type) to export metadata for
24
+ # @return [Array <Array>] - An array of arrays where each nested array contains the metadata
25
+ # for a work or file set and corresponds to a csv row.
26
+ def rows_for(work_ids)
27
+ csv_array = [csv_headers]
28
+ work_ids.each_with_object(csv_array).each do |work_id|
29
+ doc = ::SolrDocument.find work_id
30
+ csv_array << row_for(doc)
31
+ doc._source[:file_set_ids_ssim].each do |file_id|
32
+ file_doc = ::SolrDocument.find file_id
33
+ csv_array << row_for(file_doc)
34
+ end
35
+ end
36
+ end
37
+
38
+ # @param [SolrDocument] - Any model that has the properties listed in #included_fields (e.g. GenericWork, FileSet)
39
+ # @return [Array <String>] - the csv row for the given document
40
+ def row_for(document)
41
+ line_hash = {}
42
+ line_hash['type'] = document._source[:has_model_ssim].first
43
+ included_fields.each do |field|
44
+ line_hash[field] = create_cell document, field
45
+ end
46
+ line_hash.values_at(*csv_headers).map { |cell| cell.blank? ? '' : cell }
47
+ end
48
+
49
+ private
50
+
51
+ # @return [Array <String>]
52
+ def included_fields
53
+ @work_types.map { |work| work.new.attributes.keys }.flatten.uniq - excluded_fields
54
+ end
55
+
56
+ def excluded_fields
57
+ %w[date_uploaded date_modified head tail state proxy_depositor on_behalf_of arkivo_checksum label
58
+ relative_path import_url part_of resource_type access_control_id
59
+ representative_id thumbnail_id rendering_ids admin_set_id embargo_id
60
+ lease_id]
61
+ end
62
+
63
+ # @param [SolrDocument] - the document to create a cell for
64
+ # @param [String or Symbol] - the name of the field
65
+ # NOTE: any fields you want to include must also be added to the SolrDocument model as methods
66
+ # because of the check for respond_to?
67
+ def create_cell document, field
68
+ properties = document.hydra_model.properties
69
+ if document.respond_to?(field.to_sym)
70
+ if properties.keys.include?(field) && properties[field].multiple? && field.to_sym != :doi
71
+ document.send(field).join('|')
72
+ else
73
+ document.send(field)
74
+ end
75
+ end
76
+ end
77
+
78
+ # @return [Array <String>] - the heaaders for the csv
79
+ def csv_headers
80
+ ['type'] + included_fields
81
+ end
82
+
83
+ end
84
+ end
@@ -1,3 +1,3 @@
1
1
  module CdmMigrator
2
- VERSION = '3.3.2'
2
+ VERSION = '3.5.2'
3
3
  end
@@ -51,3 +51,7 @@ default_fields:
51
51
  - identifier
52
52
  - based_near
53
53
  - related_url
54
+ default_work_permissions:
55
+ # GenericWork: # the work type to set default permissions for
56
+ # edit: # the permission level (must also be set in config/initializers/hyrax.rb: Hyrax.config.permission_levels = ...). Default available values are edit, read
57
+ # - admin # the group name (currently only groups are supported, not user names)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdm_migrator
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.2
4
+ version: 3.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - sephirothkod
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-26 00:00:00.000000000 Z
11
+ date: 2024-07-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -24,6 +24,26 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '5.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: hyrax
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '3.2'
34
+ - - "<"
35
+ - !ruby/object:Gem::Version
36
+ version: '4.0'
37
+ type: :runtime
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '3.2'
44
+ - - "<"
45
+ - !ruby/object:Gem::Version
46
+ version: '4.0'
27
47
  - !ruby/object:Gem::Dependency
28
48
  name: engine_cart
29
49
  requirement: !ruby/object:Gem::Requirement
@@ -98,6 +118,7 @@ files:
98
118
  - app/models/cdm_migrator/application_record.rb
99
119
  - app/models/cdm_migrator/batch_ingest.rb
100
120
  - app/models/cdm_migrator/ingest_work.rb
121
+ - app/services/csv_export_service.rb
101
122
  - app/views/cdm_migrator/cdm/collection.html.erb
102
123
  - app/views/cdm_migrator/cdm/mappings.html.erb
103
124
  - app/views/cdm_migrator/csv/_batches_list.html.erb
@@ -141,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
141
162
  - !ruby/object:Gem::Version
142
163
  version: '0'
143
164
  requirements: []
144
- rubygems_version: 3.1.2
165
+ rubygems_version: 3.5.3
145
166
  signing_key:
146
167
  specification_version: 4
147
168
  summary: ContentDM to Hyrax migrator.