bulkrax 6.0.1 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +7 -7
  3. data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
  4. data/app/assets/javascripts/bulkrax/datatables.js +139 -0
  5. data/app/assets/javascripts/bulkrax/exporters.js +4 -4
  6. data/app/assets/javascripts/bulkrax/importers.js.erb +15 -1
  7. data/app/assets/stylesheets/bulkrax/import_export.scss +6 -1
  8. data/app/controllers/bulkrax/entries_controller.rb +52 -3
  9. data/app/controllers/bulkrax/exporters_controller.rb +19 -7
  10. data/app/controllers/bulkrax/importers_controller.rb +29 -11
  11. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +201 -0
  12. data/app/helpers/bulkrax/application_helper.rb +7 -3
  13. data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
  14. data/app/jobs/bulkrax/delete_and_import_collection_job.rb +8 -0
  15. data/app/jobs/bulkrax/delete_and_import_file_set_job.rb +8 -0
  16. data/app/jobs/bulkrax/delete_and_import_job.rb +20 -0
  17. data/app/jobs/bulkrax/delete_and_import_work_job.rb +8 -0
  18. data/app/jobs/bulkrax/delete_job.rb +5 -1
  19. data/app/jobs/bulkrax/download_cloud_file_job.rb +1 -1
  20. data/app/jobs/bulkrax/import_collection_job.rb +1 -1
  21. data/app/jobs/bulkrax/import_file_set_job.rb +1 -1
  22. data/app/jobs/bulkrax/import_job.rb +7 -0
  23. data/app/jobs/bulkrax/import_work_job.rb +1 -1
  24. data/app/jobs/bulkrax/importer_job.rb +1 -1
  25. data/app/models/bulkrax/csv_collection_entry.rb +1 -3
  26. data/app/models/bulkrax/csv_entry.rb +2 -1
  27. data/app/models/bulkrax/entry.rb +2 -0
  28. data/app/models/bulkrax/exporter.rb +9 -2
  29. data/app/models/bulkrax/importer.rb +48 -7
  30. data/app/models/bulkrax/oai_set_entry.rb +1 -3
  31. data/app/models/bulkrax/rdf_collection_entry.rb +1 -4
  32. data/app/models/bulkrax/status.rb +10 -1
  33. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +4 -2
  34. data/app/models/concerns/bulkrax/import_behavior.rb +4 -16
  35. data/app/models/concerns/bulkrax/status_info.rb +8 -0
  36. data/app/parsers/bulkrax/application_parser.rb +90 -19
  37. data/app/parsers/bulkrax/bagit_parser.rb +0 -23
  38. data/app/parsers/bulkrax/csv_parser.rb +0 -52
  39. data/app/parsers/bulkrax/oai_dc_parser.rb +26 -16
  40. data/app/parsers/bulkrax/parser_export_record_set.rb +2 -2
  41. data/app/parsers/bulkrax/xml_parser.rb +18 -21
  42. data/app/services/bulkrax/factory_class_finder.rb +90 -0
  43. data/app/views/bulkrax/exporters/_form.html.erb +10 -10
  44. data/app/views/bulkrax/exporters/index.html.erb +13 -57
  45. data/app/views/bulkrax/exporters/show.html.erb +2 -10
  46. data/app/views/bulkrax/importers/_csv_fields.html.erb +7 -1
  47. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +8 -1
  48. data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +18 -0
  49. data/app/views/bulkrax/importers/index.html.erb +20 -64
  50. data/app/views/bulkrax/importers/show.html.erb +7 -13
  51. data/app/views/bulkrax/shared/_entries_tab.html.erb +16 -0
  52. data/config/routes.rb +8 -2
  53. data/db/migrate/20240208005801_denormalize_status_message.rb +7 -0
  54. data/db/migrate/20240209070952_update_identifier_index.rb +6 -0
  55. data/lib/bulkrax/engine.rb +6 -0
  56. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +27 -0
  57. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +8 -0
  58. data/lib/bulkrax/persistence_layer.rb +38 -0
  59. data/lib/bulkrax/version.rb +1 -1
  60. data/lib/bulkrax.rb +88 -2
  61. data/lib/tasks/bulkrax_tasks.rake +12 -0
  62. metadata +46 -6
  63. data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +0 -39
  64. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +0 -39
  65. data/app/views/bulkrax/shared/_work_entries_tab.html.erb +0 -39
@@ -0,0 +1,201 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ # rubocop:disable Metrics/ModuleLength
5
+ module DatatablesBehavior
6
+ extend ActiveSupport::Concern
7
+
8
+ def table_per_page
9
+ per_page = params[:length].to_i
10
+ per_page < 1 ? 30 : per_page
11
+ end
12
+
13
+ def order_value(column)
14
+ params['columns']&.[](column)&.[]('data')
15
+ end
16
+
17
+ def table_order
18
+ "#{order_value(params&.[]('order')&.[]('0')&.[]('column'))} #{params&.[]('order')&.[]('0')&.[]('dir')}" if params&.[]('order')&.[]('0')&.[]('column').present?
19
+ end
20
+
21
+ # convert offset to page number
22
+ def table_page
23
+ params[:start].blank? ? 1 : (params[:start].to_i / params[:length].to_i) + 1
24
+ end
25
+
26
+ def entry_table_search
27
+ return @entry_table_search if @entry_table_search
28
+ return @entry_table_search = false if params['search']&.[]('value').blank?
29
+
30
+ table_search_value = params['search']&.[]('value')&.downcase
31
+
32
+ ['identifier', 'id', 'status_message', 'type', 'updated_at'].map do |col|
33
+ column = Bulkrax::Entry.arel_table[col]
34
+ column = Arel::Nodes::NamedFunction.new('CAST', [column.as('text')])
35
+ column = Arel::Nodes::NamedFunction.new('LOWER', [column])
36
+ @entry_table_search = if @entry_table_search
37
+ @entry_table_search.or(column.matches("%#{table_search_value}%"))
38
+ else
39
+ column.matches("%#{table_search_value}%")
40
+ end
41
+ end
42
+
43
+ @entry_table_search
44
+ end
45
+
46
+ def importer_table_search
47
+ return @importer_table_search if @importer_table_search
48
+ return @importer_table_search = false if params['search']&.[]('value').blank?
49
+
50
+ table_search_value = params['search']&.[]('value')&.downcase
51
+
52
+ ['name', 'id', 'status_message', 'last_error_at', 'last_succeeded_at', 'updated_at'].map do |col|
53
+ column = Bulkrax::Importer.arel_table[col]
54
+ column = Arel::Nodes::NamedFunction.new('CAST', [column.as('text')])
55
+ column = Arel::Nodes::NamedFunction.new('LOWER', [column])
56
+ @importer_table_search = if @importer_table_search
57
+ @importer_table_search.or(column.matches("%#{table_search_value}%"))
58
+ else
59
+ column.matches("%#{table_search_value}%")
60
+ end
61
+ end
62
+
63
+ @importer_table_search
64
+ end
65
+
66
+ def exporter_table_search
67
+ return @exporter_table_search if @exporter_table_search
68
+ return @exporter_table_search = false if params['search']&.[]('value').blank?
69
+
70
+ table_search_value = params['search']&.[]('value')&.downcase
71
+
72
+ ['name', 'status_message', 'created_at'].map do |col|
73
+ column = Bulkrax::Exporter.arel_table[col]
74
+ column = Arel::Nodes::NamedFunction.new('CAST', [column.as('text')])
75
+ column = Arel::Nodes::NamedFunction.new('LOWER', [column])
76
+ @exporter_table_search = if @exporter_table_search
77
+ @exporter_table_search.or(column.matches("%#{table_search_value}%"))
78
+ else
79
+ column.matches("%#{table_search_value}%")
80
+ end
81
+ end
82
+
83
+ @exporter_table_search
84
+ end
85
+
86
+ def format_importers(importers)
87
+ result = importers.map do |i|
88
+ {
89
+ name: view_context.link_to(i.name, view_context.importer_path(i)),
90
+ status_message: status_message_for(i),
91
+ last_imported_at: i.last_imported_at&.strftime("%b %d, %Y"),
92
+ next_import_at: i.next_import_at&.strftime("%b %d, %Y"),
93
+ enqueued_records: i.last_run&.enqueued_records,
94
+ processed_records: i.last_run&.processed_records || 0,
95
+ failed_records: i.last_run&.failed_records || 0,
96
+ deleted_records: i.last_run&.deleted_records,
97
+ total_collection_entries: i.last_run&.total_collection_entries,
98
+ total_work_entries: i.last_run&.total_work_entries,
99
+ total_file_set_entries: i.last_run&.total_file_set_entries,
100
+ actions: importer_util_links(i)
101
+ }
102
+ end
103
+ {
104
+ data: result,
105
+ recordsTotal: Bulkrax::Importer.count,
106
+ recordsFiltered: importers.size
107
+ }
108
+ end
109
+
110
+ def format_exporters(exporters)
111
+ result = exporters.map do |e|
112
+ {
113
+ name: view_context.link_to(e.name, view_context.exporter_path(e)),
114
+ status_message: status_message_for(e),
115
+ created_at: e.created_at,
116
+ download: download_zip(e),
117
+ actions: exporter_util_links(e)
118
+ }
119
+ end
120
+ {
121
+ data: result,
122
+ recordsTotal: Bulkrax::Exporter.count,
123
+ recordsFiltered: exporters.size
124
+ }
125
+ end
126
+
127
+ def format_entries(entries, item)
128
+ result = entries.map do |e|
129
+ {
130
+ identifier: view_context.link_to(e.identifier, view_context.item_entry_path(item, e)),
131
+ id: e.id,
132
+ status_message: status_message_for(e),
133
+ type: e.type,
134
+ updated_at: e.updated_at,
135
+ errors: e.latest_status&.error_class&.present? ? view_context.link_to(e.latest_status.error_class, view_context.item_entry_path(item, e), title: e.latest_status.error_message) : "",
136
+ actions: entry_util_links(e, item)
137
+ }
138
+ end
139
+ {
140
+ data: result,
141
+ recordsTotal: item.entries.size,
142
+ recordsFiltered: item.entries.size
143
+ }
144
+ end
145
+
146
+ def entry_util_links(e, item)
147
+ links = []
148
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-info-sign"></span>'), view_context.item_entry_path(item, e))
149
+ links << "<a class='glyphicon glyphicon-repeat' data-toggle='modal' data-target='#bulkraxItemModal' data-entry-id='#{e.id}'></a>" if view_context.an_importer?(item)
150
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-trash"></span>'), view_context.item_entry_path(item, e), method: :delete, data: { confirm: 'This will delete the entry and any work associated with it. Are you sure?' })
151
+ links.join(" ")
152
+ end
153
+
154
+ def status_message_for(e)
155
+ if e.status_message == "Complete"
156
+ "<td><span class='glyphicon glyphicon-ok' style='color: green;'></span> #{e.status_message}</td>"
157
+ elsif e.status_message == "Pending"
158
+ "<td><span class='glyphicon glyphicon-option-horizontal' style='color: blue;'></span> #{e.status_message}</td>"
159
+ elsif e.status_message == "Skipped"
160
+ "<td><span class='glyphicon glyphicon-step-forward' style='color: yellow;'></span> #{e.status_message}</td>"
161
+ else
162
+ "<td><span class='glyphicon glyphicon-remove' style='color: #{e.status == 'Deleted' ? 'green' : 'red'};'></span> #{e.status_message}</td>"
163
+ end
164
+ end
165
+
166
+ def importer_util_links(i)
167
+ links = []
168
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-info-sign"></span>'), importer_path(i))
169
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-pencil"></span>'), edit_importer_path(i))
170
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-remove"></span>'), i, method: :delete, data: { confirm: 'Are you sure?' })
171
+ links.join(" ")
172
+ end
173
+
174
+ def exporter_util_links(i)
175
+ links = []
176
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-info-sign"></span>'), exporter_path(i))
177
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-pencil"></span>'), edit_exporter_path(i), data: { turbolinks: false })
178
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-remove"></span>'), i, method: :delete, data: { confirm: 'Are you sure?' })
179
+ links.join(" ")
180
+ end
181
+
182
+ def download_zip(e)
183
+ return unless File.exist?(e.exporter_export_zip_path)
184
+
185
+ options_html = e.exporter_export_zip_files.flatten.map do |file_name|
186
+ "<option value='#{CGI.escapeHTML(file_name)}'>#{CGI.escapeHTML(file_name)}</option>"
187
+ end.join
188
+
189
+ form_html = "<form class='simple_form edit_exporter' id='edit_exporter_#{e.id}' action='#{view_context.exporter_download_path(e)}' accept-charset='UTF-8' method='get'>"
190
+ form_html += "<input name='utf8' type='hidden' value='✓'>"
191
+ form_html += "<select class='btn btn-default form-control' style='width: 200px' name='exporter[exporter_export_zip_files]' id='exporter_#{e.id}_exporter_export_zip_files'>"
192
+ form_html += options_html
193
+ form_html += "</select>\n" # add newline here to add a space between the dropdown and the download button
194
+ form_html += "<input type='submit' name='commit' value='Download' class='btn btn-default'>"
195
+ form_html += "</form>"
196
+
197
+ form_html
198
+ end
199
+ end
200
+ # rubocop:enable Metrics/ModuleLength
201
+ end
@@ -1,9 +1,13 @@
1
1
  # frozen_string_literal: true
2
- require 'coderay'
3
-
4
2
  module Bulkrax
5
3
  module ApplicationHelper
6
- include ::Hyrax::HyraxHelperBehavior if defined?(::Hyrax)
4
+ def item_entry_path(item, e, opts = {})
5
+ an_importer?(item) ? bulkrax.importer_entry_path(item.id, e.id, opts) : bulkrax.exporter_entry_path(item.id, e.id, opts)
6
+ end
7
+
8
+ def an_importer?(item)
9
+ item.class.to_s.include?('Importer')
10
+ end
7
11
 
8
12
  def coderay(value, opts)
9
13
  CodeRay
@@ -40,7 +40,7 @@ module Bulkrax
40
40
 
41
41
  include DynamicRecordLookup
42
42
 
43
- queue_as :import
43
+ queue_as Bulkrax.config.ingest_queue_name
44
44
 
45
45
  # @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
46
46
  # @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteAndImportCollectionJob < DeleteAndImportJob
5
+ DELETE_CLASS = Bulkrax::DeleteCollectionJob
6
+ IMPORT_CLASS = Bulkrax::ImportCollectionJob
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteAndImportFileSetJob < DeleteAndImportJob
5
+ DELETE_CLASS = Bulkrax::DeleteFileSetJob
6
+ IMPORT_CLASS = Bulkrax::ImportFileSetJob
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteAndImportJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ def perform(entry, importer_run)
8
+ status = self.class::DELETE_CLASS.perform_now(entry, importer_run)
9
+ if status.status_message == "Deleted"
10
+ entry = Bulkrax::Entry.find(entry.id) # maximum reload
11
+ self.class::IMPORT_CLASS.perform_now(entry.id, importer_run.id)
12
+ end
13
+
14
+ rescue => e
15
+ entry.set_status_info(e)
16
+ # this causes caught exception to be reraised
17
+ raise
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteAndImportWorkJob < DeleteAndImportJob
5
+ DELETE_CLASS = Bulkrax::DeleteWorkJob
6
+ IMPORT_CLASS = Bulkrax::ImportWorkJob
7
+ end
8
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class DeleteJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  def perform(entry, importer_run)
8
8
  obj = entry.factory.find
@@ -15,6 +15,10 @@ module Bulkrax
15
15
  entry.importer.current_run = ImporterRun.find(importer_run.id)
16
16
  entry.importer.record_status
17
17
  entry.set_status_info("Deleted", ImporterRun.find(importer_run.id))
18
+ rescue => e
19
+ entry.set_status_info(e)
20
+ # this causes caught exception to be reraised
21
+ raise
18
22
  end
19
23
  end
20
24
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class DownloadCloudFileJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  # Retrieve cloud file and write to the imports directory
8
8
  # Note: if using the file system, the mounted directory in
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class ImportCollectionJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  # rubocop:disable Rails/SkipsModelValidations
8
8
  def perform(*args)
@@ -6,7 +6,7 @@ module Bulkrax
6
6
  class ImportFileSetJob < ApplicationJob
7
7
  include DynamicRecordLookup
8
8
 
9
- queue_as :import
9
+ queue_as Bulkrax.config.ingest_queue_name
10
10
 
11
11
  attr_reader :importer_run_id
12
12
 
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImportJob < ApplicationJob
5
+ queue_as :import
6
+ end
7
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class ImportWorkJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  # rubocop:disable Rails/SkipsModelValidations
8
8
  #
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class ImporterJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  def perform(importer_id, only_updates_since_last_import = false)
8
8
  importer = Importer.find(importer_id)
@@ -2,9 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class CsvCollectionEntry < CsvEntry
5
- def factory_class
6
- Collection
7
- end
5
+ self.default_work_type = "Collection"
8
6
 
9
7
  # Use identifier set by CsvParser#unique_collection_identifier, which falls back
10
8
  # on the Collection's first title if record[source_identifier] is not present
@@ -16,11 +16,12 @@ module Bulkrax
16
16
  class_attribute(:csv_read_data_options, default: {})
17
17
 
18
18
  # there's a risk that this reads the whole file into memory and could cause a memory leak
19
+ # we strip any special characters out of the headers. looking at you Excel
19
20
  def self.read_data(path)
20
21
  raise StandardError, 'CSV path empty' if path.blank?
21
22
  options = {
22
23
  headers: true,
23
- header_converters: ->(h) { h.to_s.strip.to_sym },
24
+ header_converters: ->(h) { h.to_s.gsub(/[^\w\d\. -]+/, '').strip.to_sym },
24
25
  encoding: 'utf-8'
25
26
  }.merge(csv_read_data_options)
26
27
 
@@ -8,6 +8,8 @@ module Bulkrax
8
8
  class Entry < ApplicationRecord
9
9
  include Bulkrax::HasMatchers
10
10
  include Bulkrax::ImportBehavior
11
+ self.class_attribute :default_work_type, default: Bulkrax.default_work_type
12
+
11
13
  include Bulkrax::ExportBehavior
12
14
  include Bulkrax::StatusInfo
13
15
  include Bulkrax::HasLocalProcessing
@@ -23,6 +23,10 @@ module Bulkrax
23
23
  set_status_info(e)
24
24
  end
25
25
 
26
+ def remove_and_rerun
27
+ self.parser_fields['remove_and_rerun']
28
+ end
29
+
26
30
  # #export_source accessors
27
31
  # Used in form to prevent it from getting confused as to which value to populate #export_source with.
28
32
  # Also, used to display the correct selected value when rendering edit form.
@@ -102,9 +106,12 @@ module Bulkrax
102
106
  Importer.all.map { |i| [i.name, i.id] }
103
107
  end
104
108
 
105
- def current_run
109
+ def current_run(skip_counts: false)
110
+ @current_run ||= self.exporter_runs.create! if skip_counts
111
+ return @current_run if @current_run
112
+
106
113
  total = self.limit || parser.total
107
- @current_run ||= self.exporter_runs.create!(total_work_entries: total, enqueued_records: total)
114
+ @current_run = self.exporter_runs.create!(total_work_entries: total, enqueued_records: total)
108
115
  end
109
116
 
110
117
  def last_run
@@ -3,7 +3,7 @@
3
3
  require 'iso8601'
4
4
 
5
5
  module Bulkrax
6
- class Importer < ApplicationRecord
6
+ class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength
7
7
  include Bulkrax::ImporterExporterBehavior
8
8
  include Bulkrax::StatusInfo
9
9
 
@@ -103,11 +103,12 @@ module Bulkrax
103
103
  frequency.to_seconds != 0
104
104
  end
105
105
 
106
- def current_run
106
+ def current_run(skip_counts: false)
107
107
  return @current_run if @current_run.present?
108
108
 
109
109
  @current_run = self.importer_runs.create!
110
110
  return @current_run if file? && zip?
111
+ return @current_run if skip_counts
111
112
 
112
113
  entry_counts = {
113
114
  total_work_entries: self.limit || parser.works_total,
@@ -123,6 +124,29 @@ module Bulkrax
123
124
  @last_run ||= self.importer_runs.last
124
125
  end
125
126
 
127
+ def failed_entries?
128
+ entries.failed.any?
129
+ end
130
+
131
+ def failed_statuses
132
+ @failed_statuses ||= Bulkrax::Status.latest_by_statusable
133
+ .includes(:statusable)
134
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Failed')
135
+ end
136
+
137
+ def failed_messages
138
+ failed_statuses.each_with_object({}) do |e, i|
139
+ i[e.error_message] ||= []
140
+ i[e.error_message] << e.id
141
+ end
142
+ end
143
+
144
+ def completed_statuses
145
+ @completed_statuses ||= Bulkrax::Status.latest_by_statusable
146
+ .includes(:statusable)
147
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Complete')
148
+ end
149
+
126
150
  def seen
127
151
  @seen ||= {}
128
152
  end
@@ -135,6 +159,18 @@ module Bulkrax
135
159
  self.parser_fields['update_files']
136
160
  end
137
161
 
162
+ def remove_and_rerun
163
+ self.parser_fields['remove_and_rerun']
164
+ end
165
+
166
+ def metadata_only?
167
+ parser.parser_fields['metadata_only'] == true
168
+ end
169
+
170
+ def existing_entries?
171
+ parser.parser_fields['file_style']&.match(/Existing Entries/)
172
+ end
173
+
138
174
  def import_works
139
175
  import_objects(['work'])
140
176
  end
@@ -157,11 +193,20 @@ module Bulkrax
157
193
  self.only_updates ||= false
158
194
  self.save if self.new_record? # Object needs to be saved for statuses
159
195
  types = types_array || DEFAULT_OBJECT_TYPES
160
- parser.create_objects(types)
196
+ existing_entries? ? parser.rebuild_entries(types) : parser.create_objects(types)
197
+ mark_unseen_as_skipped
161
198
  rescue StandardError => e
162
199
  set_status_info(e)
163
200
  end
164
201
 
202
+ # After an import any entries we did not touch are skipped.
203
+ # They are not really pending, complete for the last run, or failed
204
+ def mark_unseen_as_skipped
205
+ entries.where.not(identifier: seen.keys).find_each do |entry|
206
+ entry.set_status_info('Skipped')
207
+ end
208
+ end
209
+
165
210
  # Prepend the base_url to ensure unique set identifiers
166
211
  # @todo - move to parser, as this is OAI specific
167
212
  def unique_collection_identifier(id)
@@ -192,9 +237,5 @@ module Bulkrax
192
237
  rescue
193
238
  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
194
239
  end
195
-
196
- def metadata_only?
197
- parser.parser_fields['metadata_only'] == true
198
- end
199
240
  end
200
241
  end
@@ -2,9 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class OaiSetEntry < OaiEntry
5
- def factory_class
6
- Collection
7
- end
5
+ self.default_work_type = "Collection"
8
6
 
9
7
  def build_metadata
10
8
  self.parsed_metadata = self.raw_metadata
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class RdfCollectionEntry < RdfEntry
5
+ self.default_work_type = "Collection"
5
6
  def record
6
7
  @record ||= self.raw_metadata
7
8
  end
@@ -11,9 +12,5 @@ module Bulkrax
11
12
  add_local
12
13
  return self.parsed_metadata
13
14
  end
14
-
15
- def factory_class
16
- Collection
17
- end
18
15
  end
19
16
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class Status < ApplicationRecord
5
- belongs_to :statusable, polymorphic: true
5
+ belongs_to :statusable, polymorphic: true, denormalize: { fields: %i[status_message], if: :latest? }
6
6
  belongs_to :runnable, polymorphic: true
7
7
  serialize :error_backtrace, Array
8
8
 
@@ -21,5 +21,14 @@ module Bulkrax
21
21
  status_table.join(latest_status_query.as(latest_status_table.name.to_s), Arel::Nodes::InnerJoin)
22
22
  .on(status_table[:id].eq(latest_status_table[:latest_status_id]))
23
23
  end
24
+
25
+ def latest?
26
+ # TODO: remove if statment when we stop supporting Hyrax < 4
27
+ self.id == if Gem::Version.new(Rails::VERSION::STRING) >= Gem::Version.new('6.0.0')
28
+ self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id)
29
+ else
30
+ self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first # rubocop:disable Rails/Pick
31
+ end
32
+ end
24
33
  end
25
34
  end
@@ -2,8 +2,10 @@
2
2
 
3
3
  module Bulkrax
4
4
  module FileSetEntryBehavior
5
- def factory_class
6
- ::FileSet
5
+ extend ActiveSupport::Concern
6
+
7
+ included do
8
+ self.default_work_type = "::FileSet"
7
9
  end
8
10
 
9
11
  def file_reference
@@ -189,22 +189,10 @@ module Bulkrax
189
189
  end
190
190
 
191
191
  def factory_class
192
- fc = if self.parsed_metadata&.[]('model').present?
193
- self.parsed_metadata&.[]('model').is_a?(Array) ? self.parsed_metadata&.[]('model')&.first : self.parsed_metadata&.[]('model')
194
- elsif self.mapping&.[]('work_type').present?
195
- self.parsed_metadata&.[]('work_type').is_a?(Array) ? self.parsed_metadata&.[]('work_type')&.first : self.parsed_metadata&.[]('work_type')
196
- else
197
- Bulkrax.default_work_type
198
- end
199
-
200
- # return the name of the collection or work
201
- fc.tr!(' ', '_')
202
- fc.downcase! if fc.match?(/[-_]/)
203
- fc.camelcase.constantize
204
- rescue NameError
205
- nil
206
- rescue
207
- Bulkrax.default_work_type.constantize
192
+ # ATTENTION: Do not memoize this here; tests should catch the problem, but through out the
193
+ # lifecycle of parsing a CSV row or what not, we end up having different factory classes based
194
+ # on the encountered metadata.
195
+ FactoryClassFinder.find(entry: self)
208
196
  end
209
197
  end
210
198
  end
@@ -10,6 +10,10 @@ module Bulkrax
10
10
  as: :statusable,
11
11
  class_name: "Bulkrax::Status",
12
12
  inverse_of: :statusable
13
+ scope :failed, -> { where(status_message: 'Failed') }
14
+ scope :complete, -> { where(status_message: 'Complete') }
15
+ scope :pending, -> { where(status_message: 'Pending') }
16
+ scope :skipped, -> { where(status_message: 'Skipped') }
13
17
  end
14
18
 
15
19
  def current_status
@@ -25,6 +29,10 @@ module Bulkrax
25
29
  current_status&.status_message&.match(/^Complete$/)
26
30
  end
27
31
 
32
+ def skipped?
33
+ current_status&.status_message&.match('Skipped')
34
+ end
35
+
28
36
  def status
29
37
  current_status&.status_message || 'Pending'
30
38
  end