bulkrax 6.0.1 → 7.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +7 -7
  3. data/app/assets/javascripts/bulkrax/bulkrax.js +11 -0
  4. data/app/assets/javascripts/bulkrax/datatables.js +139 -0
  5. data/app/assets/javascripts/bulkrax/exporters.js +4 -4
  6. data/app/assets/javascripts/bulkrax/importers.js.erb +15 -1
  7. data/app/assets/stylesheets/bulkrax/import_export.scss +6 -1
  8. data/app/controllers/bulkrax/entries_controller.rb +52 -3
  9. data/app/controllers/bulkrax/exporters_controller.rb +19 -7
  10. data/app/controllers/bulkrax/importers_controller.rb +29 -11
  11. data/app/controllers/concerns/bulkrax/datatables_behavior.rb +201 -0
  12. data/app/helpers/bulkrax/application_helper.rb +7 -3
  13. data/app/jobs/bulkrax/create_relationships_job.rb +1 -1
  14. data/app/jobs/bulkrax/delete_and_import_collection_job.rb +8 -0
  15. data/app/jobs/bulkrax/delete_and_import_file_set_job.rb +8 -0
  16. data/app/jobs/bulkrax/delete_and_import_job.rb +20 -0
  17. data/app/jobs/bulkrax/delete_and_import_work_job.rb +8 -0
  18. data/app/jobs/bulkrax/delete_job.rb +5 -1
  19. data/app/jobs/bulkrax/download_cloud_file_job.rb +1 -1
  20. data/app/jobs/bulkrax/import_collection_job.rb +1 -1
  21. data/app/jobs/bulkrax/import_file_set_job.rb +1 -1
  22. data/app/jobs/bulkrax/import_job.rb +7 -0
  23. data/app/jobs/bulkrax/import_work_job.rb +1 -1
  24. data/app/jobs/bulkrax/importer_job.rb +1 -1
  25. data/app/models/bulkrax/csv_collection_entry.rb +1 -3
  26. data/app/models/bulkrax/csv_entry.rb +2 -1
  27. data/app/models/bulkrax/entry.rb +2 -0
  28. data/app/models/bulkrax/exporter.rb +9 -2
  29. data/app/models/bulkrax/importer.rb +48 -7
  30. data/app/models/bulkrax/oai_set_entry.rb +1 -3
  31. data/app/models/bulkrax/rdf_collection_entry.rb +1 -4
  32. data/app/models/bulkrax/status.rb +10 -1
  33. data/app/models/concerns/bulkrax/file_set_entry_behavior.rb +4 -2
  34. data/app/models/concerns/bulkrax/import_behavior.rb +4 -16
  35. data/app/models/concerns/bulkrax/status_info.rb +8 -0
  36. data/app/parsers/bulkrax/application_parser.rb +90 -19
  37. data/app/parsers/bulkrax/bagit_parser.rb +0 -23
  38. data/app/parsers/bulkrax/csv_parser.rb +0 -52
  39. data/app/parsers/bulkrax/oai_dc_parser.rb +26 -16
  40. data/app/parsers/bulkrax/parser_export_record_set.rb +2 -2
  41. data/app/parsers/bulkrax/xml_parser.rb +18 -21
  42. data/app/services/bulkrax/factory_class_finder.rb +90 -0
  43. data/app/views/bulkrax/exporters/_form.html.erb +10 -10
  44. data/app/views/bulkrax/exporters/index.html.erb +13 -57
  45. data/app/views/bulkrax/exporters/show.html.erb +2 -10
  46. data/app/views/bulkrax/importers/_csv_fields.html.erb +7 -1
  47. data/app/views/bulkrax/importers/_edit_form_buttons.html.erb +8 -1
  48. data/app/views/bulkrax/importers/_edit_item_buttons.html.erb +18 -0
  49. data/app/views/bulkrax/importers/index.html.erb +20 -64
  50. data/app/views/bulkrax/importers/show.html.erb +7 -13
  51. data/app/views/bulkrax/shared/_entries_tab.html.erb +16 -0
  52. data/config/routes.rb +8 -2
  53. data/db/migrate/20240208005801_denormalize_status_message.rb +7 -0
  54. data/db/migrate/20240209070952_update_identifier_index.rb +6 -0
  55. data/lib/bulkrax/engine.rb +6 -0
  56. data/lib/bulkrax/persistence_layer/active_fedora_adapter.rb +27 -0
  57. data/lib/bulkrax/persistence_layer/valkyrie_adapter.rb +8 -0
  58. data/lib/bulkrax/persistence_layer.rb +38 -0
  59. data/lib/bulkrax/version.rb +1 -1
  60. data/lib/bulkrax.rb +88 -2
  61. data/lib/tasks/bulkrax_tasks.rake +12 -0
  62. metadata +46 -6
  63. data/app/views/bulkrax/shared/_collection_entries_tab.html.erb +0 -39
  64. data/app/views/bulkrax/shared/_file_set_entries_tab.html.erb +0 -39
  65. data/app/views/bulkrax/shared/_work_entries_tab.html.erb +0 -39
@@ -0,0 +1,201 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ # rubocop:disable Metrics/ModuleLength
5
+ module DatatablesBehavior
6
+ extend ActiveSupport::Concern
7
+
8
+ def table_per_page
9
+ per_page = params[:length].to_i
10
+ per_page < 1 ? 30 : per_page
11
+ end
12
+
13
+ def order_value(column)
14
+ params['columns']&.[](column)&.[]('data')
15
+ end
16
+
17
+ def table_order
18
+ "#{order_value(params&.[]('order')&.[]('0')&.[]('column'))} #{params&.[]('order')&.[]('0')&.[]('dir')}" if params&.[]('order')&.[]('0')&.[]('column').present?
19
+ end
20
+
21
+ # convert offset to page number
22
+ def table_page
23
+ params[:start].blank? ? 1 : (params[:start].to_i / params[:length].to_i) + 1
24
+ end
25
+
26
+ def entry_table_search
27
+ return @entry_table_search if @entry_table_search
28
+ return @entry_table_search = false if params['search']&.[]('value').blank?
29
+
30
+ table_search_value = params['search']&.[]('value')&.downcase
31
+
32
+ ['identifier', 'id', 'status_message', 'type', 'updated_at'].map do |col|
33
+ column = Bulkrax::Entry.arel_table[col]
34
+ column = Arel::Nodes::NamedFunction.new('CAST', [column.as('text')])
35
+ column = Arel::Nodes::NamedFunction.new('LOWER', [column])
36
+ @entry_table_search = if @entry_table_search
37
+ @entry_table_search.or(column.matches("%#{table_search_value}%"))
38
+ else
39
+ column.matches("%#{table_search_value}%")
40
+ end
41
+ end
42
+
43
+ @entry_table_search
44
+ end
45
+
46
+ def importer_table_search
47
+ return @importer_table_search if @importer_table_search
48
+ return @importer_table_search = false if params['search']&.[]('value').blank?
49
+
50
+ table_search_value = params['search']&.[]('value')&.downcase
51
+
52
+ ['name', 'id', 'status_message', 'last_error_at', 'last_succeeded_at', 'updated_at'].map do |col|
53
+ column = Bulkrax::Importer.arel_table[col]
54
+ column = Arel::Nodes::NamedFunction.new('CAST', [column.as('text')])
55
+ column = Arel::Nodes::NamedFunction.new('LOWER', [column])
56
+ @importer_table_search = if @importer_table_search
57
+ @importer_table_search.or(column.matches("%#{table_search_value}%"))
58
+ else
59
+ column.matches("%#{table_search_value}%")
60
+ end
61
+ end
62
+
63
+ @importer_table_search
64
+ end
65
+
66
+ def exporter_table_search
67
+ return @exporter_table_search if @exporter_table_search
68
+ return @exporter_table_search = false if params['search']&.[]('value').blank?
69
+
70
+ table_search_value = params['search']&.[]('value')&.downcase
71
+
72
+ ['name', 'status_message', 'created_at'].map do |col|
73
+ column = Bulkrax::Exporter.arel_table[col]
74
+ column = Arel::Nodes::NamedFunction.new('CAST', [column.as('text')])
75
+ column = Arel::Nodes::NamedFunction.new('LOWER', [column])
76
+ @exporter_table_search = if @exporter_table_search
77
+ @exporter_table_search.or(column.matches("%#{table_search_value}%"))
78
+ else
79
+ column.matches("%#{table_search_value}%")
80
+ end
81
+ end
82
+
83
+ @exporter_table_search
84
+ end
85
+
86
+ def format_importers(importers)
87
+ result = importers.map do |i|
88
+ {
89
+ name: view_context.link_to(i.name, view_context.importer_path(i)),
90
+ status_message: status_message_for(i),
91
+ last_imported_at: i.last_imported_at&.strftime("%b %d, %Y"),
92
+ next_import_at: i.next_import_at&.strftime("%b %d, %Y"),
93
+ enqueued_records: i.last_run&.enqueued_records,
94
+ processed_records: i.last_run&.processed_records || 0,
95
+ failed_records: i.last_run&.failed_records || 0,
96
+ deleted_records: i.last_run&.deleted_records,
97
+ total_collection_entries: i.last_run&.total_collection_entries,
98
+ total_work_entries: i.last_run&.total_work_entries,
99
+ total_file_set_entries: i.last_run&.total_file_set_entries,
100
+ actions: importer_util_links(i)
101
+ }
102
+ end
103
+ {
104
+ data: result,
105
+ recordsTotal: Bulkrax::Importer.count,
106
+ recordsFiltered: importers.size
107
+ }
108
+ end
109
+
110
+ def format_exporters(exporters)
111
+ result = exporters.map do |e|
112
+ {
113
+ name: view_context.link_to(e.name, view_context.exporter_path(e)),
114
+ status_message: status_message_for(e),
115
+ created_at: e.created_at,
116
+ download: download_zip(e),
117
+ actions: exporter_util_links(e)
118
+ }
119
+ end
120
+ {
121
+ data: result,
122
+ recordsTotal: Bulkrax::Exporter.count,
123
+ recordsFiltered: exporters.size
124
+ }
125
+ end
126
+
127
+ def format_entries(entries, item)
128
+ result = entries.map do |e|
129
+ {
130
+ identifier: view_context.link_to(e.identifier, view_context.item_entry_path(item, e)),
131
+ id: e.id,
132
+ status_message: status_message_for(e),
133
+ type: e.type,
134
+ updated_at: e.updated_at,
135
+ errors: e.latest_status&.error_class&.present? ? view_context.link_to(e.latest_status.error_class, view_context.item_entry_path(item, e), title: e.latest_status.error_message) : "",
136
+ actions: entry_util_links(e, item)
137
+ }
138
+ end
139
+ {
140
+ data: result,
141
+ recordsTotal: item.entries.size,
142
+ recordsFiltered: item.entries.size
143
+ }
144
+ end
145
+
146
+ def entry_util_links(e, item)
147
+ links = []
148
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-info-sign"></span>'), view_context.item_entry_path(item, e))
149
+ links << "<a class='glyphicon glyphicon-repeat' data-toggle='modal' data-target='#bulkraxItemModal' data-entry-id='#{e.id}'></a>" if view_context.an_importer?(item)
150
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-trash"></span>'), view_context.item_entry_path(item, e), method: :delete, data: { confirm: 'This will delete the entry and any work associated with it. Are you sure?' })
151
+ links.join(" ")
152
+ end
153
+
154
+ def status_message_for(e)
155
+ if e.status_message == "Complete"
156
+ "<td><span class='glyphicon glyphicon-ok' style='color: green;'></span> #{e.status_message}</td>"
157
+ elsif e.status_message == "Pending"
158
+ "<td><span class='glyphicon glyphicon-option-horizontal' style='color: blue;'></span> #{e.status_message}</td>"
159
+ elsif e.status_message == "Skipped"
160
+ "<td><span class='glyphicon glyphicon-step-forward' style='color: yellow;'></span> #{e.status_message}</td>"
161
+ else
162
+ "<td><span class='glyphicon glyphicon-remove' style='color: #{e.status == 'Deleted' ? 'green' : 'red'};'></span> #{e.status_message}</td>"
163
+ end
164
+ end
165
+
166
+ def importer_util_links(i)
167
+ links = []
168
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-info-sign"></span>'), importer_path(i))
169
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-pencil"></span>'), edit_importer_path(i))
170
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-remove"></span>'), i, method: :delete, data: { confirm: 'Are you sure?' })
171
+ links.join(" ")
172
+ end
173
+
174
+ def exporter_util_links(i)
175
+ links = []
176
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-info-sign"></span>'), exporter_path(i))
177
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-pencil"></span>'), edit_exporter_path(i), data: { turbolinks: false })
178
+ links << view_context.link_to(view_context.raw('<span class="glyphicon glyphicon-remove"></span>'), i, method: :delete, data: { confirm: 'Are you sure?' })
179
+ links.join(" ")
180
+ end
181
+
182
+ def download_zip(e)
183
+ return unless File.exist?(e.exporter_export_zip_path)
184
+
185
+ options_html = e.exporter_export_zip_files.flatten.map do |file_name|
186
+ "<option value='#{CGI.escapeHTML(file_name)}'>#{CGI.escapeHTML(file_name)}</option>"
187
+ end.join
188
+
189
+ form_html = "<form class='simple_form edit_exporter' id='edit_exporter_#{e.id}' action='#{view_context.exporter_download_path(e)}' accept-charset='UTF-8' method='get'>"
190
+ form_html += "<input name='utf8' type='hidden' value='✓'>"
191
+ form_html += "<select class='btn btn-default form-control' style='width: 200px' name='exporter[exporter_export_zip_files]' id='exporter_#{e.id}_exporter_export_zip_files'>"
192
+ form_html += options_html
193
+ form_html += "</select>\n" # add newline here to add a space between the dropdown and the download button
194
+ form_html += "<input type='submit' name='commit' value='Download' class='btn btn-default'>"
195
+ form_html += "</form>"
196
+
197
+ form_html
198
+ end
199
+ end
200
+ # rubocop:enable Metrics/ModuleLength
201
+ end
@@ -1,9 +1,13 @@
1
1
  # frozen_string_literal: true
2
- require 'coderay'
3
-
4
2
  module Bulkrax
5
3
  module ApplicationHelper
6
- include ::Hyrax::HyraxHelperBehavior if defined?(::Hyrax)
4
+ def item_entry_path(item, e, opts = {})
5
+ an_importer?(item) ? bulkrax.importer_entry_path(item.id, e.id, opts) : bulkrax.exporter_entry_path(item.id, e.id, opts)
6
+ end
7
+
8
+ def an_importer?(item)
9
+ item.class.to_s.include?('Importer')
10
+ end
7
11
 
8
12
  def coderay(value, opts)
9
13
  CodeRay
@@ -40,7 +40,7 @@ module Bulkrax
40
40
 
41
41
  include DynamicRecordLookup
42
42
 
43
- queue_as :import
43
+ queue_as Bulkrax.config.ingest_queue_name
44
44
 
45
45
  # @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers
46
46
  # @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters)
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteAndImportCollectionJob < DeleteAndImportJob
5
+ DELETE_CLASS = Bulkrax::DeleteCollectionJob
6
+ IMPORT_CLASS = Bulkrax::ImportCollectionJob
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteAndImportFileSetJob < DeleteAndImportJob
5
+ DELETE_CLASS = Bulkrax::DeleteFileSetJob
6
+ IMPORT_CLASS = Bulkrax::ImportFileSetJob
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteAndImportJob < ApplicationJob
5
+ queue_as :import
6
+
7
+ def perform(entry, importer_run)
8
+ status = self.class::DELETE_CLASS.perform_now(entry, importer_run)
9
+ if status.status_message == "Deleted"
10
+ entry = Bulkrax::Entry.find(entry.id) # maximum reload
11
+ self.class::IMPORT_CLASS.perform_now(entry.id, importer_run.id)
12
+ end
13
+
14
+ rescue => e
15
+ entry.set_status_info(e)
16
+ # this causes caught exception to be reraised
17
+ raise
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class DeleteAndImportWorkJob < DeleteAndImportJob
5
+ DELETE_CLASS = Bulkrax::DeleteWorkJob
6
+ IMPORT_CLASS = Bulkrax::ImportWorkJob
7
+ end
8
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class DeleteJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  def perform(entry, importer_run)
8
8
  obj = entry.factory.find
@@ -15,6 +15,10 @@ module Bulkrax
15
15
  entry.importer.current_run = ImporterRun.find(importer_run.id)
16
16
  entry.importer.record_status
17
17
  entry.set_status_info("Deleted", ImporterRun.find(importer_run.id))
18
+ rescue => e
19
+ entry.set_status_info(e)
20
+ # this causes caught exception to be reraised
21
+ raise
18
22
  end
19
23
  end
20
24
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class DownloadCloudFileJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  # Retrieve cloud file and write to the imports directory
8
8
  # Note: if using the file system, the mounted directory in
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class ImportCollectionJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  # rubocop:disable Rails/SkipsModelValidations
8
8
  def perform(*args)
@@ -6,7 +6,7 @@ module Bulkrax
6
6
  class ImportFileSetJob < ApplicationJob
7
7
  include DynamicRecordLookup
8
8
 
9
- queue_as :import
9
+ queue_as Bulkrax.config.ingest_queue_name
10
10
 
11
11
  attr_reader :importer_run_id
12
12
 
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bulkrax
4
+ class ImportJob < ApplicationJob
5
+ queue_as :import
6
+ end
7
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class ImportWorkJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  # rubocop:disable Rails/SkipsModelValidations
8
8
  #
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class ImporterJob < ApplicationJob
5
- queue_as :import
5
+ queue_as Bulkrax.config.ingest_queue_name
6
6
 
7
7
  def perform(importer_id, only_updates_since_last_import = false)
8
8
  importer = Importer.find(importer_id)
@@ -2,9 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class CsvCollectionEntry < CsvEntry
5
- def factory_class
6
- Collection
7
- end
5
+ self.default_work_type = "Collection"
8
6
 
9
7
  # Use identifier set by CsvParser#unique_collection_identifier, which falls back
10
8
  # on the Collection's first title if record[source_identifier] is not present
@@ -16,11 +16,12 @@ module Bulkrax
16
16
  class_attribute(:csv_read_data_options, default: {})
17
17
 
18
18
  # there's a risk that this reads the whole file into memory and could cause a memory leak
19
+ # we strip any special characters out of the headers. looking at you Excel
19
20
  def self.read_data(path)
20
21
  raise StandardError, 'CSV path empty' if path.blank?
21
22
  options = {
22
23
  headers: true,
23
- header_converters: ->(h) { h.to_s.strip.to_sym },
24
+ header_converters: ->(h) { h.to_s.gsub(/[^\w\d\. -]+/, '').strip.to_sym },
24
25
  encoding: 'utf-8'
25
26
  }.merge(csv_read_data_options)
26
27
 
@@ -8,6 +8,8 @@ module Bulkrax
8
8
  class Entry < ApplicationRecord
9
9
  include Bulkrax::HasMatchers
10
10
  include Bulkrax::ImportBehavior
11
+ self.class_attribute :default_work_type, default: Bulkrax.default_work_type
12
+
11
13
  include Bulkrax::ExportBehavior
12
14
  include Bulkrax::StatusInfo
13
15
  include Bulkrax::HasLocalProcessing
@@ -23,6 +23,10 @@ module Bulkrax
23
23
  set_status_info(e)
24
24
  end
25
25
 
26
+ def remove_and_rerun
27
+ self.parser_fields['remove_and_rerun']
28
+ end
29
+
26
30
  # #export_source accessors
27
31
  # Used in form to prevent it from getting confused as to which value to populate #export_source with.
28
32
  # Also, used to display the correct selected value when rendering edit form.
@@ -102,9 +106,12 @@ module Bulkrax
102
106
  Importer.all.map { |i| [i.name, i.id] }
103
107
  end
104
108
 
105
- def current_run
109
+ def current_run(skip_counts: false)
110
+ @current_run ||= self.exporter_runs.create! if skip_counts
111
+ return @current_run if @current_run
112
+
106
113
  total = self.limit || parser.total
107
- @current_run ||= self.exporter_runs.create!(total_work_entries: total, enqueued_records: total)
114
+ @current_run = self.exporter_runs.create!(total_work_entries: total, enqueued_records: total)
108
115
  end
109
116
 
110
117
  def last_run
@@ -3,7 +3,7 @@
3
3
  require 'iso8601'
4
4
 
5
5
  module Bulkrax
6
- class Importer < ApplicationRecord
6
+ class Importer < ApplicationRecord # rubocop:disable Metrics/ClassLength
7
7
  include Bulkrax::ImporterExporterBehavior
8
8
  include Bulkrax::StatusInfo
9
9
 
@@ -103,11 +103,12 @@ module Bulkrax
103
103
  frequency.to_seconds != 0
104
104
  end
105
105
 
106
- def current_run
106
+ def current_run(skip_counts: false)
107
107
  return @current_run if @current_run.present?
108
108
 
109
109
  @current_run = self.importer_runs.create!
110
110
  return @current_run if file? && zip?
111
+ return @current_run if skip_counts
111
112
 
112
113
  entry_counts = {
113
114
  total_work_entries: self.limit || parser.works_total,
@@ -123,6 +124,29 @@ module Bulkrax
123
124
  @last_run ||= self.importer_runs.last
124
125
  end
125
126
 
127
+ def failed_entries?
128
+ entries.failed.any?
129
+ end
130
+
131
+ def failed_statuses
132
+ @failed_statuses ||= Bulkrax::Status.latest_by_statusable
133
+ .includes(:statusable)
134
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Failed')
135
+ end
136
+
137
+ def failed_messages
138
+ failed_statuses.each_with_object({}) do |e, i|
139
+ i[e.error_message] ||= []
140
+ i[e.error_message] << e.id
141
+ end
142
+ end
143
+
144
+ def completed_statuses
145
+ @completed_statuses ||= Bulkrax::Status.latest_by_statusable
146
+ .includes(:statusable)
147
+ .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Complete')
148
+ end
149
+
126
150
  def seen
127
151
  @seen ||= {}
128
152
  end
@@ -135,6 +159,18 @@ module Bulkrax
135
159
  self.parser_fields['update_files']
136
160
  end
137
161
 
162
+ def remove_and_rerun
163
+ self.parser_fields['remove_and_rerun']
164
+ end
165
+
166
+ def metadata_only?
167
+ parser.parser_fields['metadata_only'] == true
168
+ end
169
+
170
+ def existing_entries?
171
+ parser.parser_fields['file_style']&.match(/Existing Entries/)
172
+ end
173
+
138
174
  def import_works
139
175
  import_objects(['work'])
140
176
  end
@@ -157,11 +193,20 @@ module Bulkrax
157
193
  self.only_updates ||= false
158
194
  self.save if self.new_record? # Object needs to be saved for statuses
159
195
  types = types_array || DEFAULT_OBJECT_TYPES
160
- parser.create_objects(types)
196
+ existing_entries? ? parser.rebuild_entries(types) : parser.create_objects(types)
197
+ mark_unseen_as_skipped
161
198
  rescue StandardError => e
162
199
  set_status_info(e)
163
200
  end
164
201
 
202
+ # After an import any entries we did not touch are skipped.
203
+ # They are not really pending, complete for the last run, or failed
204
+ def mark_unseen_as_skipped
205
+ entries.where.not(identifier: seen.keys).find_each do |entry|
206
+ entry.set_status_info('Skipped')
207
+ end
208
+ end
209
+
165
210
  # Prepend the base_url to ensure unique set identifiers
166
211
  # @todo - move to parser, as this is OAI specific
167
212
  def unique_collection_identifier(id)
@@ -192,9 +237,5 @@ module Bulkrax
192
237
  rescue
193
238
  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
194
239
  end
195
-
196
- def metadata_only?
197
- parser.parser_fields['metadata_only'] == true
198
- end
199
240
  end
200
241
  end
@@ -2,9 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class OaiSetEntry < OaiEntry
5
- def factory_class
6
- Collection
7
- end
5
+ self.default_work_type = "Collection"
8
6
 
9
7
  def build_metadata
10
8
  self.parsed_metadata = self.raw_metadata
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class RdfCollectionEntry < RdfEntry
5
+ self.default_work_type = "Collection"
5
6
  def record
6
7
  @record ||= self.raw_metadata
7
8
  end
@@ -11,9 +12,5 @@ module Bulkrax
11
12
  add_local
12
13
  return self.parsed_metadata
13
14
  end
14
-
15
- def factory_class
16
- Collection
17
- end
18
15
  end
19
16
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bulkrax
4
4
  class Status < ApplicationRecord
5
- belongs_to :statusable, polymorphic: true
5
+ belongs_to :statusable, polymorphic: true, denormalize: { fields: %i[status_message], if: :latest? }
6
6
  belongs_to :runnable, polymorphic: true
7
7
  serialize :error_backtrace, Array
8
8
 
@@ -21,5 +21,14 @@ module Bulkrax
21
21
  status_table.join(latest_status_query.as(latest_status_table.name.to_s), Arel::Nodes::InnerJoin)
22
22
  .on(status_table[:id].eq(latest_status_table[:latest_status_id]))
23
23
  end
24
+
25
+ def latest?
26
+ # TODO: remove if statment when we stop supporting Hyrax < 4
27
+ self.id == if Gem::Version.new(Rails::VERSION::STRING) >= Gem::Version.new('6.0.0')
28
+ self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id)
29
+ else
30
+ self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first # rubocop:disable Rails/Pick
31
+ end
32
+ end
24
33
  end
25
34
  end
@@ -2,8 +2,10 @@
2
2
 
3
3
  module Bulkrax
4
4
  module FileSetEntryBehavior
5
- def factory_class
6
- ::FileSet
5
+ extend ActiveSupport::Concern
6
+
7
+ included do
8
+ self.default_work_type = "::FileSet"
7
9
  end
8
10
 
9
11
  def file_reference
@@ -189,22 +189,10 @@ module Bulkrax
189
189
  end
190
190
 
191
191
  def factory_class
192
- fc = if self.parsed_metadata&.[]('model').present?
193
- self.parsed_metadata&.[]('model').is_a?(Array) ? self.parsed_metadata&.[]('model')&.first : self.parsed_metadata&.[]('model')
194
- elsif self.mapping&.[]('work_type').present?
195
- self.parsed_metadata&.[]('work_type').is_a?(Array) ? self.parsed_metadata&.[]('work_type')&.first : self.parsed_metadata&.[]('work_type')
196
- else
197
- Bulkrax.default_work_type
198
- end
199
-
200
- # return the name of the collection or work
201
- fc.tr!(' ', '_')
202
- fc.downcase! if fc.match?(/[-_]/)
203
- fc.camelcase.constantize
204
- rescue NameError
205
- nil
206
- rescue
207
- Bulkrax.default_work_type.constantize
192
+ # ATTENTION: Do not memoize this here; tests should catch the problem, but through out the
193
+ # lifecycle of parsing a CSV row or what not, we end up having different factory classes based
194
+ # on the encountered metadata.
195
+ FactoryClassFinder.find(entry: self)
208
196
  end
209
197
  end
210
198
  end
@@ -10,6 +10,10 @@ module Bulkrax
10
10
  as: :statusable,
11
11
  class_name: "Bulkrax::Status",
12
12
  inverse_of: :statusable
13
+ scope :failed, -> { where(status_message: 'Failed') }
14
+ scope :complete, -> { where(status_message: 'Complete') }
15
+ scope :pending, -> { where(status_message: 'Pending') }
16
+ scope :skipped, -> { where(status_message: 'Skipped') }
13
17
  end
14
18
 
15
19
  def current_status
@@ -25,6 +29,10 @@ module Bulkrax
25
29
  current_status&.status_message&.match(/^Complete$/)
26
30
  end
27
31
 
32
+ def skipped?
33
+ current_status&.status_message&.match('Skipped')
34
+ end
35
+
28
36
  def status
29
37
  current_status&.status_message || 'Pending'
30
38
  end