blacklight-spotlight 3.0.0.rc3 → 3.0.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/spotlight/admin/reindex_monitor.js +1 -0
  3. data/app/assets/stylesheets/spotlight/browse_group_categories_block.scss +23 -0
  4. data/app/controllers/spotlight/catalog_controller.rb +4 -1
  5. data/app/controllers/spotlight/dashboards_controller.rb +1 -1
  6. data/app/controllers/spotlight/exhibits_controller.rb +1 -1
  7. data/app/helpers/spotlight/application_helper.rb +19 -0
  8. data/app/helpers/spotlight/pages_helper.rb +1 -1
  9. data/app/jobs/concerns/spotlight/job_tracking.rb +47 -0
  10. data/app/jobs/concerns/spotlight/limit_concurrency.rb +33 -0
  11. data/app/jobs/spotlight/add_uploads_from_csv.rb +6 -3
  12. data/app/jobs/spotlight/application_job.rb +8 -0
  13. data/app/jobs/spotlight/cleanup_job_trackers_job.rb +13 -0
  14. data/app/jobs/spotlight/default_thumbnail_job.rb +1 -3
  15. data/app/jobs/spotlight/reindex_exhibit_job.rb +36 -0
  16. data/app/jobs/spotlight/reindex_job.rb +49 -41
  17. data/app/jobs/spotlight/rename_sidecar_field_job.rb +2 -2
  18. data/app/jobs/spotlight/update_job_trackers_job.rb +20 -0
  19. data/app/models/concerns/spotlight/user.rb +2 -1
  20. data/app/models/spotlight/event.rb +13 -0
  21. data/app/models/spotlight/exhibit.rb +4 -14
  22. data/app/models/spotlight/job_tracker.rb +105 -0
  23. data/app/models/spotlight/reindex_progress.rb +44 -27
  24. data/app/models/spotlight/resource.rb +24 -58
  25. data/app/models/spotlight/resources/iiif_harvester.rb +10 -1
  26. data/app/models/spotlight/resources/iiif_manifest.rb +3 -1
  27. data/app/models/spotlight/resources/iiif_service.rb +1 -1
  28. data/app/models/spotlight/resources/json_upload.rb +12 -0
  29. data/app/models/spotlight/resources/upload.rb +25 -2
  30. data/app/models/spotlight/solr_document_sidecar.rb +2 -1
  31. data/app/services/spotlight/etl.rb +7 -0
  32. data/app/services/spotlight/etl/context.rb +52 -0
  33. data/app/services/spotlight/etl/executor.rb +194 -0
  34. data/app/services/spotlight/etl/loaders.rb +12 -0
  35. data/app/services/spotlight/etl/pipeline.rb +81 -0
  36. data/app/services/spotlight/etl/solr_loader.rb +96 -0
  37. data/app/services/spotlight/etl/sources.rb +25 -0
  38. data/app/services/spotlight/etl/step.rb +82 -0
  39. data/app/services/spotlight/etl/transforms.rb +64 -0
  40. data/app/services/spotlight/validity_checker.rb +5 -5
  41. data/app/views/spotlight/dashboards/_reindexing_activity.html.erb +6 -6
  42. data/app/views/spotlight/shared/_locale_picker.html.erb +1 -1
  43. data/app/views/spotlight/sir_trevor/blocks/_browse_group_categories_block.html.erb +4 -3
  44. data/config/locales/spotlight.ar.yml +11 -1
  45. data/config/locales/spotlight.en.yml +3 -2
  46. data/db/migrate/20210122082032_create_job_trackers.rb +22 -0
  47. data/db/migrate/20210126123041_create_events.rb +15 -0
  48. data/lib/generators/spotlight/scaffold_resource_generator.rb +5 -13
  49. data/lib/spotlight/engine.rb +8 -1
  50. data/lib/spotlight/version.rb +1 -1
  51. data/spec/controllers/spotlight/catalog_controller_spec.rb +3 -1
  52. data/spec/examples.txt +1448 -1437
  53. data/spec/factories/job_trackers.rb +9 -0
  54. data/spec/features/add_items_spec.rb +9 -4
  55. data/spec/features/javascript/reindex_monitor_spec.rb +1 -1
  56. data/spec/features/site_users_management_spec.rb +4 -4
  57. data/spec/helpers/spotlight/pages_helper_spec.rb +8 -0
  58. data/spec/jobs/spotlight/reindex_exhibit_job_spec.rb +43 -0
  59. data/spec/jobs/spotlight/reindex_job_spec.rb +30 -59
  60. data/spec/models/spotlight/exhibit_spec.rb +3 -57
  61. data/spec/models/spotlight/reindex_progress_spec.rb +89 -87
  62. data/spec/models/spotlight/resource_spec.rb +69 -90
  63. data/spec/models/spotlight/resources/iiif_harvester_spec.rb +9 -10
  64. data/spec/models/spotlight/solr_document_sidecar_spec.rb +1 -0
  65. data/spec/services/spotlight/etl/context_spec.rb +66 -0
  66. data/spec/services/spotlight/etl/executor_spec.rb +149 -0
  67. data/spec/services/spotlight/etl/pipeline_spec.rb +22 -0
  68. data/spec/services/spotlight/etl/solr_loader_spec.rb +76 -0
  69. data/spec/services/spotlight/etl/step_spec.rb +70 -0
  70. data/spec/spec_helper.rb +2 -5
  71. data/spec/views/spotlight/dashboards/_reindexing_activity.html.erb_spec.rb +22 -19
  72. metadata +55 -15
  73. data/app/models/concerns/spotlight/resources/open_graph.rb +0 -36
  74. data/app/models/spotlight/reindexing_log_entry.rb +0 -42
  75. data/app/services/spotlight/resources/iiif_builder.rb +0 -19
  76. data/app/services/spotlight/solr_document_builder.rb +0 -77
  77. data/app/services/spotlight/upload_solr_document_builder.rb +0 -57
  78. data/spec/factories/reindexing_log_entries.rb +0 -54
  79. data/spec/models/spotlight/reindexing_log_entry_spec.rb +0 -129
  80. data/spec/models/spotlight/resources/open_graph_spec.rb +0 -65
  81. data/spec/services/spotlight/solr_document_builder_spec.rb +0 -66
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spotlight
4
+ ###
5
+ # Calls the #set_default_thumbnail method
6
+ # on the object passed in and calls save
7
+ ###
8
+ class UpdateJobTrackersJob < Spotlight::ApplicationJob
9
+ def perform(job_tracker)
10
+ reports_on = job_tracker.on
11
+
12
+ return unless reports_on.is_a? Spotlight::JobTracker
13
+
14
+ reports_on.update(status: 'completed') if reports_on.job_trackers.all?(&:completed?)
15
+ reports_on.update(status: 'failed') if reports_on.job_trackers.any?(&:failed?)
16
+
17
+ reports_on.update(data: { progress: reports_on.job_trackers.sum(&:progress), total: reports_on.job_trackers.sum(&:total) })
18
+ end
19
+ end
20
+ end
@@ -8,7 +8,6 @@ module Spotlight
8
8
  included do
9
9
  has_many :roles, class_name: 'Spotlight::Role', dependent: :destroy
10
10
  has_many :exhibits, class_name: 'Spotlight::Exhibit', through: :roles, source: 'resource', source_type: 'Spotlight::Exhibit'
11
- has_many :reindexing_log_entries, class_name: 'Spotlight::ReindexingLogEntry'
12
11
 
13
12
  scope :with_roles, -> { where(id: Spotlight::Role.distinct.pluck(:user_id)) }
14
13
 
@@ -28,6 +27,8 @@ module Spotlight
28
27
  end
29
28
 
30
29
  def add_default_roles
30
+ return unless Spotlight::Engine.config.assign_default_roles_to_first_user
31
+
31
32
  roles.build role: 'admin', resource: Spotlight::Site.instance unless self.class.any?
32
33
  end
33
34
 
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spotlight
4
+ # Logged events for Spotlight exhibits
5
+ class Event < ActiveRecord::Base
6
+ belongs_to :resource, polymorphic: true
7
+ belongs_to :exhibit, optional: true
8
+
9
+ serialize :data
10
+
11
+ self.inheritance_column = :event_class
12
+ end
13
+ end
@@ -4,7 +4,6 @@ require 'mail'
4
4
  module Spotlight
5
5
  ##
6
6
  # Spotlight exhibit
7
- # rubocop:disable Metrics/ClassLength
8
7
  class Exhibit < ActiveRecord::Base
9
8
  class_attribute :themes_selector
10
9
  include Spotlight::ExhibitAnalytics
@@ -57,8 +56,8 @@ module Spotlight
57
56
 
58
57
  has_many :feature_pages, -> { for_default_locale }, extend: FriendlyId::FinderMethods
59
58
  has_many :groups, dependent: :delete_all
59
+ has_many :job_trackers, as: :on, dependent: :delete_all
60
60
  has_many :main_navigations, dependent: :delete_all
61
- has_many :reindexing_log_entries, dependent: :destroy
62
61
  has_many :resources
63
62
  has_many :roles, as: :resource, dependent: :delete_all
64
63
  has_many :searches, dependent: :destroy, extend: FriendlyId::FinderMethods
@@ -119,8 +118,8 @@ module Spotlight
119
118
  end
120
119
  end
121
120
 
122
- def reindex_later(user = nil)
123
- Spotlight::ReindexJob.perform_later(self, new_reindexing_log_entry(user))
121
+ def reindex_later(current_user = nil)
122
+ Spotlight::ReindexExhibitJob.perform_later(self, user: current_user)
124
123
  end
125
124
 
126
125
  def uploaded_resource_fields
@@ -136,7 +135,7 @@ module Spotlight
136
135
  end
137
136
 
138
137
  def reindex_progress
139
- @reindex_progress ||= ReindexProgress.new(current_reindexing_log_entry)
138
+ @reindex_progress ||= ReindexProgress.new(self)
140
139
  end
141
140
 
142
141
  def available_locales
@@ -149,19 +148,10 @@ module Spotlight
149
148
  self.description = ::Rails::Html::FullSanitizer.new.sanitize(description)
150
149
  end
151
150
 
152
- def new_reindexing_log_entry(user = nil)
153
- Spotlight::ReindexingLogEntry.create(exhibit: self, user: user, items_reindexed_count: 0, job_status: 'unstarted')
154
- end
155
-
156
151
  private
157
152
 
158
- def current_reindexing_log_entry
159
- reindexing_log_entries.started_or_completed.first || reindexing_log_entries.build
160
- end
161
-
162
153
  def move_friendly_id_error_to_slug
163
154
  errors.add :slug, *errors.delete(:friendly_id) if errors[:friendly_id].present?
164
155
  end
165
156
  end
166
- # rubocop:enable Metrics/ClassLength
167
157
  end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spotlight
4
+ # Associate background jobs with records
5
+ class JobTracker < ActiveRecord::Base
6
+ scope :recent, -> { order('updated_at DESC').limit(5) }
7
+ scope :in_progress, -> { where.not(status: %w[completed failed]) }
8
+ scope :completed, -> { where(status: %w[completed failed]) }
9
+
10
+ belongs_to :on, polymorphic: true
11
+ belongs_to :resource, polymorphic: true
12
+ belongs_to :user, optional: true, class_name: Spotlight::Engine.config.user_class # rubocop:disable Rails/ReflectionClassName
13
+ has_many :events, as: :resource, dependent: :delete_all
14
+ has_many :job_trackers, as: :on, dependent: Rails.version > '6.1' ? :destroy_async : :destroy
15
+
16
+ serialize :data
17
+
18
+ after_initialize do
19
+ self.data ||= {}
20
+ end
21
+
22
+ after_commit do
23
+ next unless on.is_a? Spotlight::JobTracker
24
+
25
+ UpdateJobTrackersJob.perform_later(self)
26
+ end
27
+
28
+ def label
29
+ "[#{job_class.titleize}] #{resource_label}"
30
+ end
31
+
32
+ def resource_label
33
+ return resource.filename if resource.is_a? ActiveStorage::Blob
34
+ return resource.name if resource.is_a? Upload
35
+
36
+ resource_id
37
+ end
38
+
39
+ def job_status
40
+ return {} unless job_id
41
+
42
+ @job_status ||= ActiveJob::Status.get(job_id)
43
+ end
44
+
45
+ def progress_label
46
+ return number_with_delimiter(progress) unless total?
47
+
48
+ "#{number_with_delimiter(progress)} / #{number_with_delimiter(total)}"
49
+ end
50
+
51
+ def progress
52
+ data[:progress] || job_status[:progress] || 0
53
+ end
54
+
55
+ def total(default: progress)
56
+ [progress, data[:total] || job_status[:total] || default].max
57
+ end
58
+
59
+ def total?
60
+ total(default: 0).positive?
61
+ end
62
+
63
+ def percent
64
+ return nil unless total?
65
+
66
+ (100.0 * progress) / total
67
+ end
68
+
69
+ def enqueued?
70
+ status == 'enqueued'
71
+ end
72
+
73
+ def in_progress?
74
+ status == 'in_progress'
75
+ end
76
+
77
+ def completed?
78
+ status == 'completed'
79
+ end
80
+
81
+ def failed?
82
+ status == 'failed'
83
+ end
84
+
85
+ def append_log_entry(type:, **args)
86
+ events.create(type: type, data: args)
87
+ rescue StandardError => e
88
+ Rails.logger.error("Unable to create log entry for job tracker #{id}: #{e}")
89
+ end
90
+
91
+ def top_level_job_tracker
92
+ if on.is_a?(Spotlight::JobTracker)
93
+ on.top_level_job_tracker
94
+ else
95
+ self
96
+ end
97
+ end
98
+
99
+ private
100
+
101
+ def number_with_delimiter(*args)
102
+ ActiveSupport::NumberHelper.number_to_delimited(*args)
103
+ end
104
+ end
105
+ end
@@ -4,59 +4,76 @@ module Spotlight
4
4
  ##
5
5
  # ReindexProgress is a class that models the progress of reindexing a list of resources
6
6
  class ReindexProgress
7
- attr_reader :current_log_entry
7
+ attr_reader :exhibit
8
8
 
9
- delegate :updated_at, to: :current_log_entry
9
+ delegate :updated_at, to: :most_relevant_job_tracker
10
10
 
11
- def initialize(current_log_entry)
12
- @current_log_entry = current_log_entry
11
+ def initialize(exhibit)
12
+ @exhibit = exhibit
13
+ end
14
+
15
+ def as_json(*)
16
+ {
17
+ recently_in_progress: recently_in_progress?,
18
+ started_at: localized_start_time,
19
+ finished_at: localized_finish_time,
20
+ updated_at: localized_updated_time,
21
+ total: [total, completed].max,
22
+ completed: completed,
23
+ finished: finished?,
24
+ errored: errored?
25
+ }
26
+ end
27
+
28
+ private
29
+
30
+ def job_trackers
31
+ @job_trackers ||= exhibit.job_trackers.where(job_class: 'Spotlight::ReindexExhibitJob').recent
32
+ end
33
+
34
+ def most_relevant_job_tracker
35
+ return @most_relevant_job_tracker if @most_relevant_job_tracker
36
+
37
+ @most_relevant_job_tracker ||= job_trackers.in_progress.first || job_trackers.completed.first || job_trackers.first || Spotlight::JobTracker.new
13
38
  end
14
39
 
15
40
  def recently_in_progress?
16
- return true if current_log_entry.in_progress?
41
+ return false unless most_relevant_job_tracker.persisted?
42
+ return true if most_relevant_job_tracker.in_progress?
17
43
 
18
- current_log_entry.end_time.present? && (current_log_entry.end_time > Spotlight::Engine.config.reindex_progress_window.minutes.ago)
44
+ finished? && most_relevant_job_tracker.updated_at >= Spotlight::Engine.config.reindex_progress_window.ago
19
45
  end
20
46
 
21
47
  def started_at
22
- current_log_entry.start_time
48
+ most_relevant_job_tracker.created_at
23
49
  end
24
50
 
25
51
  def finished?
26
- current_log_entry.succeeded? || current_log_entry.failed?
52
+ most_relevant_job_tracker.completed? || (errored? && most_relevant_job_tracker.job_trackers.none?(&:in_progress?))
27
53
  end
28
54
 
29
55
  def finished_at
30
- current_log_entry.end_time
56
+ return unless finished?
57
+
58
+ most_relevant_job_tracker.updated_at
31
59
  end
32
60
 
33
61
  def total
34
- current_log_entry.items_reindexed_estimate
62
+ return most_relevant_job_tracker.total if finished?
63
+
64
+ most_relevant_job_tracker.job_trackers.sum(&:total)
35
65
  end
36
66
 
37
67
  def completed
38
- current_log_entry.items_reindexed_count
39
- end
68
+ return most_relevant_job_tracker.progress if finished?
40
69
 
41
- def errored?
42
- current_log_entry.failed?
70
+ most_relevant_job_tracker.job_trackers.sum(&:progress)
43
71
  end
44
72
 
45
- def as_json(*)
46
- {
47
- recently_in_progress: recently_in_progress?,
48
- started_at: localized_start_time,
49
- finished_at: localized_finish_time,
50
- updated_at: localized_updated_time,
51
- total: total,
52
- completed: completed,
53
- finished: finished?,
54
- errored: errored?
55
- }
73
+ def errored?
74
+ most_relevant_job_tracker.failed?
56
75
  end
57
76
 
58
- private
59
-
60
77
  def localized_start_time
61
78
  return unless started_at
62
79
 
@@ -4,10 +4,17 @@ module Spotlight
4
4
  ##
5
5
  # Exhibit resources
6
6
  class Resource < ActiveRecord::Base
7
- include ActiveSupport::Benchmarkable
8
-
9
- class_attribute :document_builder_class
10
- self.document_builder_class = SolrDocumentBuilder
7
+ class_attribute :indexing_pipeline, default: (Spotlight::Etl::Pipeline.new do |pipeline|
8
+ pipeline.sources = [Spotlight::Etl::Sources::IdentitySource]
9
+ pipeline.transforms = [
10
+ reject_blank: Spotlight::Etl::Transforms::RejectBlank,
11
+ reject_missing: Spotlight::Etl::Transforms::RejectMissingUniqueId,
12
+ apply_exhibit_metadata: Spotlight::Etl::Transforms::ApplyExhibitMetadata,
13
+ apply_application_metadata: Spotlight::Etl::Transforms::ApplyApplicationMetadata,
14
+ apply_pipeline_metadata: Spotlight::Etl::Transforms::ApplyPipelineMetadata
15
+ ]
16
+ pipeline.loaders = [Spotlight::Etl::SolrLoader]
17
+ end)
11
18
 
12
19
  extend ActiveModel::Callbacks
13
20
  define_model_callbacks :index
@@ -16,12 +23,10 @@ module Spotlight
16
23
 
17
24
  belongs_to :exhibit
18
25
  has_many :solr_document_sidecars
26
+ has_many :events, as: :resource
19
27
 
20
28
  serialize :data, Hash
21
29
 
22
- after_index :commit
23
- after_index :touch_exhibit!
24
-
25
30
  ##
26
31
  # Persist the record to the database, and trigger a reindex to solr
27
32
  #
@@ -45,67 +50,28 @@ module Spotlight
45
50
  # Index the result of {#to_solr} into the index in batches of {#batch_size}
46
51
  #
47
52
  # @return [Integer] number of records indexed
48
- def reindex(reindexing_log_entry = nil)
49
- benchmark "Reindexing #{self} (batch size: #{batch_size})" do
50
- count = 0
51
-
52
- run_callbacks :index do
53
- document_builder.documents_to_index.each_slice(batch_size) do |batch|
54
- write_to_index(batch)
55
- count += batch.length
56
- reindexing_log_entry&.update(items_reindexed_count: count)
57
- end
58
-
59
- count
53
+ def reindex(touch: true, **args, &block)
54
+ i = 0
55
+ run_callbacks :index do
56
+ indexing_pipeline.call(Spotlight::Etl::Context.new(self, commit: true, **args)) do |data|
57
+ i += 1
58
+ block&.call(data)
60
59
  end
61
60
  end
62
- end
63
-
64
- def document_builder
65
- @document_builder ||= document_builder_class.new(self)
66
- end
67
-
68
- private
69
-
70
- def blacklight_solr
71
- @solr ||= RSolr.connect(connection_config.merge(adapter: connection_config[:http_adapter]))
72
- end
73
61
 
74
- def connection_config
75
- Blacklight.connection_config
76
- end
62
+ touch_exhibit! if touch
77
63
 
78
- def batch_size
79
- Spotlight::Engine.config.solr_batch_size
64
+ i
80
65
  end
81
66
 
82
- def write_to_index(batch)
83
- documents = documents_that_have_ids(batch)
84
- return unless write? && documents.present?
85
-
86
- blacklight_solr.update params: { commitWithin: 500 },
87
- data: documents.to_json,
88
- headers: { 'Content-Type' => 'application/json' }
67
+ def estimated_size(**args)
68
+ indexing_pipeline.estimated_size(Spotlight::Etl::Context.new(self, **args))
89
69
  end
90
70
 
91
- def commit
92
- return unless write?
93
-
94
- blacklight_solr.commit
95
- rescue StandardError => e
96
- Rails.logger.warn "Unable to commit to solr: #{e}"
97
- end
71
+ private
98
72
 
99
73
  def touch_exhibit!
100
- exhibit.touch
101
- end
102
-
103
- def write?
104
- Spotlight::Engine.config.writable_index
105
- end
106
-
107
- def documents_that_have_ids(document_list)
108
- document_list.reject { |d| d[document_builder.document_model.unique_key.to_sym].blank? }
74
+ exhibit&.touch
109
75
  end
110
76
  end
111
77
  end
@@ -7,7 +7,6 @@ module Spotlight
7
7
  # harvest Images from IIIF Manifest and turn them into a Spotlight::Resource
8
8
  # Note: IIIF API : http://iiif.io/api/presentation/2.0
9
9
  class IiifHarvester < Spotlight::Resource
10
- self.document_builder_class = Spotlight::Resources::IiifBuilder
11
10
  self.weight = -5000
12
11
 
13
12
  validate :valid_url?
@@ -16,6 +15,16 @@ module Spotlight
16
15
  @iiif_manifests ||= IiifService.parse(url)
17
16
  end
18
17
 
18
+ def self.indexing_pipeline
19
+ @indexing_pipeline ||= super.dup.tap do |pipeline|
20
+ pipeline.sources = [Spotlight::Etl::Sources::SourceMethodSource(:iiif_manifests)]
21
+
22
+ pipeline.transforms = [
23
+ ->(data, p) { data.merge(p.source.to_solr(exhibit: p.context.resource.exhibit)) }
24
+ ] + pipeline.transforms
25
+ end
26
+ end
27
+
19
28
  private
20
29
 
21
30
  def valid_url?