blacklight-spotlight 3.0.0.rc3 → 3.0.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/spotlight/admin/reindex_monitor.js +1 -0
  3. data/app/assets/stylesheets/spotlight/browse_group_categories_block.scss +23 -0
  4. data/app/controllers/spotlight/catalog_controller.rb +4 -1
  5. data/app/controllers/spotlight/dashboards_controller.rb +1 -1
  6. data/app/controllers/spotlight/exhibits_controller.rb +1 -1
  7. data/app/helpers/spotlight/application_helper.rb +19 -0
  8. data/app/helpers/spotlight/pages_helper.rb +1 -1
  9. data/app/jobs/concerns/spotlight/job_tracking.rb +47 -0
  10. data/app/jobs/concerns/spotlight/limit_concurrency.rb +33 -0
  11. data/app/jobs/spotlight/add_uploads_from_csv.rb +6 -3
  12. data/app/jobs/spotlight/application_job.rb +8 -0
  13. data/app/jobs/spotlight/cleanup_job_trackers_job.rb +13 -0
  14. data/app/jobs/spotlight/default_thumbnail_job.rb +1 -3
  15. data/app/jobs/spotlight/reindex_exhibit_job.rb +36 -0
  16. data/app/jobs/spotlight/reindex_job.rb +49 -41
  17. data/app/jobs/spotlight/rename_sidecar_field_job.rb +2 -2
  18. data/app/jobs/spotlight/update_job_trackers_job.rb +20 -0
  19. data/app/models/concerns/spotlight/user.rb +2 -1
  20. data/app/models/spotlight/event.rb +13 -0
  21. data/app/models/spotlight/exhibit.rb +4 -14
  22. data/app/models/spotlight/job_tracker.rb +105 -0
  23. data/app/models/spotlight/reindex_progress.rb +44 -27
  24. data/app/models/spotlight/resource.rb +24 -58
  25. data/app/models/spotlight/resources/iiif_harvester.rb +10 -1
  26. data/app/models/spotlight/resources/iiif_manifest.rb +3 -1
  27. data/app/models/spotlight/resources/iiif_service.rb +1 -1
  28. data/app/models/spotlight/resources/json_upload.rb +12 -0
  29. data/app/models/spotlight/resources/upload.rb +25 -2
  30. data/app/models/spotlight/solr_document_sidecar.rb +2 -1
  31. data/app/services/spotlight/etl.rb +7 -0
  32. data/app/services/spotlight/etl/context.rb +52 -0
  33. data/app/services/spotlight/etl/executor.rb +194 -0
  34. data/app/services/spotlight/etl/loaders.rb +12 -0
  35. data/app/services/spotlight/etl/pipeline.rb +81 -0
  36. data/app/services/spotlight/etl/solr_loader.rb +96 -0
  37. data/app/services/spotlight/etl/sources.rb +25 -0
  38. data/app/services/spotlight/etl/step.rb +82 -0
  39. data/app/services/spotlight/etl/transforms.rb +64 -0
  40. data/app/services/spotlight/validity_checker.rb +5 -5
  41. data/app/views/spotlight/dashboards/_reindexing_activity.html.erb +6 -6
  42. data/app/views/spotlight/shared/_locale_picker.html.erb +1 -1
  43. data/app/views/spotlight/sir_trevor/blocks/_browse_group_categories_block.html.erb +4 -3
  44. data/config/locales/spotlight.ar.yml +11 -1
  45. data/config/locales/spotlight.en.yml +3 -2
  46. data/db/migrate/20210122082032_create_job_trackers.rb +22 -0
  47. data/db/migrate/20210126123041_create_events.rb +15 -0
  48. data/lib/generators/spotlight/scaffold_resource_generator.rb +5 -13
  49. data/lib/spotlight/engine.rb +8 -1
  50. data/lib/spotlight/version.rb +1 -1
  51. data/spec/controllers/spotlight/catalog_controller_spec.rb +3 -1
  52. data/spec/examples.txt +1448 -1437
  53. data/spec/factories/job_trackers.rb +9 -0
  54. data/spec/features/add_items_spec.rb +9 -4
  55. data/spec/features/javascript/reindex_monitor_spec.rb +1 -1
  56. data/spec/features/site_users_management_spec.rb +4 -4
  57. data/spec/helpers/spotlight/pages_helper_spec.rb +8 -0
  58. data/spec/jobs/spotlight/reindex_exhibit_job_spec.rb +43 -0
  59. data/spec/jobs/spotlight/reindex_job_spec.rb +30 -59
  60. data/spec/models/spotlight/exhibit_spec.rb +3 -57
  61. data/spec/models/spotlight/reindex_progress_spec.rb +89 -87
  62. data/spec/models/spotlight/resource_spec.rb +69 -90
  63. data/spec/models/spotlight/resources/iiif_harvester_spec.rb +9 -10
  64. data/spec/models/spotlight/solr_document_sidecar_spec.rb +1 -0
  65. data/spec/services/spotlight/etl/context_spec.rb +66 -0
  66. data/spec/services/spotlight/etl/executor_spec.rb +149 -0
  67. data/spec/services/spotlight/etl/pipeline_spec.rb +22 -0
  68. data/spec/services/spotlight/etl/solr_loader_spec.rb +76 -0
  69. data/spec/services/spotlight/etl/step_spec.rb +70 -0
  70. data/spec/spec_helper.rb +2 -5
  71. data/spec/views/spotlight/dashboards/_reindexing_activity.html.erb_spec.rb +22 -19
  72. metadata +55 -15
  73. data/app/models/concerns/spotlight/resources/open_graph.rb +0 -36
  74. data/app/models/spotlight/reindexing_log_entry.rb +0 -42
  75. data/app/services/spotlight/resources/iiif_builder.rb +0 -19
  76. data/app/services/spotlight/solr_document_builder.rb +0 -77
  77. data/app/services/spotlight/upload_solr_document_builder.rb +0 -57
  78. data/spec/factories/reindexing_log_entries.rb +0 -54
  79. data/spec/models/spotlight/reindexing_log_entry_spec.rb +0 -129
  80. data/spec/models/spotlight/resources/open_graph_spec.rb +0 -65
  81. data/spec/services/spotlight/solr_document_builder_spec.rb +0 -66
@@ -1,36 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Spotlight
4
- module Resources
5
- ##
6
- # OpenGraph metadata harvester
7
- module OpenGraph
8
- extend ActiveSupport::Concern
9
- include Spotlight::Resources::Web
10
-
11
- def opengraph
12
- @opengraph ||= begin
13
- page = {}
14
-
15
- body.css('meta').select { |m| m.attribute('property') }.each do |m|
16
- page[m.attribute('property').to_s] = m.attribute('content').to_s
17
- end
18
-
19
- page
20
- end
21
- end
22
-
23
- def opengraph_properties
24
- Hash[opengraph.map do |k, v|
25
- ["#{opengraph_solr_field_name(k)}_tesim", v]
26
- end]
27
- end
28
-
29
- private
30
-
31
- def opengraph_solr_field_name(field)
32
- field.parameterize(separator: '_')
33
- end
34
- end
35
- end
36
- end
@@ -1,42 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Spotlight
4
- ##
5
- # a log entry representing an attempt to reindex some number of records in an exhibit
6
- class ReindexingLogEntry < ActiveRecord::Base
7
- enum job_status: { unstarted: 0, in_progress: 1, succeeded: 2, failed: 3 }
8
-
9
- belongs_to :exhibit, class_name: 'Spotlight::Exhibit'
10
- belongs_to :user, class_name: '::User', optional: true
11
-
12
- # null start times sort to the top, to more easily surface pending reindexing
13
- default_scope { order(Arel.sql('start_time IS NOT NULL, start_time DESC')) }
14
- scope :recent, -> { limit(5) }
15
- scope :started_or_completed, -> { where.not(job_status: 'unstarted') }
16
-
17
- def duration
18
- end_time - start_time if end_time
19
- end
20
-
21
- def in_progress!
22
- self.start_time = Time.zone.now
23
- super
24
- rescue StandardError
25
- Rails.logger.error "unexpected error updating log entry to :in_progress from #{caller}"
26
- end
27
-
28
- def succeeded!
29
- self.end_time = Time.zone.now
30
- super
31
- rescue StandardError
32
- Rails.logger.error "unexpected error updating log entry to :succeeded from #{caller}"
33
- end
34
-
35
- def failed!
36
- self.end_time = Time.zone.now
37
- super
38
- rescue StandardError
39
- Rails.logger.error "unexpected error updating log entry to :failed from #{caller}"
40
- end
41
- end
42
- end
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Spotlight
4
- module Resources
5
- # transforms a IiifHarvester into solr documents
6
- class IiifBuilder < Spotlight::SolrDocumentBuilder
7
- def to_solr
8
- return to_enum(:to_solr) { 0 } unless block_given?
9
-
10
- base_doc = super
11
- resource.iiif_manifests.each do |manifest|
12
- manifest.with_exhibit(exhibit)
13
- manifest_solr = manifest.to_solr
14
- yield base_doc.merge(manifest_solr) if manifest_solr.present?
15
- end
16
- end
17
- end
18
- end
19
- end
@@ -1,77 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Spotlight
4
- # Creates solr documents for the documents in a resource
5
- class SolrDocumentBuilder
6
- def initialize(resource)
7
- @resource = resource
8
- end
9
-
10
- attr_reader :resource
11
-
12
- delegate :exhibit, :document_model, to: :resource
13
-
14
- ##
15
- # @return an enumerator of all the indexable documents for this resource
16
- def documents_to_index
17
- data = to_solr
18
- return [] if data.blank?
19
-
20
- data &&= [data] if data.is_a? Hash
21
-
22
- return to_enum(:documents_to_index) { data.size } unless block_given?
23
-
24
- data.lazy.reject(&:blank?).each do |doc|
25
- yield doc.reverse_merge(exhibit_solr_doc(doc[unique_key]).to_solr)
26
- end
27
- end
28
-
29
- protected
30
-
31
- ##
32
- # @abstract
33
- # Convert this resource into zero-to-many new solr documents. The data here
34
- # should be merged into the resource-specific {#to_solr} data.
35
- #
36
- # @return [Hash] a single solr document hash
37
- # @return [Enumerator<Hash>] multiple solr document hashes. This can be a
38
- # simple array, or an lazy enumerator
39
- def to_solr
40
- spotlight_resource_metadata_for_solr
41
- end
42
-
43
- private
44
-
45
- # Null object for SolrDocument
46
- module NilSolrDocument
47
- def self.to_solr
48
- {}
49
- end
50
- end
51
-
52
- ##
53
- # Get any exhibit-specific metadata stored in e.g. sidecars, tags, etc
54
- # This needs the generated solr document
55
- # @returns [#to_solr] something that responds to `to_solr'
56
- def exhibit_solr_doc(id)
57
- return NilSolrDocument unless document_model || id.present?
58
-
59
- document_model.build_for_exhibit(id, exhibit, resource: (resource if resource.persisted?))
60
- end
61
-
62
- def unique_key
63
- if document_model
64
- document_model.unique_key.to_sym
65
- else
66
- :id
67
- end
68
- end
69
-
70
- def spotlight_resource_metadata_for_solr
71
- {
72
- Spotlight::Engine.config.resource_global_id_field => (resource.to_global_id.to_s if resource.persisted?),
73
- document_model.resource_type_field => resource.class.to_s.tableize
74
- }
75
- end
76
- end
77
- end
@@ -1,57 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Spotlight
4
- # Creates solr documents for the uploaded documents in a resource
5
- class UploadSolrDocumentBuilder < SolrDocumentBuilder
6
- delegate :compound_id, to: :resource
7
-
8
- def to_solr
9
- super.tap do |solr_hash|
10
- add_default_solr_fields solr_hash
11
- add_sidecar_fields solr_hash
12
-
13
- if attached_file?
14
- add_image_dimensions solr_hash
15
- add_file_versions solr_hash
16
- add_manifest_path solr_hash
17
- end
18
- end
19
- end
20
-
21
- private
22
-
23
- def add_default_solr_fields(solr_hash)
24
- solr_hash[exhibit.blacklight_config.document_model.unique_key.to_sym] = compound_id
25
- end
26
-
27
- def add_image_dimensions(solr_hash)
28
- dimensions = Riiif::Image.new(resource.upload_id).info
29
- solr_hash[:spotlight_full_image_width_ssm] = dimensions.width
30
- solr_hash[:spotlight_full_image_height_ssm] = dimensions.height
31
- end
32
-
33
- def add_file_versions(solr_hash)
34
- solr_hash[Spotlight::Engine.config.thumbnail_field] = riiif.image_path(resource.upload_id, size: '!400,400')
35
- end
36
-
37
- def add_sidecar_fields(solr_hash)
38
- solr_hash.merge! resource.sidecar.to_solr
39
- end
40
-
41
- def add_manifest_path(solr_hash)
42
- solr_hash[Spotlight::Engine.config.iiif_manifest_field] = spotlight_routes.manifest_exhibit_solr_document_path(exhibit, resource.compound_id)
43
- end
44
-
45
- def spotlight_routes
46
- Spotlight::Engine.routes.url_helpers
47
- end
48
-
49
- def riiif
50
- Riiif::Engine.routes.url_helpers
51
- end
52
-
53
- def attached_file?
54
- resource.upload&.file_present?
55
- end
56
- end
57
- end
@@ -1,54 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- FactoryBot.define do
4
- factory :unstarted_reindexing_log_entry, class: 'Spotlight::ReindexingLogEntry' do
5
- items_reindexed_count { 15 }
6
- job_status { 'unstarted' }
7
- exhibit
8
- user
9
- end
10
-
11
- factory :reindexing_log_entry, class: 'Spotlight::ReindexingLogEntry' do
12
- items_reindexed_count { 10 }
13
- start_time { Time.zone.parse('2017-01-05 23:00:00') }
14
- end_time { Time.zone.parse('2017-01-05 23:05:00') }
15
- job_status { 'succeeded' }
16
- exhibit
17
- user
18
- end
19
-
20
- factory :reindexing_log_entry_no_user, class: 'Spotlight::ReindexingLogEntry' do
21
- items_reindexed_count { 10 }
22
- start_time { Time.zone.parse('2017-01-05 23:00:00') }
23
- end_time { Time.zone.parse('2017-01-05 23:05:00') }
24
- job_status { 'succeeded' }
25
- exhibit
26
- end
27
-
28
- factory :in_progress_reindexing_log_entry, class: 'Spotlight::ReindexingLogEntry' do
29
- items_reindexed_count { 100 }
30
- start_time { Time.zone.now - 300 }
31
- end_time { nil }
32
- job_status { 'in_progress' }
33
- exhibit
34
- user
35
- end
36
-
37
- factory :recent_reindexing_log_entry, class: 'Spotlight::ReindexingLogEntry' do
38
- sequence(:items_reindexed_count)
39
- start_time { Time.zone.now - 86_400 }
40
- end_time { Time.zone.now - 86_100 }
41
- job_status { 'succeeded' }
42
- exhibit
43
- user
44
- end
45
-
46
- factory :failed_reindexing_log_entry, class: 'Spotlight::ReindexingLogEntry' do
47
- items_reindexed_count { 10 }
48
- start_time { Time.zone.parse('2017-01-10 23:00:00') }
49
- end_time { Time.zone.parse('2017-01-10 23:05:00') }
50
- job_status { 'failed' }
51
- exhibit
52
- user
53
- end
54
- end
@@ -1,129 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- describe Spotlight::ReindexingLogEntry, type: :model do
4
- subject { FactoryBot.build(:reindexing_log_entry) }
5
-
6
- describe 'scope' do
7
- before do
8
- (0..10).to_a.each { FactoryBot.create(:recent_reindexing_log_entry) }
9
- FactoryBot.create(:unstarted_reindexing_log_entry)
10
- (0..10).to_a.each { FactoryBot.create(:recent_reindexing_log_entry) }
11
- end
12
-
13
- let(:sorted_log_entry_list) do
14
- unstarted_entries = described_class.where(start_time: nil).to_a
15
- started_entries = described_class.where.not(start_time: nil).to_a.sort_by(&:start_time).reverse
16
- unstarted_entries + started_entries # null start times should be first
17
- end
18
-
19
- context 'default' do
20
- it 'sorts by start_time in descending order' do
21
- default_log_entry_list = described_class.all.to_a
22
- expect(default_log_entry_list).to eq sorted_log_entry_list
23
- end
24
- end
25
-
26
- context 'recent' do
27
- it 'returns the most recent 5 entries (sorted by start_time descending)' do
28
- recent_log_entry_list = described_class.recent.to_a
29
- expect(recent_log_entry_list).to eq sorted_log_entry_list[0..4]
30
- end
31
- end
32
- end
33
-
34
- describe '#duration' do
35
- context 'when end_time is present' do
36
- it 'is calculated as difference between end_time and start_time' do
37
- expect(subject.duration).to eq 300
38
- end
39
- end
40
-
41
- context 'when end_time is not present' do
42
- subject { FactoryBot.build(:in_progress_reindexing_log_entry) }
43
-
44
- it 'is nil' do
45
- expect(subject.duration).to be nil
46
- end
47
- end
48
- end
49
-
50
- describe 'state updating methods' do
51
- describe '#in_progress!' do
52
- subject { FactoryBot.build(:unstarted_reindexing_log_entry) }
53
-
54
- context 'executes normally' do
55
- it 'sets start_time and job_status' do
56
- lower_time_bound = Time.zone.now
57
- subject.in_progress!
58
- upper_time_bound = Time.zone.now
59
-
60
- expect(subject.start_time).to be_between(lower_time_bound, upper_time_bound)
61
- expect(subject.job_status).to eq 'in_progress'
62
- end
63
- end
64
-
65
- context 'encounters an unexpected error' do
66
- it "traps the exception and logs an error so that the caller doesn't have to deal with it" do
67
- expect(subject).to receive(:'start_time=').and_raise StandardError.new # try to blow up the in_progress! call
68
- expect(Rails.logger).to receive(:error) do |arg|
69
- expect(arg).to match(/^unexpected error updating log entry to :in_progress from \[".*reindexing_log_entry.rb/)
70
- end
71
-
72
- expect { subject.in_progress! }.not_to raise_error
73
- end
74
- end
75
- end
76
-
77
- describe '#succeeded!' do
78
- subject { FactoryBot.build(:in_progress_reindexing_log_entry) }
79
-
80
- context 'executes normally' do
81
- it 'sets end_time and job_status' do
82
- lower_time_bound = Time.zone.now
83
- subject.succeeded!
84
- upper_time_bound = Time.zone.now
85
-
86
- expect(subject.end_time).to be_between(lower_time_bound, upper_time_bound)
87
- expect(subject.job_status).to eq 'succeeded'
88
- end
89
- end
90
-
91
- context 'encounters an unexpected error' do
92
- it "traps the exception and logs an error so that the caller doesn't have to deal with it" do
93
- expect(subject).to receive(:'end_time=').and_raise StandardError.new # try to blow up the succeeded! call
94
- expect(Rails.logger).to receive(:error) do |arg|
95
- expect(arg).to match(/^unexpected error updating log entry to :succeeded from \[".*reindexing_log_entry.rb/)
96
- end
97
-
98
- expect { subject.succeeded! }.not_to raise_error
99
- end
100
- end
101
- end
102
-
103
- describe '#failed!' do
104
- subject { FactoryBot.build(:in_progress_reindexing_log_entry) }
105
-
106
- context 'executes normally' do
107
- it 'sets end_time and job_status' do
108
- lower_time_bound = Time.zone.now
109
- subject.failed!
110
- upper_time_bound = Time.zone.now
111
-
112
- expect(subject.end_time).to be_between(lower_time_bound, upper_time_bound)
113
- expect(subject.job_status).to eq 'failed'
114
- end
115
- end
116
-
117
- context 'encounters an unexpected error' do
118
- it "traps the exception and logs an error so that the caller doesn't have to deal with it" do
119
- expect(subject).to receive(:'end_time=').and_raise StandardError.new # try to blow up the failed! call
120
- expect(Rails.logger).to receive(:error) do |arg|
121
- expect(arg).to match(/^unexpected error updating log entry to :failed from \[".*reindexing_log_entry.rb/)
122
- end
123
-
124
- expect { subject.failed! }.not_to raise_error
125
- end
126
- end
127
- end
128
- end
129
- end
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- describe Spotlight::Resources::OpenGraph, type: :model do
4
- class TestDocBuilder < Spotlight::SolrDocumentBuilder
5
- def to_solr
6
- super.merge(resource.opengraph_properties)
7
- end
8
- end
9
-
10
- class TestResource < Spotlight::Resource
11
- self.document_builder_class = TestDocBuilder
12
- include Spotlight::Resources::OpenGraph
13
- end
14
-
15
- subject { TestResource.new url: 'info:url' }
16
-
17
- let(:exhibit) { double(solr_data: {}, blacklight_config: Blacklight::Configuration.new) }
18
-
19
- describe '#to_solr' do
20
- before do
21
- allow(subject).to receive_messages id: 15, opengraph_properties: {}, exhibit: exhibit, persisted?: true
22
- end
23
-
24
- let(:solr_doc) { subject.document_builder.to_solr }
25
-
26
- it 'includes this record id' do
27
- expect(solr_doc).to include spotlight_resource_id_ssim: subject.to_global_id.to_s
28
- end
29
-
30
- it 'includes opengraph properties' do
31
- allow(subject).to receive_messages opengraph_properties: { a: 1, b: 2 }
32
-
33
- expect(solr_doc).to include a: 1, b: 2
34
- end
35
- end
36
-
37
- describe '#opengraph_properties' do
38
- it 'maps opengraph properties to solr fields' do
39
- allow(subject).to receive_messages opengraph: { 'og_title' => 'title', 'og_description' => 'description' }
40
- expect(subject.opengraph_properties).to include 'og_title_tesim' => 'title', 'og_description_tesim' => 'description'
41
- end
42
- end
43
-
44
- describe '#opengraph' do
45
- let(:body) do
46
- Nokogiri::HTML.parse <<-EOF
47
- <html><head>
48
- <meta property="og:title" content="The Ground Truth: The Human Cost of War"/>
49
- <meta property="og:description" content="The Ground Truth: The Human Cost of War is our soldiers' perspective of the Iraq War"/>
50
- <meta property="og:type" content="video.movie"/>
51
- <meta property="og:site_name" content="Internet Archive"/>
52
- <meta property="og:video" content="https://archive.org/download/Ground_Truth/GroundTruth1_bb_512kb.mp4"/>
53
- <meta property="og:video:width" content="320"/>
54
- <meta property="og:video:height" content="240"/>
55
- </head></html>
56
- EOF
57
- end
58
-
59
- it 'extracts opengraph <meta> tags' do
60
- allow(subject).to receive_messages(body: body)
61
- expect(subject.opengraph).to include 'og:title', 'og:description', 'og:type', 'og:type', 'og:site_name', 'og:video', 'og:video:width', 'og:video:height'
62
- expect(subject.opengraph['og:title']).to eq 'The Ground Truth: The Human Cost of War'
63
- end
64
- end
65
- end