blacklight-spotlight 3.0.0.rc3 → 3.0.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/javascripts/spotlight/admin/reindex_monitor.js +1 -0
  3. data/app/assets/stylesheets/spotlight/browse_group_categories_block.scss +23 -0
  4. data/app/controllers/spotlight/catalog_controller.rb +4 -1
  5. data/app/controllers/spotlight/dashboards_controller.rb +1 -1
  6. data/app/controllers/spotlight/exhibits_controller.rb +1 -1
  7. data/app/helpers/spotlight/application_helper.rb +19 -0
  8. data/app/helpers/spotlight/pages_helper.rb +1 -1
  9. data/app/jobs/concerns/spotlight/job_tracking.rb +47 -0
  10. data/app/jobs/concerns/spotlight/limit_concurrency.rb +33 -0
  11. data/app/jobs/spotlight/add_uploads_from_csv.rb +6 -3
  12. data/app/jobs/spotlight/application_job.rb +8 -0
  13. data/app/jobs/spotlight/cleanup_job_trackers_job.rb +13 -0
  14. data/app/jobs/spotlight/default_thumbnail_job.rb +1 -3
  15. data/app/jobs/spotlight/reindex_exhibit_job.rb +36 -0
  16. data/app/jobs/spotlight/reindex_job.rb +49 -41
  17. data/app/jobs/spotlight/rename_sidecar_field_job.rb +2 -2
  18. data/app/jobs/spotlight/update_job_trackers_job.rb +20 -0
  19. data/app/models/concerns/spotlight/user.rb +2 -1
  20. data/app/models/spotlight/event.rb +13 -0
  21. data/app/models/spotlight/exhibit.rb +4 -14
  22. data/app/models/spotlight/job_tracker.rb +105 -0
  23. data/app/models/spotlight/reindex_progress.rb +44 -27
  24. data/app/models/spotlight/resource.rb +24 -58
  25. data/app/models/spotlight/resources/iiif_harvester.rb +10 -1
  26. data/app/models/spotlight/resources/iiif_manifest.rb +3 -1
  27. data/app/models/spotlight/resources/iiif_service.rb +1 -1
  28. data/app/models/spotlight/resources/json_upload.rb +12 -0
  29. data/app/models/spotlight/resources/upload.rb +25 -2
  30. data/app/models/spotlight/solr_document_sidecar.rb +2 -1
  31. data/app/services/spotlight/etl.rb +7 -0
  32. data/app/services/spotlight/etl/context.rb +52 -0
  33. data/app/services/spotlight/etl/executor.rb +194 -0
  34. data/app/services/spotlight/etl/loaders.rb +12 -0
  35. data/app/services/spotlight/etl/pipeline.rb +81 -0
  36. data/app/services/spotlight/etl/solr_loader.rb +96 -0
  37. data/app/services/spotlight/etl/sources.rb +25 -0
  38. data/app/services/spotlight/etl/step.rb +82 -0
  39. data/app/services/spotlight/etl/transforms.rb +64 -0
  40. data/app/services/spotlight/validity_checker.rb +5 -5
  41. data/app/views/spotlight/dashboards/_reindexing_activity.html.erb +6 -6
  42. data/app/views/spotlight/shared/_locale_picker.html.erb +1 -1
  43. data/app/views/spotlight/sir_trevor/blocks/_browse_group_categories_block.html.erb +4 -3
  44. data/config/locales/spotlight.ar.yml +11 -1
  45. data/config/locales/spotlight.en.yml +3 -2
  46. data/db/migrate/20210122082032_create_job_trackers.rb +22 -0
  47. data/db/migrate/20210126123041_create_events.rb +15 -0
  48. data/lib/generators/spotlight/scaffold_resource_generator.rb +5 -13
  49. data/lib/spotlight/engine.rb +8 -1
  50. data/lib/spotlight/version.rb +1 -1
  51. data/spec/controllers/spotlight/catalog_controller_spec.rb +3 -1
  52. data/spec/examples.txt +1448 -1437
  53. data/spec/factories/job_trackers.rb +9 -0
  54. data/spec/features/add_items_spec.rb +9 -4
  55. data/spec/features/javascript/reindex_monitor_spec.rb +1 -1
  56. data/spec/features/site_users_management_spec.rb +4 -4
  57. data/spec/helpers/spotlight/pages_helper_spec.rb +8 -0
  58. data/spec/jobs/spotlight/reindex_exhibit_job_spec.rb +43 -0
  59. data/spec/jobs/spotlight/reindex_job_spec.rb +30 -59
  60. data/spec/models/spotlight/exhibit_spec.rb +3 -57
  61. data/spec/models/spotlight/reindex_progress_spec.rb +89 -87
  62. data/spec/models/spotlight/resource_spec.rb +69 -90
  63. data/spec/models/spotlight/resources/iiif_harvester_spec.rb +9 -10
  64. data/spec/models/spotlight/solr_document_sidecar_spec.rb +1 -0
  65. data/spec/services/spotlight/etl/context_spec.rb +66 -0
  66. data/spec/services/spotlight/etl/executor_spec.rb +149 -0
  67. data/spec/services/spotlight/etl/pipeline_spec.rb +22 -0
  68. data/spec/services/spotlight/etl/solr_loader_spec.rb +76 -0
  69. data/spec/services/spotlight/etl/step_spec.rb +70 -0
  70. data/spec/spec_helper.rb +2 -5
  71. data/spec/views/spotlight/dashboards/_reindexing_activity.html.erb_spec.rb +22 -19
  72. metadata +55 -15
  73. data/app/models/concerns/spotlight/resources/open_graph.rb +0 -36
  74. data/app/models/spotlight/reindexing_log_entry.rb +0 -42
  75. data/app/services/spotlight/resources/iiif_builder.rb +0 -19
  76. data/app/services/spotlight/solr_document_builder.rb +0 -77
  77. data/app/services/spotlight/upload_solr_document_builder.rb +0 -57
  78. data/spec/factories/reindexing_log_entries.rb +0 -54
  79. data/spec/models/spotlight/reindexing_log_entry_spec.rb +0 -129
  80. data/spec/models/spotlight/resources/open_graph_spec.rb +0 -65
  81. data/spec/services/spotlight/solr_document_builder_spec.rb +0 -66
@@ -1,126 +1,105 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  describe Spotlight::Resource, type: :model do
4
- before do
5
- allow_any_instance_of(described_class).to receive(:update_index)
6
- end
4
+ subject(:resource) { described_class.create(id: 123, exhibit: exhibit) }
7
5
 
8
6
  let(:exhibit) { FactoryBot.create(:exhibit) }
9
7
 
10
- describe '#reindex' do
11
- context 'with a provider that generates ids' do
12
- subject do
13
- Class.new(described_class).new(exhibit: exhibit)
14
- end
8
+ describe '#save_and_index' do
9
+ before do
10
+ allow(subject).to receive(:save)
11
+ allow(subject).to receive(:reindex_later)
12
+ end
15
13
 
16
- let(:solr_response) { { id: 123 } }
14
+ it 'saves the object' do
15
+ expect(subject).to receive(:save).and_return(true)
16
+ subject.save_and_index
17
+ end
17
18
 
18
- before do
19
- SolrDocument.new(id: 123).sidecars.create!(exhibit: exhibit, data: { document_data: true })
20
- allow(subject).to receive_messages(to_global_id: '')
19
+ it 'reindexes after save' do
20
+ expect(subject).to receive(:save).and_return(true)
21
+ expect(subject).to receive(:reindex_later)
22
+ subject.save_and_index
23
+ end
21
24
 
22
- allow(subject.document_builder).to receive(:to_solr).and_return(solr_response)
25
+ context 'if the save fails' do
26
+ it 'does not reindex' do
27
+ expect(subject).to receive(:save).and_return(false)
28
+ expect(subject).not_to receive(:reindex_later)
29
+ subject.save_and_index
23
30
  end
31
+ end
32
+ end
24
33
 
25
- it 'includes exhibit document-specific data' do
26
- allow(subject.send(:blacklight_solr)).to receive(:update) do |options|
27
- data = JSON.parse(options[:data], symbolize_names: true)
28
-
29
- expect(data.length).to eq 1
30
- doc = data.first
31
-
32
- break if doc.first == :commit
33
-
34
- expect(doc).to include document_data: true
35
- end
36
-
37
- subject.reindex
38
- end
34
+ describe '#reindex' do
35
+ before do
36
+ # sneak some data into the pipeline
37
+ subject.indexing_pipeline.transforms = [->(*) { { id: '123' } }] + subject.indexing_pipeline.transforms
38
+ end
39
39
 
40
- context 'when a document does not have an identifier' do
41
- let(:solr_response) { { other_field: 'Content' } }
40
+ let(:indexed_document) do
41
+ result = nil
42
42
 
43
- it 'is not indexed (but a commit can be sent)' do
44
- allow(subject.send(:blacklight_solr)).to receive(:commit)
45
- expect(subject.send(:blacklight_solr)).not_to receive(:update)
43
+ subject.reindex(**index_args) do |data, *|
44
+ result = data
46
45
 
47
- subject.reindex
48
- end
46
+ # skip actually indexing the document into the solr index
47
+ throw :skip
49
48
  end
50
49
 
51
- context 'reindexing_log_entry is provided' do
52
- before do
53
- allow(subject.send(:blacklight_solr)).to receive(:update)
54
- end
55
-
56
- it 'updates the count of reindexed items in the log entry' do
57
- reindexing_log_entry = double(Spotlight::ReindexingLogEntry)
58
- expect(reindexing_log_entry).to receive(:update).with(items_reindexed_count: 1)
59
- subject.reindex reindexing_log_entry
60
- end
61
- end
50
+ result
51
+ end
62
52
 
63
- context 'when the index is not writable' do
64
- before do
65
- allow(Spotlight::Engine.config).to receive_messages(writable_index: false)
66
- end
53
+ let(:index_args) { {} }
67
54
 
68
- it "doesn't write" do
69
- expect(subject.send(:blacklight_solr)).not_to receive(:update)
70
- subject.reindex
71
- end
72
- end
55
+ it 'returns the number of items indexed' do
56
+ expect(subject.reindex { |*| throw :skip }).to eq 1
57
+ end
73
58
 
74
- context 'with a resource that creates multiple solr documents' do
75
- let(:solr_response) { [{ id: 1 }, { id: 2 }] }
59
+ it 'applies exhibit-specific metadata from the sidecar' do
60
+ expect(indexed_document).to include Spotlight::SolrDocumentSidecar.new(document: SolrDocument.new(id: '123'), exhibit: exhibit).to_solr
61
+ end
76
62
 
77
- before do
78
- allow(subject.send(:blacklight_solr)).to receive(:update)
79
- end
63
+ it 'includes metata from each sidecar' do
64
+ a = Spotlight::SolrDocumentSidecar.create(document: SolrDocument.new(id: '123'), exhibit: exhibit)
65
+ b = Spotlight::SolrDocumentSidecar.create(document: SolrDocument.new(id: '123'), exhibit: FactoryBot.build(:exhibit))
80
66
 
81
- it 'returns the number of indexed objects' do
82
- expect(subject.reindex).to eq 2
83
- end
67
+ expect(indexed_document).to include(a.to_solr).and(include(b.to_solr))
68
+ end
84
69
 
85
- it 'triggers a solr commit' do
86
- expect(subject.send(:blacklight_solr)).to receive(:commit).once
70
+ it 'persists a sidecar document' do
71
+ expect { indexed_document }.to change(Spotlight::SolrDocumentSidecar, :count).by(1)
87
72
 
88
- subject.reindex
89
- end
73
+ expect(Spotlight::SolrDocumentSidecar.last).to have_attributes(document_id: '123', exhibit: exhibit)
74
+ end
90
75
 
91
- it 'touches the exhibit to clear any caches' do
92
- allow(subject.exhibit).to receive(:touch)
76
+ it 'applies application metadata' do
77
+ expect(indexed_document).to include(spotlight_resource_id_ssim: resource.to_global_id.to_s, spotlight_resource_type_ssim: 'spotlight/resources')
78
+ end
93
79
 
94
- subject.reindex
80
+ context 'with some provided metadata' do
81
+ let(:index_args) { { additional_metadata: { a: 1 } } }
95
82
 
96
- expect(subject.exhibit).to have_received(:touch)
97
- end
83
+ it 'applies externally provided metadata' do
84
+ expect(indexed_document).to include a: 1
98
85
  end
99
86
  end
100
- end
101
87
 
102
- describe '#save_and_index' do
103
- before do
104
- allow(subject.send(:blacklight_solr)).to receive(:update)
105
- allow(subject).to receive(:reindex_later)
106
- end
88
+ it 'touches the exhibit to bust any caches' do
89
+ allow(exhibit).to receive(:touch)
107
90
 
108
- it 'saves the object' do
109
- expect(subject).to receive(:save).and_return(true)
110
- subject.save_and_index
111
- end
91
+ indexed_document
112
92
 
113
- it 'reindexes after save' do
114
- expect(subject).to receive(:save).and_return(true)
115
- expect(subject).to receive(:reindex_later)
116
- subject.save_and_index
93
+ expect(exhibit).to have_received(:touch)
117
94
  end
118
95
 
119
- context 'if the save fails' do
120
- it 'does not reindex' do
121
- expect(subject).to receive(:save).and_return(false)
122
- expect(subject).not_to receive(:reindex_later)
123
- subject.save_and_index
96
+ context 'with touch: false' do
97
+ it 'does not touch the exhibit' do
98
+ allow(exhibit).to receive(:touch)
99
+
100
+ expect(subject.reindex(touch: false) { |*| throw :skip }).to eq 1
101
+
102
+ expect(exhibit).not_to have_received(:touch)
124
103
  end
125
104
  end
126
105
  end
@@ -3,12 +3,11 @@
3
3
  require 'spec_helper'
4
4
 
5
5
  describe Spotlight::Resources::IiifHarvester do
6
+ subject(:harvester) { described_class.create(exhibit_id: exhibit.id, url: url) }
7
+
6
8
  let(:exhibit) { FactoryBot.create(:exhibit) }
7
- let(:harvester) { described_class.create(exhibit_id: exhibit.id, url: url) }
8
9
 
9
10
  describe 'Validation' do
10
- subject { harvester }
11
-
12
11
  context 'when given an invalid URL' do
13
12
  before do
14
13
  stub_request(:head, 'http://example.com').to_return(status: 200, headers: { 'Content-Type' => 'text/html' })
@@ -38,16 +37,16 @@ describe Spotlight::Resources::IiifHarvester do
38
37
  end
39
38
  end
40
39
 
41
- describe '#documents_to_index' do
42
- subject { harvester.document_builder }
43
-
40
+ describe '#reindex' do
44
41
  let(:url) { 'uri://for-top-level-collection' }
45
42
 
46
- before { stub_default_collection }
43
+ before do
44
+ stub_default_collection
45
+ allow(Spotlight::Engine.config).to receive(:writable_index).and_return(false)
46
+ end
47
47
 
48
- it 'returns an Enumerator of all the solr documents' do
49
- expect(subject.documents_to_index).to be_a(Enumerator)
50
- expect(subject.documents_to_index.count).to eq 8
48
+ it 'indexes all the solr documents' do
49
+ expect(subject.reindex).to eq 8
51
50
  end
52
51
  end
53
52
  end
@@ -20,6 +20,7 @@ describe Spotlight::SolrDocumentSidecar, type: :model do
20
20
  context 'with an uploaded item' do
21
21
  before do
22
22
  subject.data = { 'configured_fields' => { 'some_configured_field' => 'some value' } }
23
+ subject.resource = Spotlight::Resources::Upload.new
23
24
  allow(Spotlight::Resources::Upload).to receive(:fields).with(exhibit).and_return([uploaded_field_config])
24
25
  end
25
26
 
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ describe Spotlight::Etl::Context do
4
+ subject(:context) { described_class.new(resource, **context_args) }
5
+
6
+ let(:resource) { instance_double(Spotlight::Resource, id: 123, document_model: SolrDocument) }
7
+ let(:context_args) { {} }
8
+
9
+ describe '#resource' do
10
+ it 'extracts the Spotlight::Resource from the argument list' do
11
+ expect(context.resource).to eq resource
12
+ end
13
+ end
14
+
15
+ describe '#unique_key' do
16
+ let(:data) { { id: '123' } }
17
+
18
+ it 'tries to get a usable unique key for a transformed document' do
19
+ expect(context.unique_key(data)).to eq '123'
20
+ end
21
+ end
22
+
23
+ describe '#on_error' do
24
+ context 'with a class-level handler' do
25
+ let(:handler) { instance_double(Proc, call: nil) }
26
+
27
+ it 'calls the class-level handler' do
28
+ allow(described_class).to receive(:error_reporter).and_return(handler)
29
+
30
+ subject.on_error(nil, nil, {})
31
+
32
+ expect(handler).to have_received(:call)
33
+ end
34
+ end
35
+
36
+ context 'with an instance-level handler' do
37
+ let(:context_args) { { on_error: handler } }
38
+ let(:handler) { instance_double(Proc, call: nil) }
39
+
40
+ it 'calls the instance-level handler' do
41
+ subject.on_error(nil, nil, {})
42
+
43
+ expect(handler).to have_received(:call)
44
+ end
45
+ end
46
+
47
+ context 'with :log' do
48
+ it 'logs an error' do
49
+ allow(Rails.logger).to receive(:error)
50
+
51
+ subject.on_error(nil, nil, {})
52
+
53
+ expect(Rails.logger).to have_received(:error).with(/Pipeline error/)
54
+ end
55
+ end
56
+
57
+ context 'with :exception' do
58
+ let(:context_args) { { on_error: :exception } }
59
+ let(:e) { StandardError.new('asdf') }
60
+
61
+ it 'raises an exception' do
62
+ expect { subject.on_error(nil, e, {}) }.to raise_exception(e)
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ describe Spotlight::Etl::Executor do
4
+ subject(:executor) { described_class.new(pipeline, context) }
5
+
6
+ let(:pipeline) do
7
+ Spotlight::Etl::Pipeline.new do |pipeline|
8
+ pipeline.sources = [Spotlight::Etl::Sources::IdentitySource]
9
+ pipeline.transforms = [Spotlight::Etl::Transforms::IdentityTransform]
10
+ pipeline.loaders = [->(result, *) { arr << result }]
11
+ end
12
+ end
13
+ let(:context) { Spotlight::Etl::Context.new(resource) }
14
+ let(:resource) { Spotlight::Resource.new }
15
+ let(:arr) { [] }
16
+
17
+ describe '#call' do
18
+ it 'provides the context to the sources' do
19
+ pipeline.sources = [->(context, *) { [{ context: context }] }]
20
+
21
+ executor.call
22
+
23
+ expect(arr).to eq [{ context: context }]
24
+ end
25
+
26
+ it 'makes the current source available to transforms' do
27
+ pipeline.transforms = [->(_data, pipeline) { pipeline.source }]
28
+
29
+ executor.call
30
+
31
+ expect(arr).to eq [resource]
32
+ end
33
+
34
+ it 'chains the calls to the transforms' do
35
+ pipeline.transforms = [
36
+ ->(data, *) { data.merge(a: 1) },
37
+ ->(data, *) { data.merge(b: 2) },
38
+ ->(data, *) { data.merge(c: 3) }
39
+ ]
40
+
41
+ executor.call
42
+
43
+ expect(arr).to eq [{ a: 1, b: 2, c: 3 }]
44
+ end
45
+
46
+ it 'caches the step instances across sources' do
47
+ pipeline.sources = [->(*) { [1, 2, 3] }]
48
+
49
+ pipeline.transforms = [
50
+ Class.new do
51
+ def initialize
52
+ @sum = 0
53
+ end
54
+
55
+ def call(data, pipeline)
56
+ @sum += pipeline.source
57
+
58
+ data.merge(sum: @sum)
59
+ end
60
+ end
61
+ ]
62
+
63
+ executor.call
64
+
65
+ expect(arr).to eq [{ sum: 1 }, { sum: 3 }, { sum: 6 }]
66
+ end
67
+
68
+ it 'loads resulting documents' do
69
+ loader = Spotlight::Etl::SolrLoader.new
70
+ pipeline.sources = [->(*) { [{}, {}, {}] }]
71
+ pipeline.loaders = [loader]
72
+
73
+ allow(loader).to receive(:call)
74
+
75
+ executor.call
76
+
77
+ expect(loader).to have_received(:call).exactly(3).times
78
+ end
79
+
80
+ it 'allows steps to throw :skip to skip the current source' do
81
+ pipeline.pre_processes = [
82
+ ->(*) { throw(:skip) }
83
+ ]
84
+
85
+ executor.call
86
+
87
+ expect(arr).to be_blank
88
+ end
89
+
90
+ it 'forwards errors to the context error handler' do
91
+ e = StandardError.new
92
+ pipeline.transforms = [->(*) { raise e }]
93
+
94
+ allow(context).to receive(:on_error)
95
+ executor.call
96
+
97
+ expect(context).to have_received(:on_error).with(executor, e, {})
98
+ end
99
+
100
+ context 'after processing the data' do
101
+ it 'calls finalize on the loaders' do
102
+ loader = Spotlight::Etl::SolrLoader.new
103
+ pipeline.sources = [->(*) { [{}, {}, {}] }]
104
+ pipeline.loaders = [loader]
105
+
106
+ allow(loader).to receive(:call)
107
+ allow(loader).to receive(:finalize)
108
+
109
+ executor.call
110
+
111
+ expect(loader).to have_received(:call).exactly(3).times
112
+ expect(loader).to have_received(:finalize).once
113
+ end
114
+
115
+ it 'resets the step cache' do
116
+ pipeline.transforms = [
117
+ Class.new do
118
+ def initialize
119
+ @count = 0
120
+ end
121
+
122
+ def call(data, *)
123
+ @count += 1
124
+
125
+ data.merge(count: @count)
126
+ end
127
+ end
128
+ ]
129
+
130
+ executor.call
131
+ executor.call
132
+
133
+ expect(arr).to eq [{ count: 1 }, { count: 1 }]
134
+ end
135
+ end
136
+ end
137
+
138
+ describe '#estimated_size' do
139
+ let(:source) { ->(*) { [1, 2, 3, 4] } }
140
+
141
+ before do
142
+ pipeline.sources = [source]
143
+ end
144
+
145
+ it 'estimates the final size from the sources' do
146
+ expect(executor.estimated_size).to eq 4
147
+ end
148
+ end
149
+ end