spotlight-dor-resources 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.hound.yml +2 -0
  4. data/.rubocop.yml +8 -0
  5. data/.rubocop_todo.yml +191 -0
  6. data/.travis.yml +17 -0
  7. data/Gemfile +32 -0
  8. data/LICENSE.txt +13 -0
  9. data/README.md +65 -0
  10. data/Rakefile +48 -0
  11. data/app/models/spotlight/resources/dor_resource.rb +17 -0
  12. data/app/models/spotlight/resources/harvestdor.rb +4 -0
  13. data/app/models/spotlight/resources/purl.rb +14 -0
  14. data/app/models/spotlight/resources/searchworks.rb +15 -0
  15. data/lib/spotlight/dor/indexer.rb +160 -0
  16. data/lib/spotlight/dor/resources.rb +16 -0
  17. data/lib/spotlight/dor/resources/engine.rb +13 -0
  18. data/lib/spotlight/dor/resources/version.rb +7 -0
  19. data/solr_conf/conf/schema.xml +346 -0
  20. data/solr_conf/conf/solrconfig.xml +180 -0
  21. data/spec/integration/gdor_integration_spec.rb +30 -0
  22. data/spec/integration/indexer_integration_spec.rb +28 -0
  23. data/spec/models/spotlight/resources/purl_spec.rb +115 -0
  24. data/spec/models/spotlight/resources/searchworks_spec.rb +91 -0
  25. data/spec/spec_helper.rb +60 -0
  26. data/spec/test_app_templates/catalog_controller.rb +96 -0
  27. data/spec/test_app_templates/gdor.yml +9 -0
  28. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  29. data/spec/unit/spotlight/dor/indexer_spec.rb +200 -0
  30. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_a_doc_id.yml +1201 -0
  31. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_exhibit-specific_indexing.yml +1003 -0
  32. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_spotlight_data.yml +1003 -0
  33. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_the_gdor_data.yml +1003 -0
  34. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/no_donor_tags_ssim_field_in_solr_doc_when_note_displayLabel_Donor_tags_not_in_MODS.yml +1382 -0
  35. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/solr_doc_has_donor_tags_ssim_field_when_note_displayLabel_Donor_tags_is_in_MODS.yml +1602 -0
  36. data/spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml +6822 -0
  37. data/spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml +1390 -0
  38. data/spotlight-dor-resources.gemspec +37 -0
  39. metadata +336 -0
@@ -0,0 +1,180 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <config>
3
+ <!-- NOTE: various comments and unused configuration possibilities have been purged
4
+ from this file. Please refer to http://wiki.apache.org/solr/SolrConfigXml,
5
+ as well as the default solrconfig file included with Solr -->
6
+
7
+ <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
8
+
9
+ <luceneMatchVersion>LUCENE_40</luceneMatchVersion>
10
+
11
+ <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
12
+
13
+ <updateHandler class="solr.DirectUpdateHandler2">
14
+ <updateLog>
15
+ <str name="dir">${solr.core0.data.dir:}</str>
16
+ </updateLog>
17
+ </updateHandler>
18
+
19
+ <!-- solr lib dirs -->
20
+ <lib dir="../lib/contrib/analysis-extras/lib" />
21
+ <lib dir="../lib/contrib/analysis-extras/lucene-libs" />
22
+
23
+ <dataDir>${solr.data.dir:}</dataDir>
24
+
25
+ <requestHandler name="search" class="solr.SearchHandler" default="true">
26
+ <!-- default values for query parameters can be specified, these
27
+ will be overridden by parameters in the request
28
+ -->
29
+ <lst name="defaults">
30
+ <str name="defType">edismax</str>
31
+ <str name="echoParams">explicit</str>
32
+ <str name="q.alt">*:*</str>
33
+ <str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
34
+ <int name="qs">1</int>
35
+ <int name="ps">2</int>
36
+ <float name="tie">0.01</float>
37
+ <!-- this qf and pf are used by default, if not otherwise specified by
38
+ client. The default blacklight_config will use these for the
39
+ "keywords" search. See the author_qf/author_pf, title_qf, etc
40
+ below, which the default blacklight_config will specify for
41
+ those searches. You may also be interested in:
42
+ http://wiki.apache.org/solr/LocalParams
43
+ -->
44
+ <str name="qf">
45
+ id
46
+ full_title_tesim
47
+ short_title_tesim
48
+ alternative_title_tesim
49
+ active_fedora_model_ssi
50
+ title_tesim
51
+ author_tesim
52
+ subject_tesim
53
+ all_text_timv
54
+ </str>
55
+ <str name="pf">
56
+ all_text_timv^10
57
+ </str>
58
+
59
+ <str name="author_qf">
60
+ author_tesim
61
+ </str>
62
+ <str name="author_pf">
63
+ </str>
64
+ <str name="title_qf">
65
+ title_tesim
66
+ full_title_tesim
67
+ short_title_tesim
68
+ alternative_title_tesim
69
+ </str>
70
+ <str name="title_pf">
71
+ </str>
72
+ <str name="subject_qf">
73
+ subject_tesim
74
+ </str>
75
+ <str name="subject_pf">
76
+ </str>
77
+
78
+ <str name="fl">
79
+ *,
80
+ score
81
+ </str>
82
+
83
+ <str name="facet">true</str>
84
+ <str name="facet.mincount">1</str>
85
+ <str name="facet.limit">10</str>
86
+ <str name="facet.field">active_fedora_model_ssi</str>
87
+ <str name="facet.field">subject_ssim</str>
88
+
89
+ <str name="spellcheck">true</str>
90
+ <str name="spellcheck.dictionary">default</str>
91
+ <str name="spellcheck.onlyMorePopular">true</str>
92
+ <str name="spellcheck.extendedResults">true</str>
93
+ <str name="spellcheck.collate">false</str>
94
+ <str name="spellcheck.count">5</str>
95
+
96
+ </lst>
97
+ <arr name="last-components">
98
+ <str>spellcheck</str>
99
+ </arr>
100
+ </requestHandler>
101
+
102
+ <requestHandler name="permissions" class="solr.SearchHandler" >
103
+ <lst name="defaults">
104
+ <str name="facet">off</str>
105
+ <str name="echoParams">all</str>
106
+ <str name="rows">1</str>
107
+ <str name="q">{!raw f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
108
+ <str name="fl">
109
+ id,
110
+ access_ssim,
111
+ discover_access_group_ssim,discover_access_person_ssim,
112
+ read_access_group_ssim,read_access_person_ssim,
113
+ edit_access_group_ssim,edit_access_person_ssim,
114
+ depositor_ti,
115
+ embargo_release_date_dtsi
116
+ inheritable_access_ssim,
117
+ inheritable_discover_access_group_ssim,inheritable_discover_access_person_ssim,
118
+ inheritable_read_access_group_ssim,inheritable_read_access_person_ssim,
119
+ inheritable_edit_access_group_ssim,inheritable_edit_access_person_ssim,
120
+ inheritable_embargo_release_date_dtsi
121
+ </str>
122
+ </lst>
123
+ </requestHandler>
124
+
125
+ <requestHandler name="standard" class="solr.SearchHandler">
126
+ <lst name="defaults">
127
+ <str name="echoParams">explicit</str>
128
+ <str name="defType">lucene</str>
129
+ </lst>
130
+ </requestHandler>
131
+
132
+ <!-- for requests to get a single document; use id=666 instead of q=id:666 -->
133
+ <requestHandler name="document" class="solr.SearchHandler" >
134
+ <lst name="defaults">
135
+ <str name="echoParams">all</str>
136
+ <str name="fl">*</str>
137
+ <str name="rows">1</str>
138
+ <str name="q">{!raw f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
139
+ </lst>
140
+ </requestHandler>
141
+
142
+
143
+ <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
144
+ <str name="queryAnalyzerFieldType">textSpell</str>
145
+ <!-- Multiple "Spell Checkers" can be declared and used by this component
146
+ (e.g. for title_spell field)
147
+ -->
148
+ <lst name="spellchecker">
149
+ <str name="name">default</str>
150
+ <str name="field">spell</str>
151
+ <str name="spellcheckIndexDir">./spell</str>
152
+ <str name="buildOnOptimize">true</str>
153
+ </lst>
154
+ </searchComponent>
155
+
156
+ <requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
157
+
158
+ <requestDispatcher handleSelect="true" >
159
+ <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
160
+ </requestDispatcher>
161
+
162
+ <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
163
+ <requestHandler name="/update" class="solr.UpdateRequestHandler" />
164
+ <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
165
+
166
+ <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
167
+ <lst name="invariants">
168
+ <str name="q">solrpingquery</str>
169
+ </lst>
170
+ <lst name="defaults">
171
+ <str name="echoParams">all</str>
172
+ </lst>
173
+ </requestHandler>
174
+
175
+ <!-- config for the admin interface -->
176
+ <admin>
177
+ <defaultQuery>search</defaultQuery>
178
+ </admin>
179
+
180
+ </config>
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ describe "gdor indexing integration test", :vcr do
4
+ let :exhibit do
5
+ double(solr_data: { }, blacklight_config: Blacklight::Configuration.new)
6
+ end
7
+
8
+ subject do
9
+ r = Spotlight::Resources::Purl.new(url: "https://purl.stanford.edu/xf680rd3068")
10
+ allow(r).to receive(:to_global_id).and_return('x')
11
+ allow(r).to receive(:exhibit).and_return(exhibit)
12
+ r.to_solr.first
13
+ end
14
+
15
+ it "should have a doc id" do
16
+ expect(subject[:id]).to eq "xf680rd3068"
17
+ end
18
+
19
+ it "should have the gdor data" do
20
+ expect(subject).to include :collection, :modsxml, :url_fulltext
21
+ end
22
+
23
+ it "should have spotlight data" do
24
+ expect(subject).to include :spotlight_resource_id_ssim
25
+ end
26
+
27
+ it "should have exhibit-specific indexing" do
28
+ expect(subject).to include "full_image_url_ssm"
29
+ end
30
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'indexer integration tests', :vcr do
4
+ describe 'donor tags' do
5
+ it 'solr_doc has donor_tags_ssim field when <note displayLabel="Donor tags"> is in MODS' do
6
+ r = Spotlight::Resources::Purl.new(url: 'https://purl.stanford.edu/vw282gv1740') # Feigenbaum PURL with donor tags
7
+ solr_doc = r.to_solr.first
8
+ expect(solr_doc['donor_tags_ssim']).to eq ['Knowledge Systems Laboratory', 'medical applications', 'Publishing', 'Stanford', 'Stanford Computer Science Department']
9
+ end
10
+ it 'no donor_tags_ssim field in solr doc when <note displayLabel="Donor tags"> not in MODS' do
11
+ r = Spotlight::Resources::Purl.new(url: 'https://purl.stanford.edu/bd955gr0721') # Revs PURL without donor tags
12
+ solr_doc = r.to_solr.first
13
+ expect(solr_doc['donor_tags_ssim']).to be_nil
14
+ end
15
+ end
16
+ describe 'genre' do
17
+ it 'solr_doc has genre_ssim field when <genre> in MODS' do
18
+ r = Spotlight::Resources::Purl.new(url: 'https://purl.stanford.edu/vw282gv1740') # Feigenbaum PURL with genre
19
+ solr_doc = r.to_solr.first
20
+ expect(solr_doc['genre_ssim']).to eq ['manuscripts for publication']
21
+ end
22
+ it 'no genre_ssim field when <genre> not in MODS' do
23
+ r = Spotlight::Resources::Purl.new(url: 'https://purl.stanford.edu/pz816zm7931') # Road & Track PURL without genre
24
+ solr_doc = r.to_solr.first
25
+ expect(solr_doc['genre_ssim']).to be_nil
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,115 @@
1
+ require 'spec_helper'
2
+
3
+ describe Spotlight::Resources::Purl do
4
+ let :exhibit do
5
+ double(solr_data: { }, blacklight_config: Blacklight::Configuration.new)
6
+ end
7
+ let :blacklight_solr do
8
+ double
9
+ end
10
+
11
+ subject { Spotlight::Resources::Purl.new url: "http://purl.stanford.edu/xf680rd3068" }
12
+
13
+ before do
14
+ allow(subject).to receive(:exhibit).and_return(exhibit)
15
+ allow(subject).to receive(:blacklight_solr).and_return(blacklight_solr)
16
+ allow(subject).to receive(:to_global_id).and_return('x')
17
+ end
18
+
19
+ describe ".can_provide?" do
20
+ subject { Spotlight::Resources::Purl }
21
+ it "should be able to provide any purl URL" do
22
+ expect(subject.can_provide?(double(url: "https://purl.stanford.edu/xyz"))).to eq true
23
+ expect(subject.can_provide?(double(url: "http://purl.stanford.edu/xyz"))).to eq true
24
+ end
25
+ end
26
+
27
+ describe "#doc_id" do
28
+ it "should be able to extract DRUIDs from a PURL url" do
29
+ subject.url = "http://purl.stanford.edu/xyz"
30
+ expect(subject.doc_id).to eq "xyz"
31
+ end
32
+
33
+ it "should be able to extract DRUIDs from a PURL format url" do
34
+ subject.url = "http://purl.stanford.edu/xf680rd3068.xml"
35
+ expect(subject.doc_id).to eq "xf680rd3068"
36
+ end
37
+
38
+ it "should be able to extract DRUIDs from a PURL's viewer url" do
39
+ subject.url = "http://purl.stanford.edu/xf680rd3068#image/1/small"
40
+ expect(subject.doc_id).to eq "xf680rd3068"
41
+ end
42
+ end
43
+
44
+ describe "#resource" do
45
+ it "should be a Harvestdor::Indexer resource" do
46
+ expect(subject.resource).to be_a_kind_of Harvestdor::Indexer::Resource
47
+ end
48
+
49
+ it "should have the correct druid" do
50
+ expect(subject.resource.druid).to eq "xf680rd3068"
51
+ end
52
+
53
+ it "should have the correct indexer" do
54
+ expect(subject.resource.indexer).to eq Spotlight::Dor::Resources.indexer.harvestdor
55
+ end
56
+ end
57
+
58
+ describe "#reindex" do
59
+ before do
60
+ allow(Spotlight::Dor::Resources.indexer).to receive(:solr_document).and_return({upstream: true})
61
+ allow(subject.resource).to receive(:collection?).and_return(false)
62
+ end
63
+
64
+ it "should add a document to solr" do
65
+ solr_data = [{spotlight_resource_id_ssim: nil, spotlight_resource_type_ssim: "spotlight/resources/purls", upstream: true}]
66
+ expect(blacklight_solr).to receive(:update).with({params: {commitWithin: 500}, data: solr_data.to_json, headers: {"Content-Type" => "application/json"}})
67
+ expect(subject).to receive(:update_index_time!)
68
+ subject.reindex
69
+ end
70
+ end
71
+
72
+ describe "#to_solr" do
73
+ before do
74
+ allow(Spotlight::Dor::Resources.indexer).to receive(:solr_document)
75
+ end
76
+ context "with a collection" do
77
+ before do
78
+ allow(subject.resource).to receive(:collection?).and_return(true)
79
+ end
80
+
81
+ it "should provide a solr document for the collection" do
82
+ allow(subject.resource).to receive(:items).and_return([])
83
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({upstream: true})
84
+ expect(subject.to_solr.first).to include :upstream, :spotlight_resource_id_ssim, :spotlight_resource_type_ssim
85
+ end
86
+
87
+ it "should provide a solr document for the items too" do
88
+ item = double
89
+ allow(subject.resource).to receive(:items).and_return([item])
90
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({collection: true})
91
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(item).and_return({item: true})
92
+ solr_doc = subject.to_solr.to_a
93
+ expect(solr_doc.first).to include :collection
94
+ expect(solr_doc.last).to include :item
95
+ end
96
+ end
97
+
98
+ context "with a single item" do
99
+ before do
100
+ allow(subject.resource).to receive(:collection?).and_return(false)
101
+ end
102
+
103
+ it "should provide a solr document for the resource" do
104
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({upstream: true})
105
+ expect(subject.to_solr.first).to include :upstream, :spotlight_resource_id_ssim, :spotlight_resource_type_ssim
106
+ end
107
+
108
+ it "should index outside the context of an exhibit" do
109
+ allow(subject).to receive(:exhibit).and_return(nil)
110
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({upstream: true})
111
+ expect(subject.to_solr.first).to include :upstream, :spotlight_resource_id_ssim, :spotlight_resource_type_ssim
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,91 @@
1
+ require 'spec_helper'
2
+
3
+ describe Spotlight::Resources::Searchworks do
4
+
5
+ let :exhibit do
6
+ double(solr_data: { }, blacklight_config: Blacklight::Configuration.new)
7
+ end
8
+
9
+ subject { Spotlight::Resources::Searchworks.new url: "http://searchworks.stanford.edu/view/xf680rd3068" }
10
+
11
+ before do
12
+ allow(subject).to receive(:exhibit).and_return(exhibit)
13
+ allow(subject).to receive(:to_global_id).and_return('x')
14
+ end
15
+
16
+ describe ".can_provide?" do
17
+ subject { Spotlight::Resources::Searchworks }
18
+ it "should be able to provide any searchworks URL" do
19
+ expect(subject.can_provide?(double(url: "https://searchworks.stanford.edu/xyz"))).to eq true
20
+ expect(subject.can_provide?(double(url: "http://searchworks.stanford.edu/xyz"))).to eq true
21
+ end
22
+ it "should also work with searchworks-test URLs" do
23
+ expect(subject.can_provide?(double(url: "https://searchworks-test.stanford.edu/xyz"))).to eq true
24
+ expect(subject.can_provide?(double(url: "http://searchworks-test.stanford.edu/xyz"))).to eq true
25
+ end
26
+ end
27
+
28
+ describe "#doc_id" do
29
+ it "should be able to extract DRUIDs from a searchworks url" do
30
+ subject.url = "http://searchworks.stanford.edu/view/xyz"
31
+ expect(subject.doc_id).to eq "xyz"
32
+ end
33
+
34
+ it "should be able to extract DRUIDs from a searchworks format url" do
35
+ subject.url = "http://searchworks.stanford.edu/view/xf680rd3068.xml"
36
+ expect(subject.doc_id).to eq "xf680rd3068"
37
+ end
38
+ end
39
+
40
+ describe "#resource" do
41
+ it "should be a Harvestdor::Indexer resource" do
42
+ expect(subject.resource).to be_a_kind_of Harvestdor::Indexer::Resource
43
+ end
44
+
45
+ it "should have the correct druid" do
46
+ expect(subject.resource.druid).to eq "xf680rd3068"
47
+ end
48
+
49
+ it "should have the correct indexer" do
50
+ expect(subject.resource.indexer).to eq Spotlight::Dor::Resources.indexer.harvestdor
51
+ end
52
+ end
53
+
54
+ describe "#to_solr" do
55
+ before do
56
+ allow(Spotlight::Dor::Resources.indexer).to receive(:solr_document)
57
+ end
58
+ context "with a collection" do
59
+ before do
60
+ allow(subject.resource).to receive(:collection?).and_return(true)
61
+ end
62
+
63
+ it "should provide a solr document for the collection" do
64
+ allow(subject.resource).to receive(:items).and_return([])
65
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({upstream: true})
66
+ expect(subject.to_solr.first).to include :upstream, :spotlight_resource_id_ssim, :spotlight_resource_type_ssim
67
+ end
68
+
69
+ it "should provide a solr document for the items too" do
70
+ item = double
71
+ allow(subject.resource).to receive(:items).and_return([item])
72
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({collection: true})
73
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(item).and_return({item: true})
74
+ solr_doc = subject.to_solr.to_a
75
+ expect(solr_doc.first).to include :collection
76
+ expect(solr_doc.last).to include :item
77
+ end
78
+ end
79
+
80
+ context "with a single item" do
81
+ before do
82
+ allow(subject.resource).to receive(:collection?).and_return(false)
83
+ end
84
+
85
+ it "should provide a solr document for the resource" do
86
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({upstream: true})
87
+ expect(subject.to_solr.first).to include :upstream, :spotlight_resource_id_ssim, :spotlight_resource_type_ssim
88
+ end
89
+ end
90
+ end
91
+ end