spotlight-dor-resources 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.hound.yml +2 -0
  4. data/.rubocop.yml +8 -0
  5. data/.rubocop_todo.yml +191 -0
  6. data/.travis.yml +17 -0
  7. data/Gemfile +32 -0
  8. data/LICENSE.txt +13 -0
  9. data/README.md +65 -0
  10. data/Rakefile +48 -0
  11. data/app/models/spotlight/resources/dor_resource.rb +17 -0
  12. data/app/models/spotlight/resources/harvestdor.rb +4 -0
  13. data/app/models/spotlight/resources/purl.rb +14 -0
  14. data/app/models/spotlight/resources/searchworks.rb +15 -0
  15. data/lib/spotlight/dor/indexer.rb +160 -0
  16. data/lib/spotlight/dor/resources.rb +16 -0
  17. data/lib/spotlight/dor/resources/engine.rb +13 -0
  18. data/lib/spotlight/dor/resources/version.rb +7 -0
  19. data/solr_conf/conf/schema.xml +346 -0
  20. data/solr_conf/conf/solrconfig.xml +180 -0
  21. data/spec/integration/gdor_integration_spec.rb +30 -0
  22. data/spec/integration/indexer_integration_spec.rb +28 -0
  23. data/spec/models/spotlight/resources/purl_spec.rb +115 -0
  24. data/spec/models/spotlight/resources/searchworks_spec.rb +91 -0
  25. data/spec/spec_helper.rb +60 -0
  26. data/spec/test_app_templates/catalog_controller.rb +96 -0
  27. data/spec/test_app_templates/gdor.yml +9 -0
  28. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  29. data/spec/unit/spotlight/dor/indexer_spec.rb +200 -0
  30. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_a_doc_id.yml +1201 -0
  31. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_exhibit-specific_indexing.yml +1003 -0
  32. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_spotlight_data.yml +1003 -0
  33. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_the_gdor_data.yml +1003 -0
  34. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/no_donor_tags_ssim_field_in_solr_doc_when_note_displayLabel_Donor_tags_not_in_MODS.yml +1382 -0
  35. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/solr_doc_has_donor_tags_ssim_field_when_note_displayLabel_Donor_tags_is_in_MODS.yml +1602 -0
  36. data/spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml +6822 -0
  37. data/spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml +1390 -0
  38. data/spotlight-dor-resources.gemspec +37 -0
  39. metadata +336 -0
@@ -0,0 +1,180 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <config>
3
+ <!-- NOTE: various comments and unused configuration possibilities have been purged
4
+ from this file. Please refer to http://wiki.apache.org/solr/SolrConfigXml,
5
+ as well as the default solrconfig file included with Solr -->
6
+
7
+ <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
8
+
9
+ <luceneMatchVersion>LUCENE_40</luceneMatchVersion>
10
+
11
+ <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
12
+
13
+ <updateHandler class="solr.DirectUpdateHandler2">
14
+ <updateLog>
15
+ <str name="dir">${solr.core0.data.dir:}</str>
16
+ </updateLog>
17
+ </updateHandler>
18
+
19
+ <!-- solr lib dirs -->
20
+ <lib dir="../lib/contrib/analysis-extras/lib" />
21
+ <lib dir="../lib/contrib/analysis-extras/lucene-libs" />
22
+
23
+ <dataDir>${solr.data.dir:}</dataDir>
24
+
25
+ <requestHandler name="search" class="solr.SearchHandler" default="true">
26
+ <!-- default values for query parameters can be specified, these
27
+ will be overridden by parameters in the request
28
+ -->
29
+ <lst name="defaults">
30
+ <str name="defType">edismax</str>
31
+ <str name="echoParams">explicit</str>
32
+ <str name="q.alt">*:*</str>
33
+ <str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
34
+ <int name="qs">1</int>
35
+ <int name="ps">2</int>
36
+ <float name="tie">0.01</float>
37
+ <!-- this qf and pf are used by default, if not otherwise specified by
38
+ client. The default blacklight_config will use these for the
39
+ "keywords" search. See the author_qf/author_pf, title_qf, etc
40
+ below, which the default blacklight_config will specify for
41
+ those searches. You may also be interested in:
42
+ http://wiki.apache.org/solr/LocalParams
43
+ -->
44
+ <str name="qf">
45
+ id
46
+ full_title_tesim
47
+ short_title_tesim
48
+ alternative_title_tesim
49
+ active_fedora_model_ssi
50
+ title_tesim
51
+ author_tesim
52
+ subject_tesim
53
+ all_text_timv
54
+ </str>
55
+ <str name="pf">
56
+ all_text_timv^10
57
+ </str>
58
+
59
+ <str name="author_qf">
60
+ author_tesim
61
+ </str>
62
+ <str name="author_pf">
63
+ </str>
64
+ <str name="title_qf">
65
+ title_tesim
66
+ full_title_tesim
67
+ short_title_tesim
68
+ alternative_title_tesim
69
+ </str>
70
+ <str name="title_pf">
71
+ </str>
72
+ <str name="subject_qf">
73
+ subject_tesim
74
+ </str>
75
+ <str name="subject_pf">
76
+ </str>
77
+
78
+ <str name="fl">
79
+ *,
80
+ score
81
+ </str>
82
+
83
+ <str name="facet">true</str>
84
+ <str name="facet.mincount">1</str>
85
+ <str name="facet.limit">10</str>
86
+ <str name="facet.field">active_fedora_model_ssi</str>
87
+ <str name="facet.field">subject_ssim</str>
88
+
89
+ <str name="spellcheck">true</str>
90
+ <str name="spellcheck.dictionary">default</str>
91
+ <str name="spellcheck.onlyMorePopular">true</str>
92
+ <str name="spellcheck.extendedResults">true</str>
93
+ <str name="spellcheck.collate">false</str>
94
+ <str name="spellcheck.count">5</str>
95
+
96
+ </lst>
97
+ <arr name="last-components">
98
+ <str>spellcheck</str>
99
+ </arr>
100
+ </requestHandler>
101
+
102
+ <requestHandler name="permissions" class="solr.SearchHandler" >
103
+ <lst name="defaults">
104
+ <str name="facet">off</str>
105
+ <str name="echoParams">all</str>
106
+ <str name="rows">1</str>
107
+ <str name="q">{!raw f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
108
+ <str name="fl">
109
+ id,
110
+ access_ssim,
111
+ discover_access_group_ssim,discover_access_person_ssim,
112
+ read_access_group_ssim,read_access_person_ssim,
113
+ edit_access_group_ssim,edit_access_person_ssim,
114
+ depositor_ti,
115
+ embargo_release_date_dtsi
116
+ inheritable_access_ssim,
117
+ inheritable_discover_access_group_ssim,inheritable_discover_access_person_ssim,
118
+ inheritable_read_access_group_ssim,inheritable_read_access_person_ssim,
119
+ inheritable_edit_access_group_ssim,inheritable_edit_access_person_ssim,
120
+ inheritable_embargo_release_date_dtsi
121
+ </str>
122
+ </lst>
123
+ </requestHandler>
124
+
125
+ <requestHandler name="standard" class="solr.SearchHandler">
126
+ <lst name="defaults">
127
+ <str name="echoParams">explicit</str>
128
+ <str name="defType">lucene</str>
129
+ </lst>
130
+ </requestHandler>
131
+
132
+ <!-- for requests to get a single document; use id=666 instead of q=id:666 -->
133
+ <requestHandler name="document" class="solr.SearchHandler" >
134
+ <lst name="defaults">
135
+ <str name="echoParams">all</str>
136
+ <str name="fl">*</str>
137
+ <str name="rows">1</str>
138
+ <str name="q">{!raw f=id v=$id}</str> <!-- use id=666 instead of q=id:666 -->
139
+ </lst>
140
+ </requestHandler>
141
+
142
+
143
+ <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
144
+ <str name="queryAnalyzerFieldType">textSpell</str>
145
+ <!-- Multiple "Spell Checkers" can be declared and used by this component
146
+ (e.g. for title_spell field)
147
+ -->
148
+ <lst name="spellchecker">
149
+ <str name="name">default</str>
150
+ <str name="field">spell</str>
151
+ <str name="spellcheckIndexDir">./spell</str>
152
+ <str name="buildOnOptimize">true</str>
153
+ </lst>
154
+ </searchComponent>
155
+
156
+ <requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />
157
+
158
+ <requestDispatcher handleSelect="true" >
159
+ <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
160
+ </requestDispatcher>
161
+
162
+ <requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
163
+ <requestHandler name="/update" class="solr.UpdateRequestHandler" />
164
+ <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
165
+
166
+ <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
167
+ <lst name="invariants">
168
+ <str name="q">solrpingquery</str>
169
+ </lst>
170
+ <lst name="defaults">
171
+ <str name="echoParams">all</str>
172
+ </lst>
173
+ </requestHandler>
174
+
175
+ <!-- config for the admin interface -->
176
+ <admin>
177
+ <defaultQuery>search</defaultQuery>
178
+ </admin>
179
+
180
+ </config>
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ describe "gdor indexing integration test", :vcr do
4
+ let :exhibit do
5
+ double(solr_data: { }, blacklight_config: Blacklight::Configuration.new)
6
+ end
7
+
8
+ subject do
9
+ r = Spotlight::Resources::Purl.new(url: "https://purl.stanford.edu/xf680rd3068")
10
+ allow(r).to receive(:to_global_id).and_return('x')
11
+ allow(r).to receive(:exhibit).and_return(exhibit)
12
+ r.to_solr.first
13
+ end
14
+
15
+ it "should have a doc id" do
16
+ expect(subject[:id]).to eq "xf680rd3068"
17
+ end
18
+
19
+ it "should have the gdor data" do
20
+ expect(subject).to include :collection, :modsxml, :url_fulltext
21
+ end
22
+
23
+ it "should have spotlight data" do
24
+ expect(subject).to include :spotlight_resource_id_ssim
25
+ end
26
+
27
+ it "should have exhibit-specific indexing" do
28
+ expect(subject).to include "full_image_url_ssm"
29
+ end
30
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'indexer integration tests', :vcr do
4
+ describe 'donor tags' do
5
+ it 'solr_doc has donor_tags_ssim field when <note displayLabel="Donor tags"> is in MODS' do
6
+ r = Spotlight::Resources::Purl.new(url: 'https://purl.stanford.edu/vw282gv1740') # Feigenbaum PURL with donor tags
7
+ solr_doc = r.to_solr.first
8
+ expect(solr_doc['donor_tags_ssim']).to eq ['Knowledge Systems Laboratory', 'medical applications', 'Publishing', 'Stanford', 'Stanford Computer Science Department']
9
+ end
10
+ it 'no donor_tags_ssim field in solr doc when <note displayLabel="Donor tags"> not in MODS' do
11
+ r = Spotlight::Resources::Purl.new(url: 'https://purl.stanford.edu/bd955gr0721') # Revs PURL without donor tags
12
+ solr_doc = r.to_solr.first
13
+ expect(solr_doc['donor_tags_ssim']).to be_nil
14
+ end
15
+ end
16
+ describe 'genre' do
17
+ it 'solr_doc has genre_ssim field when <genre> in MODS' do
18
+ r = Spotlight::Resources::Purl.new(url: 'https://purl.stanford.edu/vw282gv1740') # Feigenbaum PURL with genre
19
+ solr_doc = r.to_solr.first
20
+ expect(solr_doc['genre_ssim']).to eq ['manuscripts for publication']
21
+ end
22
+ it 'no genre_ssim field when <genre> not in MODS' do
23
+ r = Spotlight::Resources::Purl.new(url: 'https://purl.stanford.edu/pz816zm7931') # Road & Track PURL without genre
24
+ solr_doc = r.to_solr.first
25
+ expect(solr_doc['genre_ssim']).to be_nil
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,115 @@
1
+ require 'spec_helper'
2
+
3
+ describe Spotlight::Resources::Purl do
4
+ let :exhibit do
5
+ double(solr_data: { }, blacklight_config: Blacklight::Configuration.new)
6
+ end
7
+ let :blacklight_solr do
8
+ double
9
+ end
10
+
11
+ subject { Spotlight::Resources::Purl.new url: "http://purl.stanford.edu/xf680rd3068" }
12
+
13
+ before do
14
+ allow(subject).to receive(:exhibit).and_return(exhibit)
15
+ allow(subject).to receive(:blacklight_solr).and_return(blacklight_solr)
16
+ allow(subject).to receive(:to_global_id).and_return('x')
17
+ end
18
+
19
+ describe ".can_provide?" do
20
+ subject { Spotlight::Resources::Purl }
21
+ it "should be able to provide any purl URL" do
22
+ expect(subject.can_provide?(double(url: "https://purl.stanford.edu/xyz"))).to eq true
23
+ expect(subject.can_provide?(double(url: "http://purl.stanford.edu/xyz"))).to eq true
24
+ end
25
+ end
26
+
27
+ describe "#doc_id" do
28
+ it "should be able to extract DRUIDs from a PURL url" do
29
+ subject.url = "http://purl.stanford.edu/xyz"
30
+ expect(subject.doc_id).to eq "xyz"
31
+ end
32
+
33
+ it "should be able to extract DRUIDs from a PURL format url" do
34
+ subject.url = "http://purl.stanford.edu/xf680rd3068.xml"
35
+ expect(subject.doc_id).to eq "xf680rd3068"
36
+ end
37
+
38
+ it "should be able to extract DRUIDs from a PURL's viewer url" do
39
+ subject.url = "http://purl.stanford.edu/xf680rd3068#image/1/small"
40
+ expect(subject.doc_id).to eq "xf680rd3068"
41
+ end
42
+ end
43
+
44
+ describe "#resource" do
45
+ it "should be a Harvestdor::Indexer resource" do
46
+ expect(subject.resource).to be_a_kind_of Harvestdor::Indexer::Resource
47
+ end
48
+
49
+ it "should have the correct druid" do
50
+ expect(subject.resource.druid).to eq "xf680rd3068"
51
+ end
52
+
53
+ it "should have the correct indexer" do
54
+ expect(subject.resource.indexer).to eq Spotlight::Dor::Resources.indexer.harvestdor
55
+ end
56
+ end
57
+
58
+ describe "#reindex" do
59
+ before do
60
+ allow(Spotlight::Dor::Resources.indexer).to receive(:solr_document).and_return({upstream: true})
61
+ allow(subject.resource).to receive(:collection?).and_return(false)
62
+ end
63
+
64
+ it "should add a document to solr" do
65
+ solr_data = [{spotlight_resource_id_ssim: nil, spotlight_resource_type_ssim: "spotlight/resources/purls", upstream: true}]
66
+ expect(blacklight_solr).to receive(:update).with({params: {commitWithin: 500}, data: solr_data.to_json, headers: {"Content-Type" => "application/json"}})
67
+ expect(subject).to receive(:update_index_time!)
68
+ subject.reindex
69
+ end
70
+ end
71
+
72
+ describe "#to_solr" do
73
+ before do
74
+ allow(Spotlight::Dor::Resources.indexer).to receive(:solr_document)
75
+ end
76
+ context "with a collection" do
77
+ before do
78
+ allow(subject.resource).to receive(:collection?).and_return(true)
79
+ end
80
+
81
+ it "should provide a solr document for the collection" do
82
+ allow(subject.resource).to receive(:items).and_return([])
83
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({upstream: true})
84
+ expect(subject.to_solr.first).to include :upstream, :spotlight_resource_id_ssim, :spotlight_resource_type_ssim
85
+ end
86
+
87
+ it "should provide a solr document for the items too" do
88
+ item = double
89
+ allow(subject.resource).to receive(:items).and_return([item])
90
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({collection: true})
91
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(item).and_return({item: true})
92
+ solr_doc = subject.to_solr.to_a
93
+ expect(solr_doc.first).to include :collection
94
+ expect(solr_doc.last).to include :item
95
+ end
96
+ end
97
+
98
+ context "with a single item" do
99
+ before do
100
+ allow(subject.resource).to receive(:collection?).and_return(false)
101
+ end
102
+
103
+ it "should provide a solr document for the resource" do
104
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({upstream: true})
105
+ expect(subject.to_solr.first).to include :upstream, :spotlight_resource_id_ssim, :spotlight_resource_type_ssim
106
+ end
107
+
108
+ it "should index outside the context of an exhibit" do
109
+ allow(subject).to receive(:exhibit).and_return(nil)
110
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({upstream: true})
111
+ expect(subject.to_solr.first).to include :upstream, :spotlight_resource_id_ssim, :spotlight_resource_type_ssim
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,91 @@
1
+ require 'spec_helper'
2
+
3
+ describe Spotlight::Resources::Searchworks do
4
+
5
+ let :exhibit do
6
+ double(solr_data: { }, blacklight_config: Blacklight::Configuration.new)
7
+ end
8
+
9
+ subject { Spotlight::Resources::Searchworks.new url: "http://searchworks.stanford.edu/view/xf680rd3068" }
10
+
11
+ before do
12
+ allow(subject).to receive(:exhibit).and_return(exhibit)
13
+ allow(subject).to receive(:to_global_id).and_return('x')
14
+ end
15
+
16
+ describe ".can_provide?" do
17
+ subject { Spotlight::Resources::Searchworks }
18
+ it "should be able to provide any searchworks URL" do
19
+ expect(subject.can_provide?(double(url: "https://searchworks.stanford.edu/xyz"))).to eq true
20
+ expect(subject.can_provide?(double(url: "http://searchworks.stanford.edu/xyz"))).to eq true
21
+ end
22
+ it "should also work with searchworks-test URLs" do
23
+ expect(subject.can_provide?(double(url: "https://searchworks-test.stanford.edu/xyz"))).to eq true
24
+ expect(subject.can_provide?(double(url: "http://searchworks-test.stanford.edu/xyz"))).to eq true
25
+ end
26
+ end
27
+
28
+ describe "#doc_id" do
29
+ it "should be able to extract DRUIDs from a searchworks url" do
30
+ subject.url = "http://searchworks.stanford.edu/view/xyz"
31
+ expect(subject.doc_id).to eq "xyz"
32
+ end
33
+
34
+ it "should be able to extract DRUIDs from a searchworks format url" do
35
+ subject.url = "http://searchworks.stanford.edu/view/xf680rd3068.xml"
36
+ expect(subject.doc_id).to eq "xf680rd3068"
37
+ end
38
+ end
39
+
40
+ describe "#resource" do
41
+ it "should be a Harvestdor::Indexer resource" do
42
+ expect(subject.resource).to be_a_kind_of Harvestdor::Indexer::Resource
43
+ end
44
+
45
+ it "should have the correct druid" do
46
+ expect(subject.resource.druid).to eq "xf680rd3068"
47
+ end
48
+
49
+ it "should have the correct indexer" do
50
+ expect(subject.resource.indexer).to eq Spotlight::Dor::Resources.indexer.harvestdor
51
+ end
52
+ end
53
+
54
+ describe "#to_solr" do
55
+ before do
56
+ allow(Spotlight::Dor::Resources.indexer).to receive(:solr_document)
57
+ end
58
+ context "with a collection" do
59
+ before do
60
+ allow(subject.resource).to receive(:collection?).and_return(true)
61
+ end
62
+
63
+ it "should provide a solr document for the collection" do
64
+ allow(subject.resource).to receive(:items).and_return([])
65
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({upstream: true})
66
+ expect(subject.to_solr.first).to include :upstream, :spotlight_resource_id_ssim, :spotlight_resource_type_ssim
67
+ end
68
+
69
+ it "should provide a solr document for the items too" do
70
+ item = double
71
+ allow(subject.resource).to receive(:items).and_return([item])
72
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({collection: true})
73
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(item).and_return({item: true})
74
+ solr_doc = subject.to_solr.to_a
75
+ expect(solr_doc.first).to include :collection
76
+ expect(solr_doc.last).to include :item
77
+ end
78
+ end
79
+
80
+ context "with a single item" do
81
+ before do
82
+ allow(subject.resource).to receive(:collection?).and_return(false)
83
+ end
84
+
85
+ it "should provide a solr document for the resource" do
86
+ expect(Spotlight::Dor::Resources.indexer).to receive(:solr_document).with(subject.resource).and_return({upstream: true})
87
+ expect(subject.to_solr.first).to include :upstream, :spotlight_resource_id_ssim, :spotlight_resource_type_ssim
88
+ end
89
+ end
90
+ end
91
+ end