gdor-indexer 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -3
- data/.rubocop_todo.yml +26 -19
- data/Gemfile +1 -1
- data/gdor-indexer.gemspec +1 -1
- data/lib/gdor/indexer/mods_fields.rb +26 -37
- data/lib/gdor/indexer/version.rb +1 -1
- data/spec/unit/mods_fields_spec.rb +318 -0
- data/spec/unit/mods_pub_fields_spec.rb +257 -0
- data/spec/unit/mods_subject_fields_spec.rb +279 -0
- metadata +10 -6
- data/spec/unit/gdor_mods_fields_spec.rb +0 -813
@@ -0,0 +1,257 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GDor::Indexer::ModsFields do
|
4
|
+
let(:fake_druid) { 'oo000oo0000' }
|
5
|
+
let(:ns_decl) { "xmlns='#{Mods::MODS_NS}'" }
|
6
|
+
let(:mods_xml) { "<mods #{ns_decl}><note>gdor_mods_fields testing</note></mods>" }
|
7
|
+
let(:mods_origin_info_start_str) { "<mods #{ns_decl}><originInfo>" }
|
8
|
+
let(:mods_origin_info_end_str) { '</originInfo></mods>' }
|
9
|
+
|
10
|
+
def sdb_for_mods(m)
|
11
|
+
resource = Harvestdor::Indexer::Resource.new(double, fake_druid)
|
12
|
+
allow(resource).to receive(:public_xml).and_return(nil)
|
13
|
+
allow(resource).to receive(:mods).and_return(Nokogiri::XML(m))
|
14
|
+
i = Harvestdor::Indexer.new
|
15
|
+
i.logger.level = Logger::WARN
|
16
|
+
allow(resource).to receive(:indexer).and_return(i)
|
17
|
+
lgr = Logger.new(StringIO.new)
|
18
|
+
lgr.level = Logger::WARN
|
19
|
+
GDor::Indexer::SolrDocBuilder.new(resource, lgr)
|
20
|
+
end
|
21
|
+
|
22
|
+
let(:sdb) { sdb_for_mods(mods_xml) }
|
23
|
+
|
24
|
+
context 'publication date fields' do
|
25
|
+
|
26
|
+
RSpec.shared_examples 'expected' do |solr_field_sym, mods_field_val, exp_val|
|
27
|
+
it "#{exp_val} for #{mods_field_val}" do
|
28
|
+
m = mods_origin_info_start_str +
|
29
|
+
"<dateIssued>#{mods_field_val}</dateIssued>" +
|
30
|
+
mods_origin_info_end_str
|
31
|
+
sdb = sdb_for_mods(m)
|
32
|
+
expect(sdb.doc_hash_from_mods[solr_field_sym]).to eq exp_val
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context 'pub_date_sort' do
|
37
|
+
it 'calls Stanford::Mods::Record instance pub_date_sortable_string(false)' do
|
38
|
+
expect(sdb.smods_rec).to receive(:pub_date_sortable_string).with(false)
|
39
|
+
sdb.doc_hash_from_mods[:pub_date_sort]
|
40
|
+
end
|
41
|
+
it 'includes approx dates' do
|
42
|
+
m = mods_origin_info_start_str +
|
43
|
+
"<dateIssued qualifier='approximate'>1945</dateIssued>" +
|
44
|
+
mods_origin_info_end_str
|
45
|
+
sdb = sdb_for_mods(m)
|
46
|
+
expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('1945')
|
47
|
+
end
|
48
|
+
it 'takes single dateCreated' do
|
49
|
+
m = mods_origin_info_start_str +
|
50
|
+
"<dateCreated>1904</dateCreated>" +
|
51
|
+
mods_origin_info_end_str
|
52
|
+
sdb = sdb_for_mods(m)
|
53
|
+
expect(sdb.doc_hash_from_mods[:pub_date_sort]).to eq('1904')
|
54
|
+
end
|
55
|
+
it_behaves_like 'expected', :pub_date_sort, '1945', '1945'
|
56
|
+
it_behaves_like 'expected', :pub_date_sort, '945', '0945'
|
57
|
+
it_behaves_like 'expected', :pub_date_sort, '66', '0066'
|
58
|
+
it_behaves_like 'expected', :pub_date_sort, '5', '0005'
|
59
|
+
it_behaves_like 'expected', :pub_date_sort, '0', '0000'
|
60
|
+
# these negative values are for String lexical sorting as this is a string
|
61
|
+
it_behaves_like 'expected', :pub_date_sort, '-4', '-996'
|
62
|
+
it_behaves_like 'expected', :pub_date_sort, '-15', '-985'
|
63
|
+
it_behaves_like 'expected', :pub_date_sort, '-666', '-334'
|
64
|
+
it_behaves_like 'expected', :pub_date_sort, '16--', '1600'
|
65
|
+
it_behaves_like 'expected', :pub_date_sort, '9--', '0900'
|
66
|
+
it_behaves_like 'expected', :pub_date_sort, '19th century', '1800'
|
67
|
+
it_behaves_like 'expected', :pub_date_sort, '9th century', '0800'
|
68
|
+
# -(1000 - |yyy|) for BC dates
|
69
|
+
it_behaves_like 'expected', :pub_date_sort, '300 B.C.', '-700'
|
70
|
+
end
|
71
|
+
|
72
|
+
context 'single valued pub year facets' do
|
73
|
+
let(:mods) do
|
74
|
+
mods_origin_info_start_str +
|
75
|
+
"<dateIssued qualifier=\"approximate\">1500</dateIssued>
|
76
|
+
<dateIssued>2000</dateIssued>" +
|
77
|
+
mods_origin_info_end_str
|
78
|
+
end
|
79
|
+
it 'pub_year_no_approx_isi calls Stanford::Mods::Record instance pub_date_facet_single_value(true)' do
|
80
|
+
sdb = sdb_for_mods(mods)
|
81
|
+
expect(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(true).and_call_original
|
82
|
+
allow(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(false) # for other flavor
|
83
|
+
expect(sdb.doc_hash_from_mods[:pub_year_no_approx_isi]).to eq '2000'
|
84
|
+
end
|
85
|
+
it 'pub_year_w_approx_isi calls Stanford::Mods::Record instance pub_date_facet_single_value(false)' do
|
86
|
+
sdb = sdb_for_mods(mods)
|
87
|
+
expect(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(false).and_call_original
|
88
|
+
allow(sdb.smods_rec).to receive(:pub_date_facet_single_value).with(true) # for other flavor
|
89
|
+
expect(sdb.doc_hash_from_mods[:pub_year_w_approx_isi]).to eq '1500'
|
90
|
+
end
|
91
|
+
RSpec.shared_examples "single pub year facet" do |field_sym|
|
92
|
+
it_behaves_like 'expected', field_sym, '1945', '1945'
|
93
|
+
it_behaves_like 'expected', field_sym, '945', '945'
|
94
|
+
it_behaves_like 'expected', field_sym, '66', '66'
|
95
|
+
it_behaves_like 'expected', field_sym, '5', '5'
|
96
|
+
it_behaves_like 'expected', field_sym, '0', '0'
|
97
|
+
it_behaves_like 'expected', field_sym, '-4', '4 B.C.'
|
98
|
+
it_behaves_like 'expected', field_sym, '-15', '15 B.C.'
|
99
|
+
it_behaves_like 'expected', field_sym, '-666', '666 B.C.'
|
100
|
+
it_behaves_like 'expected', field_sym, '16--', '17th century'
|
101
|
+
it_behaves_like 'expected', field_sym, '8--', '9th century'
|
102
|
+
it_behaves_like 'expected', field_sym, '19th century', '19th century'
|
103
|
+
it_behaves_like 'expected', field_sym, '9th century', '9th century'
|
104
|
+
it_behaves_like 'expected', field_sym, '300 B.C.', '300 B.C.'
|
105
|
+
end
|
106
|
+
it_behaves_like "single pub year facet", :pub_year_no_approx_isi
|
107
|
+
it_behaves_like "single pub year facet", :pub_year_w_approx_isi
|
108
|
+
end
|
109
|
+
|
110
|
+
context 'pub_year_tisim for date slider' do
|
111
|
+
it 'should handle multiple pub dates (to be implemented - esp for date slider)'
|
112
|
+
|
113
|
+
# FIXME: it should be using a method approp for date slider values, not single value
|
114
|
+
it 'pub_year_tisim calls Stanford::Mods::Record instance pub_date_sortable_string(false)' do
|
115
|
+
expect(sdb.smods_rec).to receive(:pub_date_sortable_string).with(false)
|
116
|
+
sdb.doc_hash_from_mods[:pub_year_tisim]
|
117
|
+
end
|
118
|
+
it 'includes approx dates' do
|
119
|
+
m = mods_origin_info_start_str +
|
120
|
+
"<dateIssued qualifier='approximate'>1945</dateIssued>" +
|
121
|
+
mods_origin_info_end_str
|
122
|
+
sdb = sdb_for_mods(m)
|
123
|
+
expect(sdb.doc_hash_from_mods[:pub_year_tisim]).to eq('1945')
|
124
|
+
end
|
125
|
+
it 'takes single dateCreated' do
|
126
|
+
m = mods_origin_info_start_str +
|
127
|
+
"<dateCreated>1904</dateCreated>" +
|
128
|
+
mods_origin_info_end_str
|
129
|
+
sdb = sdb_for_mods(m)
|
130
|
+
expect(sdb.doc_hash_from_mods[:pub_year_tisim]).to eq('1904')
|
131
|
+
end
|
132
|
+
it 'ignores B.C. dates' do
|
133
|
+
m = mods_origin_info_start_str +
|
134
|
+
"<dateCreated>300 B.C.</dateCreated>" +
|
135
|
+
mods_origin_info_end_str
|
136
|
+
sdb = sdb_for_mods(m)
|
137
|
+
expect(sdb.doc_hash_from_mods).not_to have_key(:pub_year_tisim)
|
138
|
+
expect(sdb.doc_hash_from_mods[:pub_year_tisim]).to be_nil
|
139
|
+
end
|
140
|
+
it_behaves_like 'expected', :pub_year_tisim, '1945', '1945'
|
141
|
+
it_behaves_like 'expected', :pub_year_tisim, '945', '0945'
|
142
|
+
it_behaves_like 'expected', :pub_year_tisim, '66', '0066'
|
143
|
+
it_behaves_like 'expected', :pub_year_tisim, '5', '0005'
|
144
|
+
it_behaves_like 'expected', :pub_year_tisim, '0', '0000'
|
145
|
+
it_behaves_like 'expected', :pub_year_tisim, '-4', nil
|
146
|
+
it_behaves_like 'expected', :pub_year_tisim, '-15', nil
|
147
|
+
it_behaves_like 'expected', :pub_year_tisim, '-666', nil
|
148
|
+
it_behaves_like 'expected', :pub_year_tisim, '16--', '1600'
|
149
|
+
it_behaves_like 'expected', :pub_year_tisim, '9--', '0900'
|
150
|
+
it_behaves_like 'expected', :pub_year_tisim, '19th century', '1800'
|
151
|
+
it_behaves_like 'expected', :pub_year_tisim, '9th century', '0800'
|
152
|
+
it_behaves_like 'expected', :pub_year_tisim, 'Text dated June 4, 1594; miniatures added by 1596', '1594'
|
153
|
+
it_behaves_like 'expected', :pub_year_tisim, 'Aug. 3rd, 1886', '1886'
|
154
|
+
it_behaves_like 'expected', :pub_year_tisim, 'Aug. 3rd, [18]86?', '1886'
|
155
|
+
it_behaves_like 'expected', :pub_year_tisim, 'early 1890s', '1890'
|
156
|
+
it_behaves_like 'expected', :pub_year_tisim, '1865-6', '1865' # FIXME: should be both years
|
157
|
+
end
|
158
|
+
|
159
|
+
context 'creation_year_isi' do
|
160
|
+
it 'creation_year_isi calls Stanford::Mods::Record pub_date_best_sort_str_value for dateCreated elements' do
|
161
|
+
m = mods_origin_info_start_str +
|
162
|
+
"<dateCreated qualifier='approximate'>1500</dateCreated>
|
163
|
+
<dateIssued qualifier='approximate'>2000</dateIssued>" +
|
164
|
+
mods_origin_info_end_str
|
165
|
+
sdb = sdb_for_mods(m)
|
166
|
+
expect(sdb.smods_rec).to receive(:pub_date_best_sort_str_value).at_least(2).times.and_call_original
|
167
|
+
expect(sdb.doc_hash_from_mods[:creation_year_isi]).to eq '1500'
|
168
|
+
end
|
169
|
+
RSpec.shared_examples 'expected for dateCreated' do |mods_field_val, exp_val|
|
170
|
+
it "#{exp_val} for #{mods_field_val}" do
|
171
|
+
m = mods_origin_info_start_str +
|
172
|
+
"<dateCreated>#{mods_field_val}</dateCreated>" +
|
173
|
+
mods_origin_info_end_str
|
174
|
+
sdb = sdb_for_mods(m)
|
175
|
+
expect(sdb.doc_hash_from_mods[:creation_year_isi]).to eq exp_val
|
176
|
+
end
|
177
|
+
end
|
178
|
+
it_behaves_like 'expected for dateCreated', '1945', '1945'
|
179
|
+
# note that it removes leading zeros
|
180
|
+
it_behaves_like 'expected for dateCreated', '945', '945'
|
181
|
+
it_behaves_like 'expected for dateCreated', '66', '66'
|
182
|
+
it_behaves_like 'expected for dateCreated', '5', '5'
|
183
|
+
it_behaves_like 'expected for dateCreated', '0', '0'
|
184
|
+
it_behaves_like 'expected for dateCreated', '-4', '-4'
|
185
|
+
it_behaves_like 'expected for dateCreated', '-15', '-15'
|
186
|
+
it_behaves_like 'expected for dateCreated', '-666', '-666'
|
187
|
+
it_behaves_like 'expected for dateCreated', '16--', '1600'
|
188
|
+
it_behaves_like 'expected for dateCreated', '9--', '900'
|
189
|
+
it_behaves_like 'expected for dateCreated', '19th century', '1800'
|
190
|
+
it_behaves_like 'expected for dateCreated', '9th century', '800'
|
191
|
+
it_behaves_like 'expected for dateCreated', 'blah June 4, 1594; blah 1596', '1594'
|
192
|
+
it_behaves_like 'expected for dateCreated', 'Aug. 3rd, 1886', '1886'
|
193
|
+
it_behaves_like 'expected for dateCreated', 'Aug. 3rd, [18]86?', '1886'
|
194
|
+
it_behaves_like 'expected for dateCreated', 'early 1890s', '1890'
|
195
|
+
it_behaves_like 'expected for dateCreated', '1865-6', '1865'
|
196
|
+
# note: B.C. becomes a regular negative number
|
197
|
+
it_behaves_like 'expected for dateCreated', '300 B.C.', '-300'
|
198
|
+
end
|
199
|
+
|
200
|
+
context 'publication_year_isi' do
|
201
|
+
it 'publication_year_isi calls Stanford::Mods::Record pub_date_best_sort_str_value for dateIssued elements' do
|
202
|
+
m = mods_origin_info_start_str +
|
203
|
+
"<dateCreated qualifier='approximate'>1500</dateCreated>
|
204
|
+
<dateIssued qualifier='approximate'>2000</dateIssued>" +
|
205
|
+
mods_origin_info_end_str
|
206
|
+
sdb = sdb_for_mods(m)
|
207
|
+
expect(sdb.smods_rec).to receive(:pub_date_best_sort_str_value).at_least(2).times.and_call_original
|
208
|
+
expect(sdb.doc_hash_from_mods[:publication_year_isi]).to eq '2000'
|
209
|
+
end
|
210
|
+
it_behaves_like 'expected', :publication_year_isi, '1945', '1945'
|
211
|
+
# note that it removes leading zeros
|
212
|
+
it_behaves_like 'expected', :publication_year_isi, '945', '945'
|
213
|
+
it_behaves_like 'expected', :publication_year_isi, '66', '66'
|
214
|
+
it_behaves_like 'expected', :publication_year_isi, '5', '5'
|
215
|
+
it_behaves_like 'expected', :publication_year_isi, '0', '0'
|
216
|
+
it_behaves_like 'expected', :publication_year_isi, '-4', '-4'
|
217
|
+
it_behaves_like 'expected', :publication_year_isi, '-15', '-15'
|
218
|
+
it_behaves_like 'expected', :publication_year_isi, '-666', '-666'
|
219
|
+
it_behaves_like 'expected', :publication_year_isi, '16--', '1600'
|
220
|
+
it_behaves_like 'expected', :publication_year_isi, '9--', '900'
|
221
|
+
it_behaves_like 'expected', :publication_year_isi, '19th century', '1800'
|
222
|
+
it_behaves_like 'expected', :publication_year_isi, '9th century', '800'
|
223
|
+
it_behaves_like 'expected', :publication_year_isi, 'blah June 4, 1594; blah 1596', '1594'
|
224
|
+
it_behaves_like 'expected', :publication_year_isi, 'Aug. 3rd, 1886', '1886'
|
225
|
+
it_behaves_like 'expected', :publication_year_isi, 'Aug. 3rd, [18]86?', '1886'
|
226
|
+
it_behaves_like 'expected', :publication_year_isi, 'early 1890s', '1890'
|
227
|
+
it_behaves_like 'expected', :publication_year_isi, '1865-6', '1865'
|
228
|
+
# note: B.C. becomes a regular negative number
|
229
|
+
it_behaves_like 'expected', :publication_year_isi, '300 B.C.', '-300'
|
230
|
+
end
|
231
|
+
end # publication date fields
|
232
|
+
|
233
|
+
context 'imprint_display' do
|
234
|
+
# FIXME: it should be using a method returning a better string than just year
|
235
|
+
it 'imprint_display calls deprecated Stanford::Mods::Record instance pub_date_display' do
|
236
|
+
expect(sdb.smods_rec).to receive(:pub_date_display)
|
237
|
+
sdb.doc_hash_from_mods[:imprint_display]
|
238
|
+
end
|
239
|
+
it_behaves_like 'expected', :imprint_display, '1945', '1945'
|
240
|
+
it_behaves_like 'expected', :imprint_display, '945', '945'
|
241
|
+
it_behaves_like 'expected', :imprint_display, '66', '66'
|
242
|
+
it_behaves_like 'expected', :imprint_display, '5', '5'
|
243
|
+
it_behaves_like 'expected', :imprint_display, '0', '0'
|
244
|
+
it_behaves_like 'expected', :imprint_display, '-4', '-4'
|
245
|
+
it_behaves_like 'expected', :imprint_display, '-15', '-15'
|
246
|
+
it_behaves_like 'expected', :imprint_display, '-666', '-666'
|
247
|
+
it_behaves_like 'expected', :imprint_display, '16--', '16--'
|
248
|
+
it_behaves_like 'expected', :imprint_display, '9--', '9--'
|
249
|
+
it_behaves_like 'expected', :imprint_display, '19th century', '19th century'
|
250
|
+
it_behaves_like 'expected', :imprint_display, '9th century', '9th century'
|
251
|
+
it_behaves_like 'expected', :imprint_display, 'blah June 4, 1594; blah 1596', 'blah June 4, 1594; blah 1596'
|
252
|
+
it_behaves_like 'expected', :imprint_display, 'Aug. 3rd, 1886', 'Aug. 3rd, 1886'
|
253
|
+
it_behaves_like 'expected', :imprint_display, 'Aug. 3rd, [18]86?', 'Aug. 3rd, [18]86?'
|
254
|
+
it_behaves_like 'expected', :imprint_display, 'early 1890s', 'early 1890s'
|
255
|
+
it_behaves_like 'expected', :imprint_display, '1865-6', '1865-6'
|
256
|
+
end
|
257
|
+
end
|
@@ -0,0 +1,279 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GDor::Indexer::ModsFields do
|
4
|
+
let(:fake_druid) { 'oo000oo0000' }
|
5
|
+
let(:ns_decl) { "xmlns='#{Mods::MODS_NS}'" }
|
6
|
+
let(:mods_xml) { "<mods #{ns_decl}><note>gdor_mods_fields testing</note></mods>" }
|
7
|
+
|
8
|
+
def sdb_for_mods(m)
|
9
|
+
resource = Harvestdor::Indexer::Resource.new(double, fake_druid)
|
10
|
+
allow(resource).to receive(:public_xml).and_return(nil)
|
11
|
+
allow(resource).to receive(:mods).and_return(Nokogiri::XML(m))
|
12
|
+
i = Harvestdor::Indexer.new
|
13
|
+
i.logger.level = Logger::WARN
|
14
|
+
allow(resource).to receive(:indexer).and_return(i)
|
15
|
+
lgr = Logger.new(StringIO.new)
|
16
|
+
lgr.level = Logger::WARN
|
17
|
+
GDor::Indexer::SolrDocBuilder.new(resource, lgr)
|
18
|
+
end
|
19
|
+
|
20
|
+
context 'subject fields' do
|
21
|
+
let(:genre) { 'genre top level' }
|
22
|
+
let(:cart_coord) { '6 00 S, 71 30 E' }
|
23
|
+
let(:s_genre) { 'genre in subject' }
|
24
|
+
let(:geo) { 'Somewhere' }
|
25
|
+
let(:geo_code) { 'us' }
|
26
|
+
let(:hier_geo_country) { 'France' }
|
27
|
+
let(:s_name) { 'name in subject' }
|
28
|
+
let(:occupation) { 'worker bee' }
|
29
|
+
let(:temporal) { 'temporal' }
|
30
|
+
let(:s_title) { 'title in subject' }
|
31
|
+
let(:topic) { 'topic' }
|
32
|
+
let(:m) do
|
33
|
+
"<mods #{ns_decl}>
|
34
|
+
<genre>#{genre}</genre>
|
35
|
+
<subject><cartographics><coordinates>#{cart_coord}</coordinates></cartographics></subject>
|
36
|
+
<subject><genre>#{s_genre}</genre></subject>
|
37
|
+
<subject><geographic>#{geo}</geographic></subject>
|
38
|
+
<subject><geographicCode authority='iso3166'>#{geo_code}</geographicCode></subject>
|
39
|
+
<subject><hierarchicalGeographic><country>#{hier_geo_country}</country></hierarchicalGeographic></subject>
|
40
|
+
<subject><name><namePart>#{s_name}</namePart></name></subject>
|
41
|
+
<subject><occupation>#{occupation}</occupation></subject>
|
42
|
+
<subject><temporal>#{temporal}</temporal></subject>
|
43
|
+
<subject><titleInfo><title>#{s_title}</title></titleInfo></subject>
|
44
|
+
<subject><topic>#{topic}</topic></subject>
|
45
|
+
<typeOfResource>still image</typeOfResource>
|
46
|
+
</mods>"
|
47
|
+
end
|
48
|
+
let(:m_no_subject) { "<mods #{ns_decl}><note>notit</note></mods>" }
|
49
|
+
let(:sdb) { sdb_for_mods(m) }
|
50
|
+
let(:subject_doc_hash) { sdb.doc_hash_from_mods }
|
51
|
+
|
52
|
+
it 'calls the appropriate methods in stanford-mods to populate the Solr fields' do
|
53
|
+
expect(sdb.smods_rec).to receive(:topic_search)
|
54
|
+
expect(sdb.smods_rec).to receive(:geographic_search)
|
55
|
+
expect(sdb.smods_rec).to receive(:subject_other_search)
|
56
|
+
expect(sdb.smods_rec).to receive(:subject_other_subvy_search)
|
57
|
+
expect(sdb.smods_rec).to receive(:subject_all_search)
|
58
|
+
expect(sdb.smods_rec).to receive(:topic_facet)
|
59
|
+
expect(sdb.smods_rec).to receive(:geographic_facet)
|
60
|
+
expect(sdb.smods_rec).to receive(:era_facet)
|
61
|
+
sdb.doc_hash_from_mods
|
62
|
+
end
|
63
|
+
context 'search fields' do
|
64
|
+
context 'topic_search' do
|
65
|
+
it 'onlies include genre and topic' do
|
66
|
+
expect(subject_doc_hash[:topic_search]).to match_array [genre, topic]
|
67
|
+
end
|
68
|
+
context 'functional tests checking results from stanford-mods methods' do
|
69
|
+
it 'is nil if there are no values in the MODS' do
|
70
|
+
sdb = sdb_for_mods(m_no_subject)
|
71
|
+
expect(sdb.doc_hash_from_mods[:topic_search]).to be_nil
|
72
|
+
end
|
73
|
+
it 'does not be nil if there are only subject/topic elements (no <genre>)' do
|
74
|
+
m = "<mods #{ns_decl}><subject><topic>#{topic}</topic></subject></mods>"
|
75
|
+
sdb = sdb_for_mods(m)
|
76
|
+
expect(sdb.doc_hash_from_mods[:topic_search]).to match_array [topic]
|
77
|
+
end
|
78
|
+
it 'does not be nil if there are only <genre> elements (no subject/topic elements)' do
|
79
|
+
m = "<mods #{ns_decl}><genre>#{genre}</genre></mods>"
|
80
|
+
sdb = sdb_for_mods(m)
|
81
|
+
expect(sdb.doc_hash_from_mods[:topic_search]).to match_array [genre]
|
82
|
+
end
|
83
|
+
it 'has a separate value for each topic subelement' do
|
84
|
+
m = "<mods #{ns_decl}>
|
85
|
+
<subject>
|
86
|
+
<topic>first</topic>
|
87
|
+
<topic>second</topic>
|
88
|
+
</subject>
|
89
|
+
<subject><topic>third</topic></subject>
|
90
|
+
</mods>"
|
91
|
+
sdb = sdb_for_mods(m)
|
92
|
+
expect(sdb.doc_hash_from_mods[:topic_search]).to match_array %w(first second third)
|
93
|
+
end
|
94
|
+
end # functional tests checking results from stanford-mods methods
|
95
|
+
end # topic_search
|
96
|
+
|
97
|
+
context 'geographic_search' do
|
98
|
+
it 'includes geographic and hierarchicalGeographic' do
|
99
|
+
expect(subject_doc_hash[:geographic_search]).to match_array [geo, hier_geo_country]
|
100
|
+
end
|
101
|
+
it 'calls sw_geographic_search (from stanford-mods gem)' do
|
102
|
+
m = "<mods #{ns_decl}><subject><geographic>#{geo}</geographic></subject></mods>"
|
103
|
+
sdb = sdb_for_mods(m)
|
104
|
+
expect(sdb.smods_rec).to receive(:sw_geographic_search).at_least(1).times
|
105
|
+
sdb.doc_hash_from_mods
|
106
|
+
end
|
107
|
+
it "logs an info message when it encounters a geographicCode encoding it doesn't translate" do
|
108
|
+
m = "<mods #{ns_decl}><subject><geographicCode authority='iso3166'>ca</geographicCode></subject></mods>"
|
109
|
+
sdb = sdb_for_mods(m)
|
110
|
+
expect(sdb.smods_rec.sw_logger).to receive(:info).with(/#{fake_druid} has subject geographicCode element with untranslated encoding \(iso3166\): <geographicCode authority=.*>ca<\/geographicCode>/).at_least(1).times
|
111
|
+
sdb.doc_hash_from_mods
|
112
|
+
end
|
113
|
+
end # geographic_search
|
114
|
+
|
115
|
+
context 'subject_other_search' do
|
116
|
+
it 'includes occupation, subject names, and subject titles' do
|
117
|
+
expect(subject_doc_hash[:subject_other_search]).to match_array [occupation, s_name, s_title]
|
118
|
+
end
|
119
|
+
context 'functional tests checking results from stanford-mods methods' do
|
120
|
+
it 'is nil if there are no values in the MODS' do
|
121
|
+
sdb = sdb_for_mods(mods_xml)
|
122
|
+
expect(sdb.doc_hash_from_mods[:subject_other_search]).to be_nil
|
123
|
+
end
|
124
|
+
it 'does not be nil if there are only subject/name elements' do
|
125
|
+
m = "<mods #{ns_decl}><subject><name><namePart>#{s_name}</namePart></name></subject></mods>"
|
126
|
+
sdb = sdb_for_mods(m)
|
127
|
+
expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [s_name]
|
128
|
+
end
|
129
|
+
it 'does not be nil if there are only subject/occupation elements' do
|
130
|
+
m = "<mods #{ns_decl}><subject><occupation>#{occupation}</occupation></subject></mods>"
|
131
|
+
sdb = sdb_for_mods(m)
|
132
|
+
expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [occupation]
|
133
|
+
end
|
134
|
+
it 'does not be nil if there are only subject/titleInfo elements' do
|
135
|
+
m = "<mods #{ns_decl}><subject><titleInfo><title>#{s_title}</title></titleInfo></subject></mods>"
|
136
|
+
sdb = sdb_for_mods(m)
|
137
|
+
expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array [s_title]
|
138
|
+
end
|
139
|
+
it 'has a separate value for each occupation subelement' do
|
140
|
+
m = "<mods #{ns_decl}>
|
141
|
+
<subject>
|
142
|
+
<occupation>first</occupation>
|
143
|
+
<occupation>second</occupation>
|
144
|
+
</subject>
|
145
|
+
<subject><occupation>third</occupation></subject>
|
146
|
+
</mods>"
|
147
|
+
sdb = sdb_for_mods(m)
|
148
|
+
expect(sdb.doc_hash_from_mods[:subject_other_search]).to match_array %w(first second third)
|
149
|
+
end
|
150
|
+
end # functional tests checking results from stanford-mods methods
|
151
|
+
end # subject_other_search
|
152
|
+
|
153
|
+
context 'subject_other_subvy_search' do
|
154
|
+
it 'includes temporal and genre SUBelement' do
|
155
|
+
expect(subject_doc_hash[:subject_other_subvy_search]).to match_array [temporal, s_genre]
|
156
|
+
end
|
157
|
+
context 'functional tests checking results from stanford-mods methods' do
|
158
|
+
it 'is nil if there are no values in the MODS' do
|
159
|
+
sdb = sdb_for_mods(mods_xml)
|
160
|
+
expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to be_nil
|
161
|
+
end
|
162
|
+
it 'does not be nil if there are only subject/temporal elements (no subject/genre)' do
|
163
|
+
m = "<mods #{ns_decl}><subject><temporal>#{temporal}</temporal></subject></mods>"
|
164
|
+
sdb = sdb_for_mods(m)
|
165
|
+
expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array [temporal]
|
166
|
+
end
|
167
|
+
it 'does not be nil if there are only subject/genre elements (no subject/temporal)' do
|
168
|
+
m = "<mods #{ns_decl}><subject><genre>#{s_genre}</genre></subject></mods>"
|
169
|
+
sdb = sdb_for_mods(m)
|
170
|
+
expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array [s_genre]
|
171
|
+
end
|
172
|
+
context 'genre subelement' do
|
173
|
+
it 'has a separate value for each genre element' do
|
174
|
+
m = "<mods #{ns_decl}>
|
175
|
+
<subject>
|
176
|
+
<genre>first</genre>
|
177
|
+
<genre>second</genre>
|
178
|
+
</subject>
|
179
|
+
<subject><genre>third</genre></subject>
|
180
|
+
</mods>"
|
181
|
+
sdb = sdb_for_mods(m)
|
182
|
+
expect(sdb.doc_hash_from_mods[:subject_other_subvy_search]).to match_array %w(first second third)
|
183
|
+
end
|
184
|
+
end # genre subelement
|
185
|
+
end # "functional tests checking results from stanford-mods methods"
|
186
|
+
end # subject_other_subvy_search
|
187
|
+
|
188
|
+
context 'subject_all_search' do
|
189
|
+
it 'contains top level <genre> element data' do
|
190
|
+
expect(subject_doc_hash[:subject_all_search]).to include(genre)
|
191
|
+
end
|
192
|
+
it 'does not contain cartographic sub element' do
|
193
|
+
expect(subject_doc_hash[:subject_all_search]).not_to include(cart_coord)
|
194
|
+
end
|
195
|
+
it 'does not include codes from hierarchicalGeographic sub element' do
|
196
|
+
expect(subject_doc_hash[:subject_all_search]).not_to include(geo_code)
|
197
|
+
end
|
198
|
+
it 'contains all other subject subelement data' do
|
199
|
+
expect(subject_doc_hash[:subject_all_search]).to include(s_genre)
|
200
|
+
expect(subject_doc_hash[:subject_all_search]).to include(geo)
|
201
|
+
expect(subject_doc_hash[:subject_all_search]).to include(hier_geo_country)
|
202
|
+
expect(subject_doc_hash[:subject_all_search]).to include(s_name)
|
203
|
+
expect(subject_doc_hash[:subject_all_search]).to include(occupation)
|
204
|
+
expect(subject_doc_hash[:subject_all_search]).to include(temporal)
|
205
|
+
expect(subject_doc_hash[:subject_all_search]).to include(s_title)
|
206
|
+
expect(subject_doc_hash[:subject_all_search]).to include(topic)
|
207
|
+
end
|
208
|
+
end # subject_all_search
|
209
|
+
end # search fields
|
210
|
+
|
211
|
+
context 'facet fields' do
|
212
|
+
context 'topic_facet' do
|
213
|
+
it 'includes topic subelement' do
|
214
|
+
expect(subject_doc_hash[:topic_facet]).to include(topic)
|
215
|
+
end
|
216
|
+
it 'includes sw_subject_names' do
|
217
|
+
expect(subject_doc_hash[:topic_facet]).to include(s_name)
|
218
|
+
end
|
219
|
+
it 'includes sw_subject_titles' do
|
220
|
+
expect(subject_doc_hash[:topic_facet]).to include(s_title)
|
221
|
+
end
|
222
|
+
it 'includes occupation subelement' do
|
223
|
+
expect(subject_doc_hash[:topic_facet]).to include(occupation)
|
224
|
+
end
|
225
|
+
it 'has the trailing punctuation removed' do
|
226
|
+
m = "<mods #{ns_decl}><subject>
|
227
|
+
<topic>comma,</topic>
|
228
|
+
<occupation>semicolon;</occupation>
|
229
|
+
<titleInfo><title>backslash \\</title></titleInfo>
|
230
|
+
<name><namePart>internal, punct;uation</namePart></name>
|
231
|
+
</subject></mods>"
|
232
|
+
sdb = sdb_for_mods(m)
|
233
|
+
doc_hash = sdb.doc_hash_from_mods
|
234
|
+
expect(doc_hash[:topic_facet]).to include('comma')
|
235
|
+
expect(doc_hash[:topic_facet]).to include('semicolon')
|
236
|
+
expect(doc_hash[:topic_facet]).to include('backslash')
|
237
|
+
expect(doc_hash[:topic_facet]).to include('internal, punct;uation')
|
238
|
+
end
|
239
|
+
end # topic_facet
|
240
|
+
|
241
|
+
context 'geographic_facet' do
|
242
|
+
it 'includes geographic subelement' do
|
243
|
+
expect(subject_doc_hash[:geographic_facet]).to include(geo)
|
244
|
+
end
|
245
|
+
it 'is like geographic_search with the trailing punctuation (and preceding spaces) removed' do
|
246
|
+
m = "<mods #{ns_decl}><subject>
|
247
|
+
<geographic>comma,</geographic>
|
248
|
+
<geographic>semicolon;</geographic>
|
249
|
+
<geographic>backslash \\</geographic>
|
250
|
+
<geographic>internal, punct;uation</geographic>
|
251
|
+
</subject></mods>"
|
252
|
+
sdb = sdb_for_mods(m)
|
253
|
+
doc_hash = sdb.doc_hash_from_mods
|
254
|
+
expect(doc_hash[:geographic_facet]).to include('comma')
|
255
|
+
expect(doc_hash[:geographic_facet]).to include('semicolon')
|
256
|
+
expect(doc_hash[:geographic_facet]).to include('backslash')
|
257
|
+
expect(doc_hash[:geographic_facet]).to include('internal, punct;uation')
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
it 'era_facet should be temporal subelement with the trailing punctuation removed' do
|
262
|
+
m = "<mods #{ns_decl}><subject>
|
263
|
+
<temporal>comma,</temporal>
|
264
|
+
<temporal>semicolon;</temporal>
|
265
|
+
<temporal>backslash \\</temporal>
|
266
|
+
<temporal>internal, punct;uation</temporal>
|
267
|
+
</subject></mods>"
|
268
|
+
sdb = sdb_for_mods(m)
|
269
|
+
doc_hash = sdb.doc_hash_from_mods
|
270
|
+
expect(doc_hash[:era_facet]).to include('comma')
|
271
|
+
expect(doc_hash[:era_facet]).to include('semicolon')
|
272
|
+
expect(doc_hash[:era_facet]).to include('backslash')
|
273
|
+
expect(doc_hash[:era_facet]).to include('internal, punct;uation')
|
274
|
+
end
|
275
|
+
end # facet fields
|
276
|
+
end # subject fields
|
277
|
+
|
278
|
+
# publication fields moved to mods_pub_fields_spec.rb
|
279
|
+
end
|