gdor-indexer 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -3
- data/.rubocop_todo.yml +26 -19
- data/Gemfile +1 -1
- data/gdor-indexer.gemspec +1 -1
- data/lib/gdor/indexer/mods_fields.rb +26 -37
- data/lib/gdor/indexer/version.rb +1 -1
- data/spec/unit/mods_fields_spec.rb +318 -0
- data/spec/unit/mods_pub_fields_spec.rb +257 -0
- data/spec/unit/mods_subject_fields_spec.rb +279 -0
- metadata +10 -6
- data/spec/unit/gdor_mods_fields_spec.rb +0 -813
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7d3fef57a3dcc4df36487bead9f96a691ae385d
|
4
|
+
data.tar.gz: f724d1e3bfc0aee77798a10feb3c06939b29c30a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bbed1557cd19168a20ca5c26656186206ef80ea83817715fea27fb79218f45ece95f24186f9eeb4fdb17c87b2ccf147556398dcba2fe5bed02db6983951dbe7d
|
7
|
+
data.tar.gz: 578889a4be5005e8a6504313e46b082685a393937a46cd4fe4315756b8da1191ef7a383ec122b5586f6da6ab76ba3157ee325fedf9c87cf67edb24d1cacf0e67
|
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on
|
3
|
+
# on 2016-01-06 15:25:10 -0800 using RuboCop version 0.34.2.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
@@ -17,34 +17,27 @@ Lint/AmbiguousRegexpLiteral:
|
|
17
17
|
Lint/EndAlignment:
|
18
18
|
Enabled: false
|
19
19
|
|
20
|
-
# Offense count:
|
20
|
+
# Offense count: 4
|
21
21
|
Lint/UselessAssignment:
|
22
22
|
Exclude:
|
23
23
|
- 'lib/gdor/indexer.rb'
|
24
|
-
- 'spec/unit/gdor_mods_fields_spec.rb'
|
25
24
|
- 'spec/unit/indexer_spec.rb'
|
26
25
|
- 'spec/unit/solr_doc_builder_spec.rb'
|
27
26
|
|
28
|
-
# Offense count:
|
29
|
-
Lint/Void:
|
30
|
-
Exclude:
|
31
|
-
- 'spec/unit/gdor_mods_fields_spec.rb'
|
32
|
-
|
33
|
-
# Offense count: 16
|
27
|
+
# Offense count: 21
|
34
28
|
Metrics/AbcSize:
|
35
|
-
Max:
|
29
|
+
Max: 108
|
36
30
|
|
37
31
|
# Offense count: 1
|
38
32
|
# Configuration parameters: CountComments.
|
39
33
|
Metrics/ClassLength:
|
40
|
-
|
41
|
-
- 'lib/gdor/indexer.rb'
|
34
|
+
Max: 243
|
42
35
|
|
43
36
|
# Offense count: 5
|
44
37
|
Metrics/CyclomaticComplexity:
|
45
38
|
Max: 9
|
46
39
|
|
47
|
-
# Offense count:
|
40
|
+
# Offense count: 307
|
48
41
|
# Configuration parameters: AllowURI, URISchemes.
|
49
42
|
Metrics/LineLength:
|
50
43
|
Max: 258
|
@@ -52,26 +45,27 @@ Metrics/LineLength:
|
|
52
45
|
# Offense count: 15
|
53
46
|
# Configuration parameters: CountComments.
|
54
47
|
Metrics/MethodLength:
|
55
|
-
Max:
|
48
|
+
Max: 51
|
56
49
|
|
57
50
|
# Offense count: 3
|
58
51
|
Metrics/PerceivedComplexity:
|
59
52
|
Max: 10
|
60
53
|
|
61
|
-
# Offense count:
|
54
|
+
# Offense count: 7
|
62
55
|
# Configuration parameters: CustomTransform.
|
63
56
|
RSpec/FilePath:
|
64
57
|
Exclude:
|
65
|
-
- 'spec/unit/gdor_mods_fields_spec.rb'
|
66
58
|
- 'spec/unit/indexer_spec.rb'
|
59
|
+
- 'spec/unit/mods_fields_spec.rb'
|
60
|
+
- 'spec/unit/mods_pub_fields_spec.rb'
|
61
|
+
- 'spec/unit/mods_subject_fields_spec.rb'
|
67
62
|
- 'spec/unit/public_xml_fields_spec.rb'
|
68
63
|
- 'spec/unit/solr_doc_builder_spec.rb'
|
69
64
|
- 'spec/unit/solr_doc_hash_spec.rb'
|
70
65
|
|
71
|
-
# Offense count:
|
66
|
+
# Offense count: 193
|
72
67
|
RSpec/InstanceVariable:
|
73
68
|
Exclude:
|
74
|
-
- 'spec/unit/gdor_mods_fields_spec.rb'
|
75
69
|
- 'spec/unit/indexer_spec.rb'
|
76
70
|
- 'spec/unit/public_xml_fields_spec.rb'
|
77
71
|
- 'spec/unit/solr_doc_builder_spec.rb'
|
@@ -106,12 +100,25 @@ Style/DoubleNegation:
|
|
106
100
|
Exclude:
|
107
101
|
- 'lib/gdor/indexer/solr_doc_hash.rb'
|
108
102
|
|
103
|
+
# Offense count: 1
|
104
|
+
# Cop supports --auto-correct.
|
105
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
106
|
+
Style/EmptyLinesAroundBlockBody:
|
107
|
+
Exclude:
|
108
|
+
- 'spec/unit/mods_pub_fields_spec.rb'
|
109
|
+
|
109
110
|
# Offense count: 3
|
110
111
|
# Configuration parameters: MinBodyLength.
|
111
112
|
Style/GuardClause:
|
112
113
|
Exclude:
|
113
114
|
- 'lib/gdor/indexer.rb'
|
114
115
|
|
116
|
+
# Offense count: 12
|
117
|
+
# Cop supports --auto-correct.
|
118
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
119
|
+
Style/MultilineOperationIndentation:
|
120
|
+
Enabled: false
|
121
|
+
|
115
122
|
# Offense count: 1
|
116
123
|
# Configuration parameters: NamePrefix, NamePrefixBlacklist.
|
117
124
|
Style/PredicateName:
|
@@ -123,8 +130,8 @@ Style/PredicateName:
|
|
123
130
|
# Configuration parameters: EnforcedStyle, SupportedStyles, AllowInnerSlashes.
|
124
131
|
Style/RegexpLiteral:
|
125
132
|
Exclude:
|
126
|
-
- 'spec/unit/gdor_mods_fields_spec.rb'
|
127
133
|
- 'spec/unit/indexer_spec.rb'
|
134
|
+
- 'spec/unit/mods_subject_fields_spec.rb'
|
128
135
|
|
129
136
|
# Offense count: 1
|
130
137
|
Style/UnlessElse:
|
data/Gemfile
CHANGED
data/gdor-indexer.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.require_paths = ['lib']
|
19
19
|
|
20
20
|
spec.add_dependency 'harvestdor-indexer'
|
21
|
-
spec.add_dependency 'stanford-mods'
|
21
|
+
spec.add_dependency 'stanford-mods', '>= 1.3.4' # for new pub date methods
|
22
22
|
spec.add_dependency 'nokogiri'
|
23
23
|
spec.add_dependency 'rsolr'
|
24
24
|
spec.add_dependency 'activesupport'
|
@@ -36,7 +36,6 @@ module GDor::Indexer::ModsFields
|
|
36
36
|
era_facet: smods_rec.era_facet,
|
37
37
|
|
38
38
|
format_main_ssim: format_main_ssim,
|
39
|
-
format: format, # for backwards compatibility
|
40
39
|
|
41
40
|
language: smods_rec.sw_language_facet,
|
42
41
|
physical: smods_rec.term_values([:physical_description, :extent]),
|
@@ -46,36 +45,38 @@ module GDor::Indexer::ModsFields
|
|
46
45
|
|
47
46
|
# publication fields
|
48
47
|
pub_search: smods_rec.place,
|
49
|
-
pub_date_sort: smods_rec.
|
50
|
-
|
48
|
+
pub_date_sort: smods_rec.pub_date_sortable_string(false), # include approx dates
|
49
|
+
# these are for single value facet display (in leiu of date slider (pub_year_tisim) and deprecated pub_date)
|
50
|
+
pub_year_no_approx_isi: smods_rec.pub_date_facet_single_value(true),
|
51
|
+
pub_year_w_approx_isi: smods_rec.pub_date_facet_single_value(false),
|
52
|
+
# TODO: remove pub_date after reindexing existing colls; deprecated in favor of pub_year_xxx_approx_isi ...
|
51
53
|
pub_date: smods_rec.pub_date_facet,
|
52
|
-
|
54
|
+
# display fields
|
55
|
+
imprint_display: smods_rec.pub_date_display,
|
56
|
+
# pub_date_best_sort_str_value is protected ...
|
57
|
+
creation_year_isi: smods_rec.send(:pub_date_best_sort_str_value, smods_rec.date_created_elements(false)),
|
58
|
+
publication_year_isi: smods_rec.send(:pub_date_best_sort_str_value, smods_rec.date_issued_elements(false)),
|
53
59
|
|
54
60
|
all_search: smods_rec.text.gsub(/\s+/, ' ')
|
55
61
|
}
|
56
62
|
|
57
|
-
# more pub date
|
58
|
-
|
59
|
-
if is_positive_int?
|
60
|
-
doc_hash[:pub_year_tisim] =
|
61
|
-
#
|
62
|
-
doc_hash[
|
63
|
+
# more pub date field processing
|
64
|
+
pub_date_sort_val = doc_hash[:pub_date_sort]
|
65
|
+
if is_positive_int? pub_date_sort_val
|
66
|
+
doc_hash[:pub_year_tisim] = pub_date_sort_val # for date slider
|
67
|
+
# remove leading zeros
|
68
|
+
doc_hash[:creation_year_isi] = remove_leading_zeros(doc_hash[:creation_year_isi]) if doc_hash[:creation_year_isi]
|
69
|
+
doc_hash[:publication_year_isi] = remove_leading_zeros(doc_hash[:publication_year_isi]) if doc_hash[:publication_year_isi]
|
70
|
+
else
|
71
|
+
# turn B.C. into -yyy for display fields
|
72
|
+
doc_hash[:creation_year_isi] = '-' + (1000 + doc_hash[:creation_year_isi].to_i).to_s if doc_hash[:creation_year_isi]
|
73
|
+
doc_hash[:publication_year_isi] = '-' + (1000 + doc_hash[:publication_year_isi].to_i).to_s if doc_hash[:publication_year_isi]
|
63
74
|
end
|
64
75
|
|
65
76
|
doc_hash
|
66
77
|
end
|
67
78
|
|
68
|
-
|
69
|
-
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index
|
70
|
-
# via stanford-mods gem
|
71
|
-
# @return [Array<String>] value(s) in the SearchWorks controlled vocabulary, or []
|
72
|
-
def format
|
73
|
-
vals = smods_rec.format
|
74
|
-
if vals.empty?
|
75
|
-
logger.warn "#{druid} has no SearchWorks format from MODS - check <typeOfResource> and other implicated MODS elements"
|
76
|
-
end
|
77
|
-
vals
|
78
|
-
end
|
79
|
+
private
|
79
80
|
|
80
81
|
# call stanford-mods format_main to get results
|
81
82
|
# @return [Array<String>] value(s) in the SearchWorks controlled vocabulary, or []
|
@@ -87,14 +88,6 @@ module GDor::Indexer::ModsFields
|
|
87
88
|
vals
|
88
89
|
end
|
89
90
|
|
90
|
-
# call stanford-mods sw_genre to get results
|
91
|
-
# @return [Array<String>] value(s)
|
92
|
-
def genre_ssim
|
93
|
-
smods_rec.sw_genre
|
94
|
-
end
|
95
|
-
|
96
|
-
protected
|
97
|
-
|
98
91
|
# @return true if the string parses into an int, and if so, the int is >= 0
|
99
92
|
def is_positive_int?(str)
|
100
93
|
str.to_i >= 0
|
@@ -102,13 +95,9 @@ module GDor::Indexer::ModsFields
|
|
102
95
|
false
|
103
96
|
end
|
104
97
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
return :publication_year_isi if vals && vals.length > 0
|
110
|
-
vals = smods_rec.term_values([:origin_info, :dateCreated])
|
111
|
-
return :creation_year_isi if vals && vals.length > 0
|
112
|
-
nil
|
98
|
+
def remove_leading_zeros(year_sort_str)
|
99
|
+
return '0' if year_sort_str == '0000'
|
100
|
+
return year_sort_str.sub(/^[0:]*/, '') if year_sort_str.start_with?('0')
|
101
|
+
year_sort_str
|
113
102
|
end
|
114
103
|
end
|
data/lib/gdor/indexer/version.rb
CHANGED
@@ -0,0 +1,318 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GDor::Indexer::ModsFields do
|
4
|
+
let(:fake_druid) { 'oo000oo0000' }
|
5
|
+
let(:ns_decl) { "xmlns='#{Mods::MODS_NS}'" }
|
6
|
+
let(:mods_xml) { "<mods #{ns_decl}><note>gdor_mods_fields testing</note></mods>" }
|
7
|
+
|
8
|
+
def sdb_for_mods(m)
|
9
|
+
resource = Harvestdor::Indexer::Resource.new(double, fake_druid)
|
10
|
+
allow(resource).to receive(:public_xml).and_return(nil)
|
11
|
+
allow(resource).to receive(:mods).and_return(Nokogiri::XML(m))
|
12
|
+
i = Harvestdor::Indexer.new
|
13
|
+
i.logger.level = Logger::WARN
|
14
|
+
allow(resource).to receive(:indexer).and_return(i)
|
15
|
+
lgr = Logger.new(StringIO.new)
|
16
|
+
lgr.level = Logger::WARN
|
17
|
+
GDor::Indexer::SolrDocBuilder.new(resource, lgr)
|
18
|
+
end
|
19
|
+
|
20
|
+
# see https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
|
21
|
+
|
22
|
+
context 'summary_search solr field from <abstract>' do
|
23
|
+
it 'is populated when the MODS has a top level <abstract> element' do
|
24
|
+
m = "<mods #{ns_decl}><abstract>blah blah</abstract></mods>"
|
25
|
+
sdb = sdb_for_mods(m)
|
26
|
+
expect(sdb.doc_hash_from_mods[:summary_search]).to match_array ['blah blah']
|
27
|
+
end
|
28
|
+
it 'has a value for each abstract element' do
|
29
|
+
m = "<mods #{ns_decl}>
|
30
|
+
<abstract>one</abstract>
|
31
|
+
<abstract>two</abstract>
|
32
|
+
</mods>"
|
33
|
+
sdb = sdb_for_mods(m)
|
34
|
+
expect(sdb.doc_hash_from_mods[:summary_search]).to match_array %w(one two)
|
35
|
+
end
|
36
|
+
it 'does not be present when there is no top level <abstract> element' do
|
37
|
+
m = "<mods #{ns_decl}><relatedItem><abstract>blah blah</abstract></relatedItem></mods>"
|
38
|
+
sdb = sdb_for_mods(m)
|
39
|
+
expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
|
40
|
+
end
|
41
|
+
it 'does not be present if there are only empty abstract elements in the MODS' do
|
42
|
+
m = "<mods #{ns_decl}><abstract/><note>notit</note></mods>"
|
43
|
+
sdb = sdb_for_mods(m)
|
44
|
+
expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
|
45
|
+
end
|
46
|
+
it 'summary_display should not be populated - it is a copy field' do
|
47
|
+
m = "<mods #{ns_decl}><abstract>blah blah</abstract></mods>"
|
48
|
+
sdb = sdb_for_mods(m)
|
49
|
+
expect(sdb.doc_hash_from_mods[:summary_display]).to be_nil
|
50
|
+
end
|
51
|
+
end # summary_search / <abstract>
|
52
|
+
|
53
|
+
it 'language: should call sw_language_facet in stanford-mods gem to populate language field' do
|
54
|
+
sdb = sdb_for_mods(mods_xml)
|
55
|
+
smr = sdb.smods_rec
|
56
|
+
expect(smr).to receive(:sw_language_facet)
|
57
|
+
sdb.doc_hash_from_mods
|
58
|
+
end
|
59
|
+
|
60
|
+
context 'physical solr field from <physicalDescription><extent>' do
|
61
|
+
it 'is populated when the MODS has mods/physicalDescription/extent element' do
|
62
|
+
m = "<mods #{ns_decl}><physicalDescription><extent>blah blah</extent></physicalDescription></mods>"
|
63
|
+
sdb = sdb_for_mods(m)
|
64
|
+
expect(sdb.doc_hash_from_mods[:physical]).to match_array ['blah blah']
|
65
|
+
end
|
66
|
+
it 'has a value for each extent element' do
|
67
|
+
m = "<mods #{ns_decl}>
|
68
|
+
<physicalDescription>
|
69
|
+
<extent>one</extent>
|
70
|
+
<extent>two</extent>
|
71
|
+
</physicalDescription>
|
72
|
+
<physicalDescription><extent>three</extent></physicalDescription>
|
73
|
+
</mods>"
|
74
|
+
sdb = sdb_for_mods(m)
|
75
|
+
expect(sdb.doc_hash_from_mods[:physical]).to match_array %w(one two three)
|
76
|
+
end
|
77
|
+
it 'does not be present when there is no top level <physicalDescription> element' do
|
78
|
+
m = "<mods #{ns_decl}><relatedItem><physicalDescription><extent>foo</extent></physicalDescription></relatedItem></mods>"
|
79
|
+
sdb = sdb_for_mods(m)
|
80
|
+
expect(sdb.doc_hash_from_mods[:physical]).to be_nil
|
81
|
+
end
|
82
|
+
it 'does not be present if there are only empty physicalDescription or extent elements in the MODS' do
|
83
|
+
m = "<mods #{ns_decl}><physicalDescription/><physicalDescription><extent/></physicalDescription><note>notit</note></mods>"
|
84
|
+
sdb = sdb_for_mods(m)
|
85
|
+
expect(sdb.doc_hash_from_mods[:physical]).to be_nil
|
86
|
+
end
|
87
|
+
end # physical field from physicalDescription/extent
|
88
|
+
|
89
|
+
context 'url_suppl solr field from /mods/relatedItem/location/url' do
|
90
|
+
it 'is populated when the MODS has mods/relatedItem/location/url' do
|
91
|
+
m = "<mods #{ns_decl}><relatedItem><location><url>url.org</url></location></relatedItem></mods>"
|
92
|
+
sdb = sdb_for_mods(m)
|
93
|
+
expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array ['url.org']
|
94
|
+
end
|
95
|
+
it 'has a value for each mods/relatedItem/location/url element' do
|
96
|
+
m = "<mods #{ns_decl}>
|
97
|
+
<relatedItem>
|
98
|
+
<location><url>one</url></location>
|
99
|
+
<location>
|
100
|
+
<url>two</url>
|
101
|
+
<url>three</url>
|
102
|
+
</location>
|
103
|
+
</relatedItem>
|
104
|
+
<relatedItem><location><url>four</url></location></relatedItem>
|
105
|
+
</mods>"
|
106
|
+
sdb = sdb_for_mods(m)
|
107
|
+
expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array %w(one two three four)
|
108
|
+
end
|
109
|
+
it 'does not be populated from /mods/location/url element' do
|
110
|
+
m = "<mods #{ns_decl}><location><url>hi</url></location></mods>"
|
111
|
+
sdb = sdb_for_mods(m)
|
112
|
+
expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
|
113
|
+
end
|
114
|
+
it 'does not be present if there are only empty relatedItem/location/url elements in the MODS' do
|
115
|
+
m = "<mods #{ns_decl}>
|
116
|
+
<relatedItem><location><url/></location></relatedItem>
|
117
|
+
<relatedItem><location/></relatedItem>
|
118
|
+
<relatedItem/><note>notit</note></mods>"
|
119
|
+
sdb = sdb_for_mods(m)
|
120
|
+
expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
context 'toc_search solr field from <tableOfContents>' do
|
125
|
+
it 'has a value for each tableOfContents element' do
|
126
|
+
m = "<mods #{ns_decl}>
|
127
|
+
<tableOfContents>one</tableOfContents>
|
128
|
+
<tableOfContents>two</tableOfContents>
|
129
|
+
</mods>"
|
130
|
+
sdb = sdb_for_mods(m)
|
131
|
+
expect(sdb.doc_hash_from_mods[:toc_search]).to match_array %w(one two)
|
132
|
+
end
|
133
|
+
it 'does not be present when there is no top level <tableOfContents> element' do
|
134
|
+
m = "<mods #{ns_decl}><relatedItem><tableOfContents>foo</tableOfContents></relatedItem></mods>"
|
135
|
+
sdb = sdb_for_mods(m)
|
136
|
+
expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
|
137
|
+
end
|
138
|
+
it 'does not be present if there are only empty tableOfContents elements in the MODS' do
|
139
|
+
m = "<mods #{ns_decl}><tableOfContents/><note>notit</note></mods>"
|
140
|
+
sdb = sdb_for_mods(m)
|
141
|
+
expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
context '#format_main_ssim' do
|
146
|
+
it 'doc_hash_from_mods calls #format_main_ssim' do
|
147
|
+
m = "<mods #{ns_decl}><note>nope</typeOfResource></mods>"
|
148
|
+
sdb = sdb_for_mods(m)
|
149
|
+
expect(sdb).to receive(:format_main_ssim)
|
150
|
+
sdb.doc_hash_from_mods[:format_main_ssim]
|
151
|
+
end
|
152
|
+
it '#format_main_ssim calls stanford-mods.format_main' do
|
153
|
+
m = "<mods #{ns_decl}><note>nope</typeOfResource></mods>"
|
154
|
+
sdb = sdb_for_mods(m)
|
155
|
+
expect(sdb.smods_rec).to receive(:format_main).and_return([])
|
156
|
+
sdb.send(:format_main_ssim)
|
157
|
+
end
|
158
|
+
it 'has a value when MODS data provides' do
|
159
|
+
m = "<mods #{ns_decl}><typeOfResource>still image</typeOfResouce></mods>"
|
160
|
+
sdb = sdb_for_mods(m)
|
161
|
+
expect(sdb.send(:format_main_ssim)).to match_array ['Image']
|
162
|
+
end
|
163
|
+
it 'returns empty Array and logs warning if there is no value' do
|
164
|
+
sdb = sdb_for_mods(mods_xml)
|
165
|
+
expect(sdb.logger).to receive(:warn).with("#{fake_druid} has no SearchWorks Resource Type from MODS - check <typeOfResource> and other implicated MODS elements")
|
166
|
+
expect(sdb.send(:format_main_ssim)).to eq([])
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
context 'title fields' do
|
171
|
+
let(:title_mods) do
|
172
|
+
"<mods #{ns_decl}>
|
173
|
+
<titleInfo>
|
174
|
+
<title>Jerk</title>
|
175
|
+
<nonSort>The</nonSort>
|
176
|
+
<subTitle>is whom?</subTitle>
|
177
|
+
</titleInfo>
|
178
|
+
<titleInfo>
|
179
|
+
<title>Joke</title>
|
180
|
+
</titleInfo>
|
181
|
+
<titleInfo type='alternative'>
|
182
|
+
<title>Alternative</title>
|
183
|
+
</titleInfo>
|
184
|
+
</mods>"
|
185
|
+
end
|
186
|
+
let(:sdb) { sdb_for_mods(title_mods) }
|
187
|
+
let(:title_doc_hash) { sdb.doc_hash_from_mods }
|
188
|
+
|
189
|
+
it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
|
190
|
+
smr = sdb.smods_rec
|
191
|
+
expect(smr).to receive(:sw_short_title).at_least(:once)
|
192
|
+
expect(smr).to receive(:sw_full_title).at_least(:once)
|
193
|
+
expect(smr).to receive(:sw_title_display)
|
194
|
+
expect(smr).to receive(:sw_addl_titles)
|
195
|
+
expect(smr).to receive(:sw_sort_title)
|
196
|
+
sdb.doc_hash_from_mods
|
197
|
+
end
|
198
|
+
context 'search fields' do
|
199
|
+
it 'title_245a_search' do
|
200
|
+
expect(title_doc_hash[:title_245a_search]).to eq('The Jerk')
|
201
|
+
end
|
202
|
+
it 'title_245_search' do
|
203
|
+
expect(title_doc_hash[:title_245_search]).to eq('The Jerk : is whom?')
|
204
|
+
end
|
205
|
+
it 'title_variant_search' do
|
206
|
+
expect(title_doc_hash[:title_variant_search]).to match_array %w(Joke Alternative)
|
207
|
+
end
|
208
|
+
it 'title_related_search should not be populated from MODS' do
|
209
|
+
expect(title_doc_hash[:title_related_search]).to be_nil
|
210
|
+
end
|
211
|
+
end
|
212
|
+
context 'display fields' do
|
213
|
+
it 'title_display' do
|
214
|
+
expect(title_doc_hash[:title_display]).to eq('The Jerk : is whom?')
|
215
|
+
end
|
216
|
+
it 'title_245a_display' do
|
217
|
+
expect(title_doc_hash[:title_245a_display]).to eq('The Jerk')
|
218
|
+
end
|
219
|
+
it 'title_245c_display should not be populated from MODS' do
|
220
|
+
expect(title_doc_hash[:title_245c_display]).to be_nil
|
221
|
+
end
|
222
|
+
it 'title_full_display' do
|
223
|
+
expect(title_doc_hash[:title_full_display]).to eq('The Jerk : is whom?')
|
224
|
+
end
|
225
|
+
it 'removes trailing commas in title_display' do
|
226
|
+
title_mods = "<mods #{ns_decl}>
|
227
|
+
<titleInfo><title>Jerk</title><nonSort>The</nonSort><subTitle>is whom,</subTitle></titleInfo>
|
228
|
+
<titleInfo><title>Joke</title></titleInfo>
|
229
|
+
<titleInfo type='alternative'><title>Alternative</title></titleInfo>
|
230
|
+
</mods>"
|
231
|
+
sdb = sdb_for_mods(title_mods)
|
232
|
+
title_doc_hash = sdb.doc_hash_from_mods
|
233
|
+
expect(title_doc_hash[:title_display]).to eq('The Jerk : is whom')
|
234
|
+
end
|
235
|
+
it 'title_variant_display should not be populated - it is a copy field' do
|
236
|
+
expect(title_doc_hash[:title_variant_display]).to be_nil
|
237
|
+
end
|
238
|
+
end
|
239
|
+
it 'title_sort' do
|
240
|
+
expect(title_doc_hash[:title_sort]).to eq('Jerk is whom')
|
241
|
+
end
|
242
|
+
end # title fields
|
243
|
+
|
244
|
+
context 'author fields' do
|
245
|
+
let(:name_mods) do
|
246
|
+
"<mods #{ns_decl}>
|
247
|
+
<name type='personal'>
|
248
|
+
<namePart type='given'>John</namePart>
|
249
|
+
<namePart type='family'>Huston</namePart>
|
250
|
+
<role><roleTerm type='code' authority='marcrelator'>drt</roleTerm></role>
|
251
|
+
<displayForm>q</displayForm>
|
252
|
+
</name>
|
253
|
+
<name type='personal'><namePart>Crusty The Clown</namePart></name>
|
254
|
+
<name type='corporate'><namePart>Watchful Eye</namePart></name>
|
255
|
+
<name type='corporate'>
|
256
|
+
<namePart>Exciting Prints</namePart>
|
257
|
+
<role><roleTerm type='text'>lithographer</roleTerm></role>
|
258
|
+
</name>
|
259
|
+
<name type='conference'><namePart>conference</namePart></name>
|
260
|
+
</mods>"
|
261
|
+
end
|
262
|
+
let(:sdb) { sdb_for_mods(name_mods) }
|
263
|
+
let(:author_doc_hash) { sdb.doc_hash_from_mods }
|
264
|
+
|
265
|
+
it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
|
266
|
+
smr = sdb.smods_rec
|
267
|
+
expect(smr).to receive(:sw_main_author)
|
268
|
+
expect(smr).to receive(:sw_addl_authors)
|
269
|
+
expect(smr).to receive(:sw_person_authors).exactly(3).times
|
270
|
+
expect(smr).to receive(:sw_impersonal_authors)
|
271
|
+
expect(smr).to receive(:sw_corporate_authors)
|
272
|
+
expect(smr).to receive(:sw_meeting_authors)
|
273
|
+
expect(smr).to receive(:sw_sort_author)
|
274
|
+
sdb.doc_hash_from_mods
|
275
|
+
end
|
276
|
+
context 'search fields' do
|
277
|
+
it 'author_1xx_search' do
|
278
|
+
expect(author_doc_hash[:author_1xx_search]).to eq('Crusty The Clown')
|
279
|
+
end
|
280
|
+
it 'author_7xx_search' do
|
281
|
+
skip 'Should this return all authors? or only 7xx authors?'
|
282
|
+
expect(author_doc_hash[:author_7xx_search]).to match_array ['q', 'Watchful Eye', 'Exciting Prints', 'conference']
|
283
|
+
end
|
284
|
+
it 'author_8xx_search should not be populated from MODS' do
|
285
|
+
expect(author_doc_hash[:author_8xx_search]).to be_nil
|
286
|
+
end
|
287
|
+
end
|
288
|
+
context 'facet fields' do
|
289
|
+
it 'author_person_facet' do
|
290
|
+
expect(author_doc_hash[:author_person_facet]).to match_array ['q', 'Crusty The Clown']
|
291
|
+
end
|
292
|
+
it 'author_other_facet' do
|
293
|
+
expect(author_doc_hash[:author_other_facet]).to match_array ['Watchful Eye', 'Exciting Prints', 'conference']
|
294
|
+
end
|
295
|
+
end
|
296
|
+
context 'display fields' do
|
297
|
+
it 'author_person_display' do
|
298
|
+
expect(author_doc_hash[:author_person_display]).to match_array ['q', 'Crusty The Clown']
|
299
|
+
end
|
300
|
+
it 'author_person_full_display' do
|
301
|
+
expect(author_doc_hash[:author_person_full_display]).to match_array ['q', 'Crusty The Clown']
|
302
|
+
end
|
303
|
+
it 'author_corp_display' do
|
304
|
+
expect(author_doc_hash[:author_corp_display]).to match_array ['Watchful Eye', 'Exciting Prints']
|
305
|
+
end
|
306
|
+
it 'author_meeting_display' do
|
307
|
+
expect(author_doc_hash[:author_meeting_display]).to match_array ['conference']
|
308
|
+
end
|
309
|
+
end
|
310
|
+
it 'author_sort' do
|
311
|
+
expect(author_doc_hash[:author_sort]).to eq('Crusty The Clown')
|
312
|
+
end
|
313
|
+
end # author fields
|
314
|
+
|
315
|
+
# subject fields moved to mods_subject_fields_spec.rb
|
316
|
+
|
317
|
+
# publication fields moved to mods_pub_fields_spec.rb
|
318
|
+
end
|