spotlight-dor-resources 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (25) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +17 -2
  3. data/.rubocop_todo.yml +1 -186
  4. data/app/models/spotlight/resources/dor_resource.rb +1 -0
  5. data/app/models/spotlight/resources/purl.rb +4 -4
  6. data/app/models/spotlight/resources/searchworks.rb +4 -5
  7. data/lib/spotlight/dor/indexer.rb +187 -125
  8. data/lib/spotlight/dor/resources.rb +19 -7
  9. data/lib/spotlight/dor/resources/engine.rb +3 -2
  10. data/lib/spotlight/dor/resources/version.rb +2 -1
  11. data/spec/integration/gdor_integration_spec.rb +9 -9
  12. data/spec/{unit → lib}/spotlight/dor/indexer_spec.rb +209 -10
  13. data/spec/models/spotlight/resources/purl_spec.rb +45 -39
  14. data/spec/models/spotlight/resources/searchworks_spec.rb +47 -44
  15. data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_a_doc_id.yml → has_a_doc_id.yml} +60 -40
  16. data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_spotlight_data.yml → has_exhibit-specific_indexing.yml} +48 -32
  17. data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_the_gdor_data.yml → has_spotlight_data.yml} +48 -32
  18. data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_exhibit-specific_indexing.yml → has_the_gdor_data.yml} +48 -32
  19. data/spotlight-dor-resources.gemspec +22 -22
  20. metadata +11 -21
  21. data/spec/integration/indexer_integration_spec.rb +0 -28
  22. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/no_donor_tags_ssim_field_in_solr_doc_when_note_displayLabel_Donor_tags_not_in_MODS.yml +0 -1382
  23. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/solr_doc_has_donor_tags_ssim_field_when_note_displayLabel_Donor_tags_is_in_MODS.yml +0 -1602
  24. data/spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml +0 -6822
  25. data/spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml +0 -1390
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5b035c7c9d1aa81f28332f8304294beb43a960ab
4
- data.tar.gz: 1658a13bf5ff55aa7f0d67cf7770c89e17648f2d
3
+ metadata.gz: 8b1e1f0ddb963f81f8fd302a284ef53592146ef9
4
+ data.tar.gz: 03687ee4c19706fa434993ce33ad59beac3b522a
5
5
  SHA512:
6
- metadata.gz: fa6d70d3f6530b159e9bec06a3c8b2219bb836a5a3608d291c047ae7d405d653ccf02d0f4b028523a0a942848d1b9cd2070fe42cd28e786496a8059c670c28df
7
- data.tar.gz: a99351ec2d515332d76582c6b178be12021834c499d3725225b2cf2727871dd794c800a913dd87a40091cf807b76145489797cddf00ad04b1f7f97209c04ec03
6
+ metadata.gz: 01970798dc91a7dd7b3f451244e71573d6c1129eb2c59a7f7758b3fdfb4d7d70c3d5bf8e3d99d1a7300b404dbc7142869898b1792729f8ab1099beb1f7113cd1
7
+ data.tar.gz: 121941bdd33d142d20f27bcacd25d112327324d5e1f84e0f190896bb9fa0c932562cdaeea2a29dae846d84b940085e3bcdd3b92a4f832854e74c24f46269621d
@@ -4,9 +4,24 @@ require: rubocop-rspec
4
4
 
5
5
  AllCops:
6
6
  Exclude:
7
+ - 'Gemfile'
8
+ - 'bin/**/*'
9
+ - 'db/**/*'
10
+ - 'config/**/*'
7
11
  - 'spec/internal/**/*'
8
12
  - 'spec/test_app_templates/**/*'
13
+ - 'spec/spec_helper.rb'
14
+ - 'spec/teaspoon_env.rb'
15
+ - 'vendor/**/*'
16
+ RunRailsCops: true
9
17
 
10
- Metrics/ClassLength:
18
+ Metrics/LineLength:
19
+ Max: 130
20
+
21
+ Style/StringLiterals:
22
+ Enabled: true
23
+ EnforcedStyle: single_quotes
24
+
25
+ RSpec/DescribeClass:
11
26
  Exclude:
12
- - 'lib/spotlight/dor/indexer.rb'
27
+ - 'spec/integration/*'
@@ -1,188 +1,3 @@
1
- # This configuration was generated by
2
- # `rubocop --auto-gen-config`
3
- # on 2015-10-29 09:20:45 -0700 using RuboCop version 0.34.2.
4
- # The point is for the user to remove these configuration records
5
- # one by one as the offenses are removed from the code base.
6
- # Note that changes in the inspected code, or installation of new
7
- # versions of RuboCop, may require this file to be generated again.
8
1
 
9
-
10
- # Offense count: 1
11
- # Configuration parameters: CountComments.
12
- Metrics/ClassLength:
13
- Max: 102
14
-
15
- # Offense count: 84
16
- # Configuration parameters: AllowURI, URISchemes.
17
- Metrics/LineLength:
18
- Max: 171
19
-
20
- # Offense count: 1
21
- # Configuration parameters: CountComments.
22
- Metrics/MethodLength:
23
- Max: 13
24
-
25
- # Offense count: 1
26
- # Cop supports --auto-correct.
27
- Performance/StringReplacement:
28
- Exclude:
29
- - 'lib/spotlight/dor/indexer.rb'
30
-
31
- # Offense count: 2
32
- RSpec/DescribeClass:
33
- Exclude:
34
- - 'spec/integration/gdor_integration_spec.rb'
35
- - 'spec/integration/indexer_integration_spec.rb'
36
-
37
- # Offense count: 4
38
- RSpec/DescribedClass:
39
- Exclude:
40
- - 'spec/models/spotlight/resources/purl_spec.rb'
41
- - 'spec/models/spotlight/resources/searchworks_spec.rb'
42
-
43
- # Offense count: 26
44
- # Configuration parameters: CustomTransform, IgnoredWords.
45
- RSpec/ExampleWording:
46
- Exclude:
47
- - 'spec/integration/gdor_integration_spec.rb'
48
- - 'spec/models/spotlight/resources/purl_spec.rb'
49
- - 'spec/models/spotlight/resources/searchworks_spec.rb'
50
-
51
- # Offense count: 4
52
- # Cop supports --auto-correct.
53
- # Configuration parameters: EnforcedStyle, SupportedStyles.
54
- Style/AndOr:
55
- Exclude:
56
- - 'lib/spotlight/dor/indexer.rb'
57
- - 'spec/spec_helper.rb'
58
-
59
- # Offense count: 11
60
- # Cop supports --auto-correct.
61
- # Configuration parameters: EnforcedStyle, SupportedStyles.
62
- Style/BracesAroundHashParameters:
63
- Exclude:
64
- - 'spec/models/spotlight/resources/purl_spec.rb'
65
- - 'spec/models/spotlight/resources/searchworks_spec.rb'
66
-
67
- # Offense count: 6
68
- # Configuration parameters: EnforcedStyle, SupportedStyles.
69
2
  Style/ClassAndModuleChildren:
70
- Exclude:
71
- - 'app/models/spotlight/resources/dor_resource.rb'
72
- - 'app/models/spotlight/resources/harvestdor.rb'
73
- - 'app/models/spotlight/resources/purl.rb'
74
- - 'app/models/spotlight/resources/searchworks.rb'
75
- - 'lib/spotlight/dor/indexer.rb'
76
- - 'lib/spotlight/dor/resources/engine.rb'
77
-
78
- # Offense count: 7
79
- # Configuration parameters: Exclude.
80
- Style/Documentation:
81
- Exclude:
82
- - 'app/models/spotlight/resources/dor_resource.rb'
83
- - 'app/models/spotlight/resources/purl.rb'
84
- - 'app/models/spotlight/resources/searchworks.rb'
85
- - 'lib/spotlight/dor/indexer.rb'
86
- - 'lib/spotlight/dor/resources.rb'
87
- - 'lib/spotlight/dor/resources/engine.rb'
88
- - 'lib/spotlight/dor/resources/version.rb'
89
-
90
- # Offense count: 2
91
- Style/DoubleNegation:
92
- Exclude:
93
- - 'app/models/spotlight/resources/purl.rb'
94
- - 'app/models/spotlight/resources/searchworks.rb'
95
-
96
- # Offense count: 1
97
- # Cop supports --auto-correct.
98
- Style/EmptyLines:
99
- Exclude:
100
- - 'spec/spec_helper.rb'
101
-
102
- # Offense count: 4
103
- # Cop supports --auto-correct.
104
- # Configuration parameters: EnforcedStyle, SupportedStyles.
105
- Style/EmptyLinesAroundClassBody:
106
- Exclude:
107
- - 'app/models/spotlight/resources/purl.rb'
108
- - 'app/models/spotlight/resources/searchworks.rb'
109
- - 'lib/spotlight/dor/resources/engine.rb'
110
-
111
- # Offense count: 1
112
- # Cop supports --auto-correct.
113
- # Configuration parameters: EnforcedStyle, SupportedStyles.
114
- Style/EmptyLinesAroundModuleBody:
115
- Exclude:
116
- - 'lib/spotlight/dor/resources.rb'
117
-
118
- # Offense count: 9
119
- # Cop supports --auto-correct.
120
- # Configuration parameters: EnforcedStyle, SupportedStyles.
121
- Style/MethodDefParentheses:
122
- Enabled: false
123
-
124
- # Offense count: 4
125
- # Cop supports --auto-correct.
126
- # Configuration parameters: EnforcedStyle, SupportedStyles, AllowInnerSlashes.
127
- Style/RegexpLiteral:
128
- Exclude:
129
- - 'app/models/spotlight/resources/purl.rb'
130
- - 'app/models/spotlight/resources/searchworks.rb'
131
-
132
- # Offense count: 2
133
- # Cop supports --auto-correct.
134
- Style/SpaceAfterComma:
135
- Exclude:
136
- - 'lib/spotlight/dor/indexer.rb'
137
-
138
- # Offense count: 2
139
- # Cop supports --auto-correct.
140
- # Configuration parameters: EnforcedStyle, SupportedStyles, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
141
- Style/SpaceInsideBlockBraces:
142
- Enabled: false
143
-
144
- # Offense count: 31
145
- # Cop supports --auto-correct.
146
- # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SupportedStyles.
147
- Style/SpaceInsideHashLiteralBraces:
148
- Enabled: false
149
-
150
- # Offense count: 122
151
- # Cop supports --auto-correct.
152
- # Configuration parameters: EnforcedStyle, SupportedStyles.
153
- Style/StringLiterals:
154
- Enabled: false
155
-
156
- # Offense count: 1
157
- # Cop supports --auto-correct.
158
- # Configuration parameters: IgnoredMethods.
159
- Style/SymbolProc:
160
- Exclude:
161
- - 'lib/spotlight/dor/indexer.rb'
162
-
163
- # Offense count: 2
164
- # Cop supports --auto-correct.
165
- # Configuration parameters: EnforcedStyle, SupportedStyles.
166
- Style/TrailingBlankLines:
167
- Exclude:
168
- - 'spec/models/spotlight/resources/searchworks_spec.rb'
169
- - 'spec/spec_helper.rb'
170
-
171
- # Offense count: 12
172
- # Cop supports --auto-correct.
173
- Style/TrailingWhitespace:
174
- Exclude:
175
- - 'lib/spotlight/dor/resources.rb'
176
- - 'spec/models/spotlight/resources/searchworks_spec.rb'
177
-
178
- # Offense count: 1
179
- # Cop supports --auto-correct.
180
- # Configuration parameters: ExactNameMatch, AllowPredicates, AllowDSLWriters, IgnoreClassMethods, Whitelist.
181
- Style/TrivialAccessors:
182
- Exclude:
183
- - 'lib/spotlight/dor/indexer.rb'
184
-
185
- # Offense count: 4
186
- # Configuration parameters: EnforcedStyle, SupportedStyles.
187
- Style/VariableName:
188
- Enabled: false
3
+ Enabled: false
@@ -1,4 +1,5 @@
1
1
  module Spotlight::Resources
2
+ # Base Resource indexer for objects in DOR
2
3
  class DorResource < Spotlight::Resource
3
4
  ##
4
5
  # Generate solr documents for the DOR resources identified by this object
@@ -1,14 +1,14 @@
1
1
  module Spotlight::Resources
2
+ # Resource provider for PURL pages
2
3
  class Purl < Spotlight::Resources::DorResource
3
4
  self.weight = -1000
4
5
 
5
- def self.can_provide? res
6
- !!(res.url =~ /^https?:\/\/purl.stanford.edu/)
6
+ def self.can_provide?(res)
7
+ res.url.match(%r{^https?://purl.stanford.edu/}).present?
7
8
  end
8
9
 
9
10
  def doc_id
10
- url.match(/^https?:\/\/purl.stanford.edu\/([^#\/\.]+)/)[1]
11
+ url.match(%r{^https?://purl.stanford.edu/([^#/\.]+)})[1]
11
12
  end
12
-
13
13
  end
14
14
  end
@@ -1,15 +1,14 @@
1
1
  module Spotlight::Resources
2
+ # Resource provider for searchworks pages
2
3
  class Searchworks < Spotlight::Resources::DorResource
3
-
4
4
  self.weight = -1000
5
5
 
6
- def self.can_provide? res
7
- !!(res.url =~ /^https?:\/\/searchworks[^\.]*.stanford.edu/)
6
+ def self.can_provide?(res)
7
+ res.url.match(%r{^https?://searchworks[^\.]*.stanford.edu/}).present?
8
8
  end
9
9
 
10
10
  def doc_id
11
- url.match(/^https?:\/\/searchworks[^\.]*.stanford.edu\/.*view\/([^\/\.#]+)/)[1]
11
+ url.match(%r{^https?://searchworks[^\.]*.stanford.edu/.*view/([^/\.#]+)})[1]
12
12
  end
13
-
14
13
  end
15
14
  end
@@ -1,171 +1,233 @@
1
+ # rubocop:disable Metrics/ClassLength
1
2
  # external gems
2
3
  require 'gdor/indexer'
3
4
  require 'solrizer'
4
- # Base class to harvest from DOR via harvestdor gem
5
+
5
6
  module Spotlight::Dor
7
+ # Base class to harvest from DOR via harvestdor gem
6
8
  class Indexer < GDor::Indexer
7
- # add contentMetadata fields
8
- before_index do |sdb, solr_doc|
9
- Solrizer.insert_field(solr_doc, 'content_metadata_type', sdb.public_xml.xpath("/publicObject/contentMetadata/@type").text, :symbol, :displayable)
10
-
11
- sdb.public_xml.xpath("/publicObject/contentMetadata").xpath('resource/file[@mimetype="image/jp2"]').each do |node|
12
- file_id = node.attr('id').gsub(".jp2", '')
13
-
14
- if node.attr('id') =~ /jp2$/ and !solr_doc[Solrizer.solr_name('content_metadata_first_image_file_name', :displayable)]
15
- Solrizer.insert_field(solr_doc, 'content_metadata_first_image_file_name', file_id, :displayable)
16
- Solrizer.insert_field(solr_doc, 'content_metadata_first_image_width', node.xpath('./imageData/@width').text, :displayable)
17
- Solrizer.insert_field(solr_doc, 'content_metadata_first_image_height', node.xpath('./imageData/@height').text, :displayable)
18
- end
19
-
20
- Solrizer.insert_field(solr_doc, 'content_metadata_image_iiif_info', "https://stacks.stanford.edu/image/iiif/#{solr_doc[:id]}%2F#{file_id}/info.json", :displayable)
21
- Solrizer.insert_field(solr_doc, 'thumbnail_square_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_square", :displayable)
22
- Solrizer.insert_field(solr_doc, 'thumbnail_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_thumb", :displayable)
23
- Solrizer.insert_field(solr_doc, 'large_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_large", :displayable)
24
- Solrizer.insert_field(solr_doc, 'full_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_full", :displayable)
25
- end
26
- end
27
-
28
9
  # tweak author_sort field from stanford-mods
29
10
  before_index do |_sdb, solr_doc|
30
- solr_doc[:author_sort] &&= solr_doc[:author_sort].gsub("\uFFFF", "\uFFFD")
11
+ solr_doc[:author_sort] &&= solr_doc[:author_sort].tr("\uFFFF", "\uFFFD")
31
12
  end
32
13
 
33
14
  # add fields from raw mods
34
- before_index :add_box
35
- # see comment with add_donor_tags about Feigenbaum specific donor tags data
36
- before_index :add_donor_tags
37
15
  before_index :add_genre
38
- before_index :add_folder
39
- before_index :add_folder_name
40
- before_index :add_series
41
- before_index :mods_cartographics_indexing
42
16
 
43
- def solr_client
44
- @solr_client
45
- end
46
-
47
- def solr_document resource
17
+ def solr_document(resource)
48
18
  doc_hash = super
49
19
  run_hook :before_index, resource, doc_hash
50
20
  doc_hash
51
21
  end
52
22
 
53
- def resource druid
23
+ def resource(druid)
54
24
  Harvestdor::Indexer::Resource.new harvestdor, druid
55
25
  end
56
26
 
57
27
  private
58
28
 
59
- # add the box number to solr_doc as box_ssi field (note: single valued!)
60
- # data in location/physicalLocation or in relatedItem/location/physicalLocation
61
- # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
62
- def add_box(sdb, solr_doc)
63
- # see spec for data from actual collections
64
- # _location.physicalLocation should find top level and relatedItem
65
- box_num = sdb.smods_rec._location.physicalLocation.map do |node|
66
- val = node.text
67
- # note that this will also find Flatbox or Flat-box
68
- match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
69
- match_data[1].strip if match_data.present?
70
- end.compact
71
-
72
- solr_doc['box_ssi'] = box_num.first
29
+ # add plain MODS <genre> element data, not the SearchWorks genre values
30
+ def add_genre(sdb, solr_doc)
31
+ insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
73
32
  end
74
33
 
75
- # This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit. It is very likely
76
- # it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
77
- # Later refactoring could include exhibit specific fields. Peter Mangiafico
78
- def add_donor_tags sdb, solr_doc
79
- donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
80
- insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
81
- end
34
+ concerning :PhysicalLocation do
35
+ included do
36
+ before_index :add_box
37
+ before_index :add_folder
38
+ before_index :add_location
39
+ before_index :add_series
40
+ end
82
41
 
83
- # add the folder number to solr_doc as folder_ssi field (note: single valued!)
84
- # data in location/physicalLocation or in relatedItem/location/physicalLocation
85
- # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
86
- def add_folder(sdb, solr_doc)
87
- # see spec for data from actual collections
88
- # _location.physicalLocation should find top level and relatedItem
89
- folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
90
- val = node.text
91
-
92
- match_data = if val =~ /\|/
93
- # we assume the data is pipe-delimited, and may contain commas within values
94
- val.match(/Folder ?:? ?([^|]+)/)
95
- else
96
- # the data should be comma-delimited, and may not contain commas within values
97
- val.match(/Folder ?:? ?([^,]+)/)
98
- end
99
-
100
- match_data[1].strip if match_data.present?
101
- end.compact
102
-
103
- solr_doc['folder_ssi'] = folder_num.first
104
- end
42
+ # add the box number to solr_doc as box_ssi field (note: single valued!)
43
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
44
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
45
+ def add_box(sdb, solr_doc)
46
+ # see spec for data from actual collections
47
+ # _location.physicalLocation should find top level and relatedItem
48
+ box_num = sdb.smods_rec._location.physicalLocation.map do |node|
49
+ val = node.text
50
+ # note that this will also find Flatbox or Flat-box
51
+ match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
52
+ match_data[1].strip if match_data.present?
53
+ end.compact
54
+
55
+ solr_doc['box_ssi'] = box_num.first
56
+ end
57
+
58
+ # add the folder number to solr_doc as folder_ssi field (note: single valued!)
59
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
60
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
61
+ def add_folder(sdb, solr_doc)
62
+ # see spec for data from actual collections
63
+ # _location.physicalLocation should find top level and relatedItem
64
+ folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
65
+ val = node.text
66
+
67
+ match_data = if val =~ /\|/
68
+ # we assume the data is pipe-delimited, and may contain commas within values
69
+ val.match(/Folder ?:? ?([^|]+)/)
70
+ else
71
+ # the data should be comma-delimited, and may not contain commas within values
72
+ val.match(/Folder ?:? ?([^,]+)/)
73
+ end
74
+
75
+ match_data[1].strip if match_data.present?
76
+ end.compact
77
+
78
+ solr_doc['folder_ssi'] = folder_num.first
79
+ end
105
80
 
106
- # add the folder name to solr_doc as folder_name_ssi field (note: single valued!)
107
- # data is specific to Feigenbaum collection and is in <note type='preferred citation'>
108
- def add_folder_name(sdb, solr_doc)
109
- # see spec for data examples
110
- preferred_citation = sdb.smods_rec.note.select { |n| n.type_at == 'preferred citation' }.map(&:content)
111
- match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
112
- solr_doc['folder_name_ssi'] = match_data[1].rstrip if match_data.present?
81
+ # add the physicalLocation as location_ssi field (note: single valued!)
82
+ # but only if it has series, box or folder data
83
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
84
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
85
+ def add_location(sdb, solr_doc)
86
+ # see spec for data from actual collections
87
+ # _location.physicalLocation should find top level and relatedItem
88
+ loc = sdb.smods_rec._location.physicalLocation.map do |node|
89
+ node.text if node.text.match(/.*(Series)|(Accession)|(Folder)|(Box).*/i)
90
+ end.compact
91
+
92
+ solr_doc['location_ssi'] = loc.first
93
+ end
94
+
95
+ # add the series/accession 'number' to solr_doc as series_ssi field (note: single valued!)
96
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
97
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
98
+ def add_series(sdb, solr_doc)
99
+ # see spec for data from actual collections
100
+ # _location.physicalLocation should find top level and relatedItem
101
+ series_num = sdb.smods_rec._location.physicalLocation.map do |node|
102
+ val = node.text
103
+ # feigenbaum uses 'Accession'
104
+ match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
105
+ match_data[1].strip if match_data.present?
106
+ end.compact
107
+
108
+ solr_doc['series_ssi'] = series_num.first
109
+ end
113
110
  end
114
111
 
115
- # add plain MODS <genre> element data, not the SearchWorks genre values
116
- def add_genre sdb, solr_doc
117
- insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
112
+ concerning :ContentMetadata do
113
+ included do
114
+ before_index :add_content_metadata_fields
115
+ end
116
+
117
+ def add_content_metadata_fields(sdb, solr_doc)
118
+ content_metadata = sdb.public_xml.at_xpath('/publicObject/contentMetadata')
119
+ return unless content_metadata.present?
120
+
121
+ Solrizer.insert_field(solr_doc, 'content_metadata_type', content_metadata['type'], :symbol, :displayable)
122
+
123
+ images = content_metadata.xpath('resource/file[@mimetype="image/jp2"]').select { |node| node.attr('id') =~ /jp2$/ }
124
+
125
+ add_thumbnail_fields(images.first, solr_doc) if images.first
126
+
127
+ images.each do |image|
128
+ add_image_fields(image, solr_doc)
129
+ end
130
+ end
131
+
132
+ private
133
+
134
+ def add_thumbnail_fields(node, solr_doc)
135
+ file_id = node.attr('id').gsub('.jp2', '')
136
+ image_data = node.at_xpath('./imageData')
137
+
138
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_file_name', file_id, :displayable)
139
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_width', image_data['width'], :displayable)
140
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_height', image_data['height'], :displayable)
141
+ end
142
+
143
+ def add_image_fields(node, solr_doc)
144
+ file_id = node.attr('id').gsub('.jp2', '')
145
+ base_url = stacks_iiif_url(solr_doc[:id], file_id)
146
+
147
+ Solrizer.insert_field(solr_doc, 'content_metadata_image_iiif_info', "#{base_url}/info.json", :displayable)
148
+ Solrizer.insert_field(solr_doc, 'thumbnail_square_url', "#{base_url}/square/100,100/0/default.jpg", :displayable)
149
+ Solrizer.insert_field(solr_doc, 'thumbnail_url', "#{base_url}/full/!400,400/0/default.jpg", :displayable)
150
+ Solrizer.insert_field(solr_doc, 'large_image_url', "#{base_url}/full/pct:25/0/default.jpg", :displayable)
151
+ Solrizer.insert_field(solr_doc, 'full_image_url', "#{base_url}/full/full/0/default.jpg", :displayable)
152
+ end
153
+
154
+ def stacks_iiif_url(druid, file_name)
155
+ "#{Spotlight::Dor::Resources::Engine.config.stacks_iiif_url}/#{druid}%2F#{file_name}"
156
+ end
118
157
  end
119
158
 
120
- # add the series/accession 'number' to solr_doc as series_ssi field (note: single valued!)
121
- # data in location/physicalLocation or in relatedItem/location/physicalLocation
122
- # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
123
- def add_series(sdb, solr_doc)
124
- # see spec for data from actual collections
125
- # _location.physicalLocation should find top level and relatedItem
126
- series_num = sdb.smods_rec._location.physicalLocation.map do |node|
127
- val = node.text
128
- # feigenbaum uses 'Accession'
129
- match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
130
- match_data[1].strip if match_data.present?
131
- end.compact
132
-
133
- solr_doc['series_ssi'] = series_num.first
159
+ concerning :FeigenbaumSpecificFields do
160
+ # These fields were specifically for the Feigenbaum exhibit. It is very
161
+ # likely it will go ununsed by other projects, but should be benign (since this field will not be created if
162
+ # this specific MODs note is not found.). Future work could refactor this to
163
+ # only create these fields on an as-needed basis.
164
+
165
+ included do
166
+ before_index :add_donor_tags
167
+ before_index :add_folder_name
168
+ end
169
+
170
+ def add_donor_tags(sdb, solr_doc)
171
+ donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
172
+ insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
173
+ end
174
+
175
+ # add the folder name to solr_doc as folder_name_ssi field (note: single valued!)
176
+ # data is specific to Feigenbaum collection and is in <note type='preferred citation'>
177
+ def add_folder_name(sdb, solr_doc)
178
+ # see spec for data examples
179
+ preferred_citation = sdb.smods_rec.note.select { |n| n.type_at == 'preferred citation' }.map(&:content)
180
+ match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
181
+ solr_doc['folder_name_ssi'] = match_data[1].strip if match_data.present?
182
+ end
134
183
  end
135
184
 
136
- # rubocop:disable Metrics/AbcSize
137
- def mods_cartographics_indexing sdb, solr_doc
138
- insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
185
+ concerning :CartographicIndexing do
186
+ included do
187
+ before_index :mods_cartographics_indexing
188
+ end
139
189
 
140
- Array(sdb.smods_rec.subject.cartographics.coordinates).map do |n|
141
- next unless n.text =~ /^\(/ and n.text =~ /\)$/
190
+ def mods_cartographics_indexing(sdb, solr_doc)
191
+ coordinates = Array(sdb.smods_rec.subject.cartographics.coordinates)
142
192
 
143
- bbox = n.text.gsub(/[\(\)]/, '')
193
+ insert_field(solr_doc, 'coordinates', coordinates.map(&:text), :stored_searchable)
144
194
 
145
- lng, lat = bbox.split('/')
195
+ solr_doc['point_bbox'] ||= []
196
+ solr_doc['point_bbox'] += coords_to_bboxes(coordinates)
197
+ end
198
+
199
+ private
146
200
 
147
- minX,maxX = lng.split('--').map { |x| coord_to_decimal(x) }
148
- maxY,minY = lat.split('--').map { |x| coord_to_decimal(x) }
201
+ def coords_to_bboxes(coordinates)
202
+ coordinates.select { |n| n.text =~ /^\(.*\)$/ }.map do |n|
203
+ coord_to_bbox(n.text)
204
+ end
205
+ end
149
206
 
150
- solr_doc["point_bbox"] ||= []
151
- solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
207
+ def coord_to_bbox(coord)
208
+ bbox = coord.delete('(').delete(')')
209
+
210
+ lng, lat = bbox.split('/')
211
+
212
+ min_x, max_x = lng.split('--').map { |x| coord_to_decimal(x) }
213
+ max_y, min_y = lat.split('--').map { |y| coord_to_decimal(y) }
214
+ "#{min_x} #{min_y} #{max_x} #{max_y}"
152
215
  end
153
- end
154
- # rubocop:enable Metrics/AbcSize
155
216
 
156
- def coord_to_decimal point
157
- regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
158
- match = regex.match(point)
159
- dec = 0
217
+ def coord_to_decimal(point)
218
+ regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
219
+ match = regex.match(point)
220
+ dec = 0
160
221
 
161
- dec += match['deg'].to_i
162
- dec += match['sec'].to_f / 60
163
- dec = -1 * dec if match['dir'] == "W" or match['dir'] == "S"
222
+ dec += match['deg'].to_i
223
+ dec += match['sec'].to_f / 60
224
+ dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
164
225
 
165
- dec
226
+ dec
227
+ end
166
228
  end
167
229
 
168
- def insert_field solr_doc, field, values, *args
230
+ def insert_field(solr_doc, field, values, *args)
169
231
  Array(values).each do |v|
170
232
  Solrizer.insert_field solr_doc, field, v, *args
171
233
  end