spotlight-dor-resources 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +17 -2
  3. data/.rubocop_todo.yml +1 -186
  4. data/app/models/spotlight/resources/dor_resource.rb +1 -0
  5. data/app/models/spotlight/resources/purl.rb +4 -4
  6. data/app/models/spotlight/resources/searchworks.rb +4 -5
  7. data/lib/spotlight/dor/indexer.rb +187 -125
  8. data/lib/spotlight/dor/resources.rb +19 -7
  9. data/lib/spotlight/dor/resources/engine.rb +3 -2
  10. data/lib/spotlight/dor/resources/version.rb +2 -1
  11. data/spec/integration/gdor_integration_spec.rb +9 -9
  12. data/spec/{unit → lib}/spotlight/dor/indexer_spec.rb +209 -10
  13. data/spec/models/spotlight/resources/purl_spec.rb +45 -39
  14. data/spec/models/spotlight/resources/searchworks_spec.rb +47 -44
  15. data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_a_doc_id.yml → has_a_doc_id.yml} +60 -40
  16. data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_spotlight_data.yml → has_exhibit-specific_indexing.yml} +48 -32
  17. data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_the_gdor_data.yml → has_spotlight_data.yml} +48 -32
  18. data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_exhibit-specific_indexing.yml → has_the_gdor_data.yml} +48 -32
  19. data/spotlight-dor-resources.gemspec +22 -22
  20. metadata +11 -21
  21. data/spec/integration/indexer_integration_spec.rb +0 -28
  22. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/no_donor_tags_ssim_field_in_solr_doc_when_note_displayLabel_Donor_tags_not_in_MODS.yml +0 -1382
  23. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/solr_doc_has_donor_tags_ssim_field_when_note_displayLabel_Donor_tags_is_in_MODS.yml +0 -1602
  24. data/spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml +0 -6822
  25. data/spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml +0 -1390
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5b035c7c9d1aa81f28332f8304294beb43a960ab
4
- data.tar.gz: 1658a13bf5ff55aa7f0d67cf7770c89e17648f2d
3
+ metadata.gz: 8b1e1f0ddb963f81f8fd302a284ef53592146ef9
4
+ data.tar.gz: 03687ee4c19706fa434993ce33ad59beac3b522a
5
5
  SHA512:
6
- metadata.gz: fa6d70d3f6530b159e9bec06a3c8b2219bb836a5a3608d291c047ae7d405d653ccf02d0f4b028523a0a942848d1b9cd2070fe42cd28e786496a8059c670c28df
7
- data.tar.gz: a99351ec2d515332d76582c6b178be12021834c499d3725225b2cf2727871dd794c800a913dd87a40091cf807b76145489797cddf00ad04b1f7f97209c04ec03
6
+ metadata.gz: 01970798dc91a7dd7b3f451244e71573d6c1129eb2c59a7f7758b3fdfb4d7d70c3d5bf8e3d99d1a7300b404dbc7142869898b1792729f8ab1099beb1f7113cd1
7
+ data.tar.gz: 121941bdd33d142d20f27bcacd25d112327324d5e1f84e0f190896bb9fa0c932562cdaeea2a29dae846d84b940085e3bcdd3b92a4f832854e74c24f46269621d
@@ -4,9 +4,24 @@ require: rubocop-rspec
4
4
 
5
5
  AllCops:
6
6
  Exclude:
7
+ - 'Gemfile'
8
+ - 'bin/**/*'
9
+ - 'db/**/*'
10
+ - 'config/**/*'
7
11
  - 'spec/internal/**/*'
8
12
  - 'spec/test_app_templates/**/*'
13
+ - 'spec/spec_helper.rb'
14
+ - 'spec/teaspoon_env.rb'
15
+ - 'vendor/**/*'
16
+ RunRailsCops: true
9
17
 
10
- Metrics/ClassLength:
18
+ Metrics/LineLength:
19
+ Max: 130
20
+
21
+ Style/StringLiterals:
22
+ Enabled: true
23
+ EnforcedStyle: single_quotes
24
+
25
+ RSpec/DescribeClass:
11
26
  Exclude:
12
- - 'lib/spotlight/dor/indexer.rb'
27
+ - 'spec/integration/*'
@@ -1,188 +1,3 @@
1
- # This configuration was generated by
2
- # `rubocop --auto-gen-config`
3
- # on 2015-10-29 09:20:45 -0700 using RuboCop version 0.34.2.
4
- # The point is for the user to remove these configuration records
5
- # one by one as the offenses are removed from the code base.
6
- # Note that changes in the inspected code, or installation of new
7
- # versions of RuboCop, may require this file to be generated again.
8
1
 
9
-
10
- # Offense count: 1
11
- # Configuration parameters: CountComments.
12
- Metrics/ClassLength:
13
- Max: 102
14
-
15
- # Offense count: 84
16
- # Configuration parameters: AllowURI, URISchemes.
17
- Metrics/LineLength:
18
- Max: 171
19
-
20
- # Offense count: 1
21
- # Configuration parameters: CountComments.
22
- Metrics/MethodLength:
23
- Max: 13
24
-
25
- # Offense count: 1
26
- # Cop supports --auto-correct.
27
- Performance/StringReplacement:
28
- Exclude:
29
- - 'lib/spotlight/dor/indexer.rb'
30
-
31
- # Offense count: 2
32
- RSpec/DescribeClass:
33
- Exclude:
34
- - 'spec/integration/gdor_integration_spec.rb'
35
- - 'spec/integration/indexer_integration_spec.rb'
36
-
37
- # Offense count: 4
38
- RSpec/DescribedClass:
39
- Exclude:
40
- - 'spec/models/spotlight/resources/purl_spec.rb'
41
- - 'spec/models/spotlight/resources/searchworks_spec.rb'
42
-
43
- # Offense count: 26
44
- # Configuration parameters: CustomTransform, IgnoredWords.
45
- RSpec/ExampleWording:
46
- Exclude:
47
- - 'spec/integration/gdor_integration_spec.rb'
48
- - 'spec/models/spotlight/resources/purl_spec.rb'
49
- - 'spec/models/spotlight/resources/searchworks_spec.rb'
50
-
51
- # Offense count: 4
52
- # Cop supports --auto-correct.
53
- # Configuration parameters: EnforcedStyle, SupportedStyles.
54
- Style/AndOr:
55
- Exclude:
56
- - 'lib/spotlight/dor/indexer.rb'
57
- - 'spec/spec_helper.rb'
58
-
59
- # Offense count: 11
60
- # Cop supports --auto-correct.
61
- # Configuration parameters: EnforcedStyle, SupportedStyles.
62
- Style/BracesAroundHashParameters:
63
- Exclude:
64
- - 'spec/models/spotlight/resources/purl_spec.rb'
65
- - 'spec/models/spotlight/resources/searchworks_spec.rb'
66
-
67
- # Offense count: 6
68
- # Configuration parameters: EnforcedStyle, SupportedStyles.
69
2
  Style/ClassAndModuleChildren:
70
- Exclude:
71
- - 'app/models/spotlight/resources/dor_resource.rb'
72
- - 'app/models/spotlight/resources/harvestdor.rb'
73
- - 'app/models/spotlight/resources/purl.rb'
74
- - 'app/models/spotlight/resources/searchworks.rb'
75
- - 'lib/spotlight/dor/indexer.rb'
76
- - 'lib/spotlight/dor/resources/engine.rb'
77
-
78
- # Offense count: 7
79
- # Configuration parameters: Exclude.
80
- Style/Documentation:
81
- Exclude:
82
- - 'app/models/spotlight/resources/dor_resource.rb'
83
- - 'app/models/spotlight/resources/purl.rb'
84
- - 'app/models/spotlight/resources/searchworks.rb'
85
- - 'lib/spotlight/dor/indexer.rb'
86
- - 'lib/spotlight/dor/resources.rb'
87
- - 'lib/spotlight/dor/resources/engine.rb'
88
- - 'lib/spotlight/dor/resources/version.rb'
89
-
90
- # Offense count: 2
91
- Style/DoubleNegation:
92
- Exclude:
93
- - 'app/models/spotlight/resources/purl.rb'
94
- - 'app/models/spotlight/resources/searchworks.rb'
95
-
96
- # Offense count: 1
97
- # Cop supports --auto-correct.
98
- Style/EmptyLines:
99
- Exclude:
100
- - 'spec/spec_helper.rb'
101
-
102
- # Offense count: 4
103
- # Cop supports --auto-correct.
104
- # Configuration parameters: EnforcedStyle, SupportedStyles.
105
- Style/EmptyLinesAroundClassBody:
106
- Exclude:
107
- - 'app/models/spotlight/resources/purl.rb'
108
- - 'app/models/spotlight/resources/searchworks.rb'
109
- - 'lib/spotlight/dor/resources/engine.rb'
110
-
111
- # Offense count: 1
112
- # Cop supports --auto-correct.
113
- # Configuration parameters: EnforcedStyle, SupportedStyles.
114
- Style/EmptyLinesAroundModuleBody:
115
- Exclude:
116
- - 'lib/spotlight/dor/resources.rb'
117
-
118
- # Offense count: 9
119
- # Cop supports --auto-correct.
120
- # Configuration parameters: EnforcedStyle, SupportedStyles.
121
- Style/MethodDefParentheses:
122
- Enabled: false
123
-
124
- # Offense count: 4
125
- # Cop supports --auto-correct.
126
- # Configuration parameters: EnforcedStyle, SupportedStyles, AllowInnerSlashes.
127
- Style/RegexpLiteral:
128
- Exclude:
129
- - 'app/models/spotlight/resources/purl.rb'
130
- - 'app/models/spotlight/resources/searchworks.rb'
131
-
132
- # Offense count: 2
133
- # Cop supports --auto-correct.
134
- Style/SpaceAfterComma:
135
- Exclude:
136
- - 'lib/spotlight/dor/indexer.rb'
137
-
138
- # Offense count: 2
139
- # Cop supports --auto-correct.
140
- # Configuration parameters: EnforcedStyle, SupportedStyles, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
141
- Style/SpaceInsideBlockBraces:
142
- Enabled: false
143
-
144
- # Offense count: 31
145
- # Cop supports --auto-correct.
146
- # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SupportedStyles.
147
- Style/SpaceInsideHashLiteralBraces:
148
- Enabled: false
149
-
150
- # Offense count: 122
151
- # Cop supports --auto-correct.
152
- # Configuration parameters: EnforcedStyle, SupportedStyles.
153
- Style/StringLiterals:
154
- Enabled: false
155
-
156
- # Offense count: 1
157
- # Cop supports --auto-correct.
158
- # Configuration parameters: IgnoredMethods.
159
- Style/SymbolProc:
160
- Exclude:
161
- - 'lib/spotlight/dor/indexer.rb'
162
-
163
- # Offense count: 2
164
- # Cop supports --auto-correct.
165
- # Configuration parameters: EnforcedStyle, SupportedStyles.
166
- Style/TrailingBlankLines:
167
- Exclude:
168
- - 'spec/models/spotlight/resources/searchworks_spec.rb'
169
- - 'spec/spec_helper.rb'
170
-
171
- # Offense count: 12
172
- # Cop supports --auto-correct.
173
- Style/TrailingWhitespace:
174
- Exclude:
175
- - 'lib/spotlight/dor/resources.rb'
176
- - 'spec/models/spotlight/resources/searchworks_spec.rb'
177
-
178
- # Offense count: 1
179
- # Cop supports --auto-correct.
180
- # Configuration parameters: ExactNameMatch, AllowPredicates, AllowDSLWriters, IgnoreClassMethods, Whitelist.
181
- Style/TrivialAccessors:
182
- Exclude:
183
- - 'lib/spotlight/dor/indexer.rb'
184
-
185
- # Offense count: 4
186
- # Configuration parameters: EnforcedStyle, SupportedStyles.
187
- Style/VariableName:
188
- Enabled: false
3
+ Enabled: false
@@ -1,4 +1,5 @@
1
1
  module Spotlight::Resources
2
+ # Base Resource indexer for objects in DOR
2
3
  class DorResource < Spotlight::Resource
3
4
  ##
4
5
  # Generate solr documents for the DOR resources identified by this object
@@ -1,14 +1,14 @@
1
1
  module Spotlight::Resources
2
+ # Resource provider for PURL pages
2
3
  class Purl < Spotlight::Resources::DorResource
3
4
  self.weight = -1000
4
5
 
5
- def self.can_provide? res
6
- !!(res.url =~ /^https?:\/\/purl.stanford.edu/)
6
+ def self.can_provide?(res)
7
+ res.url.match(%r{^https?://purl.stanford.edu/}).present?
7
8
  end
8
9
 
9
10
  def doc_id
10
- url.match(/^https?:\/\/purl.stanford.edu\/([^#\/\.]+)/)[1]
11
+ url.match(%r{^https?://purl.stanford.edu/([^#/\.]+)})[1]
11
12
  end
12
-
13
13
  end
14
14
  end
@@ -1,15 +1,14 @@
1
1
  module Spotlight::Resources
2
+ # Resource provider for searchworks pages
2
3
  class Searchworks < Spotlight::Resources::DorResource
3
-
4
4
  self.weight = -1000
5
5
 
6
- def self.can_provide? res
7
- !!(res.url =~ /^https?:\/\/searchworks[^\.]*.stanford.edu/)
6
+ def self.can_provide?(res)
7
+ res.url.match(%r{^https?://searchworks[^\.]*.stanford.edu/}).present?
8
8
  end
9
9
 
10
10
  def doc_id
11
- url.match(/^https?:\/\/searchworks[^\.]*.stanford.edu\/.*view\/([^\/\.#]+)/)[1]
11
+ url.match(%r{^https?://searchworks[^\.]*.stanford.edu/.*view/([^/\.#]+)})[1]
12
12
  end
13
-
14
13
  end
15
14
  end
@@ -1,171 +1,233 @@
1
+ # rubocop:disable Metrics/ClassLength
1
2
  # external gems
2
3
  require 'gdor/indexer'
3
4
  require 'solrizer'
4
- # Base class to harvest from DOR via harvestdor gem
5
+
5
6
  module Spotlight::Dor
7
+ # Base class to harvest from DOR via harvestdor gem
6
8
  class Indexer < GDor::Indexer
7
- # add contentMetadata fields
8
- before_index do |sdb, solr_doc|
9
- Solrizer.insert_field(solr_doc, 'content_metadata_type', sdb.public_xml.xpath("/publicObject/contentMetadata/@type").text, :symbol, :displayable)
10
-
11
- sdb.public_xml.xpath("/publicObject/contentMetadata").xpath('resource/file[@mimetype="image/jp2"]').each do |node|
12
- file_id = node.attr('id').gsub(".jp2", '')
13
-
14
- if node.attr('id') =~ /jp2$/ and !solr_doc[Solrizer.solr_name('content_metadata_first_image_file_name', :displayable)]
15
- Solrizer.insert_field(solr_doc, 'content_metadata_first_image_file_name', file_id, :displayable)
16
- Solrizer.insert_field(solr_doc, 'content_metadata_first_image_width', node.xpath('./imageData/@width').text, :displayable)
17
- Solrizer.insert_field(solr_doc, 'content_metadata_first_image_height', node.xpath('./imageData/@height').text, :displayable)
18
- end
19
-
20
- Solrizer.insert_field(solr_doc, 'content_metadata_image_iiif_info', "https://stacks.stanford.edu/image/iiif/#{solr_doc[:id]}%2F#{file_id}/info.json", :displayable)
21
- Solrizer.insert_field(solr_doc, 'thumbnail_square_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_square", :displayable)
22
- Solrizer.insert_field(solr_doc, 'thumbnail_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_thumb", :displayable)
23
- Solrizer.insert_field(solr_doc, 'large_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_large", :displayable)
24
- Solrizer.insert_field(solr_doc, 'full_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_full", :displayable)
25
- end
26
- end
27
-
28
9
  # tweak author_sort field from stanford-mods
29
10
  before_index do |_sdb, solr_doc|
30
- solr_doc[:author_sort] &&= solr_doc[:author_sort].gsub("\uFFFF", "\uFFFD")
11
+ solr_doc[:author_sort] &&= solr_doc[:author_sort].tr("\uFFFF", "\uFFFD")
31
12
  end
32
13
 
33
14
  # add fields from raw mods
34
- before_index :add_box
35
- # see comment with add_donor_tags about Feigenbaum specific donor tags data
36
- before_index :add_donor_tags
37
15
  before_index :add_genre
38
- before_index :add_folder
39
- before_index :add_folder_name
40
- before_index :add_series
41
- before_index :mods_cartographics_indexing
42
16
 
43
- def solr_client
44
- @solr_client
45
- end
46
-
47
- def solr_document resource
17
+ def solr_document(resource)
48
18
  doc_hash = super
49
19
  run_hook :before_index, resource, doc_hash
50
20
  doc_hash
51
21
  end
52
22
 
53
- def resource druid
23
+ def resource(druid)
54
24
  Harvestdor::Indexer::Resource.new harvestdor, druid
55
25
  end
56
26
 
57
27
  private
58
28
 
59
- # add the box number to solr_doc as box_ssi field (note: single valued!)
60
- # data in location/physicalLocation or in relatedItem/location/physicalLocation
61
- # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
62
- def add_box(sdb, solr_doc)
63
- # see spec for data from actual collections
64
- # _location.physicalLocation should find top level and relatedItem
65
- box_num = sdb.smods_rec._location.physicalLocation.map do |node|
66
- val = node.text
67
- # note that this will also find Flatbox or Flat-box
68
- match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
69
- match_data[1].strip if match_data.present?
70
- end.compact
71
-
72
- solr_doc['box_ssi'] = box_num.first
29
+ # add plain MODS <genre> element data, not the SearchWorks genre values
30
+ def add_genre(sdb, solr_doc)
31
+ insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
73
32
  end
74
33
 
75
- # This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit. It is very likely
76
- # it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
77
- # Later refactoring could include exhibit specific fields. Peter Mangiafico
78
- def add_donor_tags sdb, solr_doc
79
- donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
80
- insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
81
- end
34
+ concerning :PhysicalLocation do
35
+ included do
36
+ before_index :add_box
37
+ before_index :add_folder
38
+ before_index :add_location
39
+ before_index :add_series
40
+ end
82
41
 
83
- # add the folder number to solr_doc as folder_ssi field (note: single valued!)
84
- # data in location/physicalLocation or in relatedItem/location/physicalLocation
85
- # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
86
- def add_folder(sdb, solr_doc)
87
- # see spec for data from actual collections
88
- # _location.physicalLocation should find top level and relatedItem
89
- folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
90
- val = node.text
91
-
92
- match_data = if val =~ /\|/
93
- # we assume the data is pipe-delimited, and may contain commas within values
94
- val.match(/Folder ?:? ?([^|]+)/)
95
- else
96
- # the data should be comma-delimited, and may not contain commas within values
97
- val.match(/Folder ?:? ?([^,]+)/)
98
- end
99
-
100
- match_data[1].strip if match_data.present?
101
- end.compact
102
-
103
- solr_doc['folder_ssi'] = folder_num.first
104
- end
42
+ # add the box number to solr_doc as box_ssi field (note: single valued!)
43
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
44
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
45
+ def add_box(sdb, solr_doc)
46
+ # see spec for data from actual collections
47
+ # _location.physicalLocation should find top level and relatedItem
48
+ box_num = sdb.smods_rec._location.physicalLocation.map do |node|
49
+ val = node.text
50
+ # note that this will also find Flatbox or Flat-box
51
+ match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
52
+ match_data[1].strip if match_data.present?
53
+ end.compact
54
+
55
+ solr_doc['box_ssi'] = box_num.first
56
+ end
57
+
58
+ # add the folder number to solr_doc as folder_ssi field (note: single valued!)
59
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
60
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
61
+ def add_folder(sdb, solr_doc)
62
+ # see spec for data from actual collections
63
+ # _location.physicalLocation should find top level and relatedItem
64
+ folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
65
+ val = node.text
66
+
67
+ match_data = if val =~ /\|/
68
+ # we assume the data is pipe-delimited, and may contain commas within values
69
+ val.match(/Folder ?:? ?([^|]+)/)
70
+ else
71
+ # the data should be comma-delimited, and may not contain commas within values
72
+ val.match(/Folder ?:? ?([^,]+)/)
73
+ end
74
+
75
+ match_data[1].strip if match_data.present?
76
+ end.compact
77
+
78
+ solr_doc['folder_ssi'] = folder_num.first
79
+ end
105
80
 
106
- # add the folder name to solr_doc as folder_name_ssi field (note: single valued!)
107
- # data is specific to Feigenbaum collection and is in <note type='preferred citation'>
108
- def add_folder_name(sdb, solr_doc)
109
- # see spec for data examples
110
- preferred_citation = sdb.smods_rec.note.select { |n| n.type_at == 'preferred citation' }.map(&:content)
111
- match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
112
- solr_doc['folder_name_ssi'] = match_data[1].rstrip if match_data.present?
81
+ # add the physicalLocation as location_ssi field (note: single valued!)
82
+ # but only if it has series, box or folder data
83
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
84
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
85
+ def add_location(sdb, solr_doc)
86
+ # see spec for data from actual collections
87
+ # _location.physicalLocation should find top level and relatedItem
88
+ loc = sdb.smods_rec._location.physicalLocation.map do |node|
89
+ node.text if node.text.match(/.*(Series)|(Accession)|(Folder)|(Box).*/i)
90
+ end.compact
91
+
92
+ solr_doc['location_ssi'] = loc.first
93
+ end
94
+
95
+ # add the series/accession 'number' to solr_doc as series_ssi field (note: single valued!)
96
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
97
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
98
+ def add_series(sdb, solr_doc)
99
+ # see spec for data from actual collections
100
+ # _location.physicalLocation should find top level and relatedItem
101
+ series_num = sdb.smods_rec._location.physicalLocation.map do |node|
102
+ val = node.text
103
+ # feigenbaum uses 'Accession'
104
+ match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
105
+ match_data[1].strip if match_data.present?
106
+ end.compact
107
+
108
+ solr_doc['series_ssi'] = series_num.first
109
+ end
113
110
  end
114
111
 
115
- # add plain MODS <genre> element data, not the SearchWorks genre values
116
- def add_genre sdb, solr_doc
117
- insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
112
+ concerning :ContentMetadata do
113
+ included do
114
+ before_index :add_content_metadata_fields
115
+ end
116
+
117
+ def add_content_metadata_fields(sdb, solr_doc)
118
+ content_metadata = sdb.public_xml.at_xpath('/publicObject/contentMetadata')
119
+ return unless content_metadata.present?
120
+
121
+ Solrizer.insert_field(solr_doc, 'content_metadata_type', content_metadata['type'], :symbol, :displayable)
122
+
123
+ images = content_metadata.xpath('resource/file[@mimetype="image/jp2"]').select { |node| node.attr('id') =~ /jp2$/ }
124
+
125
+ add_thumbnail_fields(images.first, solr_doc) if images.first
126
+
127
+ images.each do |image|
128
+ add_image_fields(image, solr_doc)
129
+ end
130
+ end
131
+
132
+ private
133
+
134
+ def add_thumbnail_fields(node, solr_doc)
135
+ file_id = node.attr('id').gsub('.jp2', '')
136
+ image_data = node.at_xpath('./imageData')
137
+
138
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_file_name', file_id, :displayable)
139
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_width', image_data['width'], :displayable)
140
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_height', image_data['height'], :displayable)
141
+ end
142
+
143
+ def add_image_fields(node, solr_doc)
144
+ file_id = node.attr('id').gsub('.jp2', '')
145
+ base_url = stacks_iiif_url(solr_doc[:id], file_id)
146
+
147
+ Solrizer.insert_field(solr_doc, 'content_metadata_image_iiif_info', "#{base_url}/info.json", :displayable)
148
+ Solrizer.insert_field(solr_doc, 'thumbnail_square_url', "#{base_url}/square/100,100/0/default.jpg", :displayable)
149
+ Solrizer.insert_field(solr_doc, 'thumbnail_url', "#{base_url}/full/!400,400/0/default.jpg", :displayable)
150
+ Solrizer.insert_field(solr_doc, 'large_image_url', "#{base_url}/full/pct:25/0/default.jpg", :displayable)
151
+ Solrizer.insert_field(solr_doc, 'full_image_url', "#{base_url}/full/full/0/default.jpg", :displayable)
152
+ end
153
+
154
+ def stacks_iiif_url(druid, file_name)
155
+ "#{Spotlight::Dor::Resources::Engine.config.stacks_iiif_url}/#{druid}%2F#{file_name}"
156
+ end
118
157
  end
119
158
 
120
- # add the series/accession 'number' to solr_doc as series_ssi field (note: single valued!)
121
- # data in location/physicalLocation or in relatedItem/location/physicalLocation
122
- # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
123
- def add_series(sdb, solr_doc)
124
- # see spec for data from actual collections
125
- # _location.physicalLocation should find top level and relatedItem
126
- series_num = sdb.smods_rec._location.physicalLocation.map do |node|
127
- val = node.text
128
- # feigenbaum uses 'Accession'
129
- match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
130
- match_data[1].strip if match_data.present?
131
- end.compact
132
-
133
- solr_doc['series_ssi'] = series_num.first
159
+ concerning :FeigenbaumSpecificFields do
160
+ # These fields were specifically for the Feigenbaum exhibit. It is very
161
+ # likely it will go ununsed by other projects, but should be benign (since this field will not be created if
162
+ # this specific MODs note is not found.). Future work could refactor this to
163
+ # only create these fields on an as-needed basis.
164
+
165
+ included do
166
+ before_index :add_donor_tags
167
+ before_index :add_folder_name
168
+ end
169
+
170
+ def add_donor_tags(sdb, solr_doc)
171
+ donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
172
+ insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
173
+ end
174
+
175
+ # add the folder name to solr_doc as folder_name_ssi field (note: single valued!)
176
+ # data is specific to Feigenbaum collection and is in <note type='preferred citation'>
177
+ def add_folder_name(sdb, solr_doc)
178
+ # see spec for data examples
179
+ preferred_citation = sdb.smods_rec.note.select { |n| n.type_at == 'preferred citation' }.map(&:content)
180
+ match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
181
+ solr_doc['folder_name_ssi'] = match_data[1].strip if match_data.present?
182
+ end
134
183
  end
135
184
 
136
- # rubocop:disable Metrics/AbcSize
137
- def mods_cartographics_indexing sdb, solr_doc
138
- insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
185
+ concerning :CartographicIndexing do
186
+ included do
187
+ before_index :mods_cartographics_indexing
188
+ end
139
189
 
140
- Array(sdb.smods_rec.subject.cartographics.coordinates).map do |n|
141
- next unless n.text =~ /^\(/ and n.text =~ /\)$/
190
+ def mods_cartographics_indexing(sdb, solr_doc)
191
+ coordinates = Array(sdb.smods_rec.subject.cartographics.coordinates)
142
192
 
143
- bbox = n.text.gsub(/[\(\)]/, '')
193
+ insert_field(solr_doc, 'coordinates', coordinates.map(&:text), :stored_searchable)
144
194
 
145
- lng, lat = bbox.split('/')
195
+ solr_doc['point_bbox'] ||= []
196
+ solr_doc['point_bbox'] += coords_to_bboxes(coordinates)
197
+ end
198
+
199
+ private
146
200
 
147
- minX,maxX = lng.split('--').map { |x| coord_to_decimal(x) }
148
- maxY,minY = lat.split('--').map { |x| coord_to_decimal(x) }
201
+ def coords_to_bboxes(coordinates)
202
+ coordinates.select { |n| n.text =~ /^\(.*\)$/ }.map do |n|
203
+ coord_to_bbox(n.text)
204
+ end
205
+ end
149
206
 
150
- solr_doc["point_bbox"] ||= []
151
- solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
207
+ def coord_to_bbox(coord)
208
+ bbox = coord.delete('(').delete(')')
209
+
210
+ lng, lat = bbox.split('/')
211
+
212
+ min_x, max_x = lng.split('--').map { |x| coord_to_decimal(x) }
213
+ max_y, min_y = lat.split('--').map { |y| coord_to_decimal(y) }
214
+ "#{min_x} #{min_y} #{max_x} #{max_y}"
152
215
  end
153
- end
154
- # rubocop:enable Metrics/AbcSize
155
216
 
156
- def coord_to_decimal point
157
- regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
158
- match = regex.match(point)
159
- dec = 0
217
+ def coord_to_decimal(point)
218
+ regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
219
+ match = regex.match(point)
220
+ dec = 0
160
221
 
161
- dec += match['deg'].to_i
162
- dec += match['sec'].to_f / 60
163
- dec = -1 * dec if match['dir'] == "W" or match['dir'] == "S"
222
+ dec += match['deg'].to_i
223
+ dec += match['sec'].to_f / 60
224
+ dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
164
225
 
165
- dec
226
+ dec
227
+ end
166
228
  end
167
229
 
168
- def insert_field solr_doc, field, values, *args
230
+ def insert_field(solr_doc, field, values, *args)
169
231
  Array(values).each do |v|
170
232
  Solrizer.insert_field solr_doc, field, v, *args
171
233
  end