spotlight-dor-resources 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +17 -2
- data/.rubocop_todo.yml +1 -186
- data/app/models/spotlight/resources/dor_resource.rb +1 -0
- data/app/models/spotlight/resources/purl.rb +4 -4
- data/app/models/spotlight/resources/searchworks.rb +4 -5
- data/lib/spotlight/dor/indexer.rb +187 -125
- data/lib/spotlight/dor/resources.rb +19 -7
- data/lib/spotlight/dor/resources/engine.rb +3 -2
- data/lib/spotlight/dor/resources/version.rb +2 -1
- data/spec/integration/gdor_integration_spec.rb +9 -9
- data/spec/{unit → lib}/spotlight/dor/indexer_spec.rb +209 -10
- data/spec/models/spotlight/resources/purl_spec.rb +45 -39
- data/spec/models/spotlight/resources/searchworks_spec.rb +47 -44
- data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_a_doc_id.yml → has_a_doc_id.yml} +60 -40
- data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_spotlight_data.yml → has_exhibit-specific_indexing.yml} +48 -32
- data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_the_gdor_data.yml → has_spotlight_data.yml} +48 -32
- data/spec/vcr_cassettes/gdor_indexing_integration_test/{should_have_exhibit-specific_indexing.yml → has_the_gdor_data.yml} +48 -32
- data/spotlight-dor-resources.gemspec +22 -22
- metadata +11 -21
- data/spec/integration/indexer_integration_spec.rb +0 -28
- data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/no_donor_tags_ssim_field_in_solr_doc_when_note_displayLabel_Donor_tags_not_in_MODS.yml +0 -1382
- data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/solr_doc_has_donor_tags_ssim_field_when_note_displayLabel_Donor_tags_is_in_MODS.yml +0 -1602
- data/spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml +0 -6822
- data/spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml +0 -1390
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b1e1f0ddb963f81f8fd302a284ef53592146ef9
|
4
|
+
data.tar.gz: 03687ee4c19706fa434993ce33ad59beac3b522a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 01970798dc91a7dd7b3f451244e71573d6c1129eb2c59a7f7758b3fdfb4d7d70c3d5bf8e3d99d1a7300b404dbc7142869898b1792729f8ab1099beb1f7113cd1
|
7
|
+
data.tar.gz: 121941bdd33d142d20f27bcacd25d112327324d5e1f84e0f190896bb9fa0c932562cdaeea2a29dae846d84b940085e3bcdd3b92a4f832854e74c24f46269621d
|
data/.rubocop.yml
CHANGED
@@ -4,9 +4,24 @@ require: rubocop-rspec
|
|
4
4
|
|
5
5
|
AllCops:
|
6
6
|
Exclude:
|
7
|
+
- 'Gemfile'
|
8
|
+
- 'bin/**/*'
|
9
|
+
- 'db/**/*'
|
10
|
+
- 'config/**/*'
|
7
11
|
- 'spec/internal/**/*'
|
8
12
|
- 'spec/test_app_templates/**/*'
|
13
|
+
- 'spec/spec_helper.rb'
|
14
|
+
- 'spec/teaspoon_env.rb'
|
15
|
+
- 'vendor/**/*'
|
16
|
+
RunRailsCops: true
|
9
17
|
|
10
|
-
Metrics/
|
18
|
+
Metrics/LineLength:
|
19
|
+
Max: 130
|
20
|
+
|
21
|
+
Style/StringLiterals:
|
22
|
+
Enabled: true
|
23
|
+
EnforcedStyle: single_quotes
|
24
|
+
|
25
|
+
RSpec/DescribeClass:
|
11
26
|
Exclude:
|
12
|
-
- '
|
27
|
+
- 'spec/integration/*'
|
data/.rubocop_todo.yml
CHANGED
@@ -1,188 +1,3 @@
|
|
1
|
-
# This configuration was generated by
|
2
|
-
# `rubocop --auto-gen-config`
|
3
|
-
# on 2015-10-29 09:20:45 -0700 using RuboCop version 0.34.2.
|
4
|
-
# The point is for the user to remove these configuration records
|
5
|
-
# one by one as the offenses are removed from the code base.
|
6
|
-
# Note that changes in the inspected code, or installation of new
|
7
|
-
# versions of RuboCop, may require this file to be generated again.
|
8
1
|
|
9
|
-
|
10
|
-
# Offense count: 1
|
11
|
-
# Configuration parameters: CountComments.
|
12
|
-
Metrics/ClassLength:
|
13
|
-
Max: 102
|
14
|
-
|
15
|
-
# Offense count: 84
|
16
|
-
# Configuration parameters: AllowURI, URISchemes.
|
17
|
-
Metrics/LineLength:
|
18
|
-
Max: 171
|
19
|
-
|
20
|
-
# Offense count: 1
|
21
|
-
# Configuration parameters: CountComments.
|
22
|
-
Metrics/MethodLength:
|
23
|
-
Max: 13
|
24
|
-
|
25
|
-
# Offense count: 1
|
26
|
-
# Cop supports --auto-correct.
|
27
|
-
Performance/StringReplacement:
|
28
|
-
Exclude:
|
29
|
-
- 'lib/spotlight/dor/indexer.rb'
|
30
|
-
|
31
|
-
# Offense count: 2
|
32
|
-
RSpec/DescribeClass:
|
33
|
-
Exclude:
|
34
|
-
- 'spec/integration/gdor_integration_spec.rb'
|
35
|
-
- 'spec/integration/indexer_integration_spec.rb'
|
36
|
-
|
37
|
-
# Offense count: 4
|
38
|
-
RSpec/DescribedClass:
|
39
|
-
Exclude:
|
40
|
-
- 'spec/models/spotlight/resources/purl_spec.rb'
|
41
|
-
- 'spec/models/spotlight/resources/searchworks_spec.rb'
|
42
|
-
|
43
|
-
# Offense count: 26
|
44
|
-
# Configuration parameters: CustomTransform, IgnoredWords.
|
45
|
-
RSpec/ExampleWording:
|
46
|
-
Exclude:
|
47
|
-
- 'spec/integration/gdor_integration_spec.rb'
|
48
|
-
- 'spec/models/spotlight/resources/purl_spec.rb'
|
49
|
-
- 'spec/models/spotlight/resources/searchworks_spec.rb'
|
50
|
-
|
51
|
-
# Offense count: 4
|
52
|
-
# Cop supports --auto-correct.
|
53
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
54
|
-
Style/AndOr:
|
55
|
-
Exclude:
|
56
|
-
- 'lib/spotlight/dor/indexer.rb'
|
57
|
-
- 'spec/spec_helper.rb'
|
58
|
-
|
59
|
-
# Offense count: 11
|
60
|
-
# Cop supports --auto-correct.
|
61
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
62
|
-
Style/BracesAroundHashParameters:
|
63
|
-
Exclude:
|
64
|
-
- 'spec/models/spotlight/resources/purl_spec.rb'
|
65
|
-
- 'spec/models/spotlight/resources/searchworks_spec.rb'
|
66
|
-
|
67
|
-
# Offense count: 6
|
68
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
69
2
|
Style/ClassAndModuleChildren:
|
70
|
-
|
71
|
-
- 'app/models/spotlight/resources/dor_resource.rb'
|
72
|
-
- 'app/models/spotlight/resources/harvestdor.rb'
|
73
|
-
- 'app/models/spotlight/resources/purl.rb'
|
74
|
-
- 'app/models/spotlight/resources/searchworks.rb'
|
75
|
-
- 'lib/spotlight/dor/indexer.rb'
|
76
|
-
- 'lib/spotlight/dor/resources/engine.rb'
|
77
|
-
|
78
|
-
# Offense count: 7
|
79
|
-
# Configuration parameters: Exclude.
|
80
|
-
Style/Documentation:
|
81
|
-
Exclude:
|
82
|
-
- 'app/models/spotlight/resources/dor_resource.rb'
|
83
|
-
- 'app/models/spotlight/resources/purl.rb'
|
84
|
-
- 'app/models/spotlight/resources/searchworks.rb'
|
85
|
-
- 'lib/spotlight/dor/indexer.rb'
|
86
|
-
- 'lib/spotlight/dor/resources.rb'
|
87
|
-
- 'lib/spotlight/dor/resources/engine.rb'
|
88
|
-
- 'lib/spotlight/dor/resources/version.rb'
|
89
|
-
|
90
|
-
# Offense count: 2
|
91
|
-
Style/DoubleNegation:
|
92
|
-
Exclude:
|
93
|
-
- 'app/models/spotlight/resources/purl.rb'
|
94
|
-
- 'app/models/spotlight/resources/searchworks.rb'
|
95
|
-
|
96
|
-
# Offense count: 1
|
97
|
-
# Cop supports --auto-correct.
|
98
|
-
Style/EmptyLines:
|
99
|
-
Exclude:
|
100
|
-
- 'spec/spec_helper.rb'
|
101
|
-
|
102
|
-
# Offense count: 4
|
103
|
-
# Cop supports --auto-correct.
|
104
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
105
|
-
Style/EmptyLinesAroundClassBody:
|
106
|
-
Exclude:
|
107
|
-
- 'app/models/spotlight/resources/purl.rb'
|
108
|
-
- 'app/models/spotlight/resources/searchworks.rb'
|
109
|
-
- 'lib/spotlight/dor/resources/engine.rb'
|
110
|
-
|
111
|
-
# Offense count: 1
|
112
|
-
# Cop supports --auto-correct.
|
113
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
114
|
-
Style/EmptyLinesAroundModuleBody:
|
115
|
-
Exclude:
|
116
|
-
- 'lib/spotlight/dor/resources.rb'
|
117
|
-
|
118
|
-
# Offense count: 9
|
119
|
-
# Cop supports --auto-correct.
|
120
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
121
|
-
Style/MethodDefParentheses:
|
122
|
-
Enabled: false
|
123
|
-
|
124
|
-
# Offense count: 4
|
125
|
-
# Cop supports --auto-correct.
|
126
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles, AllowInnerSlashes.
|
127
|
-
Style/RegexpLiteral:
|
128
|
-
Exclude:
|
129
|
-
- 'app/models/spotlight/resources/purl.rb'
|
130
|
-
- 'app/models/spotlight/resources/searchworks.rb'
|
131
|
-
|
132
|
-
# Offense count: 2
|
133
|
-
# Cop supports --auto-correct.
|
134
|
-
Style/SpaceAfterComma:
|
135
|
-
Exclude:
|
136
|
-
- 'lib/spotlight/dor/indexer.rb'
|
137
|
-
|
138
|
-
# Offense count: 2
|
139
|
-
# Cop supports --auto-correct.
|
140
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
|
141
|
-
Style/SpaceInsideBlockBraces:
|
142
|
-
Enabled: false
|
143
|
-
|
144
|
-
# Offense count: 31
|
145
|
-
# Cop supports --auto-correct.
|
146
|
-
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SupportedStyles.
|
147
|
-
Style/SpaceInsideHashLiteralBraces:
|
148
|
-
Enabled: false
|
149
|
-
|
150
|
-
# Offense count: 122
|
151
|
-
# Cop supports --auto-correct.
|
152
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
153
|
-
Style/StringLiterals:
|
154
|
-
Enabled: false
|
155
|
-
|
156
|
-
# Offense count: 1
|
157
|
-
# Cop supports --auto-correct.
|
158
|
-
# Configuration parameters: IgnoredMethods.
|
159
|
-
Style/SymbolProc:
|
160
|
-
Exclude:
|
161
|
-
- 'lib/spotlight/dor/indexer.rb'
|
162
|
-
|
163
|
-
# Offense count: 2
|
164
|
-
# Cop supports --auto-correct.
|
165
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
166
|
-
Style/TrailingBlankLines:
|
167
|
-
Exclude:
|
168
|
-
- 'spec/models/spotlight/resources/searchworks_spec.rb'
|
169
|
-
- 'spec/spec_helper.rb'
|
170
|
-
|
171
|
-
# Offense count: 12
|
172
|
-
# Cop supports --auto-correct.
|
173
|
-
Style/TrailingWhitespace:
|
174
|
-
Exclude:
|
175
|
-
- 'lib/spotlight/dor/resources.rb'
|
176
|
-
- 'spec/models/spotlight/resources/searchworks_spec.rb'
|
177
|
-
|
178
|
-
# Offense count: 1
|
179
|
-
# Cop supports --auto-correct.
|
180
|
-
# Configuration parameters: ExactNameMatch, AllowPredicates, AllowDSLWriters, IgnoreClassMethods, Whitelist.
|
181
|
-
Style/TrivialAccessors:
|
182
|
-
Exclude:
|
183
|
-
- 'lib/spotlight/dor/indexer.rb'
|
184
|
-
|
185
|
-
# Offense count: 4
|
186
|
-
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
187
|
-
Style/VariableName:
|
188
|
-
Enabled: false
|
3
|
+
Enabled: false
|
@@ -1,14 +1,14 @@
|
|
1
1
|
module Spotlight::Resources
|
2
|
+
# Resource provider for PURL pages
|
2
3
|
class Purl < Spotlight::Resources::DorResource
|
3
4
|
self.weight = -1000
|
4
5
|
|
5
|
-
def self.can_provide?
|
6
|
-
|
6
|
+
def self.can_provide?(res)
|
7
|
+
res.url.match(%r{^https?://purl.stanford.edu/}).present?
|
7
8
|
end
|
8
9
|
|
9
10
|
def doc_id
|
10
|
-
url.match(
|
11
|
+
url.match(%r{^https?://purl.stanford.edu/([^#/\.]+)})[1]
|
11
12
|
end
|
12
|
-
|
13
13
|
end
|
14
14
|
end
|
@@ -1,15 +1,14 @@
|
|
1
1
|
module Spotlight::Resources
|
2
|
+
# Resource provider for searchworks pages
|
2
3
|
class Searchworks < Spotlight::Resources::DorResource
|
3
|
-
|
4
4
|
self.weight = -1000
|
5
5
|
|
6
|
-
def self.can_provide?
|
7
|
-
|
6
|
+
def self.can_provide?(res)
|
7
|
+
res.url.match(%r{^https?://searchworks[^\.]*.stanford.edu/}).present?
|
8
8
|
end
|
9
9
|
|
10
10
|
def doc_id
|
11
|
-
url.match(
|
11
|
+
url.match(%r{^https?://searchworks[^\.]*.stanford.edu/.*view/([^/\.#]+)})[1]
|
12
12
|
end
|
13
|
-
|
14
13
|
end
|
15
14
|
end
|
@@ -1,171 +1,233 @@
|
|
1
|
+
# rubocop:disable Metrics/ClassLength
|
1
2
|
# external gems
|
2
3
|
require 'gdor/indexer'
|
3
4
|
require 'solrizer'
|
4
|
-
|
5
|
+
|
5
6
|
module Spotlight::Dor
|
7
|
+
# Base class to harvest from DOR via harvestdor gem
|
6
8
|
class Indexer < GDor::Indexer
|
7
|
-
# add contentMetadata fields
|
8
|
-
before_index do |sdb, solr_doc|
|
9
|
-
Solrizer.insert_field(solr_doc, 'content_metadata_type', sdb.public_xml.xpath("/publicObject/contentMetadata/@type").text, :symbol, :displayable)
|
10
|
-
|
11
|
-
sdb.public_xml.xpath("/publicObject/contentMetadata").xpath('resource/file[@mimetype="image/jp2"]').each do |node|
|
12
|
-
file_id = node.attr('id').gsub(".jp2", '')
|
13
|
-
|
14
|
-
if node.attr('id') =~ /jp2$/ and !solr_doc[Solrizer.solr_name('content_metadata_first_image_file_name', :displayable)]
|
15
|
-
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_file_name', file_id, :displayable)
|
16
|
-
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_width', node.xpath('./imageData/@width').text, :displayable)
|
17
|
-
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_height', node.xpath('./imageData/@height').text, :displayable)
|
18
|
-
end
|
19
|
-
|
20
|
-
Solrizer.insert_field(solr_doc, 'content_metadata_image_iiif_info', "https://stacks.stanford.edu/image/iiif/#{solr_doc[:id]}%2F#{file_id}/info.json", :displayable)
|
21
|
-
Solrizer.insert_field(solr_doc, 'thumbnail_square_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_square", :displayable)
|
22
|
-
Solrizer.insert_field(solr_doc, 'thumbnail_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_thumb", :displayable)
|
23
|
-
Solrizer.insert_field(solr_doc, 'large_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_large", :displayable)
|
24
|
-
Solrizer.insert_field(solr_doc, 'full_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_full", :displayable)
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
9
|
# tweak author_sort field from stanford-mods
|
29
10
|
before_index do |_sdb, solr_doc|
|
30
|
-
solr_doc[:author_sort] &&= solr_doc[:author_sort].
|
11
|
+
solr_doc[:author_sort] &&= solr_doc[:author_sort].tr("\uFFFF", "\uFFFD")
|
31
12
|
end
|
32
13
|
|
33
14
|
# add fields from raw mods
|
34
|
-
before_index :add_box
|
35
|
-
# see comment with add_donor_tags about Feigenbaum specific donor tags data
|
36
|
-
before_index :add_donor_tags
|
37
15
|
before_index :add_genre
|
38
|
-
before_index :add_folder
|
39
|
-
before_index :add_folder_name
|
40
|
-
before_index :add_series
|
41
|
-
before_index :mods_cartographics_indexing
|
42
16
|
|
43
|
-
def
|
44
|
-
@solr_client
|
45
|
-
end
|
46
|
-
|
47
|
-
def solr_document resource
|
17
|
+
def solr_document(resource)
|
48
18
|
doc_hash = super
|
49
19
|
run_hook :before_index, resource, doc_hash
|
50
20
|
doc_hash
|
51
21
|
end
|
52
22
|
|
53
|
-
def resource
|
23
|
+
def resource(druid)
|
54
24
|
Harvestdor::Indexer::Resource.new harvestdor, druid
|
55
25
|
end
|
56
26
|
|
57
27
|
private
|
58
28
|
|
59
|
-
# add
|
60
|
-
|
61
|
-
|
62
|
-
def add_box(sdb, solr_doc)
|
63
|
-
# see spec for data from actual collections
|
64
|
-
# _location.physicalLocation should find top level and relatedItem
|
65
|
-
box_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
66
|
-
val = node.text
|
67
|
-
# note that this will also find Flatbox or Flat-box
|
68
|
-
match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
|
69
|
-
match_data[1].strip if match_data.present?
|
70
|
-
end.compact
|
71
|
-
|
72
|
-
solr_doc['box_ssi'] = box_num.first
|
29
|
+
# add plain MODS <genre> element data, not the SearchWorks genre values
|
30
|
+
def add_genre(sdb, solr_doc)
|
31
|
+
insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
|
73
32
|
end
|
74
33
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
34
|
+
concerning :PhysicalLocation do
|
35
|
+
included do
|
36
|
+
before_index :add_box
|
37
|
+
before_index :add_folder
|
38
|
+
before_index :add_location
|
39
|
+
before_index :add_series
|
40
|
+
end
|
82
41
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
42
|
+
# add the box number to solr_doc as box_ssi field (note: single valued!)
|
43
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
44
|
+
# TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
|
45
|
+
def add_box(sdb, solr_doc)
|
46
|
+
# see spec for data from actual collections
|
47
|
+
# _location.physicalLocation should find top level and relatedItem
|
48
|
+
box_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
49
|
+
val = node.text
|
50
|
+
# note that this will also find Flatbox or Flat-box
|
51
|
+
match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
|
52
|
+
match_data[1].strip if match_data.present?
|
53
|
+
end.compact
|
54
|
+
|
55
|
+
solr_doc['box_ssi'] = box_num.first
|
56
|
+
end
|
57
|
+
|
58
|
+
# add the folder number to solr_doc as folder_ssi field (note: single valued!)
|
59
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
60
|
+
# TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
|
61
|
+
def add_folder(sdb, solr_doc)
|
62
|
+
# see spec for data from actual collections
|
63
|
+
# _location.physicalLocation should find top level and relatedItem
|
64
|
+
folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
65
|
+
val = node.text
|
66
|
+
|
67
|
+
match_data = if val =~ /\|/
|
68
|
+
# we assume the data is pipe-delimited, and may contain commas within values
|
69
|
+
val.match(/Folder ?:? ?([^|]+)/)
|
70
|
+
else
|
71
|
+
# the data should be comma-delimited, and may not contain commas within values
|
72
|
+
val.match(/Folder ?:? ?([^,]+)/)
|
73
|
+
end
|
74
|
+
|
75
|
+
match_data[1].strip if match_data.present?
|
76
|
+
end.compact
|
77
|
+
|
78
|
+
solr_doc['folder_ssi'] = folder_num.first
|
79
|
+
end
|
105
80
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
#
|
110
|
-
|
111
|
-
|
112
|
-
|
81
|
+
# add the physicalLocation as location_ssi field (note: single valued!)
|
82
|
+
# but only if it has series, box or folder data
|
83
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
84
|
+
# TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
|
85
|
+
def add_location(sdb, solr_doc)
|
86
|
+
# see spec for data from actual collections
|
87
|
+
# _location.physicalLocation should find top level and relatedItem
|
88
|
+
loc = sdb.smods_rec._location.physicalLocation.map do |node|
|
89
|
+
node.text if node.text.match(/.*(Series)|(Accession)|(Folder)|(Box).*/i)
|
90
|
+
end.compact
|
91
|
+
|
92
|
+
solr_doc['location_ssi'] = loc.first
|
93
|
+
end
|
94
|
+
|
95
|
+
# add the series/accession 'number' to solr_doc as series_ssi field (note: single valued!)
|
96
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
97
|
+
# TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
|
98
|
+
def add_series(sdb, solr_doc)
|
99
|
+
# see spec for data from actual collections
|
100
|
+
# _location.physicalLocation should find top level and relatedItem
|
101
|
+
series_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
102
|
+
val = node.text
|
103
|
+
# feigenbaum uses 'Accession'
|
104
|
+
match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
|
105
|
+
match_data[1].strip if match_data.present?
|
106
|
+
end.compact
|
107
|
+
|
108
|
+
solr_doc['series_ssi'] = series_num.first
|
109
|
+
end
|
113
110
|
end
|
114
111
|
|
115
|
-
|
116
|
-
|
117
|
-
|
112
|
+
concerning :ContentMetadata do
|
113
|
+
included do
|
114
|
+
before_index :add_content_metadata_fields
|
115
|
+
end
|
116
|
+
|
117
|
+
def add_content_metadata_fields(sdb, solr_doc)
|
118
|
+
content_metadata = sdb.public_xml.at_xpath('/publicObject/contentMetadata')
|
119
|
+
return unless content_metadata.present?
|
120
|
+
|
121
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_type', content_metadata['type'], :symbol, :displayable)
|
122
|
+
|
123
|
+
images = content_metadata.xpath('resource/file[@mimetype="image/jp2"]').select { |node| node.attr('id') =~ /jp2$/ }
|
124
|
+
|
125
|
+
add_thumbnail_fields(images.first, solr_doc) if images.first
|
126
|
+
|
127
|
+
images.each do |image|
|
128
|
+
add_image_fields(image, solr_doc)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def add_thumbnail_fields(node, solr_doc)
|
135
|
+
file_id = node.attr('id').gsub('.jp2', '')
|
136
|
+
image_data = node.at_xpath('./imageData')
|
137
|
+
|
138
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_file_name', file_id, :displayable)
|
139
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_width', image_data['width'], :displayable)
|
140
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_height', image_data['height'], :displayable)
|
141
|
+
end
|
142
|
+
|
143
|
+
def add_image_fields(node, solr_doc)
|
144
|
+
file_id = node.attr('id').gsub('.jp2', '')
|
145
|
+
base_url = stacks_iiif_url(solr_doc[:id], file_id)
|
146
|
+
|
147
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_image_iiif_info', "#{base_url}/info.json", :displayable)
|
148
|
+
Solrizer.insert_field(solr_doc, 'thumbnail_square_url', "#{base_url}/square/100,100/0/default.jpg", :displayable)
|
149
|
+
Solrizer.insert_field(solr_doc, 'thumbnail_url', "#{base_url}/full/!400,400/0/default.jpg", :displayable)
|
150
|
+
Solrizer.insert_field(solr_doc, 'large_image_url', "#{base_url}/full/pct:25/0/default.jpg", :displayable)
|
151
|
+
Solrizer.insert_field(solr_doc, 'full_image_url', "#{base_url}/full/full/0/default.jpg", :displayable)
|
152
|
+
end
|
153
|
+
|
154
|
+
def stacks_iiif_url(druid, file_name)
|
155
|
+
"#{Spotlight::Dor::Resources::Engine.config.stacks_iiif_url}/#{druid}%2F#{file_name}"
|
156
|
+
end
|
118
157
|
end
|
119
158
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
#
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
159
|
+
concerning :FeigenbaumSpecificFields do
|
160
|
+
# These fields were specifically for the Feigenbaum exhibit. It is very
|
161
|
+
# likely it will go ununsed by other projects, but should be benign (since this field will not be created if
|
162
|
+
# this specific MODs note is not found.). Future work could refactor this to
|
163
|
+
# only create these fields on an as-needed basis.
|
164
|
+
|
165
|
+
included do
|
166
|
+
before_index :add_donor_tags
|
167
|
+
before_index :add_folder_name
|
168
|
+
end
|
169
|
+
|
170
|
+
def add_donor_tags(sdb, solr_doc)
|
171
|
+
donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
|
172
|
+
insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
|
173
|
+
end
|
174
|
+
|
175
|
+
# add the folder name to solr_doc as folder_name_ssi field (note: single valued!)
|
176
|
+
# data is specific to Feigenbaum collection and is in <note type='preferred citation'>
|
177
|
+
def add_folder_name(sdb, solr_doc)
|
178
|
+
# see spec for data examples
|
179
|
+
preferred_citation = sdb.smods_rec.note.select { |n| n.type_at == 'preferred citation' }.map(&:content)
|
180
|
+
match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
|
181
|
+
solr_doc['folder_name_ssi'] = match_data[1].strip if match_data.present?
|
182
|
+
end
|
134
183
|
end
|
135
184
|
|
136
|
-
|
137
|
-
|
138
|
-
|
185
|
+
concerning :CartographicIndexing do
|
186
|
+
included do
|
187
|
+
before_index :mods_cartographics_indexing
|
188
|
+
end
|
139
189
|
|
140
|
-
|
141
|
-
|
190
|
+
def mods_cartographics_indexing(sdb, solr_doc)
|
191
|
+
coordinates = Array(sdb.smods_rec.subject.cartographics.coordinates)
|
142
192
|
|
143
|
-
|
193
|
+
insert_field(solr_doc, 'coordinates', coordinates.map(&:text), :stored_searchable)
|
144
194
|
|
145
|
-
|
195
|
+
solr_doc['point_bbox'] ||= []
|
196
|
+
solr_doc['point_bbox'] += coords_to_bboxes(coordinates)
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
146
200
|
|
147
|
-
|
148
|
-
|
201
|
+
def coords_to_bboxes(coordinates)
|
202
|
+
coordinates.select { |n| n.text =~ /^\(.*\)$/ }.map do |n|
|
203
|
+
coord_to_bbox(n.text)
|
204
|
+
end
|
205
|
+
end
|
149
206
|
|
150
|
-
|
151
|
-
|
207
|
+
def coord_to_bbox(coord)
|
208
|
+
bbox = coord.delete('(').delete(')')
|
209
|
+
|
210
|
+
lng, lat = bbox.split('/')
|
211
|
+
|
212
|
+
min_x, max_x = lng.split('--').map { |x| coord_to_decimal(x) }
|
213
|
+
max_y, min_y = lat.split('--').map { |y| coord_to_decimal(y) }
|
214
|
+
"#{min_x} #{min_y} #{max_x} #{max_y}"
|
152
215
|
end
|
153
|
-
end
|
154
|
-
# rubocop:enable Metrics/AbcSize
|
155
216
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
217
|
+
def coord_to_decimal(point)
|
218
|
+
regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
|
219
|
+
match = regex.match(point)
|
220
|
+
dec = 0
|
160
221
|
|
161
|
-
|
162
|
-
|
163
|
-
|
222
|
+
dec += match['deg'].to_i
|
223
|
+
dec += match['sec'].to_f / 60
|
224
|
+
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
164
225
|
|
165
|
-
|
226
|
+
dec
|
227
|
+
end
|
166
228
|
end
|
167
229
|
|
168
|
-
def insert_field
|
230
|
+
def insert_field(solr_doc, field, values, *args)
|
169
231
|
Array(values).each do |v|
|
170
232
|
Solrizer.insert_field solr_doc, field, v, *args
|
171
233
|
end
|