discovery-indexer 3.0.1 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f64eea714259a5affc67b00288a36ad0748f5b7
|
4
|
+
data.tar.gz: bf28bbd7dfef5326c5b47b67a38ed8993b297d03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d0ba91632921efb45a5c0cee754d5681a48420dc7c35799b654b92ff3c3a962b68f5a422c70a89928e6ab1c4f0b0281161828dccbaba5dd91f3d788207d0c648
|
7
|
+
data.tar.gz: a4f79864d64473a0409447c2c8f1d7d791eeea0edd87b9956bafa1571997248a4d0381e3d83bd8af7c8a65f7070ff1f8db15d4d30a43ce8bae09cf123618b27e
|
@@ -69,6 +69,12 @@ module DiscoveryIndexer
|
|
69
69
|
# ["pc0065_b08_f10_i031.jp2","pc0065_b08_f10_i032.jp2"]
|
70
70
|
attr_accessor :image_ids
|
71
71
|
|
72
|
+
# @!attribute [rw] thumb
|
73
|
+
# @return [String] a thumbnail image if it exists in the PURL
|
74
|
+
# @example
|
75
|
+
# "oo000oo0001/pc0065_b08_f10_i031.jp2"
|
76
|
+
attr_accessor :thumb
|
77
|
+
|
72
78
|
# @!attribute [rw] sw_image_ids
|
73
79
|
# @return [Array] a list of the image ids and corresponding druids with %2F
|
74
80
|
# separator from the file and externalFile tags in the content_metadata
|
@@ -30,6 +30,7 @@ module DiscoveryIndexer
|
|
30
30
|
purlxml_model.dor_content_type = parse_dor_content_type
|
31
31
|
purlxml_model.release_tags_hash = parse_release_tags_hash
|
32
32
|
purlxml_model.file_ids = parse_file_ids
|
33
|
+
purlxml_model.thumb = parse_thumb
|
33
34
|
purlxml_model.image_ids = parse_image_ids
|
34
35
|
purlxml_model.sw_image_ids = parse_sw_image_ids
|
35
36
|
purlxml_model.catkey = parse_catkey
|
@@ -142,16 +143,42 @@ module DiscoveryIndexer
|
|
142
143
|
content_md.xpath('//resource[@type="page" or @type="image" or @type="thumb"]/file[@mimetype="image/jp2"]/@id').map(&:to_s)
|
143
144
|
end
|
144
145
|
|
146
|
+
# the thumbnail in publicXML, falling back to the first image if no thumb node is found
|
147
|
+
# @return [String] thumb filename with druid prepended, e.g. oo000oo0001/filename withspace.jp2
|
148
|
+
def parse_thumb
|
149
|
+
unless @purlxml_ng_doc.nil?
|
150
|
+
thumb = @purlxml_ng_doc.xpath('//thumb')
|
151
|
+
# first try and parse what is in the thumb node of publicXML, but fallback to the first image if needed
|
152
|
+
if thumb.size == 1
|
153
|
+
thumb.first.content
|
154
|
+
elsif thumb.size == 0 && parse_sw_image_ids.size > 0
|
155
|
+
parse_sw_image_ids.first
|
156
|
+
else
|
157
|
+
nil
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# the thumbnail in publicXML properly URI encoded, including the slash separator
|
163
|
+
# @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
|
164
|
+
def encoded_thumb
|
165
|
+
thumb=parse_thumb
|
166
|
+
return unless thumb
|
167
|
+
thumb_druid=thumb.split('/').first # the druid (before the first slash)
|
168
|
+
thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
|
169
|
+
"#{thumb_druid}%2F#{URI.escape(thumb_filename)}"
|
170
|
+
end
|
171
|
+
|
145
172
|
# the druid and id attribute of resource/file and objectId and fileId of the
|
146
173
|
# resource/externalFile elements that match the image, page, or thumb resource type, including extension
|
147
|
-
# Also, prepends the corresponding druid and
|
174
|
+
# Also, prepends the corresponding druid and / specifically for Searchworks use
|
148
175
|
# @return [Array<String>] filenames
|
149
176
|
def parse_sw_image_ids
|
150
177
|
content_md = parse_content_metadata
|
151
178
|
return [] if content_md.nil?
|
152
179
|
content_md.xpath('//resource[@type="page" or @type="image" or @type="thumb"]').map do |node|
|
153
|
-
node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid}
|
154
|
-
"#{y.attributes['objectId'].text.split(':').last}" + "
|
180
|
+
node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid.gsub('druid:','')}/" + x } << node.xpath('./externalFile[@mimetype="image/jp2"]').map do |y|
|
181
|
+
"#{y.attributes['objectId'].text.split(':').last}" + "/" + "#{y.attributes['fileId']}"
|
155
182
|
end
|
156
183
|
end.flatten
|
157
184
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: discovery-indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ahmed AlSum
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-10-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -232,7 +232,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
232
232
|
version: '0'
|
233
233
|
requirements: []
|
234
234
|
rubyforge_project:
|
235
|
-
rubygems_version: 2.
|
235
|
+
rubygems_version: 2.6.7
|
236
236
|
signing_key:
|
237
237
|
specification_version: 4
|
238
238
|
summary: Shared library for the basic discovery indexing operation for Stanford DLSS.
|