discovery-indexer 3.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 32f16c9c6a49b290815357fa39f9aad6dd53ed4e
4
- data.tar.gz: e110a3a509c0e8e354c42949418acb6f7ba39eab
3
+ metadata.gz: 9f64eea714259a5affc67b00288a36ad0748f5b7
4
+ data.tar.gz: bf28bbd7dfef5326c5b47b67a38ed8993b297d03
5
5
  SHA512:
6
- metadata.gz: bf0e4464eb5fc84e5a821816502e4c58e526bec8720986fd5e8f979d79d5f034959f444c9331bb201b79a9e78def6ef2c74543e0b4d710d6075e06bd13ce3ee4
7
- data.tar.gz: b43ad5545d7068c9d4e9ea3758b7bff0c7dd593199a42b8fc606f5f61ab72fb9fc000d0fe1d436ed93069dd80113ecabed0f1b8a0d4de85a9f473bb4783130d0
6
+ metadata.gz: d0ba91632921efb45a5c0cee754d5681a48420dc7c35799b654b92ff3c3a962b68f5a422c70a89928e6ab1c4f0b0281161828dccbaba5dd91f3d788207d0c648
7
+ data.tar.gz: a4f79864d64473a0409447c2c8f1d7d791eeea0edd87b9956bafa1571997248a4d0381e3d83bd8af7c8a65f7070ff1f8db15d4d30a43ce8bae09cf123618b27e
@@ -69,6 +69,12 @@ module DiscoveryIndexer
69
69
  # ["pc0065_b08_f10_i031.jp2","pc0065_b08_f10_i032.jp2"]
70
70
  attr_accessor :image_ids
71
71
 
72
+ # @!attribute [rw] thumb
73
+ # @return [String] a thumbnail image if it exists in the PURL
74
+ # @example
75
+ # "oo000oo0001/pc0065_b08_f10_i031.jp2"
76
+ attr_accessor :thumb
77
+
72
78
  # @!attribute [rw] sw_image_ids
73
79
  # @return [Array] a list of the image ids and corresponding druids with %2F
74
80
  # separator from the file and externalFile tags in the content_metadata
@@ -30,6 +30,7 @@ module DiscoveryIndexer
30
30
  purlxml_model.dor_content_type = parse_dor_content_type
31
31
  purlxml_model.release_tags_hash = parse_release_tags_hash
32
32
  purlxml_model.file_ids = parse_file_ids
33
+ purlxml_model.thumb = parse_thumb
33
34
  purlxml_model.image_ids = parse_image_ids
34
35
  purlxml_model.sw_image_ids = parse_sw_image_ids
35
36
  purlxml_model.catkey = parse_catkey
@@ -142,16 +143,42 @@ module DiscoveryIndexer
142
143
  content_md.xpath('//resource[@type="page" or @type="image" or @type="thumb"]/file[@mimetype="image/jp2"]/@id').map(&:to_s)
143
144
  end
144
145
 
146
+ # the thumbnail in publicXML, falling back to the first image if no thumb node is found
147
+ # @return [String] thumb filename with druid prepended, e.g. oo000oo0001/filename withspace.jp2
148
+ def parse_thumb
149
+ unless @purlxml_ng_doc.nil?
150
+ thumb = @purlxml_ng_doc.xpath('//thumb')
151
+ # first try and parse what is in the thumb node of publicXML, but fallback to the first image if needed
152
+ if thumb.size == 1
153
+ thumb.first.content
154
+ elsif thumb.size == 0 && parse_sw_image_ids.size > 0
155
+ parse_sw_image_ids.first
156
+ else
157
+ nil
158
+ end
159
+ end
160
+ end
161
+
162
+ # the thumbnail in publicXML properly URI encoded, including the slash separator
163
+ # @return [String] thumb filename with druid prepended, e.g. oo000oo0001%2Ffilename%20withspace.jp2
164
+ def encoded_thumb
165
+ thumb=parse_thumb
166
+ return unless thumb
167
+ thumb_druid=thumb.split('/').first # the druid (before the first slash)
168
+ thumb_filename=thumb.split(/[a-zA-Z]{2}[0-9]{3}[a-zA-Z]{2}[0-9]{4}[\/]/).last # everything after the druid
169
+ "#{thumb_druid}%2F#{URI.escape(thumb_filename)}"
170
+ end
171
+
145
172
  # the druid and id attribute of resource/file and objectId and fileId of the
146
173
  # resource/externalFile elements that match the image, page, or thumb resource type, including extension
147
- # Also, prepends the corresponding druid and %2F specifically for Searchworks use
174
+ # Also, prepends the corresponding druid and / specifically for Searchworks use
148
175
  # @return [Array<String>] filenames
149
176
  def parse_sw_image_ids
150
177
  content_md = parse_content_metadata
151
178
  return [] if content_md.nil?
152
179
  content_md.xpath('//resource[@type="page" or @type="image" or @type="thumb"]').map do |node|
153
- node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid}%2F" + x } << node.xpath('./externalFile[@mimetype="image/jp2"]').map do |y|
154
- "#{y.attributes['objectId'].text.split(':').last}" + "%2F" + "#{y.attributes['fileId']}"
180
+ node.xpath('./file[@mimetype="image/jp2"]/@id').map{ |x| "#{@druid.gsub('druid:','')}/" + x } << node.xpath('./externalFile[@mimetype="image/jp2"]').map do |y|
181
+ "#{y.attributes['objectId'].text.split(':').last}" + "/" + "#{y.attributes['fileId']}"
155
182
  end
156
183
  end.flatten
157
184
  end
@@ -1,3 +1,3 @@
1
1
  module DiscoveryIndexer
2
- VERSION = '3.0.1'
2
+ VERSION = '3.1.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: discovery-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ahmed AlSum
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-09-22 00:00:00.000000000 Z
12
+ date: 2016-10-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -232,7 +232,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
232
232
  version: '0'
233
233
  requirements: []
234
234
  rubyforge_project:
235
- rubygems_version: 2.4.5.1
235
+ rubygems_version: 2.6.7
236
236
  signing_key:
237
237
  specification_version: 4
238
238
  summary: Shared library for the basic discovery indexing operation for Stanford DLSS.