derivative-rodeo 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +6 -0
  3. data/LICENSE +15 -0
  4. data/README.md +251 -0
  5. data/Rakefile +42 -0
  6. data/derivative_rodeo.gemspec +54 -0
  7. data/lib/derivative/rodeo.rb +3 -0
  8. data/lib/derivative-rodeo.rb +3 -0
  9. data/lib/derivative_rodeo/configuration.rb +95 -0
  10. data/lib/derivative_rodeo/errors.rb +56 -0
  11. data/lib/derivative_rodeo/generators/base_generator.rb +200 -0
  12. data/lib/derivative_rodeo/generators/concerns/copy_file_concern.rb +28 -0
  13. data/lib/derivative_rodeo/generators/copy_generator.rb +14 -0
  14. data/lib/derivative_rodeo/generators/hocr_generator.rb +112 -0
  15. data/lib/derivative_rodeo/generators/monochrome_generator.rb +39 -0
  16. data/lib/derivative_rodeo/generators/pdf_split_generator.rb +61 -0
  17. data/lib/derivative_rodeo/generators/thumbnail_generator.rb +38 -0
  18. data/lib/derivative_rodeo/generators/word_coordinates_generator.rb +39 -0
  19. data/lib/derivative_rodeo/services/base_service.rb +15 -0
  20. data/lib/derivative_rodeo/services/convert_uri_via_template_service.rb +87 -0
  21. data/lib/derivative_rodeo/services/extract_word_coordinates_from_hocr_sgml_service.rb +218 -0
  22. data/lib/derivative_rodeo/services/image_identify_service.rb +89 -0
  23. data/lib/derivative_rodeo/services/image_jp2_service.rb +112 -0
  24. data/lib/derivative_rodeo/services/image_service.rb +73 -0
  25. data/lib/derivative_rodeo/services/pdf_splitter/base.rb +177 -0
  26. data/lib/derivative_rodeo/services/pdf_splitter/jpg_page.rb +14 -0
  27. data/lib/derivative_rodeo/services/pdf_splitter/pages_summary.rb +130 -0
  28. data/lib/derivative_rodeo/services/pdf_splitter/png_page.rb +26 -0
  29. data/lib/derivative_rodeo/services/pdf_splitter/tiff_page.rb +52 -0
  30. data/lib/derivative_rodeo/services/pdf_splitter_service.rb +19 -0
  31. data/lib/derivative_rodeo/services/url_service.rb +42 -0
  32. data/lib/derivative_rodeo/storage_locations/base_location.rb +251 -0
  33. data/lib/derivative_rodeo/storage_locations/concerns/download_concern.rb +67 -0
  34. data/lib/derivative_rodeo/storage_locations/file_location.rb +39 -0
  35. data/lib/derivative_rodeo/storage_locations/http_location.rb +13 -0
  36. data/lib/derivative_rodeo/storage_locations/https_location.rb +13 -0
  37. data/lib/derivative_rodeo/storage_locations/s3_location.rb +103 -0
  38. data/lib/derivative_rodeo/storage_locations/sqs_location.rb +187 -0
  39. data/lib/derivative_rodeo/technical_metadata.rb +23 -0
  40. data/lib/derivative_rodeo/version.rb +5 -0
  41. data/lib/derivative_rodeo.rb +36 -0
  42. metadata +339 -0
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+ require 'open3'
3
+ require 'mini_magick'
4
+
5
+ module DerivativeRodeo
6
+ module Services
7
+ module PdfSplitter
8
+ # A simple data structure that summarizes the image properties of the given path.
9
+ PagesSummary = Struct.new(
10
+ :path, :page_count, :width,
11
+ :height, :pixels_per_inch, :color_description,
12
+ :channels, :bits_per_channel, keyword_init: true
13
+ ) do
14
+ # class constant column numbers
15
+ COL_WIDTH = 3
16
+ COL_HEIGHT = 4
17
+ COL_COLOR_DESC = 5
18
+ COL_CHANNELS = 6
19
+ COL_BITS = 7
20
+ # only poppler 0.25+ has this column in output:
21
+ COL_XPPI = 12
22
+
23
+ # @return [Array<String, Integer, Integer>]
24
+ def color
25
+ [color_description, channels, bits_per_channel]
26
+ end
27
+ alias_method :ppi, :pixels_per_inch
28
+ alias_method :bits, :bits_per_channel
29
+
30
+ # If the underlying extraction couldn't set the various properties, we likely have an
31
+ # invalid_pdf.
32
+ def valid?
33
+ return false if pdf_pages_summary.color_description.nil?
34
+ return false if pdf_pages_summary.channels.nil?
35
+ return false if pdf_pages_summary.bits_per_channel.nil?
36
+ return false if pdf_pages_summary.height.nil?
37
+ return false if pdf_pages_summary.page_count.to_i.zero?
38
+
39
+ true
40
+ end
41
+ end
42
+
43
+ ##
44
+ # @api public
45
+ #
46
+ # @param path [String]
47
+ # @return [DerivativeRodeo::PdfSplitter::PagesSummary]
48
+ #
49
+ # Responsible for determining the image properties of the PDF.
50
+ #
51
+ # @note
52
+ #
53
+ # Uses poppler 0.19+ pdfimages command to extract image listing metadata from PDF files.
54
+ # Though we are optimizing for 0.25 or later for poppler.
55
+ #
56
+ # @note
57
+ #
58
+ # For dpi extraction, falls back to calculating using MiniMagick, if neccessary.
59
+ #
60
+ # The first two lines are tabular header information:
61
+ #
62
+ # @example Output from PDF Images
63
+ #
64
+ # bash-5.1$ pdfimages -list fmc_color.pdf | head -5
65
+ # page num step width height color comp bpc enc interp object ID x-ppi y-ppi size ratio
66
+ # --------------------------------------------------------------------------------------------
67
+ # 1 0 image 2475 413 rgb 3 8 jpeg no 10 0 300 300 21.8K 0.7%
68
+ # rubocop:disable Metrics/AbcSize - Because this helps us process the results in one loop.
69
+ # rubocop:disable Metrics/MethodLength - Again, to help speed up the processing loop.
70
+ # rubocop:disable Metrics/CyclomaticComplexity
71
+ # rubocop:disable Metrics/PerceivedComplexity
72
+ def PagesSummary.extract_from(path:)
73
+ # NOTE: https://github.com/scientist-softserv/iiif_print/pull/223/files for piping warnings
74
+ # to /dev/null
75
+ command = format('pdfimages -list %<path>s 2>/dev/null', path: path)
76
+
77
+ page_count = 0
78
+ color_description = 'gray'
79
+ width = 0
80
+ height = 0
81
+ channels = 0
82
+ bits_per_channel = 0
83
+ pixels_per_inch = 0
84
+ Open3.popen3(command) do |_stdin, stdout, _stderr, _wait_thr|
85
+ stdout.read.split("\n").each_with_index do |line, index|
86
+ # Skip the two header lines (see the above example)
87
+ next if index <= 1
88
+
89
+ page_count += 1
90
+ cells = line.gsub(/\s+/m, ' ').strip.split(' ')
91
+
92
+ color_description = 'rgb' if cells[COL_COLOR_DESC] != 'gray'
93
+ width = cells[COL_WIDTH].to_i if cells[COL_WIDTH].to_i > width
94
+ height = cells[COL_HEIGHT].to_i if cells[COL_HEIGHT].to_i > height
95
+ channels = cells[COL_CHANNELS].to_i if cells[COL_CHANNELS].to_i > channels
96
+ bits_per_channel = cells[COL_BITS].to_i if cells[COL_BITS].to_i > bits_per_channel
97
+
98
+ # In the case of poppler version < 0.25, we will have no more than 12 columns. As such,
99
+ # we need to do some alternative magic to calculate this.
100
+ if page_count == 1 && cells.size <= 12
101
+ pdf = MiniMagick::Image.open(path)
102
+ width_points = pdf.width
103
+ width_px = width
104
+ pixels_per_inch = (72 * width_px / width_points).to_i
105
+ elsif cells[COL_XPPI].to_i > pixels_per_inch
106
+ pixels_per_inch = cells[COL_XPPI].to_i
107
+ end
108
+ # By the magic of nil#to_i if we don't have more than 12 columns, we've already set
109
+ # the pixels_per_inch and this line won't due much of anything.
110
+ end
111
+ end
112
+
113
+ new(
114
+ path: path,
115
+ page_count: page_count,
116
+ pixels_per_inch: pixels_per_inch,
117
+ width: width,
118
+ height: height,
119
+ color_description: color_description,
120
+ channels: channels,
121
+ bits_per_channel: bits_per_channel
122
+ )
123
+ end
124
+ # rubocop:enable Metrics/AbcSize
125
+ # rubocop:enable Metrics/MethodLength
126
+ # rubocop:enable Metrics/CyclomaticComplexity
127
+ # rubocop:enable Metrics/PerceivedComplexity
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DerivativeRodeo
4
+ module Services
5
+ module PdfSplitter
6
+ # The purpose of this class is to split the PDF into constituent png files.
7
+ class PngPage < PdfSplitter::Base
8
+ self.image_extension = 'png'
9
+
10
+ def gsdevice
11
+ return @gsdevice if defined?(@gsdevice)
12
+
13
+ color = pdf_pages_summary.color_description
14
+ bits_per_channel = pdf_pages_summary.bits_per_channel
15
+ if color == 'gray'
16
+ # 1 Bit Grayscale, if applicable:
17
+ return @gsdevice = 'pngmonod' if bits_per_channel == 1
18
+ return @gsdevice = 'pnggray' if bits_per_channel > 1
19
+ end
20
+
21
+ @gsdevice = 'png16m'
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DerivativeRodeo
4
+ module Services
5
+ module PdfSplitter
6
+ ##
7
+ # The purpose of this class is to split the PDF into constituent tiff files.
8
+ class TiffPage < PdfSplitter::Base
9
+ self.image_extension = 'tiff'
10
+ self.compression = 'lzw'
11
+
12
+ ##
13
+ # @api private
14
+ #
15
+ # @return [String]
16
+ def gsdevice
17
+ return @gsdevice if defined?(@gsdevice)
18
+
19
+ color = pdf_pages_summary.color_description
20
+ channels = pdf_pages_summary.channels
21
+ bpc = pdf_pages_summary.bits_per_channel
22
+
23
+ @gsdevice = color_bpc(color, bpc)
24
+
25
+ # otherwise color:
26
+ @gsdevice ||= colordevice(channels, bpc)
27
+ end
28
+
29
+ def color_bpc(color, bpc)
30
+ return unless color == 'gray'
31
+
32
+ # CCITT Group 4 Black and White, if applicable:
33
+ if bpc == 1
34
+ self.compression = 'g4'
35
+ 'tiffg4'
36
+ elsif bpc > 1
37
+ # 8 Bit Grayscale, if applicable:
38
+ 'tiffgray'
39
+ end
40
+ end
41
+
42
+ def colordevice(channels, bpc)
43
+ bits = bpc * channels
44
+ # will be either 8bpc/16bpd color TIFF,
45
+ # with any CMYK source transformed to 8bpc RBG
46
+ bits = 24 unless [24, 48].include? bits
47
+ "tiff#{bits}nc"
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DerivativeRodeo
4
+ module Services
5
+ module PdfSplitterService
6
+ ##
7
+ # @api public
8
+ #
9
+ # Find the {PdfSplitter::Base} with the given name.
10
+ #
11
+ # @param name [#to_s]
12
+ # @return [PdfSplitter::Base]
13
+ def self.for(name)
14
+ klass_name = "#{name.to_s.classify}_page".classify
15
+ "DerivativeRodeo::Services::PdfSplitter::#{klass_name}".constantize
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'httparty'
4
+
5
+ module DerivativeRodeo
6
+ module Services
7
+ ##
8
+ # A utility class for handling general URLs. Provided as a means of easing the implementation
9
+ # logic of those that use this class.
10
+ #
11
+ # @note
12
+ # It is a good design idea to wrap a library (in this case HTTParty). The goal is to expose
13
+ # the smallest interface and make it something that would be easy to swap out.
14
+ #
15
+ # @see https://rubygems.org/gems/httparty
16
+ module UrlService
17
+ ##
18
+ # @param url [String]
19
+ #
20
+ # @return [String]
21
+ def self.read(url)
22
+ HTTParty.get(url, logger: DerivativeRodeo.config.logger).body
23
+ rescue StandardError => e
24
+ config.logger.error(%(#{e.message}\n#{e.backtrace.join("\n")}))
25
+ raise e
26
+ end
27
+
28
+ ##
29
+ # @param url [String]
30
+ #
31
+ # @return [URI] when the URL resolves successfully
32
+ # @return [FalseClass] when the URL's head request is not successful or we've exhausted our
33
+ # remaining redirects.
34
+ def self.exists?(url)
35
+ HTTParty.head(url, logger: DerivativeRodeo.config.logger)
36
+ rescue StandardError => e
37
+ DerivativeRodeo.config.logger.error(%(#{e.message}\n#{e.backtrace.join("\n")}))
38
+ false
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,251 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tmpdir'
4
+
5
+ module DerivativeRodeo
6
+ module StorageLocations
7
+ ##
8
+ # When the output location is the same type of location as "this" location, we indicate that via
9
+ # the SAME constant.
10
+ SAME = :same
11
+
12
+ ##
13
+ # The base location for storing files.
14
+ #
15
+ # - dir :: is the directory path
16
+ # - path :: is the full file path
17
+ # - uri :: is the full file path plus the uri prefix parts
18
+ #
19
+ # A location represents a pointer to a storage location. The {#exist?} method can answer if a
20
+ # file exists at the path.
21
+ #
22
+ # rubocop:disable Metrics/ClassLength
23
+ class BaseLocation
24
+ @locations = []
25
+
26
+ ##
27
+ # @return [Array<String>]
28
+ def self.locations
29
+ @locations ||= []
30
+ end
31
+
32
+ def self.inherited(subclass)
33
+ locations << subclass.location_name
34
+ super
35
+ end
36
+
37
+ ##
38
+ # @return [String]
39
+ def self.location_name
40
+ to_s.demodulize.underscore.sub(/_location$/, '')
41
+ end
42
+
43
+ class << self
44
+ alias scheme location_name
45
+ end
46
+
47
+ ##
48
+ # @param location_name [String]
49
+ #
50
+ # @return [Class]
51
+ def self.load_location(location_name)
52
+ location_name = location_name.split("://").first
53
+ raise Errors::StorageLocationNotFoundError.new(location_name: location_name) unless locations.include?(location_name)
54
+ "DerivativeRodeo::StorageLocations::#{location_name.to_s.classify}Location".constantize
55
+ end
56
+
57
+ ##
58
+ # @param file_uri [String] of the form scheme://arbitrary-stuff
59
+ #
60
+ # @return [BaseLocation]
61
+ def self.from_uri(file_uri)
62
+ location_name = file_uri.split('://').first
63
+ raise Errors::StorageLocationMissing.new(file_uri: file_uri) if location_name.blank?
64
+
65
+ load_location(location_name).new(file_uri)
66
+ end
67
+
68
+ ##
69
+ # Registers the location with the main StorageLocation class to it can be used
70
+ #
71
+ # @param location_name [String]
72
+ def self.register_location(location_name)
73
+ return if DerivativeRodeo::StorageLocations::BaseLocation.locations.include?(location_name.to_s)
74
+
75
+ DerivativeRodeo::StorageLocations::BaseLocation.locations << location_name.to_s
76
+ end
77
+
78
+ ##
79
+ # Create a new uri of the classes type. Parts argument should have a default in
80
+ # implementing classes. Must support a number or the symbol :all
81
+ #
82
+ # @api public
83
+ #
84
+ # @param path [String]
85
+ # @param parts [Integer, :all]
86
+ # @return [String]
87
+ #
88
+ # @see .file_path_from_parts
89
+ def self.create_uri(path:, parts:)
90
+ raise NotImplementedError, "#{self.class}.create_uri"
91
+ end
92
+
93
+ ##
94
+ # Build a {StorageLocations::BaseLocation} by converting the :from_uri with the :template via
95
+ # the given :service.
96
+ #
97
+ # @param from_uri [String]
98
+ # @param template [String]
99
+ # @param service [#call, Module<DerivativeRodeo::Services::ConvertUriViaTemplateService>]
100
+ #
101
+ # @return [StorageLocations::BaseLocation]
102
+ def self.build(from_uri:, template:, service: DerivativeRodeo::Services::ConvertUriViaTemplateService)
103
+ # HACK: Ensuring that we have the correct scheme. Maybe this is a hack?
104
+ from_uri = "#{scheme}://#{from_uri}" unless from_uri.start_with?("#{scheme}://")
105
+ to_uri = service.call(from_uri: from_uri, template: template, adapter: self)
106
+ new(to_uri)
107
+ end
108
+
109
+ ##
110
+ # @param path [String]
111
+ # @param parts [Integer, :all]
112
+ #
113
+ # @return [String]
114
+ def self.file_path_from_parts(path:, parts:)
115
+ parts = - parts unless parts == :all || parts.negative?
116
+ parts == :all ? path : path.split('/')[parts..-1].join('/')
117
+ end
118
+
119
+ ##
120
+ # @param file_uri [String] a URI to the file's location; this is **not** a templated URI (as
121
+ # described in {DerivativeRodeo::Services::ConvertUriViaTemplateService}
122
+ # @param config [DerivativeRodeo::Configuration]
123
+ def initialize(file_uri, config: DerivativeRodeo.config)
124
+ @file_uri = file_uri
125
+ @config = config
126
+ end
127
+
128
+ attr_accessor :tmp_file_path
129
+ private :tmp_file_path=, :tmp_file_path
130
+
131
+ attr_reader :config, :file_uri
132
+
133
+ ##
134
+ # @param auto_write_file [Boolean] Provided as a testing helper method.
135
+ #
136
+ # @yieldparam tmp_file_path [String]
137
+ #
138
+ # @return [StorageLocations::BaseLocation]
139
+ # @see with_tmp_path
140
+ def with_new_tmp_path(auto_write_file: true, &block)
141
+ with_tmp_path(lambda { |_file_path, tmp_file_path, exist|
142
+ FileUtils.rm_rf(tmp_file_path) if exist
143
+ FileUtils.touch(tmp_file_path)
144
+ }, auto_write_file: auto_write_file, &block)
145
+ end
146
+
147
+ ##
148
+ # @yieldparam tmp_file_path [String]
149
+ # @return [StorageLocations::BaseLocation]
150
+ def with_existing_tmp_path
151
+ raise NotImplementedError, "#{self.class}#with_existing_tmp_path"
152
+ end
153
+
154
+ ##
155
+ # @param preamble_lambda [Lambda, #call] the "function" we should call to prepare the
156
+ # temporary location before we yield it's location.
157
+ #
158
+ # @param auto_write_file [Boolean] Provided as a testing helper method. Given that we have
159
+ # both {#with_new_tmp_path} and {#with_existing_tmp_path}, we want the default to not
160
+ # automatically perform the write. But this is something we can easily forget when
161
+ # working with the {#with_new_tmp_path}
162
+ #
163
+ # @yieldparam tmp_file_path [String]
164
+ #
165
+ # @return [StorageLocations::BaseLocation]
166
+ def with_tmp_path(preamble_lambda, auto_write_file: false)
167
+ raise ArgumentError, 'Expected a block' unless block_given?
168
+
169
+ tmp_file_dir do |tmpdir|
170
+ self.tmp_file_path = File.join(tmpdir, file_dir, file_name)
171
+ FileUtils.mkdir_p(File.dirname(tmp_file_path))
172
+ preamble_lambda.call(file_path, tmp_file_path, exist?)
173
+ yield tmp_file_path
174
+ write if auto_write_file
175
+ end
176
+ # TODO: Do we need to ensure this?
177
+ self.tmp_file_path = nil
178
+
179
+ # In returning self we again remove the need for those calling #with_new_tmp_path,
180
+ # #with_tmp_path, and #with_new_tmp_path to remember to return the current Location.
181
+ # In other words removing the jagged edges of the code.
182
+ self
183
+ end
184
+
185
+ ##
186
+ # Write the tmp file to the file_uri
187
+ def write
188
+ raise NotImplementedError, "#{self.class}#write"
189
+ end
190
+
191
+ ##
192
+ # @return [TrueClass] when the file exists in this storage
193
+ # @return [FalseClass] when the file does not exist in this storage
194
+ def exist?
195
+ raise NotImplementedError, "#{self.class}#exist?"
196
+ end
197
+ alias exists? exist?
198
+
199
+ ##
200
+ # @param template [String]
201
+ # @return [StorageLocations::BaseLocation]
202
+ #
203
+ # @see DerivativeRodeo::Services::ConvertUriViaTemplateService
204
+ def derived_file_from(template:)
205
+ klass = DerivativeRodeo::StorageLocations::BaseLocation.load_location(template)
206
+ klass.build(from_uri: file_path, template: template)
207
+ end
208
+
209
+ ##
210
+ # @param extension [String, StorageLocations::SAME]
211
+ # @return [String] the path for the new extension; when given {StorageLocations::SAME} re-use
212
+ # the file's extension.
213
+ def with_new_extension(extension)
214
+ return file_path if extension == StorageLocations::SAME
215
+
216
+ "#{file_path.split('.')[0]}.#{extension}"
217
+ end
218
+
219
+ def file_path
220
+ @file_path ||= @file_uri.sub(%r{.+://}, '')
221
+ end
222
+
223
+ def file_dir
224
+ @file_dir ||= File.dirname(file_path)
225
+ end
226
+
227
+ def file_name
228
+ @file_name ||= File.basename(file_path)
229
+ end
230
+
231
+ def file_extension
232
+ @file_extension ||= File.extname(file_path)
233
+ end
234
+
235
+ def file_basename
236
+ @file_basename ||= File.basename(file_path, file_extension)
237
+ end
238
+
239
+ def tmp_file_dir(&block)
240
+ raise ArgumentError, 'Expected a block' unless block_given?
241
+
242
+ Dir.mktmpdir(&block)
243
+ end
244
+ end
245
+ # rubocop:enable Metrics/ClassLength
246
+ end
247
+ end
248
+
249
+ Dir.glob(File.join(__dir__, '**/*')).sort.each do |location|
250
+ require location unless File.directory?(location) || location.match?('base_location')
251
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'httparty'
4
+
5
+ module DerivativeRodeo
6
+ module StorageLocations
7
+ ##
8
+ # A helper module for copying files from one location to another.
9
+ module DownloadConcern
10
+ extend ActiveSupport::Concern
11
+
12
+ class_methods do
13
+ def create_uri(path:, parts: :all, ssl: true)
14
+ file_path = file_path_from_parts(path: path, parts: parts)
15
+ "#{adapter_prefix(ssl: ssl)}#{file_path}"
16
+ end
17
+
18
+ def adapter_prefix(ssl: true)
19
+ ssl ? "https://" : "http://"
20
+ end
21
+ end
22
+
23
+ delegate :config, to: DerivativeRodeo
24
+
25
+ def with_existing_tmp_path(&block)
26
+ with_tmp_path(lambda { |_file_path, tmp_file_path, exist|
27
+ raise Errors::FileMissingError unless exist
28
+
29
+ response = get(file_uri)
30
+ File.open(tmp_file_path, 'wb') { |fp| fp.write(response.body) }
31
+ }, &block)
32
+ end
33
+
34
+ ##
35
+ # Implemented to complete the interface.
36
+ #
37
+ # @raise [NotImplementedError]
38
+ def write
39
+ raise "#{self.class}#write is deliberately not implemented"
40
+ end
41
+
42
+ ##
43
+ # @param url [String]
44
+ #
45
+ # @return [String]
46
+ def read(url)
47
+ HTTParty.get(url, logger: config.logger)
48
+ rescue => e
49
+ config.logger.error(%(#{e.message}\n#{e.backtrace.join("\n")}))
50
+ raise e
51
+ end
52
+
53
+ ##
54
+ # @param url [String]
55
+ #
56
+ # @return [URI] when the URL resolves successfully
57
+ # @return [FalseClass] when the URL's head request is not successful or we've exhausted our
58
+ # remaining redirects.
59
+ def exists?(url)
60
+ HTTParty.head(url, logger: config.logger)
61
+ rescue => e
62
+ config.logger.error(%(#{e.message}\n#{e.backtrace.join("\n")}))
63
+ false
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DerivativeRodeo
4
+ module StorageLocations
5
+ ##
6
+ # Location for files found on a local disk
7
+ class FileLocation < BaseLocation
8
+ def self.create_uri(path:, parts: :all)
9
+ file_path = file_path_from_parts(path: path, parts: parts)
10
+ "#{adapter_prefix}#{file_path}"
11
+ end
12
+
13
+ def self.adapter_prefix
14
+ "#{scheme}://"
15
+ end
16
+
17
+ def with_existing_tmp_path(&block)
18
+ with_tmp_path(lambda { |file_path, tmp_file_path, exist|
19
+ raise Errors::FileMissingError unless exist
20
+
21
+ FileUtils.cp(file_path, tmp_file_path)
22
+ }, &block)
23
+ end
24
+
25
+ def exist?
26
+ File.exist?(file_path)
27
+ end
28
+
29
+ # write the file to the file_uri
30
+ def write
31
+ raise Errors::FileMissingError("Use write within a with_new_tmp_path block and fille the mp file with data before writing") unless File.exist?(tmp_file_path)
32
+
33
+ FileUtils.mkdir_p(file_dir)
34
+ FileUtils.cp_r(tmp_file_path, file_path)
35
+ file_uri
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'derivative_rodeo/storage_locations/concerns/download_concern'
4
+
5
+ module DerivativeRodeo
6
+ module StorageLocations
7
+ ##
8
+ # Location for files from the web. Download only, can not write!
9
+ class HttpLocation < BaseLocation
10
+ include DownloadConcern
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'derivative_rodeo/storage_locations/concerns/download_concern'
4
+
5
+ module DerivativeRodeo
6
+ module StorageLocations
7
+ ##
8
+ # Location for files from the web. Download only, can not write!
9
+ class HttpsLocation < BaseLocation
10
+ include DownloadConcern
11
+ end
12
+ end
13
+ end