derivative-rodeo 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +6 -0
  3. data/LICENSE +15 -0
  4. data/README.md +251 -0
  5. data/Rakefile +42 -0
  6. data/derivative_rodeo.gemspec +54 -0
  7. data/lib/derivative/rodeo.rb +3 -0
  8. data/lib/derivative-rodeo.rb +3 -0
  9. data/lib/derivative_rodeo/configuration.rb +95 -0
  10. data/lib/derivative_rodeo/errors.rb +56 -0
  11. data/lib/derivative_rodeo/generators/base_generator.rb +200 -0
  12. data/lib/derivative_rodeo/generators/concerns/copy_file_concern.rb +28 -0
  13. data/lib/derivative_rodeo/generators/copy_generator.rb +14 -0
  14. data/lib/derivative_rodeo/generators/hocr_generator.rb +112 -0
  15. data/lib/derivative_rodeo/generators/monochrome_generator.rb +39 -0
  16. data/lib/derivative_rodeo/generators/pdf_split_generator.rb +61 -0
  17. data/lib/derivative_rodeo/generators/thumbnail_generator.rb +38 -0
  18. data/lib/derivative_rodeo/generators/word_coordinates_generator.rb +39 -0
  19. data/lib/derivative_rodeo/services/base_service.rb +15 -0
  20. data/lib/derivative_rodeo/services/convert_uri_via_template_service.rb +87 -0
  21. data/lib/derivative_rodeo/services/extract_word_coordinates_from_hocr_sgml_service.rb +218 -0
  22. data/lib/derivative_rodeo/services/image_identify_service.rb +89 -0
  23. data/lib/derivative_rodeo/services/image_jp2_service.rb +112 -0
  24. data/lib/derivative_rodeo/services/image_service.rb +73 -0
  25. data/lib/derivative_rodeo/services/pdf_splitter/base.rb +177 -0
  26. data/lib/derivative_rodeo/services/pdf_splitter/jpg_page.rb +14 -0
  27. data/lib/derivative_rodeo/services/pdf_splitter/pages_summary.rb +130 -0
  28. data/lib/derivative_rodeo/services/pdf_splitter/png_page.rb +26 -0
  29. data/lib/derivative_rodeo/services/pdf_splitter/tiff_page.rb +52 -0
  30. data/lib/derivative_rodeo/services/pdf_splitter_service.rb +19 -0
  31. data/lib/derivative_rodeo/services/url_service.rb +42 -0
  32. data/lib/derivative_rodeo/storage_locations/base_location.rb +251 -0
  33. data/lib/derivative_rodeo/storage_locations/concerns/download_concern.rb +67 -0
  34. data/lib/derivative_rodeo/storage_locations/file_location.rb +39 -0
  35. data/lib/derivative_rodeo/storage_locations/http_location.rb +13 -0
  36. data/lib/derivative_rodeo/storage_locations/https_location.rb +13 -0
  37. data/lib/derivative_rodeo/storage_locations/s3_location.rb +103 -0
  38. data/lib/derivative_rodeo/storage_locations/sqs_location.rb +187 -0
  39. data/lib/derivative_rodeo/technical_metadata.rb +23 -0
  40. data/lib/derivative_rodeo/version.rb +5 -0
  41. data/lib/derivative_rodeo.rb +36 -0
  42. metadata +339 -0
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+ require 'open3'
3
+ require 'mini_magick'
4
+
5
+ module DerivativeRodeo
6
+ module Services
7
+ module PdfSplitter
8
+ # A simple data structure that summarizes the image properties of the given path.
9
+ PagesSummary = Struct.new(
10
+ :path, :page_count, :width,
11
+ :height, :pixels_per_inch, :color_description,
12
+ :channels, :bits_per_channel, keyword_init: true
13
+ ) do
14
+ # class constant column numbers
15
+ COL_WIDTH = 3
16
+ COL_HEIGHT = 4
17
+ COL_COLOR_DESC = 5
18
+ COL_CHANNELS = 6
19
+ COL_BITS = 7
20
+ # only poppler 0.25+ has this column in output:
21
+ COL_XPPI = 12
22
+
23
+ # @return [Array<String, Integer, Integer>]
24
+ def color
25
+ [color_description, channels, bits_per_channel]
26
+ end
27
+ alias_method :ppi, :pixels_per_inch
28
+ alias_method :bits, :bits_per_channel
29
+
30
+ # If the underlying extraction couldn't set the various properties, we likely have an
31
+ # invalid_pdf.
32
+ def valid?
33
+ return false if pdf_pages_summary.color_description.nil?
34
+ return false if pdf_pages_summary.channels.nil?
35
+ return false if pdf_pages_summary.bits_per_channel.nil?
36
+ return false if pdf_pages_summary.height.nil?
37
+ return false if pdf_pages_summary.page_count.to_i.zero?
38
+
39
+ true
40
+ end
41
+ end
42
+
43
+ ##
44
+ # @api public
45
+ #
46
+ # @param path [String]
47
+ # @return [DerivativeRodeo::PdfSplitter::PagesSummary]
48
+ #
49
+ # Responsible for determining the image properties of the PDF.
50
+ #
51
+ # @note
52
+ #
53
+ # Uses poppler 0.19+ pdfimages command to extract image listing metadata from PDF files.
54
+ # Though we are optimizing for 0.25 or later for poppler.
55
+ #
56
+ # @note
57
+ #
58
+ # For dpi extraction, falls back to calculating using MiniMagick, if neccessary.
59
+ #
60
+ # The first two lines are tabular header information:
61
+ #
62
+ # @example Output from PDF Images
63
+ #
64
+ # bash-5.1$ pdfimages -list fmc_color.pdf | head -5
65
+ # page num step width height color comp bpc enc interp object ID x-ppi y-ppi size ratio
66
+ # --------------------------------------------------------------------------------------------
67
+ # 1 0 image 2475 413 rgb 3 8 jpeg no 10 0 300 300 21.8K 0.7%
68
+ # rubocop:disable Metrics/AbcSize - Because this helps us process the results in one loop.
69
+ # rubocop:disable Metrics/MethodLength - Again, to help speed up the processing loop.
70
+ # rubocop:disable Metrics/CyclomaticComplexity
71
+ # rubocop:disable Metrics/PerceivedComplexity
72
+ def PagesSummary.extract_from(path:)
73
+ # NOTE: https://github.com/scientist-softserv/iiif_print/pull/223/files for piping warnings
74
+ # to /dev/null
75
+ command = format('pdfimages -list %<path>s 2>/dev/null', path: path)
76
+
77
+ page_count = 0
78
+ color_description = 'gray'
79
+ width = 0
80
+ height = 0
81
+ channels = 0
82
+ bits_per_channel = 0
83
+ pixels_per_inch = 0
84
+ Open3.popen3(command) do |_stdin, stdout, _stderr, _wait_thr|
85
+ stdout.read.split("\n").each_with_index do |line, index|
86
+ # Skip the two header lines (see the above example)
87
+ next if index <= 1
88
+
89
+ page_count += 1
90
+ cells = line.gsub(/\s+/m, ' ').strip.split(' ')
91
+
92
+ color_description = 'rgb' if cells[COL_COLOR_DESC] != 'gray'
93
+ width = cells[COL_WIDTH].to_i if cells[COL_WIDTH].to_i > width
94
+ height = cells[COL_HEIGHT].to_i if cells[COL_HEIGHT].to_i > height
95
+ channels = cells[COL_CHANNELS].to_i if cells[COL_CHANNELS].to_i > channels
96
+ bits_per_channel = cells[COL_BITS].to_i if cells[COL_BITS].to_i > bits_per_channel
97
+
98
+ # In the case of poppler version < 0.25, we will have no more than 12 columns. As such,
99
+ # we need to do some alternative magic to calculate this.
100
+ if page_count == 1 && cells.size <= 12
101
+ pdf = MiniMagick::Image.open(path)
102
+ width_points = pdf.width
103
+ width_px = width
104
+ pixels_per_inch = (72 * width_px / width_points).to_i
105
+ elsif cells[COL_XPPI].to_i > pixels_per_inch
106
+ pixels_per_inch = cells[COL_XPPI].to_i
107
+ end
108
+ # By the magic of nil#to_i if we don't have more than 12 columns, we've already set
109
+ # the pixels_per_inch and this line won't due much of anything.
110
+ end
111
+ end
112
+
113
+ new(
114
+ path: path,
115
+ page_count: page_count,
116
+ pixels_per_inch: pixels_per_inch,
117
+ width: width,
118
+ height: height,
119
+ color_description: color_description,
120
+ channels: channels,
121
+ bits_per_channel: bits_per_channel
122
+ )
123
+ end
124
+ # rubocop:enable Metrics/AbcSize
125
+ # rubocop:enable Metrics/MethodLength
126
+ # rubocop:enable Metrics/CyclomaticComplexity
127
+ # rubocop:enable Metrics/PerceivedComplexity
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DerivativeRodeo
4
+ module Services
5
+ module PdfSplitter
6
+ # The purpose of this class is to split the PDF into constituent png files.
7
+ class PngPage < PdfSplitter::Base
8
+ self.image_extension = 'png'
9
+
10
+ def gsdevice
11
+ return @gsdevice if defined?(@gsdevice)
12
+
13
+ color = pdf_pages_summary.color_description
14
+ bits_per_channel = pdf_pages_summary.bits_per_channel
15
+ if color == 'gray'
16
+ # 1 Bit Grayscale, if applicable:
17
+ return @gsdevice = 'pngmonod' if bits_per_channel == 1
18
+ return @gsdevice = 'pnggray' if bits_per_channel > 1
19
+ end
20
+
21
+ @gsdevice = 'png16m'
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DerivativeRodeo
4
+ module Services
5
+ module PdfSplitter
6
+ ##
7
+ # The purpose of this class is to split the PDF into constituent tiff files.
8
+ class TiffPage < PdfSplitter::Base
9
+ self.image_extension = 'tiff'
10
+ self.compression = 'lzw'
11
+
12
+ ##
13
+ # @api private
14
+ #
15
+ # @return [String]
16
+ def gsdevice
17
+ return @gsdevice if defined?(@gsdevice)
18
+
19
+ color = pdf_pages_summary.color_description
20
+ channels = pdf_pages_summary.channels
21
+ bpc = pdf_pages_summary.bits_per_channel
22
+
23
+ @gsdevice = color_bpc(color, bpc)
24
+
25
+ # otherwise color:
26
+ @gsdevice ||= colordevice(channels, bpc)
27
+ end
28
+
29
+ def color_bpc(color, bpc)
30
+ return unless color == 'gray'
31
+
32
+ # CCITT Group 4 Black and White, if applicable:
33
+ if bpc == 1
34
+ self.compression = 'g4'
35
+ 'tiffg4'
36
+ elsif bpc > 1
37
+ # 8 Bit Grayscale, if applicable:
38
+ 'tiffgray'
39
+ end
40
+ end
41
+
42
+ def colordevice(channels, bpc)
43
+ bits = bpc * channels
44
+ # will be either 8bpc/16bpd color TIFF,
45
+ # with any CMYK source transformed to 8bpc RBG
46
+ bits = 24 unless [24, 48].include? bits
47
+ "tiff#{bits}nc"
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DerivativeRodeo
4
+ module Services
5
+ module PdfSplitterService
6
+ ##
7
+ # @api public
8
+ #
9
+ # Find the {PdfSplitter::Base} with the given name.
10
+ #
11
+ # @param name [#to_s]
12
+ # @return [PdfSplitter::Base]
13
+ def self.for(name)
14
+ klass_name = "#{name.to_s.classify}_page".classify
15
+ "DerivativeRodeo::Services::PdfSplitter::#{klass_name}".constantize
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'httparty'
4
+
5
+ module DerivativeRodeo
6
+ module Services
7
+ ##
8
+ # A utility class for handling general URLs. Provided as a means of easing the implementation
9
+ # logic of those that use this class.
10
+ #
11
+ # @note
12
+ # It is a good design idea to wrap a library (in this case HTTParty). The goal is to expose
13
+ # the smallest interface and make it something that would be easy to swap out.
14
+ #
15
+ # @see https://rubygems.org/gems/httparty
16
+ module UrlService
17
+ ##
18
+ # @param url [String]
19
+ #
20
+ # @return [String]
21
+ def self.read(url)
22
+ HTTParty.get(url, logger: DerivativeRodeo.config.logger).body
23
+ rescue StandardError => e
24
+ config.logger.error(%(#{e.message}\n#{e.backtrace.join("\n")}))
25
+ raise e
26
+ end
27
+
28
+ ##
29
+ # @param url [String]
30
+ #
31
+ # @return [URI] when the URL resolves successfully
32
+ # @return [FalseClass] when the URL's head request is not successful or we've exhausted our
33
+ # remaining redirects.
34
+ def self.exists?(url)
35
+ HTTParty.head(url, logger: DerivativeRodeo.config.logger)
36
+ rescue StandardError => e
37
+ DerivativeRodeo.config.logger.error(%(#{e.message}\n#{e.backtrace.join("\n")}))
38
+ false
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,251 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tmpdir'
4
+
5
+ module DerivativeRodeo
6
+ module StorageLocations
7
+ ##
8
+ # When the output location is the same type of location as "this" location, we indicate that via
9
+ # the SAME constant.
10
+ SAME = :same
11
+
12
+ ##
13
+ # The base location for storing files.
14
+ #
15
+ # - dir :: is the directory path
16
+ # - path :: is the full file path
17
+ # - uri :: is the full file path plus the uri prefix parts
18
+ #
19
+ # A location represents a pointer to a storage location. The {#exist?} method can answer if a
20
+ # file exists at the path.
21
+ #
22
+ # rubocop:disable Metrics/ClassLength
23
+ class BaseLocation
24
+ @locations = []
25
+
26
+ ##
27
+ # @return [Array<String>]
28
+ def self.locations
29
+ @locations ||= []
30
+ end
31
+
32
+ def self.inherited(subclass)
33
+ locations << subclass.location_name
34
+ super
35
+ end
36
+
37
+ ##
38
+ # @return [String]
39
+ def self.location_name
40
+ to_s.demodulize.underscore.sub(/_location$/, '')
41
+ end
42
+
43
+ class << self
44
+ alias scheme location_name
45
+ end
46
+
47
+ ##
48
+ # @param location_name [String]
49
+ #
50
+ # @return [Class]
51
+ def self.load_location(location_name)
52
+ location_name = location_name.split("://").first
53
+ raise Errors::StorageLocationNotFoundError.new(location_name: location_name) unless locations.include?(location_name)
54
+ "DerivativeRodeo::StorageLocations::#{location_name.to_s.classify}Location".constantize
55
+ end
56
+
57
+ ##
58
+ # @param file_uri [String] of the form scheme://arbitrary-stuff
59
+ #
60
+ # @return [BaseLocation]
61
+ def self.from_uri(file_uri)
62
+ location_name = file_uri.split('://').first
63
+ raise Errors::StorageLocationMissing.new(file_uri: file_uri) if location_name.blank?
64
+
65
+ load_location(location_name).new(file_uri)
66
+ end
67
+
68
+ ##
69
+ # Registers the location with the main StorageLocation class to it can be used
70
+ #
71
+ # @param location_name [String]
72
+ def self.register_location(location_name)
73
+ return if DerivativeRodeo::StorageLocations::BaseLocation.locations.include?(location_name.to_s)
74
+
75
+ DerivativeRodeo::StorageLocations::BaseLocation.locations << location_name.to_s
76
+ end
77
+
78
+ ##
79
+ # Create a new uri of the classes type. Parts argument should have a default in
80
+ # implementing classes. Must support a number or the symbol :all
81
+ #
82
+ # @api public
83
+ #
84
+ # @param path [String]
85
+ # @param parts [Integer, :all]
86
+ # @return [String]
87
+ #
88
+ # @see .file_path_from_parts
89
+ def self.create_uri(path:, parts:)
90
+ raise NotImplementedError, "#{self.class}.create_uri"
91
+ end
92
+
93
+ ##
94
+ # Build a {StorageLocations::BaseLocation} by converting the :from_uri with the :template via
95
+ # the given :service.
96
+ #
97
+ # @param from_uri [String]
98
+ # @param template [String]
99
+ # @param service [#call, Module<DerivativeRodeo::Services::ConvertUriViaTemplateService>]
100
+ #
101
+ # @return [StorageLocations::BaseLocation]
102
+ def self.build(from_uri:, template:, service: DerivativeRodeo::Services::ConvertUriViaTemplateService)
103
+ # HACK: Ensuring that we have the correct scheme. Maybe this is a hack?
104
+ from_uri = "#{scheme}://#{from_uri}" unless from_uri.start_with?("#{scheme}://")
105
+ to_uri = service.call(from_uri: from_uri, template: template, adapter: self)
106
+ new(to_uri)
107
+ end
108
+
109
+ ##
110
+ # @param path [String]
111
+ # @param parts [Integer, :all]
112
+ #
113
+ # @return [String]
114
+ def self.file_path_from_parts(path:, parts:)
115
+ parts = - parts unless parts == :all || parts.negative?
116
+ parts == :all ? path : path.split('/')[parts..-1].join('/')
117
+ end
118
+
119
+ ##
120
+ # @param file_uri [String] a URI to the file's location; this is **not** a templated URI (as
121
+ # described in {DerivativeRodeo::Services::ConvertUriViaTemplateService}
122
+ # @param config [DerivativeRodeo::Configuration]
123
+ def initialize(file_uri, config: DerivativeRodeo.config)
124
+ @file_uri = file_uri
125
+ @config = config
126
+ end
127
+
128
+ attr_accessor :tmp_file_path
129
+ private :tmp_file_path=, :tmp_file_path
130
+
131
+ attr_reader :config, :file_uri
132
+
133
+ ##
134
+ # @param auto_write_file [Boolean] Provided as a testing helper method.
135
+ #
136
+ # @yieldparam tmp_file_path [String]
137
+ #
138
+ # @return [StorageLocations::BaseLocation]
139
+ # @see with_tmp_path
140
+ def with_new_tmp_path(auto_write_file: true, &block)
141
+ with_tmp_path(lambda { |_file_path, tmp_file_path, exist|
142
+ FileUtils.rm_rf(tmp_file_path) if exist
143
+ FileUtils.touch(tmp_file_path)
144
+ }, auto_write_file: auto_write_file, &block)
145
+ end
146
+
147
+ ##
148
+ # @yieldparam tmp_file_path [String]
149
+ # @return [StorageLocations::BaseLocation]
150
+ def with_existing_tmp_path
151
+ raise NotImplementedError, "#{self.class}#with_existing_tmp_path"
152
+ end
153
+
154
+ ##
155
+ # @param preamble_lambda [Lambda, #call] the "function" we should call to prepare the
156
+ # temporary location before we yield it's location.
157
+ #
158
+ # @param auto_write_file [Boolean] Provided as a testing helper method. Given that we have
159
+ # both {#with_new_tmp_path} and {#with_existing_tmp_path}, we want the default to not
160
+ # automatically perform the write. But this is something we can easily forget when
161
+ # working with the {#with_new_tmp_path}
162
+ #
163
+ # @yieldparam tmp_file_path [String]
164
+ #
165
+ # @return [StorageLocations::BaseLocation]
166
+ def with_tmp_path(preamble_lambda, auto_write_file: false)
167
+ raise ArgumentError, 'Expected a block' unless block_given?
168
+
169
+ tmp_file_dir do |tmpdir|
170
+ self.tmp_file_path = File.join(tmpdir, file_dir, file_name)
171
+ FileUtils.mkdir_p(File.dirname(tmp_file_path))
172
+ preamble_lambda.call(file_path, tmp_file_path, exist?)
173
+ yield tmp_file_path
174
+ write if auto_write_file
175
+ end
176
+ # TODO: Do we need to ensure this?
177
+ self.tmp_file_path = nil
178
+
179
+ # In returning self we again remove the need for those calling #with_new_tmp_path,
180
+ # #with_tmp_path, and #with_new_tmp_path to remember to return the current Location.
181
+ # In other words removing the jagged edges of the code.
182
+ self
183
+ end
184
+
185
+ ##
186
+ # Write the tmp file to the file_uri
187
+ def write
188
+ raise NotImplementedError, "#{self.class}#write"
189
+ end
190
+
191
+ ##
192
+ # @return [TrueClass] when the file exists in this storage
193
+ # @return [FalseClass] when the file does not exist in this storage
194
+ def exist?
195
+ raise NotImplementedError, "#{self.class}#exist?"
196
+ end
197
+ alias exists? exist?
198
+
199
+ ##
200
+ # @param template [String]
201
+ # @return [StorageLocations::BaseLocation]
202
+ #
203
+ # @see DerivativeRodeo::Services::ConvertUriViaTemplateService
204
+ def derived_file_from(template:)
205
+ klass = DerivativeRodeo::StorageLocations::BaseLocation.load_location(template)
206
+ klass.build(from_uri: file_path, template: template)
207
+ end
208
+
209
+ ##
210
+ # @param extension [String, StorageLocations::SAME]
211
+ # @return [String] the path for the new extension; when given {StorageLocations::SAME} re-use
212
+ # the file's extension.
213
+ def with_new_extension(extension)
214
+ return file_path if extension == StorageLocations::SAME
215
+
216
+ "#{file_path.split('.')[0]}.#{extension}"
217
+ end
218
+
219
+ def file_path
220
+ @file_path ||= @file_uri.sub(%r{.+://}, '')
221
+ end
222
+
223
+ def file_dir
224
+ @file_dir ||= File.dirname(file_path)
225
+ end
226
+
227
+ def file_name
228
+ @file_name ||= File.basename(file_path)
229
+ end
230
+
231
+ def file_extension
232
+ @file_extension ||= File.extname(file_path)
233
+ end
234
+
235
+ def file_basename
236
+ @file_basename ||= File.basename(file_path, file_extension)
237
+ end
238
+
239
+ def tmp_file_dir(&block)
240
+ raise ArgumentError, 'Expected a block' unless block_given?
241
+
242
+ Dir.mktmpdir(&block)
243
+ end
244
+ end
245
+ # rubocop:enable Metrics/ClassLength
246
+ end
247
+ end
248
+
249
+ Dir.glob(File.join(__dir__, '**/*')).sort.each do |location|
250
+ require location unless File.directory?(location) || location.match?('base_location')
251
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'httparty'
4
+
5
+ module DerivativeRodeo
6
+ module StorageLocations
7
+ ##
8
+ # A helper module for copying files from one location to another.
9
+ module DownloadConcern
10
+ extend ActiveSupport::Concern
11
+
12
+ class_methods do
13
+ def create_uri(path:, parts: :all, ssl: true)
14
+ file_path = file_path_from_parts(path: path, parts: parts)
15
+ "#{adapter_prefix(ssl: ssl)}#{file_path}"
16
+ end
17
+
18
+ def adapter_prefix(ssl: true)
19
+ ssl ? "https://" : "http://"
20
+ end
21
+ end
22
+
23
+ delegate :config, to: DerivativeRodeo
24
+
25
+ def with_existing_tmp_path(&block)
26
+ with_tmp_path(lambda { |_file_path, tmp_file_path, exist|
27
+ raise Errors::FileMissingError unless exist
28
+
29
+ response = get(file_uri)
30
+ File.open(tmp_file_path, 'wb') { |fp| fp.write(response.body) }
31
+ }, &block)
32
+ end
33
+
34
+ ##
35
+ # Implemented to complete the interface.
36
+ #
37
+ # @raise [NotImplementedError]
38
+ def write
39
+ raise "#{self.class}#write is deliberately not implemented"
40
+ end
41
+
42
+ ##
43
+ # @param url [String]
44
+ #
45
+ # @return [String]
46
+ def read(url)
47
+ HTTParty.get(url, logger: config.logger)
48
+ rescue => e
49
+ config.logger.error(%(#{e.message}\n#{e.backtrace.join("\n")}))
50
+ raise e
51
+ end
52
+
53
+ ##
54
+ # @param url [String]
55
+ #
56
+ # @return [URI] when the URL resolves successfully
57
+ # @return [FalseClass] when the URL's head request is not successful or we've exhausted our
58
+ # remaining redirects.
59
+ def exists?(url)
60
+ HTTParty.head(url, logger: config.logger)
61
+ rescue => e
62
+ config.logger.error(%(#{e.message}\n#{e.backtrace.join("\n")}))
63
+ false
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DerivativeRodeo
4
+ module StorageLocations
5
+ ##
6
+ # Location for files found on a local disk
7
+ class FileLocation < BaseLocation
8
+ def self.create_uri(path:, parts: :all)
9
+ file_path = file_path_from_parts(path: path, parts: parts)
10
+ "#{adapter_prefix}#{file_path}"
11
+ end
12
+
13
+ def self.adapter_prefix
14
+ "#{scheme}://"
15
+ end
16
+
17
+ def with_existing_tmp_path(&block)
18
+ with_tmp_path(lambda { |file_path, tmp_file_path, exist|
19
+ raise Errors::FileMissingError unless exist
20
+
21
+ FileUtils.cp(file_path, tmp_file_path)
22
+ }, &block)
23
+ end
24
+
25
+ def exist?
26
+ File.exist?(file_path)
27
+ end
28
+
29
+ # write the file to the file_uri
30
+ def write
31
+ raise Errors::FileMissingError("Use write within a with_new_tmp_path block and fille the mp file with data before writing") unless File.exist?(tmp_file_path)
32
+
33
+ FileUtils.mkdir_p(file_dir)
34
+ FileUtils.cp_r(tmp_file_path, file_path)
35
+ file_uri
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'derivative_rodeo/storage_locations/concerns/download_concern'
4
+
5
+ module DerivativeRodeo
6
+ module StorageLocations
7
+ ##
8
+ # Location for files from the web. Download only, can not write!
9
+ class HttpLocation < BaseLocation
10
+ include DownloadConcern
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'derivative_rodeo/storage_locations/concerns/download_concern'
4
+
5
+ module DerivativeRodeo
6
+ module StorageLocations
7
+ ##
8
+ # Location for files from the web. Download only, can not write!
9
+ class HttpsLocation < BaseLocation
10
+ include DownloadConcern
11
+ end
12
+ end
13
+ end