derivative-rodeo 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/derivative_rodeo/generators/base_generator.rb +46 -5
- data/lib/derivative_rodeo/generators/monochrome_generator.rb +2 -1
- data/lib/derivative_rodeo/services/convert_uri_via_template_service.rb +18 -6
- data/lib/derivative_rodeo/storage_locations/base_location.rb +5 -4
- data/lib/derivative_rodeo/storage_locations/sqs_location.rb +8 -1
- data/lib/derivative_rodeo/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 62872d16bfd5d73940f87d5c09f61f2a88ee67414f51905ce503f411b9b2fb37
|
|
4
|
+
data.tar.gz: 742d63ca02418b3453824655738e25b47d3cca918f030e2fb5db4c997d52e945
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e43b94745f35474edf4b463cd11b8c7d7bb29391f443c7ef2a9e84966d969aa6cf7c205a92c32a03452ba28c490c790ccd05d6569fd9db59d8c119b7e38f1dde
|
|
7
|
+
data.tar.gz: '07962c3175aed6d77295e473ad8462d0ae634c931a2f3f5bc75be1195977dded3cec013448c9d2fe2653fa83b9d178dd3513aaa1eda8eeafe9da539e3dbf06b0'
|
|
@@ -82,6 +82,7 @@ module DerivativeRodeo
|
|
|
82
82
|
#
|
|
83
83
|
# @see #build_step
|
|
84
84
|
# @see #with_each_requisite_location_and_tmp_file_path
|
|
85
|
+
# rubocop:disable Metrics/MethodLength
|
|
85
86
|
def generated_files
|
|
86
87
|
# TODO: Examples please
|
|
87
88
|
return @generated_files if defined?(@generated_files)
|
|
@@ -101,11 +102,16 @@ module DerivativeRodeo
|
|
|
101
102
|
@generated_files << if generated_file.exist?
|
|
102
103
|
generated_file
|
|
103
104
|
else
|
|
105
|
+
log_message = "#{self.class}#generated_files :: " \
|
|
106
|
+
"input_location file_uri #{input_location.file_uri} :: " \
|
|
107
|
+
"Generating output_location file_uri #{generated_file.file_uri} via build_step."
|
|
108
|
+
logger.info(log_message)
|
|
104
109
|
build_step(input_location: input_location, output_location: generated_file, input_tmp_file_path: input_tmp_file_path)
|
|
105
110
|
end
|
|
106
111
|
end
|
|
107
112
|
@generated_files
|
|
108
113
|
end
|
|
114
|
+
# rubocop:enable Metrics/MethodLength
|
|
109
115
|
|
|
110
116
|
##
|
|
111
117
|
# @return [Array<String>]
|
|
@@ -167,20 +173,55 @@ module DerivativeRodeo
|
|
|
167
173
|
# {#output_location_template} or {#preprocessed_location_template}.
|
|
168
174
|
#
|
|
169
175
|
# @see [StorageLocations::BaseLocation#exist?]
|
|
176
|
+
# rubocop:disable Metrics/MethodLength
|
|
177
|
+
# rubocop:disable Metrics/AbcSize
|
|
170
178
|
def destination(input_location)
|
|
171
|
-
output_location = input_location.derived_file_from(template: output_location_template)
|
|
179
|
+
output_location = input_location.derived_file_from(template: output_location_template, extension: output_extension)
|
|
172
180
|
|
|
173
|
-
|
|
174
|
-
|
|
181
|
+
if output_location.exist?
|
|
182
|
+
log_message = "#{self.class}#destination :: " \
|
|
183
|
+
"input_location file_uri #{input_location.file_uri} :: " \
|
|
184
|
+
"Found output_location file_uri #{output_location.file_uri}."
|
|
185
|
+
logger.info(log_message)
|
|
175
186
|
|
|
176
|
-
|
|
187
|
+
return output_location
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
unless preprocessed_location_template
|
|
191
|
+
log_message = "#{self.class}#destination :: " \
|
|
192
|
+
"input_location file_uri #{input_location.file_uri} :: " \
|
|
193
|
+
"No preprocessed_location_template provided " \
|
|
194
|
+
"nor does a file exist at output_location file_uri #{output_location.file_uri};" \
|
|
195
|
+
" moving on to generation via #{self.class}#build_step."
|
|
196
|
+
logger.info(log_message)
|
|
197
|
+
|
|
198
|
+
return output_location
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
preprocessed_location = input_location.derived_file_from(template: preprocessed_location_template, extension: output_extension)
|
|
177
202
|
# We only want the location if it exists
|
|
178
|
-
|
|
203
|
+
if preprocessed_location&.exist?
|
|
204
|
+
log_message = "#{self.class}#destination :: " \
|
|
205
|
+
"input_location file_uri #{input_location.file_uri} :: " \
|
|
206
|
+
"Found preprocessed_location file_uri #{output_location.file_uri}."
|
|
207
|
+
logger.info(log_message)
|
|
208
|
+
|
|
209
|
+
return preprocessed_location
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
log_message = "#{self.class}#destination :: " \
|
|
213
|
+
"input_location file_uri #{input_location.file_uri} :: " \
|
|
214
|
+
"No file exists at preprocessed_location file_uri #{preprocessed_location.file_uri} " \
|
|
215
|
+
"nor output_location file_uri #{output_location.file_uri}; " \
|
|
216
|
+
"moving on to generation via #{self.class}#build_step."
|
|
217
|
+
logger.info(log_message)
|
|
179
218
|
|
|
180
219
|
# NOTE: The file does not exist at the output_location; but we pass this information along so
|
|
181
220
|
# that the #build_step knows where to write the file.
|
|
182
221
|
output_location
|
|
183
222
|
end
|
|
223
|
+
# rubocop:enable Metrics/AbcSize
|
|
224
|
+
# rubocop:enable Metrics/MethodLength
|
|
184
225
|
|
|
185
226
|
##
|
|
186
227
|
# A bit of indirection to create a common interface for running a shell command.
|
|
@@ -5,7 +5,8 @@ module DerivativeRodeo
|
|
|
5
5
|
##
|
|
6
6
|
# Take images an ensures that we have a monochrome derivative of those images.
|
|
7
7
|
class MonochromeGenerator < BaseGenerator
|
|
8
|
-
#
|
|
8
|
+
# @see DerivativeRodeo::Services::ConvertUriViaTemplateService for the interaction of the
|
|
9
|
+
# magic ".mono" suffix
|
|
9
10
|
self.output_extension = 'mono.tiff'
|
|
10
11
|
|
|
11
12
|
##
|
|
@@ -46,11 +46,12 @@ module DerivativeRodeo
|
|
|
46
46
|
# from_uris: ["file:///path1/A/file.pdf", "aws:///path2/B/file.pdf"],
|
|
47
47
|
# template: "file:///dest1/{{dir_parts[-1..-1]}}/{{ filename }}")
|
|
48
48
|
# => ["file:///dest1/A/file.pdf", "aws:///dest1/B/file.pdf"]
|
|
49
|
-
def self.call(from_uri:, template:, adapter: nil, separator: "/")
|
|
50
|
-
new(from_uri: from_uri, template: template, adapter: adapter, separator: separator).call
|
|
49
|
+
def self.call(from_uri:, template:, adapter: nil, separator: "/", **options)
|
|
50
|
+
new(from_uri: from_uri, template: template, adapter: adapter, separator: separator, **options).call
|
|
51
51
|
end
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
# rubocop:disable Metrics/MethodLength
|
|
54
|
+
def initialize(from_uri:, template:, adapter: nil, separator: "/", **options)
|
|
54
55
|
@from_uri = from_uri
|
|
55
56
|
@template = template
|
|
56
57
|
@adapter = adapter
|
|
@@ -60,12 +61,23 @@ module DerivativeRodeo
|
|
|
60
61
|
@from_scheme, @path = uri.split("://")
|
|
61
62
|
@parts = @path.split(separator)
|
|
62
63
|
@dir_parts = @parts[0..-2]
|
|
63
|
-
@filename = @parts[-1]
|
|
64
|
-
@basename = File.basename(@filename, ".*")
|
|
65
|
-
|
|
64
|
+
@filename = options[:filename] || @parts[-1]
|
|
65
|
+
@basename = options[:basename] || File.basename(@filename, ".*")
|
|
66
|
+
|
|
67
|
+
##
|
|
68
|
+
# HACK: Because the HocrGenerator has `.mono.tiff` and we are not interested in carrying
|
|
69
|
+
# forward the `.mono` suffix as that makes it hard to find the preprocessed word
|
|
70
|
+
# coordinates, alto, and plain text. This ensures files derived from the .mono are findable
|
|
71
|
+
# in IIIF Print.
|
|
72
|
+
@basename = @basename.sub(/\.mono\z/, '')
|
|
73
|
+
@extension = options[:extension] || File.extname(@filename)
|
|
74
|
+
# When a generator specifies "same" we want to use the given file's extension
|
|
75
|
+
@extension = File.extname(@filename) if @extension == DerivativeRodeo::StorageLocations::SAME
|
|
76
|
+
@extension = ".#{@extension}" unless @extension.start_with?(".")
|
|
66
77
|
|
|
67
78
|
@template_without_query, @template_query = template.split("?")
|
|
68
79
|
end
|
|
80
|
+
# rubocop:enable Metrics/MethodLength
|
|
69
81
|
|
|
70
82
|
def call
|
|
71
83
|
to_uri = template_without_query.gsub(DIR_PARTS_REPLACEMENT_REGEXP) do |text|
|
|
@@ -101,10 +101,10 @@ module DerivativeRodeo
|
|
|
101
101
|
# @param service [#call, Module<DerivativeRodeo::Services::ConvertUriViaTemplateService>]
|
|
102
102
|
#
|
|
103
103
|
# @return [StorageLocations::BaseLocation]
|
|
104
|
-
def self.build(from_uri:, template:, service: DerivativeRodeo::Services::ConvertUriViaTemplateService)
|
|
104
|
+
def self.build(from_uri:, template:, service: DerivativeRodeo::Services::ConvertUriViaTemplateService, **options)
|
|
105
105
|
# HACK: Ensuring that we have the correct scheme. Maybe this is a hack?
|
|
106
106
|
from_uri = "#{scheme}://#{from_uri}" unless from_uri.start_with?("#{scheme}://")
|
|
107
|
-
to_uri = service.call(from_uri: from_uri, template: template, adapter: self)
|
|
107
|
+
to_uri = service.call(from_uri: from_uri, template: template, adapter: self, **options)
|
|
108
108
|
new(to_uri)
|
|
109
109
|
end
|
|
110
110
|
|
|
@@ -203,9 +203,9 @@ module DerivativeRodeo
|
|
|
203
203
|
# @return [StorageLocations::BaseLocation]
|
|
204
204
|
#
|
|
205
205
|
# @see DerivativeRodeo::Services::ConvertUriViaTemplateService
|
|
206
|
-
def derived_file_from(template
|
|
206
|
+
def derived_file_from(template:, **options)
|
|
207
207
|
klass = DerivativeRodeo::StorageLocations::BaseLocation.load_location(template)
|
|
208
|
-
klass.build(from_uri: file_path, template: template)
|
|
208
|
+
klass.build(from_uri: file_path, template: template, **options)
|
|
209
209
|
end
|
|
210
210
|
|
|
211
211
|
##
|
|
@@ -231,6 +231,7 @@ module DerivativeRodeo
|
|
|
231
231
|
def with_new_extension(extension)
|
|
232
232
|
return file_path if extension == StorageLocations::SAME
|
|
233
233
|
|
|
234
|
+
# NOTE: May need to revisit this
|
|
234
235
|
"#{file_path.split('.')[0]}.#{extension}"
|
|
235
236
|
end
|
|
236
237
|
|
|
@@ -13,6 +13,8 @@ module DerivativeRodeo
|
|
|
13
13
|
# Location to download and upload files to Sqs
|
|
14
14
|
# It uploads a file_uri to the queue, not the contents of that file
|
|
15
15
|
# reading from the queue is not currently implemented
|
|
16
|
+
#
|
|
17
|
+
# rubocop:disable Metrics/ClassLength
|
|
16
18
|
class SqsLocation < BaseLocation
|
|
17
19
|
##
|
|
18
20
|
# @!group Class Attributes
|
|
@@ -85,11 +87,14 @@ module DerivativeRodeo
|
|
|
85
87
|
batch = []
|
|
86
88
|
Dir.glob("#{File.dirname(tmp_file_path)}/**/**").each.with_index do |fp, i|
|
|
87
89
|
batch << { id: SecureRandom.uuid, message_body: output_json("file://#{fp}") }
|
|
88
|
-
if (i % batch_size).zero?
|
|
90
|
+
if (i + 1 % batch_size).zero?
|
|
89
91
|
add_batch(messages: batch)
|
|
90
92
|
batch = []
|
|
91
93
|
end
|
|
92
94
|
end
|
|
95
|
+
|
|
96
|
+
# Ensure we're flushing the batched up queue as part of completing the write.
|
|
97
|
+
add_batch(messages: batch) if batch.present?
|
|
93
98
|
file_uri
|
|
94
99
|
end
|
|
95
100
|
|
|
@@ -181,6 +186,7 @@ module DerivativeRodeo
|
|
|
181
186
|
end
|
|
182
187
|
|
|
183
188
|
def output_json(uri)
|
|
189
|
+
# TODO: Add ability to handle a pre-process-template given to an SQS, and pass that along to the generator when applicable.
|
|
184
190
|
key = DerivativeRodeo::Services::ConvertUriViaTemplateService.call(from_uri: uri, template: template, adapter: self)
|
|
185
191
|
{ key => [template] }.to_json
|
|
186
192
|
end
|
|
@@ -201,5 +207,6 @@ module DerivativeRodeo
|
|
|
201
207
|
@file_uri_parts
|
|
202
208
|
end
|
|
203
209
|
end
|
|
210
|
+
# rubocop:enable Metrics/ClassLength
|
|
204
211
|
end
|
|
205
212
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: derivative-rodeo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Rob Kaufman
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: exe
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2023-
|
|
12
|
+
date: 2023-07-10 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: activesupport
|
|
@@ -337,7 +337,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
337
337
|
- !ruby/object:Gem::Version
|
|
338
338
|
version: '0'
|
|
339
339
|
requirements: []
|
|
340
|
-
rubygems_version: 3.
|
|
340
|
+
rubygems_version: 3.3.7
|
|
341
341
|
signing_key:
|
|
342
342
|
specification_version: 4
|
|
343
343
|
summary: An ETL Ecosystem for Derivative Processing.
|