derivative-rodeo 0.4.0 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/derivative_rodeo/generators/base_generator.rb +46 -5
- data/lib/derivative_rodeo/generators/monochrome_generator.rb +2 -1
- data/lib/derivative_rodeo/services/convert_uri_via_template_service.rb +18 -6
- data/lib/derivative_rodeo/storage_locations/base_location.rb +5 -4
- data/lib/derivative_rodeo/storage_locations/sqs_location.rb +8 -1
- data/lib/derivative_rodeo/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62872d16bfd5d73940f87d5c09f61f2a88ee67414f51905ce503f411b9b2fb37
|
4
|
+
data.tar.gz: 742d63ca02418b3453824655738e25b47d3cca918f030e2fb5db4c997d52e945
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e43b94745f35474edf4b463cd11b8c7d7bb29391f443c7ef2a9e84966d969aa6cf7c205a92c32a03452ba28c490c790ccd05d6569fd9db59d8c119b7e38f1dde
|
7
|
+
data.tar.gz: '07962c3175aed6d77295e473ad8462d0ae634c931a2f3f5bc75be1195977dded3cec013448c9d2fe2653fa83b9d178dd3513aaa1eda8eeafe9da539e3dbf06b0'
|
@@ -82,6 +82,7 @@ module DerivativeRodeo
|
|
82
82
|
#
|
83
83
|
# @see #build_step
|
84
84
|
# @see #with_each_requisite_location_and_tmp_file_path
|
85
|
+
# rubocop:disable Metrics/MethodLength
|
85
86
|
def generated_files
|
86
87
|
# TODO: Examples please
|
87
88
|
return @generated_files if defined?(@generated_files)
|
@@ -101,11 +102,16 @@ module DerivativeRodeo
|
|
101
102
|
@generated_files << if generated_file.exist?
|
102
103
|
generated_file
|
103
104
|
else
|
105
|
+
log_message = "#{self.class}#generated_files :: " \
|
106
|
+
"input_location file_uri #{input_location.file_uri} :: " \
|
107
|
+
"Generating output_location file_uri #{generated_file.file_uri} via build_step."
|
108
|
+
logger.info(log_message)
|
104
109
|
build_step(input_location: input_location, output_location: generated_file, input_tmp_file_path: input_tmp_file_path)
|
105
110
|
end
|
106
111
|
end
|
107
112
|
@generated_files
|
108
113
|
end
|
114
|
+
# rubocop:enable Metrics/MethodLength
|
109
115
|
|
110
116
|
##
|
111
117
|
# @return [Array<String>]
|
@@ -167,20 +173,55 @@ module DerivativeRodeo
|
|
167
173
|
# {#output_location_template} or {#preprocessed_location_template}.
|
168
174
|
#
|
169
175
|
# @see [StorageLocations::BaseLocation#exist?]
|
176
|
+
# rubocop:disable Metrics/MethodLength
|
177
|
+
# rubocop:disable Metrics/AbcSize
|
170
178
|
def destination(input_location)
|
171
|
-
output_location = input_location.derived_file_from(template: output_location_template)
|
179
|
+
output_location = input_location.derived_file_from(template: output_location_template, extension: output_extension)
|
172
180
|
|
173
|
-
|
174
|
-
|
181
|
+
if output_location.exist?
|
182
|
+
log_message = "#{self.class}#destination :: " \
|
183
|
+
"input_location file_uri #{input_location.file_uri} :: " \
|
184
|
+
"Found output_location file_uri #{output_location.file_uri}."
|
185
|
+
logger.info(log_message)
|
175
186
|
|
176
|
-
|
187
|
+
return output_location
|
188
|
+
end
|
189
|
+
|
190
|
+
unless preprocessed_location_template
|
191
|
+
log_message = "#{self.class}#destination :: " \
|
192
|
+
"input_location file_uri #{input_location.file_uri} :: " \
|
193
|
+
"No preprocessed_location_template provided " \
|
194
|
+
"nor does a file exist at output_location file_uri #{output_location.file_uri};" \
|
195
|
+
" moving on to generation via #{self.class}#build_step."
|
196
|
+
logger.info(log_message)
|
197
|
+
|
198
|
+
return output_location
|
199
|
+
end
|
200
|
+
|
201
|
+
preprocessed_location = input_location.derived_file_from(template: preprocessed_location_template, extension: output_extension)
|
177
202
|
# We only want the location if it exists
|
178
|
-
|
203
|
+
if preprocessed_location&.exist?
|
204
|
+
log_message = "#{self.class}#destination :: " \
|
205
|
+
"input_location file_uri #{input_location.file_uri} :: " \
|
206
|
+
"Found preprocessed_location file_uri #{output_location.file_uri}."
|
207
|
+
logger.info(log_message)
|
208
|
+
|
209
|
+
return preprocessed_location
|
210
|
+
end
|
211
|
+
|
212
|
+
log_message = "#{self.class}#destination :: " \
|
213
|
+
"input_location file_uri #{input_location.file_uri} :: " \
|
214
|
+
"No file exists at preprocessed_location file_uri #{preprocessed_location.file_uri} " \
|
215
|
+
"nor output_location file_uri #{output_location.file_uri}; " \
|
216
|
+
"moving on to generation via #{self.class}#build_step."
|
217
|
+
logger.info(log_message)
|
179
218
|
|
180
219
|
# NOTE: The file does not exist at the output_location; but we pass this information along so
|
181
220
|
# that the #build_step knows where to write the file.
|
182
221
|
output_location
|
183
222
|
end
|
223
|
+
# rubocop:enable Metrics/AbcSize
|
224
|
+
# rubocop:enable Metrics/MethodLength
|
184
225
|
|
185
226
|
##
|
186
227
|
# A bit of indirection to create a common interface for running a shell command.
|
@@ -5,7 +5,8 @@ module DerivativeRodeo
|
|
5
5
|
##
|
6
6
|
# Take images an ensures that we have a monochrome derivative of those images.
|
7
7
|
class MonochromeGenerator < BaseGenerator
|
8
|
-
#
|
8
|
+
# @see DerivativeRodeo::Services::ConvertUriViaTemplateService for the interaction of the
|
9
|
+
# magic ".mono" suffix
|
9
10
|
self.output_extension = 'mono.tiff'
|
10
11
|
|
11
12
|
##
|
@@ -46,11 +46,12 @@ module DerivativeRodeo
|
|
46
46
|
# from_uris: ["file:///path1/A/file.pdf", "aws:///path2/B/file.pdf"],
|
47
47
|
# template: "file:///dest1/{{dir_parts[-1..-1]}}/{{ filename }}")
|
48
48
|
# => ["file:///dest1/A/file.pdf", "aws:///dest1/B/file.pdf"]
|
49
|
-
def self.call(from_uri:, template:, adapter: nil, separator: "/")
|
50
|
-
new(from_uri: from_uri, template: template, adapter: adapter, separator: separator).call
|
49
|
+
def self.call(from_uri:, template:, adapter: nil, separator: "/", **options)
|
50
|
+
new(from_uri: from_uri, template: template, adapter: adapter, separator: separator, **options).call
|
51
51
|
end
|
52
52
|
|
53
|
-
|
53
|
+
# rubocop:disable Metrics/MethodLength
|
54
|
+
def initialize(from_uri:, template:, adapter: nil, separator: "/", **options)
|
54
55
|
@from_uri = from_uri
|
55
56
|
@template = template
|
56
57
|
@adapter = adapter
|
@@ -60,12 +61,23 @@ module DerivativeRodeo
|
|
60
61
|
@from_scheme, @path = uri.split("://")
|
61
62
|
@parts = @path.split(separator)
|
62
63
|
@dir_parts = @parts[0..-2]
|
63
|
-
@filename = @parts[-1]
|
64
|
-
@basename = File.basename(@filename, ".*")
|
65
|
-
|
64
|
+
@filename = options[:filename] || @parts[-1]
|
65
|
+
@basename = options[:basename] || File.basename(@filename, ".*")
|
66
|
+
|
67
|
+
##
|
68
|
+
# HACK: Because the HocrGenerator has `.mono.tiff` and we are not interested in carrying
|
69
|
+
# forward the `.mono` suffix as that makes it hard to find the preprocessed word
|
70
|
+
# coordinates, alto, and plain text. This ensures files derived from the .mono are findable
|
71
|
+
# in IIIF Print.
|
72
|
+
@basename = @basename.sub(/\.mono\z/, '')
|
73
|
+
@extension = options[:extension] || File.extname(@filename)
|
74
|
+
# When a generator specifies "same" we want to use the given file's extension
|
75
|
+
@extension = File.extname(@filename) if @extension == DerivativeRodeo::StorageLocations::SAME
|
76
|
+
@extension = ".#{@extension}" unless @extension.start_with?(".")
|
66
77
|
|
67
78
|
@template_without_query, @template_query = template.split("?")
|
68
79
|
end
|
80
|
+
# rubocop:enable Metrics/MethodLength
|
69
81
|
|
70
82
|
def call
|
71
83
|
to_uri = template_without_query.gsub(DIR_PARTS_REPLACEMENT_REGEXP) do |text|
|
@@ -101,10 +101,10 @@ module DerivativeRodeo
|
|
101
101
|
# @param service [#call, Module<DerivativeRodeo::Services::ConvertUriViaTemplateService>]
|
102
102
|
#
|
103
103
|
# @return [StorageLocations::BaseLocation]
|
104
|
-
def self.build(from_uri:, template:, service: DerivativeRodeo::Services::ConvertUriViaTemplateService)
|
104
|
+
def self.build(from_uri:, template:, service: DerivativeRodeo::Services::ConvertUriViaTemplateService, **options)
|
105
105
|
# HACK: Ensuring that we have the correct scheme. Maybe this is a hack?
|
106
106
|
from_uri = "#{scheme}://#{from_uri}" unless from_uri.start_with?("#{scheme}://")
|
107
|
-
to_uri = service.call(from_uri: from_uri, template: template, adapter: self)
|
107
|
+
to_uri = service.call(from_uri: from_uri, template: template, adapter: self, **options)
|
108
108
|
new(to_uri)
|
109
109
|
end
|
110
110
|
|
@@ -203,9 +203,9 @@ module DerivativeRodeo
|
|
203
203
|
# @return [StorageLocations::BaseLocation]
|
204
204
|
#
|
205
205
|
# @see DerivativeRodeo::Services::ConvertUriViaTemplateService
|
206
|
-
def derived_file_from(template
|
206
|
+
def derived_file_from(template:, **options)
|
207
207
|
klass = DerivativeRodeo::StorageLocations::BaseLocation.load_location(template)
|
208
|
-
klass.build(from_uri: file_path, template: template)
|
208
|
+
klass.build(from_uri: file_path, template: template, **options)
|
209
209
|
end
|
210
210
|
|
211
211
|
##
|
@@ -231,6 +231,7 @@ module DerivativeRodeo
|
|
231
231
|
def with_new_extension(extension)
|
232
232
|
return file_path if extension == StorageLocations::SAME
|
233
233
|
|
234
|
+
# NOTE: May need to revisit this
|
234
235
|
"#{file_path.split('.')[0]}.#{extension}"
|
235
236
|
end
|
236
237
|
|
@@ -13,6 +13,8 @@ module DerivativeRodeo
|
|
13
13
|
# Location to download and upload files to Sqs
|
14
14
|
# It uploads a file_uri to the queue, not the contents of that file
|
15
15
|
# reading from the queue is not currently implemented
|
16
|
+
#
|
17
|
+
# rubocop:disable Metrics/ClassLength
|
16
18
|
class SqsLocation < BaseLocation
|
17
19
|
##
|
18
20
|
# @!group Class Attributes
|
@@ -85,11 +87,14 @@ module DerivativeRodeo
|
|
85
87
|
batch = []
|
86
88
|
Dir.glob("#{File.dirname(tmp_file_path)}/**/**").each.with_index do |fp, i|
|
87
89
|
batch << { id: SecureRandom.uuid, message_body: output_json("file://#{fp}") }
|
88
|
-
if (i % batch_size).zero?
|
90
|
+
if (i + 1 % batch_size).zero?
|
89
91
|
add_batch(messages: batch)
|
90
92
|
batch = []
|
91
93
|
end
|
92
94
|
end
|
95
|
+
|
96
|
+
# Ensure we're flushing the batched up queue as part of completing the write.
|
97
|
+
add_batch(messages: batch) if batch.present?
|
93
98
|
file_uri
|
94
99
|
end
|
95
100
|
|
@@ -181,6 +186,7 @@ module DerivativeRodeo
|
|
181
186
|
end
|
182
187
|
|
183
188
|
def output_json(uri)
|
189
|
+
# TODO: Add ability to handle a pre-process-template given to an SQS, and pass that along to the generator when applicable.
|
184
190
|
key = DerivativeRodeo::Services::ConvertUriViaTemplateService.call(from_uri: uri, template: template, adapter: self)
|
185
191
|
{ key => [template] }.to_json
|
186
192
|
end
|
@@ -201,5 +207,6 @@ module DerivativeRodeo
|
|
201
207
|
@file_uri_parts
|
202
208
|
end
|
203
209
|
end
|
210
|
+
# rubocop:enable Metrics/ClassLength
|
204
211
|
end
|
205
212
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: derivative-rodeo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Kaufman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-
|
12
|
+
date: 2023-07-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -337,7 +337,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
337
337
|
- !ruby/object:Gem::Version
|
338
338
|
version: '0'
|
339
339
|
requirements: []
|
340
|
-
rubygems_version: 3.
|
340
|
+
rubygems_version: 3.3.7
|
341
341
|
signing_key:
|
342
342
|
specification_version: 4
|
343
343
|
summary: An ETL Ecosystem for Derivative Processing.
|