derivative-rodeo 0.4.0 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bee0908ac5d045db1948b062d3a4e1569fff41b4a5a0f1521f31b620c15c53a6
4
- data.tar.gz: 328de6f1cde3bcdadca31a361821b2c573425d15bfddbea5bf85d9475a8a3d0e
3
+ metadata.gz: 62872d16bfd5d73940f87d5c09f61f2a88ee67414f51905ce503f411b9b2fb37
4
+ data.tar.gz: 742d63ca02418b3453824655738e25b47d3cca918f030e2fb5db4c997d52e945
5
5
  SHA512:
6
- metadata.gz: 2d8ba019ef30666d9633b73e90fe599b53d5f6698de0400b43a25c37a06470542962db8f61d74ed6a444f1ecc5ffbd604d2c0891dffcf5fa86d9dce87ace1bb5
7
- data.tar.gz: b682377ce08e4379f1323bfdb6c6b8f46c97d45ebb392bb1dc7f5bc27f5b01701731880dc08f96cb5ef2b0da8ad39349b4534a63b4039e78616331b93bd9516c
6
+ metadata.gz: e43b94745f35474edf4b463cd11b8c7d7bb29391f443c7ef2a9e84966d969aa6cf7c205a92c32a03452ba28c490c790ccd05d6569fd9db59d8c119b7e38f1dde
7
+ data.tar.gz: '07962c3175aed6d77295e473ad8462d0ae634c931a2f3f5bc75be1195977dded3cec013448c9d2fe2653fa83b9d178dd3513aaa1eda8eeafe9da539e3dbf06b0'
@@ -82,6 +82,7 @@ module DerivativeRodeo
82
82
  #
83
83
  # @see #build_step
84
84
  # @see #with_each_requisite_location_and_tmp_file_path
85
+ # rubocop:disable Metrics/MethodLength
85
86
  def generated_files
86
87
  # TODO: Examples please
87
88
  return @generated_files if defined?(@generated_files)
@@ -101,11 +102,16 @@ module DerivativeRodeo
101
102
  @generated_files << if generated_file.exist?
102
103
  generated_file
103
104
  else
105
+ log_message = "#{self.class}#generated_files :: " \
106
+ "input_location file_uri #{input_location.file_uri} :: " \
107
+ "Generating output_location file_uri #{generated_file.file_uri} via build_step."
108
+ logger.info(log_message)
104
109
  build_step(input_location: input_location, output_location: generated_file, input_tmp_file_path: input_tmp_file_path)
105
110
  end
106
111
  end
107
112
  @generated_files
108
113
  end
114
+ # rubocop:enable Metrics/MethodLength
109
115
 
110
116
  ##
111
117
  # @return [Array<String>]
@@ -167,20 +173,55 @@ module DerivativeRodeo
167
173
  # {#output_location_template} or {#preprocessed_location_template}.
168
174
  #
169
175
  # @see [StorageLocations::BaseLocation#exist?]
176
+ # rubocop:disable Metrics/MethodLength
177
+ # rubocop:disable Metrics/AbcSize
170
178
  def destination(input_location)
171
- output_location = input_location.derived_file_from(template: output_location_template)
179
+ output_location = input_location.derived_file_from(template: output_location_template, extension: output_extension)
172
180
 
173
- return output_location if output_location.exist?
174
- return output_location unless preprocessed_location_template
181
+ if output_location.exist?
182
+ log_message = "#{self.class}#destination :: " \
183
+ "input_location file_uri #{input_location.file_uri} :: " \
184
+ "Found output_location file_uri #{output_location.file_uri}."
185
+ logger.info(log_message)
175
186
 
176
- preprocessed_location = input_location.derived_file_from(template: preprocessed_location_template)
187
+ return output_location
188
+ end
189
+
190
+ unless preprocessed_location_template
191
+ log_message = "#{self.class}#destination :: " \
192
+ "input_location file_uri #{input_location.file_uri} :: " \
193
+ "No preprocessed_location_template provided " \
194
+ "nor does a file exist at output_location file_uri #{output_location.file_uri};" \
195
+ " moving on to generation via #{self.class}#build_step."
196
+ logger.info(log_message)
197
+
198
+ return output_location
199
+ end
200
+
201
+ preprocessed_location = input_location.derived_file_from(template: preprocessed_location_template, extension: output_extension)
177
202
  # We only want the location if it exists
178
- return preprocessed_location if preprocessed_location&.exist?
203
+ if preprocessed_location&.exist?
204
+ log_message = "#{self.class}#destination :: " \
205
+ "input_location file_uri #{input_location.file_uri} :: " \
206
+ "Found preprocessed_location file_uri #{output_location.file_uri}."
207
+ logger.info(log_message)
208
+
209
+ return preprocessed_location
210
+ end
211
+
212
+ log_message = "#{self.class}#destination :: " \
213
+ "input_location file_uri #{input_location.file_uri} :: " \
214
+ "No file exists at preprocessed_location file_uri #{preprocessed_location.file_uri} " \
215
+ "nor output_location file_uri #{output_location.file_uri}; " \
216
+ "moving on to generation via #{self.class}#build_step."
217
+ logger.info(log_message)
179
218
 
180
219
  # NOTE: The file does not exist at the output_location; but we pass this information along so
181
220
  # that the #build_step knows where to write the file.
182
221
  output_location
183
222
  end
223
+ # rubocop:enable Metrics/AbcSize
224
+ # rubocop:enable Metrics/MethodLength
184
225
 
185
226
  ##
186
227
  # A bit of indirection to create a common interface for running a shell command.
@@ -5,7 +5,8 @@ module DerivativeRodeo
5
5
  ##
6
6
  # Take images an ensures that we have a monochrome derivative of those images.
7
7
  class MonochromeGenerator < BaseGenerator
8
- # TODO: Can we assume a tiff?
8
+ # @see DerivativeRodeo::Services::ConvertUriViaTemplateService for the interaction of the
9
+ # magic ".mono" suffix
9
10
  self.output_extension = 'mono.tiff'
10
11
 
11
12
  ##
@@ -46,11 +46,12 @@ module DerivativeRodeo
46
46
  # from_uris: ["file:///path1/A/file.pdf", "aws:///path2/B/file.pdf"],
47
47
  # template: "file:///dest1/{{dir_parts[-1..-1]}}/{{ filename }}")
48
48
  # => ["file:///dest1/A/file.pdf", "aws:///dest1/B/file.pdf"]
49
- def self.call(from_uri:, template:, adapter: nil, separator: "/")
50
- new(from_uri: from_uri, template: template, adapter: adapter, separator: separator).call
49
+ def self.call(from_uri:, template:, adapter: nil, separator: "/", **options)
50
+ new(from_uri: from_uri, template: template, adapter: adapter, separator: separator, **options).call
51
51
  end
52
52
 
53
- def initialize(from_uri:, template:, adapter: nil, separator: "/")
53
+ # rubocop:disable Metrics/MethodLength
54
+ def initialize(from_uri:, template:, adapter: nil, separator: "/", **options)
54
55
  @from_uri = from_uri
55
56
  @template = template
56
57
  @adapter = adapter
@@ -60,12 +61,23 @@ module DerivativeRodeo
60
61
  @from_scheme, @path = uri.split("://")
61
62
  @parts = @path.split(separator)
62
63
  @dir_parts = @parts[0..-2]
63
- @filename = @parts[-1]
64
- @basename = File.basename(@filename, ".*")
65
- @extension = File.extname(@filename)
64
+ @filename = options[:filename] || @parts[-1]
65
+ @basename = options[:basename] || File.basename(@filename, ".*")
66
+
67
+ ##
68
+ # HACK: Because the HocrGenerator has `.mono.tiff` and we are not interested in carrying
69
+ # forward the `.mono` suffix as that makes it hard to find the preprocessed word
70
+ # coordinates, alto, and plain text. This ensures files derived from the .mono are findable
71
+ # in IIIF Print.
72
+ @basename = @basename.sub(/\.mono\z/, '')
73
+ @extension = options[:extension] || File.extname(@filename)
74
+ # When a generator specifies "same" we want to use the given file's extension
75
+ @extension = File.extname(@filename) if @extension == DerivativeRodeo::StorageLocations::SAME
76
+ @extension = ".#{@extension}" unless @extension.start_with?(".")
66
77
 
67
78
  @template_without_query, @template_query = template.split("?")
68
79
  end
80
+ # rubocop:enable Metrics/MethodLength
69
81
 
70
82
  def call
71
83
  to_uri = template_without_query.gsub(DIR_PARTS_REPLACEMENT_REGEXP) do |text|
@@ -101,10 +101,10 @@ module DerivativeRodeo
101
101
  # @param service [#call, Module<DerivativeRodeo::Services::ConvertUriViaTemplateService>]
102
102
  #
103
103
  # @return [StorageLocations::BaseLocation]
104
- def self.build(from_uri:, template:, service: DerivativeRodeo::Services::ConvertUriViaTemplateService)
104
+ def self.build(from_uri:, template:, service: DerivativeRodeo::Services::ConvertUriViaTemplateService, **options)
105
105
  # HACK: Ensuring that we have the correct scheme. Maybe this is a hack?
106
106
  from_uri = "#{scheme}://#{from_uri}" unless from_uri.start_with?("#{scheme}://")
107
- to_uri = service.call(from_uri: from_uri, template: template, adapter: self)
107
+ to_uri = service.call(from_uri: from_uri, template: template, adapter: self, **options)
108
108
  new(to_uri)
109
109
  end
110
110
 
@@ -203,9 +203,9 @@ module DerivativeRodeo
203
203
  # @return [StorageLocations::BaseLocation]
204
204
  #
205
205
  # @see DerivativeRodeo::Services::ConvertUriViaTemplateService
206
- def derived_file_from(template:)
206
+ def derived_file_from(template:, **options)
207
207
  klass = DerivativeRodeo::StorageLocations::BaseLocation.load_location(template)
208
- klass.build(from_uri: file_path, template: template)
208
+ klass.build(from_uri: file_path, template: template, **options)
209
209
  end
210
210
 
211
211
  ##
@@ -231,6 +231,7 @@ module DerivativeRodeo
231
231
  def with_new_extension(extension)
232
232
  return file_path if extension == StorageLocations::SAME
233
233
 
234
+ # NOTE: May need to revisit this
234
235
  "#{file_path.split('.')[0]}.#{extension}"
235
236
  end
236
237
 
@@ -13,6 +13,8 @@ module DerivativeRodeo
13
13
  # Location to download and upload files to Sqs
14
14
  # It uploads a file_uri to the queue, not the contents of that file
15
15
  # reading from the queue is not currently implemented
16
+ #
17
+ # rubocop:disable Metrics/ClassLength
16
18
  class SqsLocation < BaseLocation
17
19
  ##
18
20
  # @!group Class Attributes
@@ -85,11 +87,14 @@ module DerivativeRodeo
85
87
  batch = []
86
88
  Dir.glob("#{File.dirname(tmp_file_path)}/**/**").each.with_index do |fp, i|
87
89
  batch << { id: SecureRandom.uuid, message_body: output_json("file://#{fp}") }
88
- if (i % batch_size).zero?
90
+ if (i + 1 % batch_size).zero?
89
91
  add_batch(messages: batch)
90
92
  batch = []
91
93
  end
92
94
  end
95
+
96
+ # Ensure we're flushing the batched up queue as part of completing the write.
97
+ add_batch(messages: batch) if batch.present?
93
98
  file_uri
94
99
  end
95
100
 
@@ -181,6 +186,7 @@ module DerivativeRodeo
181
186
  end
182
187
 
183
188
  def output_json(uri)
189
+ # TODO: Add ability to handle a pre-process-template given to an SQS, and pass that along to the generator when applicable.
184
190
  key = DerivativeRodeo::Services::ConvertUriViaTemplateService.call(from_uri: uri, template: template, adapter: self)
185
191
  { key => [template] }.to_json
186
192
  end
@@ -201,5 +207,6 @@ module DerivativeRodeo
201
207
  @file_uri_parts
202
208
  end
203
209
  end
210
+ # rubocop:enable Metrics/ClassLength
204
211
  end
205
212
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DerivativeRodeo
4
- VERSION = '0.4.0'
4
+ VERSION = '0.4.2'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: derivative-rodeo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2023-06-06 00:00:00.000000000 Z
12
+ date: 2023-07-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -337,7 +337,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
337
337
  - !ruby/object:Gem::Version
338
338
  version: '0'
339
339
  requirements: []
340
- rubygems_version: 3.1.6
340
+ rubygems_version: 3.3.7
341
341
  signing_key:
342
342
  specification_version: 4
343
343
  summary: An ETL Ecosystem for Derivative Processing.