derivative-rodeo 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bee0908ac5d045db1948b062d3a4e1569fff41b4a5a0f1521f31b620c15c53a6
4
- data.tar.gz: 328de6f1cde3bcdadca31a361821b2c573425d15bfddbea5bf85d9475a8a3d0e
3
+ metadata.gz: 62872d16bfd5d73940f87d5c09f61f2a88ee67414f51905ce503f411b9b2fb37
4
+ data.tar.gz: 742d63ca02418b3453824655738e25b47d3cca918f030e2fb5db4c997d52e945
5
5
  SHA512:
6
- metadata.gz: 2d8ba019ef30666d9633b73e90fe599b53d5f6698de0400b43a25c37a06470542962db8f61d74ed6a444f1ecc5ffbd604d2c0891dffcf5fa86d9dce87ace1bb5
7
- data.tar.gz: b682377ce08e4379f1323bfdb6c6b8f46c97d45ebb392bb1dc7f5bc27f5b01701731880dc08f96cb5ef2b0da8ad39349b4534a63b4039e78616331b93bd9516c
6
+ metadata.gz: e43b94745f35474edf4b463cd11b8c7d7bb29391f443c7ef2a9e84966d969aa6cf7c205a92c32a03452ba28c490c790ccd05d6569fd9db59d8c119b7e38f1dde
7
+ data.tar.gz: '07962c3175aed6d77295e473ad8462d0ae634c931a2f3f5bc75be1195977dded3cec013448c9d2fe2653fa83b9d178dd3513aaa1eda8eeafe9da539e3dbf06b0'
@@ -82,6 +82,7 @@ module DerivativeRodeo
82
82
  #
83
83
  # @see #build_step
84
84
  # @see #with_each_requisite_location_and_tmp_file_path
85
+ # rubocop:disable Metrics/MethodLength
85
86
  def generated_files
86
87
  # TODO: Examples please
87
88
  return @generated_files if defined?(@generated_files)
@@ -101,11 +102,16 @@ module DerivativeRodeo
101
102
  @generated_files << if generated_file.exist?
102
103
  generated_file
103
104
  else
105
+ log_message = "#{self.class}#generated_files :: " \
106
+ "input_location file_uri #{input_location.file_uri} :: " \
107
+ "Generating output_location file_uri #{generated_file.file_uri} via build_step."
108
+ logger.info(log_message)
104
109
  build_step(input_location: input_location, output_location: generated_file, input_tmp_file_path: input_tmp_file_path)
105
110
  end
106
111
  end
107
112
  @generated_files
108
113
  end
114
+ # rubocop:enable Metrics/MethodLength
109
115
 
110
116
  ##
111
117
  # @return [Array<String>]
@@ -167,20 +173,55 @@ module DerivativeRodeo
167
173
  # {#output_location_template} or {#preprocessed_location_template}.
168
174
  #
169
175
  # @see [StorageLocations::BaseLocation#exist?]
176
+ # rubocop:disable Metrics/MethodLength
177
+ # rubocop:disable Metrics/AbcSize
170
178
  def destination(input_location)
171
- output_location = input_location.derived_file_from(template: output_location_template)
179
+ output_location = input_location.derived_file_from(template: output_location_template, extension: output_extension)
172
180
 
173
- return output_location if output_location.exist?
174
- return output_location unless preprocessed_location_template
181
+ if output_location.exist?
182
+ log_message = "#{self.class}#destination :: " \
183
+ "input_location file_uri #{input_location.file_uri} :: " \
184
+ "Found output_location file_uri #{output_location.file_uri}."
185
+ logger.info(log_message)
175
186
 
176
- preprocessed_location = input_location.derived_file_from(template: preprocessed_location_template)
187
+ return output_location
188
+ end
189
+
190
+ unless preprocessed_location_template
191
+ log_message = "#{self.class}#destination :: " \
192
+ "input_location file_uri #{input_location.file_uri} :: " \
193
+ "No preprocessed_location_template provided " \
194
+ "nor does a file exist at output_location file_uri #{output_location.file_uri};" \
195
+ " moving on to generation via #{self.class}#build_step."
196
+ logger.info(log_message)
197
+
198
+ return output_location
199
+ end
200
+
201
+ preprocessed_location = input_location.derived_file_from(template: preprocessed_location_template, extension: output_extension)
177
202
  # We only want the location if it exists
178
- return preprocessed_location if preprocessed_location&.exist?
203
+ if preprocessed_location&.exist?
204
+ log_message = "#{self.class}#destination :: " \
205
+ "input_location file_uri #{input_location.file_uri} :: " \
206
+ "Found preprocessed_location file_uri #{output_location.file_uri}."
207
+ logger.info(log_message)
208
+
209
+ return preprocessed_location
210
+ end
211
+
212
+ log_message = "#{self.class}#destination :: " \
213
+ "input_location file_uri #{input_location.file_uri} :: " \
214
+ "No file exists at preprocessed_location file_uri #{preprocessed_location.file_uri} " \
215
+ "nor output_location file_uri #{output_location.file_uri}; " \
216
+ "moving on to generation via #{self.class}#build_step."
217
+ logger.info(log_message)
179
218
 
180
219
  # NOTE: The file does not exist at the output_location; but we pass this information along so
181
220
  # that the #build_step knows where to write the file.
182
221
  output_location
183
222
  end
223
+ # rubocop:enable Metrics/AbcSize
224
+ # rubocop:enable Metrics/MethodLength
184
225
 
185
226
  ##
186
227
  # A bit of indirection to create a common interface for running a shell command.
@@ -5,7 +5,8 @@ module DerivativeRodeo
5
5
  ##
6
6
  # Take images an ensures that we have a monochrome derivative of those images.
7
7
  class MonochromeGenerator < BaseGenerator
8
- # TODO: Can we assume a tiff?
8
+ # @see DerivativeRodeo::Services::ConvertUriViaTemplateService for the interaction of the
9
+ # magic ".mono" suffix
9
10
  self.output_extension = 'mono.tiff'
10
11
 
11
12
  ##
@@ -46,11 +46,12 @@ module DerivativeRodeo
46
46
  # from_uris: ["file:///path1/A/file.pdf", "aws:///path2/B/file.pdf"],
47
47
  # template: "file:///dest1/{{dir_parts[-1..-1]}}/{{ filename }}")
48
48
  # => ["file:///dest1/A/file.pdf", "aws:///dest1/B/file.pdf"]
49
- def self.call(from_uri:, template:, adapter: nil, separator: "/")
50
- new(from_uri: from_uri, template: template, adapter: adapter, separator: separator).call
49
+ def self.call(from_uri:, template:, adapter: nil, separator: "/", **options)
50
+ new(from_uri: from_uri, template: template, adapter: adapter, separator: separator, **options).call
51
51
  end
52
52
 
53
- def initialize(from_uri:, template:, adapter: nil, separator: "/")
53
+ # rubocop:disable Metrics/MethodLength
54
+ def initialize(from_uri:, template:, adapter: nil, separator: "/", **options)
54
55
  @from_uri = from_uri
55
56
  @template = template
56
57
  @adapter = adapter
@@ -60,12 +61,23 @@ module DerivativeRodeo
60
61
  @from_scheme, @path = uri.split("://")
61
62
  @parts = @path.split(separator)
62
63
  @dir_parts = @parts[0..-2]
63
- @filename = @parts[-1]
64
- @basename = File.basename(@filename, ".*")
65
- @extension = File.extname(@filename)
64
+ @filename = options[:filename] || @parts[-1]
65
+ @basename = options[:basename] || File.basename(@filename, ".*")
66
+
67
+ ##
68
+ # HACK: Because the HocrGenerator has `.mono.tiff` and we are not interested in carrying
69
+ # forward the `.mono` suffix as that makes it hard to find the preprocessed word
70
+ # coordinates, alto, and plain text. This ensures files derived from the .mono are findable
71
+ # in IIIF Print.
72
+ @basename = @basename.sub(/\.mono\z/, '')
73
+ @extension = options[:extension] || File.extname(@filename)
74
+ # When a generator specifies "same" we want to use the given file's extension
75
+ @extension = File.extname(@filename) if @extension == DerivativeRodeo::StorageLocations::SAME
76
+ @extension = ".#{@extension}" unless @extension.start_with?(".")
66
77
 
67
78
  @template_without_query, @template_query = template.split("?")
68
79
  end
80
+ # rubocop:enable Metrics/MethodLength
69
81
 
70
82
  def call
71
83
  to_uri = template_without_query.gsub(DIR_PARTS_REPLACEMENT_REGEXP) do |text|
@@ -101,10 +101,10 @@ module DerivativeRodeo
101
101
  # @param service [#call, Module<DerivativeRodeo::Services::ConvertUriViaTemplateService>]
102
102
  #
103
103
  # @return [StorageLocations::BaseLocation]
104
- def self.build(from_uri:, template:, service: DerivativeRodeo::Services::ConvertUriViaTemplateService)
104
+ def self.build(from_uri:, template:, service: DerivativeRodeo::Services::ConvertUriViaTemplateService, **options)
105
105
  # HACK: Ensuring that we have the correct scheme. Maybe this is a hack?
106
106
  from_uri = "#{scheme}://#{from_uri}" unless from_uri.start_with?("#{scheme}://")
107
- to_uri = service.call(from_uri: from_uri, template: template, adapter: self)
107
+ to_uri = service.call(from_uri: from_uri, template: template, adapter: self, **options)
108
108
  new(to_uri)
109
109
  end
110
110
 
@@ -203,9 +203,9 @@ module DerivativeRodeo
203
203
  # @return [StorageLocations::BaseLocation]
204
204
  #
205
205
  # @see DerivativeRodeo::Services::ConvertUriViaTemplateService
206
- def derived_file_from(template:)
206
+ def derived_file_from(template:, **options)
207
207
  klass = DerivativeRodeo::StorageLocations::BaseLocation.load_location(template)
208
- klass.build(from_uri: file_path, template: template)
208
+ klass.build(from_uri: file_path, template: template, **options)
209
209
  end
210
210
 
211
211
  ##
@@ -231,6 +231,7 @@ module DerivativeRodeo
231
231
  def with_new_extension(extension)
232
232
  return file_path if extension == StorageLocations::SAME
233
233
 
234
+ # NOTE: May need to revisit this
234
235
  "#{file_path.split('.')[0]}.#{extension}"
235
236
  end
236
237
 
@@ -13,6 +13,8 @@ module DerivativeRodeo
13
13
  # Location to download and upload files to Sqs
14
14
  # It uploads a file_uri to the queue, not the contents of that file
15
15
  # reading from the queue is not currently implemented
16
+ #
17
+ # rubocop:disable Metrics/ClassLength
16
18
  class SqsLocation < BaseLocation
17
19
  ##
18
20
  # @!group Class Attributes
@@ -85,11 +87,14 @@ module DerivativeRodeo
85
87
  batch = []
86
88
  Dir.glob("#{File.dirname(tmp_file_path)}/**/**").each.with_index do |fp, i|
87
89
  batch << { id: SecureRandom.uuid, message_body: output_json("file://#{fp}") }
88
- if (i % batch_size).zero?
90
+ if (i + 1 % batch_size).zero?
89
91
  add_batch(messages: batch)
90
92
  batch = []
91
93
  end
92
94
  end
95
+
96
+ # Ensure we're flushing the batched up queue as part of completing the write.
97
+ add_batch(messages: batch) if batch.present?
93
98
  file_uri
94
99
  end
95
100
 
@@ -181,6 +186,7 @@ module DerivativeRodeo
181
186
  end
182
187
 
183
188
  def output_json(uri)
189
+ # TODO: Add ability to handle a pre-process-template given to an SQS, and pass that along to the generator when applicable.
184
190
  key = DerivativeRodeo::Services::ConvertUriViaTemplateService.call(from_uri: uri, template: template, adapter: self)
185
191
  { key => [template] }.to_json
186
192
  end
@@ -201,5 +207,6 @@ module DerivativeRodeo
201
207
  @file_uri_parts
202
208
  end
203
209
  end
210
+ # rubocop:enable Metrics/ClassLength
204
211
  end
205
212
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DerivativeRodeo
4
- VERSION = '0.4.0'
4
+ VERSION = '0.4.2'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: derivative-rodeo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Kaufman
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2023-06-06 00:00:00.000000000 Z
12
+ date: 2023-07-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -337,7 +337,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
337
337
  - !ruby/object:Gem::Version
338
338
  version: '0'
339
339
  requirements: []
340
- rubygems_version: 3.1.6
340
+ rubygems_version: 3.3.7
341
341
  signing_key:
342
342
  specification_version: 4
343
343
  summary: An ETL Ecosystem for Derivative Processing.