bpl-derivatives 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +7 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +60 -0
  8. data/Rakefile +7 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/bpl-derivatives.gemspec +50 -0
  12. data/lib/bpl/derivatives.rb +74 -0
  13. data/lib/bpl/derivatives/audio_encoder.rb +27 -0
  14. data/lib/bpl/derivatives/config.rb +64 -0
  15. data/lib/bpl/derivatives/datastream_decorator.rb +31 -0
  16. data/lib/bpl/derivatives/input_object_decorator.rb +11 -0
  17. data/lib/bpl/derivatives/io_decorator.rb +15 -0
  18. data/lib/bpl/derivatives/logger.rb +25 -0
  19. data/lib/bpl/derivatives/output_object_decorator.rb +12 -0
  20. data/lib/bpl/derivatives/processors.rb +18 -0
  21. data/lib/bpl/derivatives/processors/audio.rb +5 -0
  22. data/lib/bpl/derivatives/processors/document.rb +45 -0
  23. data/lib/bpl/derivatives/processors/ffmpeg.rb +21 -0
  24. data/lib/bpl/derivatives/processors/image.rb +76 -0
  25. data/lib/bpl/derivatives/processors/jpeg2k_image.rb +127 -0
  26. data/lib/bpl/derivatives/processors/processor.rb +43 -0
  27. data/lib/bpl/derivatives/processors/raw_image.rb +37 -0
  28. data/lib/bpl/derivatives/processors/shell_based_processor.rb +103 -0
  29. data/lib/bpl/derivatives/processors/video.rb +10 -0
  30. data/lib/bpl/derivatives/processors/video/config.rb +66 -0
  31. data/lib/bpl/derivatives/processors/video/processor.rb +41 -0
  32. data/lib/bpl/derivatives/runners/audio_derivatives.rb +7 -0
  33. data/lib/bpl/derivatives/runners/document_derivatives.rb +7 -0
  34. data/lib/bpl/derivatives/runners/image_derivatives.rb +15 -0
  35. data/lib/bpl/derivatives/runners/jpeg2k_image_derivatives.rb +15 -0
  36. data/lib/bpl/derivatives/runners/pdf_derivatives.rb +4 -0
  37. data/lib/bpl/derivatives/runners/runner.rb +59 -0
  38. data/lib/bpl/derivatives/runners/video_derivatives.rb +7 -0
  39. data/lib/bpl/derivatives/services/capability_service.rb +17 -0
  40. data/lib/bpl/derivatives/services/mime_type_service.rb +14 -0
  41. data/lib/bpl/derivatives/services/persist_basic_contained_output_file_service.rb +73 -0
  42. data/lib/bpl/derivatives/services/persist_datastream_output_service.rb +30 -0
  43. data/lib/bpl/derivatives/services/persist_file_system_output_service.rb +31 -0
  44. data/lib/bpl/derivatives/services/persist_output_file_service.rb +24 -0
  45. data/lib/bpl/derivatives/services/retrieve_source_file_from_datastream_service.rb +12 -0
  46. data/lib/bpl/derivatives/services/retrieve_source_file_service.rb +13 -0
  47. data/lib/bpl/derivatives/services/tempfile_service.rb +65 -0
  48. data/lib/bpl/derivatives/version.rb +5 -0
  49. data/lib/color_profiles/license.txt +7 -0
  50. data/lib/color_profiles/sRGB_IEC61966-2-1_no_black_scaling.icc +0 -0
  51. metadata +238 -0
@@ -0,0 +1,25 @@
1
+ module BPL::Derivatives
2
+ class Logger
3
+ class << self
4
+ def method_missing(method_name, *arguments, &block)
5
+ logger.send(method_name, *arguments, &block)
6
+ rescue StandardError
7
+ super
8
+ end
9
+
10
+ def respond_to?(method_name, _include_private = false)
11
+ logger.respond_to? method_name
12
+ end
13
+
14
+ def respond_to_missing?(method_name, _include_private = false)
15
+ logger.respond_to_missing? method_name
16
+ end
17
+
18
+ private
19
+
20
+ def logger
21
+ BPL::Derivatives.config.base_logger
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,12 @@
1
+ require 'delegate'
2
+
3
+ module BPL::Derivatives
4
+ class OutputObjectDecorator < SimpleDelegator
5
+ attr_accessor :content, :original_object
6
+ def initialize(content, original_object = nil)
7
+ super(content)
8
+ self.content = StringIO.new(content)
9
+ self.original_object = original_object
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,18 @@
1
+ module BPL::Derivatives
2
+ module Processors
3
+ extend ActiveSupport::Autoload
4
+
5
+ eager_autoload do
6
+ autoload :Processor
7
+ end
8
+
9
+ autoload :Audio
10
+ autoload :Document
11
+ autoload :Ffmpeg
12
+ autoload :Image
13
+ autoload :Jpeg2kImage
14
+ autoload :RawImage
15
+ autoload :ShellBasedProcessor
16
+ autoload :Video
17
+ end
18
+ end
@@ -0,0 +1,5 @@
1
+ module BPL::Derivatives::Processors
2
+ class Audio < Processor
3
+ include Ffmpeg
4
+ end
5
+ end
@@ -0,0 +1,45 @@
1
+ module BPL::Derivatives::Processors
2
+ class Document < Processor
3
+ include ShellBasedProcessor
4
+
5
+ def self.encode(path, format, outdir)
6
+ execute "#{BPL::Derivatives.config.libreoffice_path} --invisible --headless --convert-to #{format} --outdir #{outdir} #{Shellwords.escape(path)}"
7
+ end
8
+
9
+ # Converts the document to the format specified in the directives hash.
10
+ # TODO: file_suffix and options are passed from ShellBasedProcessor.process but are not needed.
11
+ # A refactor could simplify this.
12
+ def encode_file(_file_suffix, _options = {})
13
+ convert_to_format
14
+ ensure
15
+ FileUtils.rm_f(converted_file)
16
+ end
17
+
18
+ private
19
+
20
+ # For jpeg files, a pdf is created from the original source and then passed to the Image processor class
21
+ # so we can get a better conversion with resizing options. Otherwise, the ::encode method is used.
22
+ def convert_to_format
23
+ if directives.fetch(:format) == "jpg"
24
+ object.source_path = converted_file
25
+ BPL::Derivatives::Processors::Image.new(object, directives).process
26
+ else
27
+ finalize_derivative_output(File.read(converted_file))
28
+ end
29
+ end
30
+
31
+
32
+ def converted_file
33
+ @converted_file ||= if directives.fetch(:format) == "jpg"
34
+ convert_to("pdf")
35
+ else
36
+ convert_to(directives.fetch(:format))
37
+ end
38
+ end
39
+
40
+ def convert_to(format)
41
+ self.class.encode(source_path, format, BPL::Derivatives.config.temp_file_base)
42
+ File.join(BPL::Derivatives.config.temp_file_base, [File.basename(source_path, ".*"), format].join('.'))
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,21 @@
1
+ # An abstract class for asyncronous jobs that transcode files using FFMpeg
2
+ module BPL::Derivatives::Processors
3
+ module Ffmpeg
4
+ extend ActiveSupport::Concern
5
+
6
+ INPUT_OPTIONS = :input_options
7
+ OUTPUT_OPTIONS = :output_options
8
+
9
+ included do
10
+ include ShellBasedProcessor
11
+ end
12
+
13
+ module ClassMethods
14
+ def encode(path, options, output_file)
15
+ inopts = options[INPUT_OPTIONS] ||= "-y"
16
+ outopts = options[OUTPUT_OPTIONS] ||= ""
17
+ execute "#{BPL::Derivatives.ffmpeg_path} #{inopts} -i #{Shellwords.escape(path)} #{outopts} #{output_file}"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,76 @@
1
+ require 'mini_magick'
2
+
3
+ module BPL::Derivatives::Processors
4
+ class Image < Processor
5
+ cattr_accessor :timeout
6
+
7
+ def process
8
+ timeout ? process_with_timeout : create_resized_image
9
+ end
10
+
11
+ def process_with_timeout
12
+ Timeout.timeout(timeout) { create_resized_image }
13
+ rescue Timeout::Error
14
+ raise BPL::Derivatives::TimeoutError, "Unable to process image derivative\nThe command took longer than #{timeout} seconds to execute"
15
+ end
16
+
17
+ protected
18
+
19
+ # When resizing images, it is necessary to flatten any layers, otherwise the background
20
+ # may be completely black. This happens especially with PDFs. See #110
21
+ def create_resized_image
22
+ create_image do |xfrm|
23
+ if size
24
+ xfrm.flatten
25
+ xfrm.resize(size)
26
+ end
27
+ end
28
+ end
29
+
30
+ def create_image
31
+ xfrm = selected_layers(load_image_transformer)
32
+ yield(xfrm) if block_given?
33
+ xfrm.format(directives.fetch(:format))
34
+ xfrm.quality(quality.to_s) if quality
35
+ xfrm.density(density.to_s) if density
36
+ write_image(xfrm)
37
+ end
38
+
39
+ def write_image(xfrm)
40
+ output_io = StringIO.new
41
+ xfrm.write(output_io)
42
+ output_io.rewind
43
+ finalize_derivative_output(output_io.read)
44
+ end
45
+
46
+ # Override this method if you want a different transformer, or need to load the
47
+ # raw image from a different source (e.g. external file)
48
+ def load_image_transformer
49
+ MiniMagick::Image.open(source_path)
50
+ end
51
+
52
+ private
53
+
54
+ def size
55
+ directives.fetch(:size, nil)
56
+ end
57
+
58
+ def quality
59
+ directives.fetch(:quality, nil)
60
+ end
61
+
62
+ def density
63
+ directives.fetch(:density, nil)
64
+ end
65
+
66
+ def selected_layers(image)
67
+ if image.type =~ /pdf/i
68
+ image.layers[directives.fetch(:layer, 0)]
69
+ elsif directives.fetch(:layer, false)
70
+ image.layers[directives.fetch(:layer)]
71
+ else
72
+ image
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,127 @@
1
+ require 'mini_magick'
2
+ require 'nokogiri'
3
+
4
+ module BPL::Derivatives::Processors
5
+ class Jpeg2kImage < Processor
6
+ include ShellBasedProcessor
7
+
8
+ class << self
9
+ def srgb_profile_path
10
+ File.join [
11
+ File.expand_path('../../../../', __FILE__),
12
+ 'color_profiles',
13
+ 'sRGB_IEC61966-2-1_no_black_scaling.icc'
14
+ ]
15
+ end
16
+
17
+ def kdu_compress_recipe(args, quality, long_dim)
18
+ if args[:recipe].is_a? Symbol
19
+ recipe = [args[:recipe].to_s, quality].join('_').to_sym
20
+ return BPL::Derivatives.config.kdu_compress_recipes[recipe] if BPL::Derivatives.config.kdu_compress_recipes.key? recipe
21
+ BPL::Derivatives.config.base_logger.warn "No JP2 recipe for :#{args[:recipe]} ('#{recipe}') found in configuration. Using best guess."
22
+ calculate_recipe(args, quality, long_dim)
23
+ elsif args[:recipe].is_a? String
24
+ args[:recipe]
25
+ else
26
+ calculate_recipe(args, quality, long_dim)
27
+ end
28
+ end
29
+
30
+ def calculate_recipe(args, quality, long_dim)
31
+ levels_arg = args.fetch(:levels, level_count_for_size(long_dim))
32
+ rates_arg = layer_rates(args.fetch(:layers, 8), args.fetch(:compression, 10))
33
+ tile_size = args.fetch(:tile_size, 1024)
34
+ tiles_arg = "#{tile_size},#{tile_size}"
35
+ jp2_space_arg = quality == 'gray' ? 'sLUM' : 'sRGB'
36
+
37
+ %(-rate #{rates_arg}
38
+ -jp2_space #{jp2_space_arg}
39
+ -double_buffering 10
40
+ -num_threads 4
41
+ -no_weights
42
+ Clevels=#{levels_arg}
43
+ "Stiles={#{tiles_arg}}"
44
+ "Cblk={64,64}"
45
+ Cuse_sop=yes
46
+ Cuse_eph=yes
47
+ Corder=RPCL
48
+ ORGgen_plt=yes
49
+ ORGtparts=R ).gsub(/\s+/, " ").strip
50
+ end
51
+
52
+ def level_count_for_size(long_dim)
53
+ levels = 0
54
+ level_size = long_dim
55
+ while level_size >= 96
56
+ level_size /= 2
57
+ levels += 1
58
+ end
59
+ levels - 1
60
+ end
61
+
62
+ def layer_rates(layer_count, compression_numerator)
63
+ # e.g. if compression_numerator = 10 then compression is 10:1
64
+ rates = []
65
+ cmp = 24.0 / compression_numerator
66
+ layer_count.times do
67
+ rates << cmp
68
+ cmp = (cmp / 1.618).round(8)
69
+ end
70
+ rates.map(&:to_s).join(',')
71
+ end
72
+
73
+ def encode(path, recipe, output_file)
74
+ kdu_compress = BPL::Derivatives.config.kdu_compress_path
75
+ execute "#{kdu_compress} -quiet -i #{Shellwords.escape(path)} -o #{output_file} #{recipe}"
76
+ end
77
+
78
+ def tmp_file(ext)
79
+ Dir::Tmpname.create(['bpl-derivative', ext], BPL::Derivatives.config.temp_file_base) {}
80
+ end
81
+
82
+ def long_dim(image)
83
+ [image[:width], image[:height]].max
84
+ end
85
+ end
86
+
87
+ def process
88
+ image = MiniMagick::Image.open(source_path)
89
+ quality = image['%[channels]'] == 'gray' ? 'gray' : 'color'
90
+ long_dim = self.class.long_dim(image)
91
+ file_path = self.class.tmp_file('.tif')
92
+ to_srgb = directives.fetch(:to_srgb, true)
93
+ if directives[:resize] || to_srgb
94
+ preprocess(image, resize: directives[:resize], to_srgb: to_srgb, src_quality: quality)
95
+ end
96
+ image.write file_path
97
+ recipe = self.class.kdu_compress_recipe(directives, quality, long_dim)
98
+ encode_file(recipe, file_path: file_path)
99
+ File.unlink(file_path) unless file_path.nil?
100
+ end
101
+
102
+ def encode_file(recipe, opts = {})
103
+ output_file = self.class.tmp_file('.jp2')
104
+ if opts[:file_path]
105
+ self.class.encode(opts[:file_path], recipe, output_file)
106
+ else
107
+ BPL::Derivatives::TempfileService.create(source_file) do |f|
108
+ self.class.encode(f.path, recipe, output_file)
109
+ end
110
+ end
111
+ finalize_derivative_output(File.open(output_file, "rb", &:read))
112
+ File.unlink(output_file)
113
+ end
114
+
115
+
116
+ protected
117
+
118
+ def preprocess(image, opts = {})
119
+ # resize: <geometry>, to_srgb: <bool>, src_quality: 'color'|'gray'
120
+ image.combine_options do |c|
121
+ c.resize(opts[:resize]) if opts[:resize]
122
+ c.profile self.class.srgb_profile_path if opts[:src_quality] == 'color' && opts[:to_srgb]
123
+ end
124
+ image
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,43 @@
1
+ module BPL::Derivatives::Processors
2
+ # Processors take a single input and produce a single output
3
+ class Processor
4
+ attr_accessor :object, :source_path, :directives, :output_file_service
5
+
6
+ # @param [BPL::Derivatives::InputObjectDecorator,BPL::Derivatives::OutputObjectDelegator]
7
+ # @param [Hash] directives directions for creating the output
8
+ # @option [String] :format the format of the output
9
+ # @option [String] :url the location to put the output
10
+ # @param [Hash] opts
11
+ # @option [#call] :output_file_service An output file service to call
12
+ def initialize(object, directives, opts = {})
13
+ self.object = object
14
+ self.source_path = object.source_path
15
+ self.directives = directives
16
+ self.output_file_service = opts.fetch(:output_file_service, BPL::Derivatives.config.output_file_service)
17
+ end
18
+
19
+ def process
20
+ raise "Processor is an abstract class. Implement `process' on #{self.class.name}"
21
+ end
22
+
23
+ # This governs the output key sent to the persist file service
24
+ # while this is adequate for storing in Fedora, it's not a great name for saving
25
+ # to the file system.
26
+ def output_file_id(name)
27
+ [out_prefix, name].join('_')
28
+ end
29
+
30
+ def output_filename_for(_name)
31
+ File.basename(source_path)
32
+ end
33
+
34
+ def finalize_derivative_output(output_io)
35
+ output_object = BPL::Derivatives::OutputObjectDecorator.new(output_io, object)
36
+ output_file_service.call(output_object, directives)
37
+ end
38
+ # @deprecated Please use a PersistOutputFileService class to save an object
39
+ def output_file
40
+ raise NotImplementedError, "Processor is an abstract class. Utilize an implementation of a PersistOutputFileService class in #{self.class.name}"
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,37 @@
1
+ require 'mini_magick'
2
+
3
+ module BPL::Derivatives::Processors
4
+ class RawImage < Image
5
+ class_attribute :timeout
6
+
7
+ protected
8
+
9
+ def create_image(destination_name, format, quality = nil)
10
+ xfrm = load_image_transformer
11
+ # Transpose format and scaling due to the fact that ImageMagick can
12
+ # read but not write RAW files and this will otherwise cause many
13
+ # cryptic segmentation faults
14
+ xfrm.format(format)
15
+ yield(xfrm) if block_given?
16
+ xfrm.quality(quality.to_s) if quality
17
+ write_image(destination_name, format, xfrm)
18
+ remove_temp_files(xfrm)
19
+ end
20
+
21
+ # Delete any temp files that might clutter up the disk if
22
+ # you are doing a batch or don't touch your temporary storage
23
+ # for a long time
24
+ def remove_temp_files(xfrm)
25
+ xfrm.destroy!
26
+ end
27
+
28
+ # Override this method if you want a different transformer, or # need to load the raw image from a different source (e.g.
29
+ # external file).
30
+ #
31
+ # In this case always add an extension to help out MiniMagick
32
+ # with RAW files
33
+ def load_image_transformer
34
+ MiniMagick::Image.open(source_path)
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,103 @@
1
+ # An abstract class for asyncronous jobs that transcode files using FFMpeg
2
+
3
+ require 'tmpdir'
4
+ require 'open3'
5
+
6
+ module BPL::Derivatives::Processors
7
+ module ShellBasedProcessor
8
+ extend ActiveSupport::Concern
9
+
10
+ BLOCK_SIZE = 1024
11
+
12
+ included do
13
+ cattr_accessor :timeout
14
+ extend Open3
15
+ end
16
+
17
+ def process
18
+ format = directives[:format]
19
+ raise ArgumentError, "You must provide the :format you want to transcode into. You provided #{directives}" unless format
20
+ # TODO: if the source is in the correct format, we could just copy it and skip transcoding.
21
+ encode_file(format, options_for(format))
22
+ end
23
+
24
+ # override this method in subclass if you want to provide specific options.
25
+ # returns a hash of options that the specific processors use
26
+ def options_for(_format)
27
+ {}
28
+ end
29
+
30
+ def encode_file(file_suffix, options)
31
+ temp_file_name = output_file(file_suffix)
32
+ self.class.encode(source_path, options, temp_file_name)
33
+ finalize_derivative_output(File.read(temp_file_name))
34
+ File.unlink(temp_file_name)
35
+ end
36
+
37
+ def output_file(file_suffix)
38
+ Dir::Tmpname.create(['sufia', ".#{file_suffix}"], BPL::Derivatives.temp_file_base) {}
39
+ end
40
+
41
+ module ClassMethods
42
+ def execute(command)
43
+ context = {}
44
+ if timeout
45
+ execute_with_timeout(timeout, command, context)
46
+ else
47
+ execute_without_timeout(command, context)
48
+ end
49
+ end
50
+
51
+ def execute_with_timeout(timeout, command, context)
52
+ Timeout.timeout(timeout) do
53
+ execute_without_timeout(command, context)
54
+ end
55
+ rescue Timeout::Error
56
+ pid = context[:pid]
57
+ Process.kill("KILL", pid)
58
+ raise BPL::Derivatives::TimeoutError, "Unable to execute command \"#{command}\"\nThe command took longer than #{timeout} seconds to execute"
59
+ end
60
+
61
+ def execute_without_timeout(command, context)
62
+ err_str = ''
63
+ stdin, stdout, stderr, wait_thr = popen3(command)
64
+ context[:pid] = wait_thr[:pid]
65
+ files = [stderr, stdout]
66
+ stdin.close
67
+
68
+ until all_eof?(files)
69
+ ready = IO.select(files, nil, nil, 60)
70
+
71
+ next unless ready
72
+ readable = ready[0]
73
+ readable.each do |f|
74
+ fileno = f.fileno
75
+
76
+ begin
77
+ data = f.read_nonblock(BLOCK_SIZE)
78
+
79
+ case fileno
80
+ when stderr.fileno
81
+ err_str << data
82
+ end
83
+ rescue EOFError => e
84
+ BPL::Derivatives::Logger.debug "Caught an eof error in ShellBasedProcessor"
85
+ BPL::Derivatives::Logger.debug "#{e.message}"
86
+ # No big deal.
87
+ end
88
+ end
89
+ end
90
+
91
+ stdout.close
92
+ stderr.close
93
+ exit_status = wait_thr.value
94
+
95
+ raise "Unable to execute command \"#{command}\". Exit code: #{exit_status}\nError message: #{err_str}" unless exit_status.success?
96
+ end
97
+
98
+ def all_eof?(files)
99
+ files.find { |f| !f.eof }.nil?
100
+ end
101
+ end
102
+ end
103
+ end