bpl-derivatives 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +7 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +60 -0
  8. data/Rakefile +7 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/bpl-derivatives.gemspec +50 -0
  12. data/lib/bpl/derivatives.rb +74 -0
  13. data/lib/bpl/derivatives/audio_encoder.rb +27 -0
  14. data/lib/bpl/derivatives/config.rb +64 -0
  15. data/lib/bpl/derivatives/datastream_decorator.rb +31 -0
  16. data/lib/bpl/derivatives/input_object_decorator.rb +11 -0
  17. data/lib/bpl/derivatives/io_decorator.rb +15 -0
  18. data/lib/bpl/derivatives/logger.rb +25 -0
  19. data/lib/bpl/derivatives/output_object_decorator.rb +12 -0
  20. data/lib/bpl/derivatives/processors.rb +18 -0
  21. data/lib/bpl/derivatives/processors/audio.rb +5 -0
  22. data/lib/bpl/derivatives/processors/document.rb +45 -0
  23. data/lib/bpl/derivatives/processors/ffmpeg.rb +21 -0
  24. data/lib/bpl/derivatives/processors/image.rb +76 -0
  25. data/lib/bpl/derivatives/processors/jpeg2k_image.rb +127 -0
  26. data/lib/bpl/derivatives/processors/processor.rb +43 -0
  27. data/lib/bpl/derivatives/processors/raw_image.rb +37 -0
  28. data/lib/bpl/derivatives/processors/shell_based_processor.rb +103 -0
  29. data/lib/bpl/derivatives/processors/video.rb +10 -0
  30. data/lib/bpl/derivatives/processors/video/config.rb +66 -0
  31. data/lib/bpl/derivatives/processors/video/processor.rb +41 -0
  32. data/lib/bpl/derivatives/runners/audio_derivatives.rb +7 -0
  33. data/lib/bpl/derivatives/runners/document_derivatives.rb +7 -0
  34. data/lib/bpl/derivatives/runners/image_derivatives.rb +15 -0
  35. data/lib/bpl/derivatives/runners/jpeg2k_image_derivatives.rb +15 -0
  36. data/lib/bpl/derivatives/runners/pdf_derivatives.rb +4 -0
  37. data/lib/bpl/derivatives/runners/runner.rb +59 -0
  38. data/lib/bpl/derivatives/runners/video_derivatives.rb +7 -0
  39. data/lib/bpl/derivatives/services/capability_service.rb +17 -0
  40. data/lib/bpl/derivatives/services/mime_type_service.rb +14 -0
  41. data/lib/bpl/derivatives/services/persist_basic_contained_output_file_service.rb +73 -0
  42. data/lib/bpl/derivatives/services/persist_datastream_output_service.rb +30 -0
  43. data/lib/bpl/derivatives/services/persist_file_system_output_service.rb +31 -0
  44. data/lib/bpl/derivatives/services/persist_output_file_service.rb +24 -0
  45. data/lib/bpl/derivatives/services/retrieve_source_file_from_datastream_service.rb +12 -0
  46. data/lib/bpl/derivatives/services/retrieve_source_file_service.rb +13 -0
  47. data/lib/bpl/derivatives/services/tempfile_service.rb +65 -0
  48. data/lib/bpl/derivatives/version.rb +5 -0
  49. data/lib/color_profiles/license.txt +7 -0
  50. data/lib/color_profiles/sRGB_IEC61966-2-1_no_black_scaling.icc +0 -0
  51. metadata +238 -0
@@ -0,0 +1,25 @@
1
+ module BPL::Derivatives
2
+ class Logger
3
+ class << self
4
+ def method_missing(method_name, *arguments, &block)
5
+ logger.send(method_name, *arguments, &block)
6
+ rescue StandardError
7
+ super
8
+ end
9
+
10
+ def respond_to?(method_name, _include_private = false)
11
+ logger.respond_to? method_name
12
+ end
13
+
14
+ def respond_to_missing?(method_name, _include_private = false)
15
+ logger.respond_to_missing? method_name
16
+ end
17
+
18
+ private
19
+
20
+ def logger
21
+ BPL::Derivatives.config.base_logger
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,12 @@
1
+ require 'delegate'
2
+
3
+ module BPL::Derivatives
4
+ class OutputObjectDecorator < SimpleDelegator
5
+ attr_accessor :content, :original_object
6
+ def initialize(content, original_object = nil)
7
+ super(content)
8
+ self.content = StringIO.new(content)
9
+ self.original_object = original_object
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,18 @@
1
+ module BPL::Derivatives
2
+ module Processors
3
+ extend ActiveSupport::Autoload
4
+
5
+ eager_autoload do
6
+ autoload :Processor
7
+ end
8
+
9
+ autoload :Audio
10
+ autoload :Document
11
+ autoload :Ffmpeg
12
+ autoload :Image
13
+ autoload :Jpeg2kImage
14
+ autoload :RawImage
15
+ autoload :ShellBasedProcessor
16
+ autoload :Video
17
+ end
18
+ end
@@ -0,0 +1,5 @@
1
+ module BPL::Derivatives::Processors
2
+ class Audio < Processor
3
+ include Ffmpeg
4
+ end
5
+ end
@@ -0,0 +1,45 @@
1
+ module BPL::Derivatives::Processors
2
+ class Document < Processor
3
+ include ShellBasedProcessor
4
+
5
+ def self.encode(path, format, outdir)
6
+ execute "#{BPL::Derivatives.config.libreoffice_path} --invisible --headless --convert-to #{format} --outdir #{outdir} #{Shellwords.escape(path)}"
7
+ end
8
+
9
+ # Converts the document to the format specified in the directives hash.
10
+ # TODO: file_suffix and options are passed from ShellBasedProcessor.process but are not needed.
11
+ # A refactor could simplify this.
12
+ def encode_file(_file_suffix, _options = {})
13
+ convert_to_format
14
+ ensure
15
+ FileUtils.rm_f(converted_file)
16
+ end
17
+
18
+ private
19
+
20
+ # For jpeg files, a pdf is created from the original source and then passed to the Image processor class
21
+ # so we can get a better conversion with resizing options. Otherwise, the ::encode method is used.
22
+ def convert_to_format
23
+ if directives.fetch(:format) == "jpg"
24
+ object.source_path = converted_file
25
+ BPL::Derivatives::Processors::Image.new(object, directives).process
26
+ else
27
+ finalize_derivative_output(File.read(converted_file))
28
+ end
29
+ end
30
+
31
+
32
+ def converted_file
33
+ @converted_file ||= if directives.fetch(:format) == "jpg"
34
+ convert_to("pdf")
35
+ else
36
+ convert_to(directives.fetch(:format))
37
+ end
38
+ end
39
+
40
+ def convert_to(format)
41
+ self.class.encode(source_path, format, BPL::Derivatives.config.temp_file_base)
42
+ File.join(BPL::Derivatives.config.temp_file_base, [File.basename(source_path, ".*"), format].join('.'))
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,21 @@
1
+ # An abstract class for asyncronous jobs that transcode files using FFMpeg
2
+ module BPL::Derivatives::Processors
3
+ module Ffmpeg
4
+ extend ActiveSupport::Concern
5
+
6
+ INPUT_OPTIONS = :input_options
7
+ OUTPUT_OPTIONS = :output_options
8
+
9
+ included do
10
+ include ShellBasedProcessor
11
+ end
12
+
13
+ module ClassMethods
14
+ def encode(path, options, output_file)
15
+ inopts = options[INPUT_OPTIONS] ||= "-y"
16
+ outopts = options[OUTPUT_OPTIONS] ||= ""
17
+ execute "#{BPL::Derivatives.ffmpeg_path} #{inopts} -i #{Shellwords.escape(path)} #{outopts} #{output_file}"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,76 @@
1
+ require 'mini_magick'
2
+
3
+ module BPL::Derivatives::Processors
4
+ class Image < Processor
5
+ cattr_accessor :timeout
6
+
7
+ def process
8
+ timeout ? process_with_timeout : create_resized_image
9
+ end
10
+
11
+ def process_with_timeout
12
+ Timeout.timeout(timeout) { create_resized_image }
13
+ rescue Timeout::Error
14
+ raise BPL::Derivatives::TimeoutError, "Unable to process image derivative\nThe command took longer than #{timeout} seconds to execute"
15
+ end
16
+
17
+ protected
18
+
19
+ # When resizing images, it is necessary to flatten any layers, otherwise the background
20
+ # may be completely black. This happens especially with PDFs. See #110
21
+ def create_resized_image
22
+ create_image do |xfrm|
23
+ if size
24
+ xfrm.flatten
25
+ xfrm.resize(size)
26
+ end
27
+ end
28
+ end
29
+
30
+ def create_image
31
+ xfrm = selected_layers(load_image_transformer)
32
+ yield(xfrm) if block_given?
33
+ xfrm.format(directives.fetch(:format))
34
+ xfrm.quality(quality.to_s) if quality
35
+ xfrm.density(density.to_s) if density
36
+ write_image(xfrm)
37
+ end
38
+
39
+ def write_image(xfrm)
40
+ output_io = StringIO.new
41
+ xfrm.write(output_io)
42
+ output_io.rewind
43
+ finalize_derivative_output(output_io.read)
44
+ end
45
+
46
+ # Override this method if you want a different transformer, or need to load the
47
+ # raw image from a different source (e.g. external file)
48
+ def load_image_transformer
49
+ MiniMagick::Image.open(source_path)
50
+ end
51
+
52
+ private
53
+
54
+ def size
55
+ directives.fetch(:size, nil)
56
+ end
57
+
58
+ def quality
59
+ directives.fetch(:quality, nil)
60
+ end
61
+
62
+ def density
63
+ directives.fetch(:density, nil)
64
+ end
65
+
66
+ def selected_layers(image)
67
+ if image.type =~ /pdf/i
68
+ image.layers[directives.fetch(:layer, 0)]
69
+ elsif directives.fetch(:layer, false)
70
+ image.layers[directives.fetch(:layer)]
71
+ else
72
+ image
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,127 @@
1
+ require 'mini_magick'
2
+ require 'nokogiri'
3
+
4
+ module BPL::Derivatives::Processors
5
+ class Jpeg2kImage < Processor
6
+ include ShellBasedProcessor
7
+
8
+ class << self
9
+ def srgb_profile_path
10
+ File.join [
11
+ File.expand_path('../../../../', __FILE__),
12
+ 'color_profiles',
13
+ 'sRGB_IEC61966-2-1_no_black_scaling.icc'
14
+ ]
15
+ end
16
+
17
+ def kdu_compress_recipe(args, quality, long_dim)
18
+ if args[:recipe].is_a? Symbol
19
+ recipe = [args[:recipe].to_s, quality].join('_').to_sym
20
+ return BPL::Derivatives.config.kdu_compress_recipes[recipe] if BPL::Derivatives.config.kdu_compress_recipes.key? recipe
21
+ BPL::Derivatives.config.base_logger.warn "No JP2 recipe for :#{args[:recipe]} ('#{recipe}') found in configuration. Using best guess."
22
+ calculate_recipe(args, quality, long_dim)
23
+ elsif args[:recipe].is_a? String
24
+ args[:recipe]
25
+ else
26
+ calculate_recipe(args, quality, long_dim)
27
+ end
28
+ end
29
+
30
+ def calculate_recipe(args, quality, long_dim)
31
+ levels_arg = args.fetch(:levels, level_count_for_size(long_dim))
32
+ rates_arg = layer_rates(args.fetch(:layers, 8), args.fetch(:compression, 10))
33
+ tile_size = args.fetch(:tile_size, 1024)
34
+ tiles_arg = "#{tile_size},#{tile_size}"
35
+ jp2_space_arg = quality == 'gray' ? 'sLUM' : 'sRGB'
36
+
37
+ %(-rate #{rates_arg}
38
+ -jp2_space #{jp2_space_arg}
39
+ -double_buffering 10
40
+ -num_threads 4
41
+ -no_weights
42
+ Clevels=#{levels_arg}
43
+ "Stiles={#{tiles_arg}}"
44
+ "Cblk={64,64}"
45
+ Cuse_sop=yes
46
+ Cuse_eph=yes
47
+ Corder=RPCL
48
+ ORGgen_plt=yes
49
+ ORGtparts=R ).gsub(/\s+/, " ").strip
50
+ end
51
+
52
+ def level_count_for_size(long_dim)
53
+ levels = 0
54
+ level_size = long_dim
55
+ while level_size >= 96
56
+ level_size /= 2
57
+ levels += 1
58
+ end
59
+ levels - 1
60
+ end
61
+
62
+ def layer_rates(layer_count, compression_numerator)
63
+ # e.g. if compression_numerator = 10 then compression is 10:1
64
+ rates = []
65
+ cmp = 24.0 / compression_numerator
66
+ layer_count.times do
67
+ rates << cmp
68
+ cmp = (cmp / 1.618).round(8)
69
+ end
70
+ rates.map(&:to_s).join(',')
71
+ end
72
+
73
+ def encode(path, recipe, output_file)
74
+ kdu_compress = BPL::Derivatives.config.kdu_compress_path
75
+ execute "#{kdu_compress} -quiet -i #{Shellwords.escape(path)} -o #{output_file} #{recipe}"
76
+ end
77
+
78
+ def tmp_file(ext)
79
+ Dir::Tmpname.create(['bpl-derivative', ext], BPL::Derivatives.config.temp_file_base) {}
80
+ end
81
+
82
+ def long_dim(image)
83
+ [image[:width], image[:height]].max
84
+ end
85
+ end
86
+
87
+ def process
88
+ image = MiniMagick::Image.open(source_path)
89
+ quality = image['%[channels]'] == 'gray' ? 'gray' : 'color'
90
+ long_dim = self.class.long_dim(image)
91
+ file_path = self.class.tmp_file('.tif')
92
+ to_srgb = directives.fetch(:to_srgb, true)
93
+ if directives[:resize] || to_srgb
94
+ preprocess(image, resize: directives[:resize], to_srgb: to_srgb, src_quality: quality)
95
+ end
96
+ image.write file_path
97
+ recipe = self.class.kdu_compress_recipe(directives, quality, long_dim)
98
+ encode_file(recipe, file_path: file_path)
99
+ File.unlink(file_path) unless file_path.nil?
100
+ end
101
+
102
+ def encode_file(recipe, opts = {})
103
+ output_file = self.class.tmp_file('.jp2')
104
+ if opts[:file_path]
105
+ self.class.encode(opts[:file_path], recipe, output_file)
106
+ else
107
+ BPL::Derivatives::TempfileService.create(source_file) do |f|
108
+ self.class.encode(f.path, recipe, output_file)
109
+ end
110
+ end
111
+ finalize_derivative_output(File.open(output_file, "rb", &:read))
112
+ File.unlink(output_file)
113
+ end
114
+
115
+
116
+ protected
117
+
118
+ def preprocess(image, opts = {})
119
+ # resize: <geometry>, to_srgb: <bool>, src_quality: 'color'|'gray'
120
+ image.combine_options do |c|
121
+ c.resize(opts[:resize]) if opts[:resize]
122
+ c.profile self.class.srgb_profile_path if opts[:src_quality] == 'color' && opts[:to_srgb]
123
+ end
124
+ image
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,43 @@
1
+ module BPL::Derivatives::Processors
2
+ # Processors take a single input and produce a single output
3
+ class Processor
4
+ attr_accessor :object, :source_path, :directives, :output_file_service
5
+
6
+ # @param [BPL::Derivatives::InputObjectDecorator,BPL::Derivatives::OutputObjectDelegator]
7
+ # @param [Hash] directives directions for creating the output
8
+ # @option [String] :format the format of the output
9
+ # @option [String] :url the location to put the output
10
+ # @param [Hash] opts
11
+ # @option [#call] :output_file_service An output file service to call
12
+ def initialize(object, directives, opts = {})
13
+ self.object = object
14
+ self.source_path = object.source_path
15
+ self.directives = directives
16
+ self.output_file_service = opts.fetch(:output_file_service, BPL::Derivatives.config.output_file_service)
17
+ end
18
+
19
+ def process
20
+ raise "Processor is an abstract class. Implement `process' on #{self.class.name}"
21
+ end
22
+
23
+ # This governs the output key sent to the persist file service
24
+ # while this is adequate for storing in Fedora, it's not a great name for saving
25
+ # to the file system.
26
+ def output_file_id(name)
27
+ [out_prefix, name].join('_')
28
+ end
29
+
30
+ def output_filename_for(_name)
31
+ File.basename(source_path)
32
+ end
33
+
34
+ def finalize_derivative_output(output_io)
35
+ output_object = BPL::Derivatives::OutputObjectDecorator.new(output_io, object)
36
+ output_file_service.call(output_object, directives)
37
+ end
38
+ # @deprecated Please use a PersistOutputFileService class to save an object
39
+ def output_file
40
+ raise NotImplementedError, "Processor is an abstract class. Utilize an implementation of a PersistOutputFileService class in #{self.class.name}"
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,37 @@
1
+ require 'mini_magick'
2
+
3
+ module BPL::Derivatives::Processors
4
+ class RawImage < Image
5
+ class_attribute :timeout
6
+
7
+ protected
8
+
9
+ def create_image(destination_name, format, quality = nil)
10
+ xfrm = load_image_transformer
11
+ # Transpose format and scaling due to the fact that ImageMagick can
12
+ # read but not write RAW files and this will otherwise cause many
13
+ # cryptic segmentation faults
14
+ xfrm.format(format)
15
+ yield(xfrm) if block_given?
16
+ xfrm.quality(quality.to_s) if quality
17
+ write_image(destination_name, format, xfrm)
18
+ remove_temp_files(xfrm)
19
+ end
20
+
21
+ # Delete any temp files that might clutter up the disk if
22
+ # you are doing a batch or don't touch your temporary storage
23
+ # for a long time
24
+ def remove_temp_files(xfrm)
25
+ xfrm.destroy!
26
+ end
27
+
28
+ # Override this method if you want a different transformer, or # need to load the raw image from a different source (e.g.
29
+ # external file).
30
+ #
31
+ # In this case always add an extension to help out MiniMagick
32
+ # with RAW files
33
+ def load_image_transformer
34
+ MiniMagick::Image.open(source_path)
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,103 @@
1
+ # An abstract class for asyncronous jobs that transcode files using FFMpeg
2
+
3
+ require 'tmpdir'
4
+ require 'open3'
5
+
6
+ module BPL::Derivatives::Processors
7
+ module ShellBasedProcessor
8
+ extend ActiveSupport::Concern
9
+
10
+ BLOCK_SIZE = 1024
11
+
12
+ included do
13
+ cattr_accessor :timeout
14
+ extend Open3
15
+ end
16
+
17
+ def process
18
+ format = directives[:format]
19
+ raise ArgumentError, "You must provide the :format you want to transcode into. You provided #{directives}" unless format
20
+ # TODO: if the source is in the correct format, we could just copy it and skip transcoding.
21
+ encode_file(format, options_for(format))
22
+ end
23
+
24
+ # override this method in subclass if you want to provide specific options.
25
+ # returns a hash of options that the specific processors use
26
+ def options_for(_format)
27
+ {}
28
+ end
29
+
30
+ def encode_file(file_suffix, options)
31
+ temp_file_name = output_file(file_suffix)
32
+ self.class.encode(source_path, options, temp_file_name)
33
+ finalize_derivative_output(File.read(temp_file_name))
34
+ File.unlink(temp_file_name)
35
+ end
36
+
37
+ def output_file(file_suffix)
38
+ Dir::Tmpname.create(['sufia', ".#{file_suffix}"], BPL::Derivatives.temp_file_base) {}
39
+ end
40
+
41
+ module ClassMethods
42
+ def execute(command)
43
+ context = {}
44
+ if timeout
45
+ execute_with_timeout(timeout, command, context)
46
+ else
47
+ execute_without_timeout(command, context)
48
+ end
49
+ end
50
+
51
+ def execute_with_timeout(timeout, command, context)
52
+ Timeout.timeout(timeout) do
53
+ execute_without_timeout(command, context)
54
+ end
55
+ rescue Timeout::Error
56
+ pid = context[:pid]
57
+ Process.kill("KILL", pid)
58
+ raise BPL::Derivatives::TimeoutError, "Unable to execute command \"#{command}\"\nThe command took longer than #{timeout} seconds to execute"
59
+ end
60
+
61
+ def execute_without_timeout(command, context)
62
+ err_str = ''
63
+ stdin, stdout, stderr, wait_thr = popen3(command)
64
+ context[:pid] = wait_thr[:pid]
65
+ files = [stderr, stdout]
66
+ stdin.close
67
+
68
+ until all_eof?(files)
69
+ ready = IO.select(files, nil, nil, 60)
70
+
71
+ next unless ready
72
+ readable = ready[0]
73
+ readable.each do |f|
74
+ fileno = f.fileno
75
+
76
+ begin
77
+ data = f.read_nonblock(BLOCK_SIZE)
78
+
79
+ case fileno
80
+ when stderr.fileno
81
+ err_str << data
82
+ end
83
+ rescue EOFError => e
84
+ BPL::Derivatives::Logger.debug "Caught an eof error in ShellBasedProcessor"
85
+ BPL::Derivatives::Logger.debug "#{e.message}"
86
+ # No big deal.
87
+ end
88
+ end
89
+ end
90
+
91
+ stdout.close
92
+ stderr.close
93
+ exit_status = wait_thr.value
94
+
95
+ raise "Unable to execute command \"#{command}\". Exit code: #{exit_status}\nError message: #{err_str}" unless exit_status.success?
96
+ end
97
+
98
+ def all_eof?(files)
99
+ files.find { |f| !f.eof }.nil?
100
+ end
101
+ end
102
+ end
103
+ end