hydra-derivatives 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -1
  3. data/History.md +43 -0
  4. data/README.md +23 -38
  5. data/VERSION +1 -1
  6. data/hydra-derivatives.gemspec +0 -1
  7. data/lib/hydra/derivatives.rb +23 -123
  8. data/lib/hydra/derivatives/io_decorator.rb +7 -1
  9. data/lib/hydra/derivatives/processors.rb +19 -0
  10. data/lib/hydra/derivatives/processors/audio.rb +6 -0
  11. data/lib/hydra/derivatives/processors/document.rb +28 -0
  12. data/lib/hydra/derivatives/processors/ffmpeg.rb +22 -0
  13. data/lib/hydra/derivatives/processors/full_text.rb +60 -0
  14. data/lib/hydra/derivatives/processors/image.rb +58 -0
  15. data/lib/hydra/derivatives/processors/jpeg2k_image.rb +129 -0
  16. data/lib/hydra/derivatives/processors/processor.rb +38 -0
  17. data/lib/hydra/derivatives/processors/raw_image.rb +37 -0
  18. data/lib/hydra/derivatives/processors/shell_based_processor.rb +108 -0
  19. data/lib/hydra/derivatives/{video.rb → processors/video.rb} +1 -1
  20. data/lib/hydra/derivatives/{video → processors/video}/config.rb +1 -1
  21. data/lib/hydra/derivatives/{video → processors/video}/processor.rb +2 -8
  22. data/lib/hydra/derivatives/runners/audio_derivatives.rb +7 -0
  23. data/lib/hydra/derivatives/runners/document_derivatives.rb +7 -0
  24. data/lib/hydra/derivatives/runners/full_text_extract.rb +16 -0
  25. data/lib/hydra/derivatives/runners/image_derivatives.rb +16 -0
  26. data/lib/hydra/derivatives/runners/jpeg2k_image_derivatives.rb +15 -0
  27. data/lib/hydra/derivatives/runners/pdf_derivatives.rb +6 -0
  28. data/lib/hydra/derivatives/runners/runner.rb +52 -0
  29. data/lib/hydra/derivatives/runners/video_derivatives.rb +7 -0
  30. data/lib/hydra/derivatives/services/mime_type_service.rb +10 -0
  31. data/lib/hydra/derivatives/services/persist_basic_contained_output_file_service.rb +23 -8
  32. data/lib/hydra/derivatives/services/persist_output_file_service.rb +4 -5
  33. data/lib/hydra/derivatives/services/retrieve_source_file_service.rb +8 -6
  34. data/spec/processors/full_text.rb +61 -0
  35. data/spec/{units → processors}/image_spec.rb +7 -17
  36. data/spec/{units → processors}/jpeg2k_spec.rb +9 -11
  37. data/spec/processors/processor_spec.rb +36 -0
  38. data/spec/processors/shell_based_processor_spec.rb +19 -0
  39. data/spec/processors/video_spec.rb +40 -0
  40. data/spec/services/audio_derivatives_spec.rb +76 -0
  41. data/spec/services/persist_basic_contained_output_file_service_spec.rb +4 -3
  42. data/spec/services/retrieve_source_file_service_spec.rb +16 -12
  43. data/spec/units/derivatives_spec.rb +18 -26
  44. data/spec/units/io_decorator_spec.rb +33 -0
  45. data/spec/units/transcoding_spec.rb +109 -86
  46. metadata +42 -44
  47. data/lib/hydra/derivatives/audio.rb +0 -19
  48. data/lib/hydra/derivatives/document.rb +0 -56
  49. data/lib/hydra/derivatives/extract_metadata.rb +0 -27
  50. data/lib/hydra/derivatives/ffmpeg.rb +0 -31
  51. data/lib/hydra/derivatives/image.rb +0 -73
  52. data/lib/hydra/derivatives/jpeg2k_image.rb +0 -136
  53. data/lib/hydra/derivatives/processor.rb +0 -33
  54. data/lib/hydra/derivatives/railtie.rb +0 -9
  55. data/lib/hydra/derivatives/raw_image.rb +0 -45
  56. data/lib/hydra/derivatives/shell_based_processor.rb +0 -81
  57. data/spec/lib/hydra/derivatives/extract_metadata_spec.rb +0 -39
  58. data/spec/units/extract_spec.rb +0 -22
  59. data/spec/units/processor_spec.rb +0 -61
  60. data/spec/units/shell_based_processor_spec.rb +0 -22
  61. data/spec/units/video_spec.rb +0 -50
@@ -1,33 +0,0 @@
1
- module Hydra
2
- module Derivatives
3
- class Processor
4
- attr_accessor :object, :source_name, :directives, :source_file_service, :output_file_service
5
-
6
- def initialize(obj, source_name, directives, opts={})
7
- self.object = obj
8
- self.source_name = source_name
9
- self.directives = directives
10
- self.source_file_service = opts.fetch(:source_file_service, Hydra::Derivatives.source_file_service)
11
- self.output_file_service = opts.fetch(:output_file_service, Hydra::Derivatives.output_file_service)
12
- end
13
-
14
- def process
15
- raise "Processor is an abstract class. Implement `process' on #{self.class.name}"
16
- end
17
-
18
- def output_file_id(name)
19
- [source_name, name].join('_')
20
- end
21
-
22
- # @deprecated Please use a PersistOutputFileService class to save an object
23
- def output_file
24
- raise NotImplementedError, "Processor is an abstract class. Utilize an implementation of a PersistOutputFileService class in #{self.class.name}"
25
- end
26
-
27
- def source_file
28
- @source_file ||= source_file_service.call(object, source_name)
29
- end
30
-
31
- end
32
- end
33
- end
@@ -1,9 +0,0 @@
1
- module Hydra
2
- module Derivative
3
- class Railtie < Rails::Railtie
4
- initializer 'hydra-derivative' do
5
- require 'hydra-file_characterization'
6
- end
7
- end
8
- end
9
- end
@@ -1,45 +0,0 @@
1
- require 'mini_magick'
2
-
3
- module Hydra
4
- module Derivatives
5
- class RawImage < Image
6
- class_attribute :timeout
7
-
8
- protected
9
-
10
- def create_image(destination_name, format, quality=nil)
11
- xfrm = load_image_transformer
12
- # Transpose format and scaling due to the fact that ImageMagick can
13
- # read but not write RAW files and this will otherwise cause many
14
- # cryptic segmentation faults
15
- xfrm.format(format)
16
- yield(xfrm) if block_given?
17
- xfrm.quality(quality.to_s) if quality
18
- write_image(destination_name, format, xfrm)
19
- remove_temp_files(xfrm)
20
- end
21
-
22
- # Delete any temp files that might clutter up the disk if
23
- # you are doing a batch or don't touch your temporary storage
24
- # for a long time
25
- def remove_temp_files(xfrm)
26
- xfrm.destroy!
27
- end
28
-
29
- # Override this method if you want a different transformer, or # need to load the raw image from a different source (e.g.
30
- # external file).
31
- #
32
- # In this case always add an extension to help out MiniMagick
33
- # with RAW files
34
- def load_image_transformer
35
- extension = MIME::Types[source_file.mime_type].first.extensions.first
36
-
37
- if extension.present?
38
- MiniMagick::Image.read(source_file.content, ".#{extension}")
39
- else
40
- MiniMagick::Image.read(source_file.content)
41
- end
42
- end
43
- end
44
- end
45
- end
@@ -1,81 +0,0 @@
1
- # An abstract class for asyncronous jobs that transcode files using FFMpeg
2
-
3
- require 'tmpdir'
4
- require 'open3'
5
-
6
- module Hydra
7
- module Derivatives
8
- module ShellBasedProcessor
9
- extend ActiveSupport::Concern
10
-
11
- included do
12
- class_attribute :timeout
13
- extend Open3
14
- end
15
-
16
- def process
17
- directives.each do |name, args|
18
- format = args[:format]
19
- raise ArgumentError, "You must provide the :format you want to transcode into. You provided #{args}" unless format
20
- # TODO if the source is in the correct format, we could just copy it and skip transcoding.
21
- output_file_name = args[:datastream] || output_file_id(name)
22
- encode_file(output_file_name, format, new_mime_type(format), options_for(format))
23
- end
24
- end
25
-
26
- # override this method in subclass if you want to provide specific options.
27
- # returns a hash of options that the specific processors use
28
- def options_for(format)
29
- {}
30
- end
31
-
32
- def encode_file(destination_name, file_suffix, mime_type, options)
33
- out_file = nil
34
- output_file = Dir::Tmpname.create(['sufia', ".#{file_suffix}"], Hydra::Derivatives.temp_file_base){}
35
- Hydra::Derivatives::TempfileService.create(source_file) do |f|
36
- self.class.encode(f.path, options, output_file)
37
- end
38
- out_file = Hydra::Derivatives::IoDecorator.new(File.open(output_file, "rb"))
39
- out_file.mime_type = mime_type
40
- output_file_service.call(object, out_file, destination_name)
41
- File.unlink(output_file)
42
- end
43
-
44
- module ClassMethods
45
-
46
- def execute(command)
47
- context = {}
48
- if timeout
49
- execute_with_timeout(timeout, command, context)
50
- else
51
- execute_without_timeout(command, context)
52
- end
53
- end
54
-
55
- def execute_with_timeout(timeout, command, context)
56
- begin
57
- status = Timeout::timeout(timeout) do
58
- execute_without_timeout(command, context)
59
- end
60
- rescue Timeout::Error => ex
61
- pid = context[:pid]
62
- Process.kill("KILL", pid)
63
- raise Hydra::Derivatives::TimeoutError, "Unable to execute command \"#{command}\"\nThe command took longer than #{timeout} seconds to execute"
64
- end
65
-
66
- end
67
-
68
- def execute_without_timeout(command, context)
69
- stdin, stdout, stderr, wait_thr = popen3(command)
70
- context[:pid] = wait_thr[:pid]
71
- stdin.close
72
- out = stdout.read
73
- stdout.close
74
- err = stderr.read
75
- stderr.close
76
- raise "Unable to execute command \"#{command}\"\n#{err}" unless wait_thr.value.success?
77
- end
78
- end
79
- end
80
- end
81
- end
@@ -1,39 +0,0 @@
1
- require 'spec_helper'
2
-
3
- module Hydra::Derivatives
4
- describe ExtractMetadata do
5
- let(:class_with_metadata_extraction) do
6
- Class.new do
7
- attr_reader :content, :mime_type, :uri
8
-
9
- def initialize(options = {})
10
- @content = options.fetch(:content, '')
11
- @mime_uype = options.fetch(:mime_type, nil)
12
- @uri = 'http://example.com/pid/123'
13
- end
14
-
15
- include Hydra::Derivatives::ExtractMetadata
16
- def has_content?; content.present?; end
17
- end
18
- end
19
-
20
- let(:initialization_options) { {content: 'abc', mime_type: 'text/plain'} }
21
- subject { class_with_metadata_extraction.new(initialization_options) }
22
-
23
- context '#extract_metadata' do
24
- context 'without content' do
25
- let(:initialization_options) { {content: '', mime_type: 'text/plain'} }
26
- it 'should be nil' do
27
- expect(subject.extract_metadata).to be_nil
28
- end
29
- end
30
-
31
- context 'with content', unless: ENV['TRAVIS'] == 'true' do
32
- let(:mime_type) { 'image/jpeg' }
33
- it 'should get some XML' do
34
- expect(subject.extract_metadata).to match "<identity format=\"Plain text\" mimetype=\"text/plain\""
35
- end
36
- end
37
- end
38
- end
39
- end
@@ -1,22 +0,0 @@
1
- require "spec_helper"
2
-
3
- class ExtractThing < ActiveFedora::File
4
- include Hydra::Derivatives::ExtractMetadata
5
- attr_accessor :pid
6
- end
7
-
8
- describe Hydra::Derivatives::ExtractMetadata, :unless => $in_travis do
9
- let(:subject) { ExtractThing.new('http://example.com/foo') }
10
- let(:attachment) { File.open(File.expand_path('../../fixtures/world.png', __FILE__))}
11
-
12
- describe "Image Content" do
13
- it "should get a mime type" do
14
- subject.content = attachment
15
- subject.pid = "abc"
16
- xml = subject.extract_metadata
17
- doc = Nokogiri::HTML(xml)
18
- identity = doc.xpath('//identity').first
19
- expect(identity.attr('mimetype')).to eq('image/png')
20
- end
21
- end
22
- end
@@ -1,61 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Hydra::Derivatives::Processor do
4
-
5
- let(:object) { "Fake Object" }
6
- let(:source_name) { 'content' }
7
- let(:directives) { { thumb: "100x100>" } }
8
-
9
- subject { Hydra::Derivatives::Processor.new(object, source_name, directives)}
10
-
11
- describe "source_file" do
12
- it "relies on the source_file_service" do
13
- expect(subject.source_file_service).to receive(:call).with(object, source_name)
14
- subject.source_file
15
- end
16
- end
17
-
18
- describe "output_file_service" do
19
- let(:custom_output_file_service) { "fake service" }
20
- let(:another_custom_output_file_service) { "another fake service" }
21
-
22
- context "as a global configuration setting" do
23
- before do
24
- allow(Hydra::Derivatives).to receive(:output_file_service).and_return(custom_output_file_service)
25
- end
26
- it "utilizes the default output file service" do
27
- expect(subject.output_file_service).to eq(custom_output_file_service)
28
- end
29
- end
30
-
31
- context "as an instance level configuration setting" do
32
- subject { Hydra::Derivatives::Processor.new(object, source_name, directives, output_file_service: another_custom_output_file_service)}
33
- it "accepts a custom output file service as an option" do
34
- expect(subject.output_file_service).to eq(another_custom_output_file_service)
35
- end
36
- end
37
- end
38
-
39
- describe "source_file_service" do
40
-
41
- let(:custom_source_file_service) { "fake service" }
42
- let(:another_custom_source_file_service) { "another fake service" }
43
-
44
- context "as a global configuration setting" do
45
- before do
46
- allow(Hydra::Derivatives).to receive(:source_file_service).and_return(custom_source_file_service)
47
- end
48
- it "utilizes the default source file service" do
49
- expect(subject.source_file_service).to eq(custom_source_file_service)
50
- end
51
- end
52
-
53
- context "as an instance level configuration setting" do
54
- subject { Hydra::Derivatives::Processor.new(object, source_name, directives, source_file_service: another_custom_source_file_service)}
55
- it "accepts a custom source file service as an option" do
56
- expect(subject.source_file_service).to eq(another_custom_source_file_service)
57
- end
58
- end
59
- end
60
-
61
- end
@@ -1,22 +0,0 @@
1
- require 'spec_helper'
2
-
3
-
4
- describe Hydra::Derivatives::ShellBasedProcessor do
5
- class TestProcessor <
6
- include Hydra::Derivatives::ShellBasedProcessor
7
- end
8
-
9
- let (:processor) {TestProcessor.new}
10
-
11
- describe "has expected interface" do
12
-
13
- describe "options_for" do
14
- it "returns a hash" do
15
- expect(processor.options_for("a")).to be_a Hash
16
- end
17
- end
18
- end
19
- end
20
-
21
-
22
-
@@ -1,50 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Hydra::Derivatives::Video::Processor do
4
- subject { described_class.new(double(:obj), 'content', directives)}
5
-
6
- describe ".config" do
7
- before do
8
- @original_config = described_class.config.dup
9
- described_class.config.mpeg4.codec = "-vcodec mpeg4 -acodec aac -strict -2"
10
- end
11
-
12
- after { described_class.config = @original_config }
13
- let(:directives) {{ thumb: { format: "mp4", datastream: 'thumbnail' } }}
14
-
15
- it "should be configurable" do
16
- expect(subject).to receive(:encode_file).with("thumbnail", "mp4", 'video/mp4', {Hydra::Derivatives::Ffmpeg::OUTPUT_OPTIONS =>"-s 320x240 -vcodec mpeg4 -acodec aac -strict -2 -g 30 -b:v 345k -ac 2 -ab 96k -ar 44100", Hydra::Derivatives::Ffmpeg::INPUT_OPTIONS=>""})
17
- subject.process
18
- end
19
- end
20
-
21
- context "when arguments are passed as a hash" do
22
- context "and datastream is provided as an argument" do
23
- let(:directives) {{ thumb: { format: "webm", datastream: 'thumbnail' } }}
24
- it "should create a datastream with the specified name" do
25
- expect(subject).to receive(:encode_file).with("thumbnail", "webm", 'video/webm', {Hydra::Derivatives::Ffmpeg::OUTPUT_OPTIONS =>"-s 320x240 -vcodec libvpx -acodec libvorbis -g 30 -b:v 345k -ac 2 -ab 96k -ar 44100", Hydra::Derivatives::Ffmpeg::INPUT_OPTIONS=>""})
26
- subject.process
27
-
28
- end
29
- end
30
-
31
- context "and datastream is not provided as an argument" do
32
- let(:directives) {{ thumb: { format: "webm" } }}
33
- it "should create a datastream and infer the name" do
34
- expect(subject).to receive(:encode_file).with("content_thumb", "webm", 'video/webm', {Hydra::Derivatives::Ffmpeg::OUTPUT_OPTIONS =>"-s 320x240 -vcodec libvpx -acodec libvorbis -g 30 -b:v 345k -ac 2 -ab 96k -ar 44100", Hydra::Derivatives::Ffmpeg::INPUT_OPTIONS=>""})
35
- subject.process
36
-
37
- end
38
- end
39
-
40
- context "and jpg is requested" do
41
- let(:directives) {{ thumb: { format: 'jpg' , datastream: 'thumbnail'} }}
42
- it "should create a datastream and infer the name" do
43
- expect(subject).to receive(:encode_file).with("thumbnail", "jpg", "image/jpeg", {:output_options=>"-s 320x240 -vcodec mjpeg -vframes 1 -an -f rawvideo", :input_options=>" -itsoffset -2"})
44
- subject.process
45
-
46
- end
47
- end
48
- end
49
- end
50
-