hydra-derivatives 2.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/History.md +43 -0
- data/README.md +23 -38
- data/VERSION +1 -1
- data/hydra-derivatives.gemspec +0 -1
- data/lib/hydra/derivatives.rb +23 -123
- data/lib/hydra/derivatives/io_decorator.rb +7 -1
- data/lib/hydra/derivatives/processors.rb +19 -0
- data/lib/hydra/derivatives/processors/audio.rb +6 -0
- data/lib/hydra/derivatives/processors/document.rb +28 -0
- data/lib/hydra/derivatives/processors/ffmpeg.rb +22 -0
- data/lib/hydra/derivatives/processors/full_text.rb +60 -0
- data/lib/hydra/derivatives/processors/image.rb +58 -0
- data/lib/hydra/derivatives/processors/jpeg2k_image.rb +129 -0
- data/lib/hydra/derivatives/processors/processor.rb +38 -0
- data/lib/hydra/derivatives/processors/raw_image.rb +37 -0
- data/lib/hydra/derivatives/processors/shell_based_processor.rb +108 -0
- data/lib/hydra/derivatives/{video.rb → processors/video.rb} +1 -1
- data/lib/hydra/derivatives/{video → processors/video}/config.rb +1 -1
- data/lib/hydra/derivatives/{video → processors/video}/processor.rb +2 -8
- data/lib/hydra/derivatives/runners/audio_derivatives.rb +7 -0
- data/lib/hydra/derivatives/runners/document_derivatives.rb +7 -0
- data/lib/hydra/derivatives/runners/full_text_extract.rb +16 -0
- data/lib/hydra/derivatives/runners/image_derivatives.rb +16 -0
- data/lib/hydra/derivatives/runners/jpeg2k_image_derivatives.rb +15 -0
- data/lib/hydra/derivatives/runners/pdf_derivatives.rb +6 -0
- data/lib/hydra/derivatives/runners/runner.rb +52 -0
- data/lib/hydra/derivatives/runners/video_derivatives.rb +7 -0
- data/lib/hydra/derivatives/services/mime_type_service.rb +10 -0
- data/lib/hydra/derivatives/services/persist_basic_contained_output_file_service.rb +23 -8
- data/lib/hydra/derivatives/services/persist_output_file_service.rb +4 -5
- data/lib/hydra/derivatives/services/retrieve_source_file_service.rb +8 -6
- data/spec/processors/full_text.rb +61 -0
- data/spec/{units → processors}/image_spec.rb +7 -17
- data/spec/{units → processors}/jpeg2k_spec.rb +9 -11
- data/spec/processors/processor_spec.rb +36 -0
- data/spec/processors/shell_based_processor_spec.rb +19 -0
- data/spec/processors/video_spec.rb +40 -0
- data/spec/services/audio_derivatives_spec.rb +76 -0
- data/spec/services/persist_basic_contained_output_file_service_spec.rb +4 -3
- data/spec/services/retrieve_source_file_service_spec.rb +16 -12
- data/spec/units/derivatives_spec.rb +18 -26
- data/spec/units/io_decorator_spec.rb +33 -0
- data/spec/units/transcoding_spec.rb +109 -86
- metadata +42 -44
- data/lib/hydra/derivatives/audio.rb +0 -19
- data/lib/hydra/derivatives/document.rb +0 -56
- data/lib/hydra/derivatives/extract_metadata.rb +0 -27
- data/lib/hydra/derivatives/ffmpeg.rb +0 -31
- data/lib/hydra/derivatives/image.rb +0 -73
- data/lib/hydra/derivatives/jpeg2k_image.rb +0 -136
- data/lib/hydra/derivatives/processor.rb +0 -33
- data/lib/hydra/derivatives/railtie.rb +0 -9
- data/lib/hydra/derivatives/raw_image.rb +0 -45
- data/lib/hydra/derivatives/shell_based_processor.rb +0 -81
- data/spec/lib/hydra/derivatives/extract_metadata_spec.rb +0 -39
- data/spec/units/extract_spec.rb +0 -22
- data/spec/units/processor_spec.rb +0 -61
- data/spec/units/shell_based_processor_spec.rb +0 -22
- data/spec/units/video_spec.rb +0 -50
@@ -1,33 +0,0 @@
|
|
1
|
-
module Hydra
|
2
|
-
module Derivatives
|
3
|
-
class Processor
|
4
|
-
attr_accessor :object, :source_name, :directives, :source_file_service, :output_file_service
|
5
|
-
|
6
|
-
def initialize(obj, source_name, directives, opts={})
|
7
|
-
self.object = obj
|
8
|
-
self.source_name = source_name
|
9
|
-
self.directives = directives
|
10
|
-
self.source_file_service = opts.fetch(:source_file_service, Hydra::Derivatives.source_file_service)
|
11
|
-
self.output_file_service = opts.fetch(:output_file_service, Hydra::Derivatives.output_file_service)
|
12
|
-
end
|
13
|
-
|
14
|
-
def process
|
15
|
-
raise "Processor is an abstract class. Implement `process' on #{self.class.name}"
|
16
|
-
end
|
17
|
-
|
18
|
-
def output_file_id(name)
|
19
|
-
[source_name, name].join('_')
|
20
|
-
end
|
21
|
-
|
22
|
-
# @deprecated Please use a PersistOutputFileService class to save an object
|
23
|
-
def output_file
|
24
|
-
raise NotImplementedError, "Processor is an abstract class. Utilize an implementation of a PersistOutputFileService class in #{self.class.name}"
|
25
|
-
end
|
26
|
-
|
27
|
-
def source_file
|
28
|
-
@source_file ||= source_file_service.call(object, source_name)
|
29
|
-
end
|
30
|
-
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
require 'mini_magick'
|
2
|
-
|
3
|
-
module Hydra
|
4
|
-
module Derivatives
|
5
|
-
class RawImage < Image
|
6
|
-
class_attribute :timeout
|
7
|
-
|
8
|
-
protected
|
9
|
-
|
10
|
-
def create_image(destination_name, format, quality=nil)
|
11
|
-
xfrm = load_image_transformer
|
12
|
-
# Transpose format and scaling due to the fact that ImageMagick can
|
13
|
-
# read but not write RAW files and this will otherwise cause many
|
14
|
-
# cryptic segmentation faults
|
15
|
-
xfrm.format(format)
|
16
|
-
yield(xfrm) if block_given?
|
17
|
-
xfrm.quality(quality.to_s) if quality
|
18
|
-
write_image(destination_name, format, xfrm)
|
19
|
-
remove_temp_files(xfrm)
|
20
|
-
end
|
21
|
-
|
22
|
-
# Delete any temp files that might clutter up the disk if
|
23
|
-
# you are doing a batch or don't touch your temporary storage
|
24
|
-
# for a long time
|
25
|
-
def remove_temp_files(xfrm)
|
26
|
-
xfrm.destroy!
|
27
|
-
end
|
28
|
-
|
29
|
-
# Override this method if you want a different transformer, or # need to load the raw image from a different source (e.g.
|
30
|
-
# external file).
|
31
|
-
#
|
32
|
-
# In this case always add an extension to help out MiniMagick
|
33
|
-
# with RAW files
|
34
|
-
def load_image_transformer
|
35
|
-
extension = MIME::Types[source_file.mime_type].first.extensions.first
|
36
|
-
|
37
|
-
if extension.present?
|
38
|
-
MiniMagick::Image.read(source_file.content, ".#{extension}")
|
39
|
-
else
|
40
|
-
MiniMagick::Image.read(source_file.content)
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
@@ -1,81 +0,0 @@
|
|
1
|
-
# An abstract class for asyncronous jobs that transcode files using FFMpeg
|
2
|
-
|
3
|
-
require 'tmpdir'
|
4
|
-
require 'open3'
|
5
|
-
|
6
|
-
module Hydra
|
7
|
-
module Derivatives
|
8
|
-
module ShellBasedProcessor
|
9
|
-
extend ActiveSupport::Concern
|
10
|
-
|
11
|
-
included do
|
12
|
-
class_attribute :timeout
|
13
|
-
extend Open3
|
14
|
-
end
|
15
|
-
|
16
|
-
def process
|
17
|
-
directives.each do |name, args|
|
18
|
-
format = args[:format]
|
19
|
-
raise ArgumentError, "You must provide the :format you want to transcode into. You provided #{args}" unless format
|
20
|
-
# TODO if the source is in the correct format, we could just copy it and skip transcoding.
|
21
|
-
output_file_name = args[:datastream] || output_file_id(name)
|
22
|
-
encode_file(output_file_name, format, new_mime_type(format), options_for(format))
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
# override this method in subclass if you want to provide specific options.
|
27
|
-
# returns a hash of options that the specific processors use
|
28
|
-
def options_for(format)
|
29
|
-
{}
|
30
|
-
end
|
31
|
-
|
32
|
-
def encode_file(destination_name, file_suffix, mime_type, options)
|
33
|
-
out_file = nil
|
34
|
-
output_file = Dir::Tmpname.create(['sufia', ".#{file_suffix}"], Hydra::Derivatives.temp_file_base){}
|
35
|
-
Hydra::Derivatives::TempfileService.create(source_file) do |f|
|
36
|
-
self.class.encode(f.path, options, output_file)
|
37
|
-
end
|
38
|
-
out_file = Hydra::Derivatives::IoDecorator.new(File.open(output_file, "rb"))
|
39
|
-
out_file.mime_type = mime_type
|
40
|
-
output_file_service.call(object, out_file, destination_name)
|
41
|
-
File.unlink(output_file)
|
42
|
-
end
|
43
|
-
|
44
|
-
module ClassMethods
|
45
|
-
|
46
|
-
def execute(command)
|
47
|
-
context = {}
|
48
|
-
if timeout
|
49
|
-
execute_with_timeout(timeout, command, context)
|
50
|
-
else
|
51
|
-
execute_without_timeout(command, context)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
def execute_with_timeout(timeout, command, context)
|
56
|
-
begin
|
57
|
-
status = Timeout::timeout(timeout) do
|
58
|
-
execute_without_timeout(command, context)
|
59
|
-
end
|
60
|
-
rescue Timeout::Error => ex
|
61
|
-
pid = context[:pid]
|
62
|
-
Process.kill("KILL", pid)
|
63
|
-
raise Hydra::Derivatives::TimeoutError, "Unable to execute command \"#{command}\"\nThe command took longer than #{timeout} seconds to execute"
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
67
|
-
|
68
|
-
def execute_without_timeout(command, context)
|
69
|
-
stdin, stdout, stderr, wait_thr = popen3(command)
|
70
|
-
context[:pid] = wait_thr[:pid]
|
71
|
-
stdin.close
|
72
|
-
out = stdout.read
|
73
|
-
stdout.close
|
74
|
-
err = stderr.read
|
75
|
-
stderr.close
|
76
|
-
raise "Unable to execute command \"#{command}\"\n#{err}" unless wait_thr.value.success?
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
module Hydra::Derivatives
|
4
|
-
describe ExtractMetadata do
|
5
|
-
let(:class_with_metadata_extraction) do
|
6
|
-
Class.new do
|
7
|
-
attr_reader :content, :mime_type, :uri
|
8
|
-
|
9
|
-
def initialize(options = {})
|
10
|
-
@content = options.fetch(:content, '')
|
11
|
-
@mime_uype = options.fetch(:mime_type, nil)
|
12
|
-
@uri = 'http://example.com/pid/123'
|
13
|
-
end
|
14
|
-
|
15
|
-
include Hydra::Derivatives::ExtractMetadata
|
16
|
-
def has_content?; content.present?; end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
let(:initialization_options) { {content: 'abc', mime_type: 'text/plain'} }
|
21
|
-
subject { class_with_metadata_extraction.new(initialization_options) }
|
22
|
-
|
23
|
-
context '#extract_metadata' do
|
24
|
-
context 'without content' do
|
25
|
-
let(:initialization_options) { {content: '', mime_type: 'text/plain'} }
|
26
|
-
it 'should be nil' do
|
27
|
-
expect(subject.extract_metadata).to be_nil
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
context 'with content', unless: ENV['TRAVIS'] == 'true' do
|
32
|
-
let(:mime_type) { 'image/jpeg' }
|
33
|
-
it 'should get some XML' do
|
34
|
-
expect(subject.extract_metadata).to match "<identity format=\"Plain text\" mimetype=\"text/plain\""
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
data/spec/units/extract_spec.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
class ExtractThing < ActiveFedora::File
|
4
|
-
include Hydra::Derivatives::ExtractMetadata
|
5
|
-
attr_accessor :pid
|
6
|
-
end
|
7
|
-
|
8
|
-
describe Hydra::Derivatives::ExtractMetadata, :unless => $in_travis do
|
9
|
-
let(:subject) { ExtractThing.new('http://example.com/foo') }
|
10
|
-
let(:attachment) { File.open(File.expand_path('../../fixtures/world.png', __FILE__))}
|
11
|
-
|
12
|
-
describe "Image Content" do
|
13
|
-
it "should get a mime type" do
|
14
|
-
subject.content = attachment
|
15
|
-
subject.pid = "abc"
|
16
|
-
xml = subject.extract_metadata
|
17
|
-
doc = Nokogiri::HTML(xml)
|
18
|
-
identity = doc.xpath('//identity').first
|
19
|
-
expect(identity.attr('mimetype')).to eq('image/png')
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Hydra::Derivatives::Processor do
|
4
|
-
|
5
|
-
let(:object) { "Fake Object" }
|
6
|
-
let(:source_name) { 'content' }
|
7
|
-
let(:directives) { { thumb: "100x100>" } }
|
8
|
-
|
9
|
-
subject { Hydra::Derivatives::Processor.new(object, source_name, directives)}
|
10
|
-
|
11
|
-
describe "source_file" do
|
12
|
-
it "relies on the source_file_service" do
|
13
|
-
expect(subject.source_file_service).to receive(:call).with(object, source_name)
|
14
|
-
subject.source_file
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
describe "output_file_service" do
|
19
|
-
let(:custom_output_file_service) { "fake service" }
|
20
|
-
let(:another_custom_output_file_service) { "another fake service" }
|
21
|
-
|
22
|
-
context "as a global configuration setting" do
|
23
|
-
before do
|
24
|
-
allow(Hydra::Derivatives).to receive(:output_file_service).and_return(custom_output_file_service)
|
25
|
-
end
|
26
|
-
it "utilizes the default output file service" do
|
27
|
-
expect(subject.output_file_service).to eq(custom_output_file_service)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
context "as an instance level configuration setting" do
|
32
|
-
subject { Hydra::Derivatives::Processor.new(object, source_name, directives, output_file_service: another_custom_output_file_service)}
|
33
|
-
it "accepts a custom output file service as an option" do
|
34
|
-
expect(subject.output_file_service).to eq(another_custom_output_file_service)
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
describe "source_file_service" do
|
40
|
-
|
41
|
-
let(:custom_source_file_service) { "fake service" }
|
42
|
-
let(:another_custom_source_file_service) { "another fake service" }
|
43
|
-
|
44
|
-
context "as a global configuration setting" do
|
45
|
-
before do
|
46
|
-
allow(Hydra::Derivatives).to receive(:source_file_service).and_return(custom_source_file_service)
|
47
|
-
end
|
48
|
-
it "utilizes the default source file service" do
|
49
|
-
expect(subject.source_file_service).to eq(custom_source_file_service)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
context "as an instance level configuration setting" do
|
54
|
-
subject { Hydra::Derivatives::Processor.new(object, source_name, directives, source_file_service: another_custom_source_file_service)}
|
55
|
-
it "accepts a custom source file service as an option" do
|
56
|
-
expect(subject.source_file_service).to eq(another_custom_source_file_service)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
|
4
|
-
describe Hydra::Derivatives::ShellBasedProcessor do
|
5
|
-
class TestProcessor <
|
6
|
-
include Hydra::Derivatives::ShellBasedProcessor
|
7
|
-
end
|
8
|
-
|
9
|
-
let (:processor) {TestProcessor.new}
|
10
|
-
|
11
|
-
describe "has expected interface" do
|
12
|
-
|
13
|
-
describe "options_for" do
|
14
|
-
it "returns a hash" do
|
15
|
-
expect(processor.options_for("a")).to be_a Hash
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
|
22
|
-
|
data/spec/units/video_spec.rb
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Hydra::Derivatives::Video::Processor do
|
4
|
-
subject { described_class.new(double(:obj), 'content', directives)}
|
5
|
-
|
6
|
-
describe ".config" do
|
7
|
-
before do
|
8
|
-
@original_config = described_class.config.dup
|
9
|
-
described_class.config.mpeg4.codec = "-vcodec mpeg4 -acodec aac -strict -2"
|
10
|
-
end
|
11
|
-
|
12
|
-
after { described_class.config = @original_config }
|
13
|
-
let(:directives) {{ thumb: { format: "mp4", datastream: 'thumbnail' } }}
|
14
|
-
|
15
|
-
it "should be configurable" do
|
16
|
-
expect(subject).to receive(:encode_file).with("thumbnail", "mp4", 'video/mp4', {Hydra::Derivatives::Ffmpeg::OUTPUT_OPTIONS =>"-s 320x240 -vcodec mpeg4 -acodec aac -strict -2 -g 30 -b:v 345k -ac 2 -ab 96k -ar 44100", Hydra::Derivatives::Ffmpeg::INPUT_OPTIONS=>""})
|
17
|
-
subject.process
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
context "when arguments are passed as a hash" do
|
22
|
-
context "and datastream is provided as an argument" do
|
23
|
-
let(:directives) {{ thumb: { format: "webm", datastream: 'thumbnail' } }}
|
24
|
-
it "should create a datastream with the specified name" do
|
25
|
-
expect(subject).to receive(:encode_file).with("thumbnail", "webm", 'video/webm', {Hydra::Derivatives::Ffmpeg::OUTPUT_OPTIONS =>"-s 320x240 -vcodec libvpx -acodec libvorbis -g 30 -b:v 345k -ac 2 -ab 96k -ar 44100", Hydra::Derivatives::Ffmpeg::INPUT_OPTIONS=>""})
|
26
|
-
subject.process
|
27
|
-
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
context "and datastream is not provided as an argument" do
|
32
|
-
let(:directives) {{ thumb: { format: "webm" } }}
|
33
|
-
it "should create a datastream and infer the name" do
|
34
|
-
expect(subject).to receive(:encode_file).with("content_thumb", "webm", 'video/webm', {Hydra::Derivatives::Ffmpeg::OUTPUT_OPTIONS =>"-s 320x240 -vcodec libvpx -acodec libvorbis -g 30 -b:v 345k -ac 2 -ab 96k -ar 44100", Hydra::Derivatives::Ffmpeg::INPUT_OPTIONS=>""})
|
35
|
-
subject.process
|
36
|
-
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
context "and jpg is requested" do
|
41
|
-
let(:directives) {{ thumb: { format: 'jpg' , datastream: 'thumbnail'} }}
|
42
|
-
it "should create a datastream and infer the name" do
|
43
|
-
expect(subject).to receive(:encode_file).with("thumbnail", "jpg", "image/jpeg", {:output_options=>"-s 320x240 -vcodec mjpeg -vframes 1 -an -f rawvideo", :input_options=>" -itsoffset -2"})
|
44
|
-
subject.process
|
45
|
-
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|