hydra-derivatives 2.0.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/History.md +43 -0
- data/README.md +23 -38
- data/VERSION +1 -1
- data/hydra-derivatives.gemspec +0 -1
- data/lib/hydra/derivatives.rb +23 -123
- data/lib/hydra/derivatives/io_decorator.rb +7 -1
- data/lib/hydra/derivatives/processors.rb +19 -0
- data/lib/hydra/derivatives/processors/audio.rb +6 -0
- data/lib/hydra/derivatives/processors/document.rb +28 -0
- data/lib/hydra/derivatives/processors/ffmpeg.rb +22 -0
- data/lib/hydra/derivatives/processors/full_text.rb +60 -0
- data/lib/hydra/derivatives/processors/image.rb +58 -0
- data/lib/hydra/derivatives/processors/jpeg2k_image.rb +129 -0
- data/lib/hydra/derivatives/processors/processor.rb +38 -0
- data/lib/hydra/derivatives/processors/raw_image.rb +37 -0
- data/lib/hydra/derivatives/processors/shell_based_processor.rb +108 -0
- data/lib/hydra/derivatives/{video.rb → processors/video.rb} +1 -1
- data/lib/hydra/derivatives/{video → processors/video}/config.rb +1 -1
- data/lib/hydra/derivatives/{video → processors/video}/processor.rb +2 -8
- data/lib/hydra/derivatives/runners/audio_derivatives.rb +7 -0
- data/lib/hydra/derivatives/runners/document_derivatives.rb +7 -0
- data/lib/hydra/derivatives/runners/full_text_extract.rb +16 -0
- data/lib/hydra/derivatives/runners/image_derivatives.rb +16 -0
- data/lib/hydra/derivatives/runners/jpeg2k_image_derivatives.rb +15 -0
- data/lib/hydra/derivatives/runners/pdf_derivatives.rb +6 -0
- data/lib/hydra/derivatives/runners/runner.rb +52 -0
- data/lib/hydra/derivatives/runners/video_derivatives.rb +7 -0
- data/lib/hydra/derivatives/services/mime_type_service.rb +10 -0
- data/lib/hydra/derivatives/services/persist_basic_contained_output_file_service.rb +23 -8
- data/lib/hydra/derivatives/services/persist_output_file_service.rb +4 -5
- data/lib/hydra/derivatives/services/retrieve_source_file_service.rb +8 -6
- data/spec/processors/full_text.rb +61 -0
- data/spec/{units → processors}/image_spec.rb +7 -17
- data/spec/{units → processors}/jpeg2k_spec.rb +9 -11
- data/spec/processors/processor_spec.rb +36 -0
- data/spec/processors/shell_based_processor_spec.rb +19 -0
- data/spec/processors/video_spec.rb +40 -0
- data/spec/services/audio_derivatives_spec.rb +76 -0
- data/spec/services/persist_basic_contained_output_file_service_spec.rb +4 -3
- data/spec/services/retrieve_source_file_service_spec.rb +16 -12
- data/spec/units/derivatives_spec.rb +18 -26
- data/spec/units/io_decorator_spec.rb +33 -0
- data/spec/units/transcoding_spec.rb +109 -86
- metadata +42 -44
- data/lib/hydra/derivatives/audio.rb +0 -19
- data/lib/hydra/derivatives/document.rb +0 -56
- data/lib/hydra/derivatives/extract_metadata.rb +0 -27
- data/lib/hydra/derivatives/ffmpeg.rb +0 -31
- data/lib/hydra/derivatives/image.rb +0 -73
- data/lib/hydra/derivatives/jpeg2k_image.rb +0 -136
- data/lib/hydra/derivatives/processor.rb +0 -33
- data/lib/hydra/derivatives/railtie.rb +0 -9
- data/lib/hydra/derivatives/raw_image.rb +0 -45
- data/lib/hydra/derivatives/shell_based_processor.rb +0 -81
- data/spec/lib/hydra/derivatives/extract_metadata_spec.rb +0 -39
- data/spec/units/extract_spec.rb +0 -22
- data/spec/units/processor_spec.rb +0 -61
- data/spec/units/shell_based_processor_spec.rb +0 -22
- data/spec/units/video_spec.rb +0 -50
@@ -1,6 +1,6 @@
|
|
1
|
-
module Hydra::Derivatives
|
1
|
+
module Hydra::Derivatives::Processors
|
2
2
|
module Video
|
3
|
-
class Processor < Hydra::Derivatives::Processor
|
3
|
+
class Processor < Hydra::Derivatives::Processors::Processor
|
4
4
|
include Ffmpeg
|
5
5
|
|
6
6
|
class_attribute :config
|
@@ -36,12 +36,6 @@ module Hydra::Derivatives
|
|
36
36
|
raise ArgumentError, "Unknown format `#{format}'"
|
37
37
|
end
|
38
38
|
end
|
39
|
-
|
40
|
-
def new_mime_type(format)
|
41
|
-
format == "jpg" ? "image/jpeg" : "video/#{format}"
|
42
|
-
end
|
43
39
|
end
|
44
40
|
end
|
45
41
|
end
|
46
|
-
|
47
|
-
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Hydra::Derivatives
|
2
|
+
class FullTextExtract < ImageDerivatives
|
3
|
+
# Adds format: 'txt' as the default to each of the directives
|
4
|
+
def self.transform_directives(options)
|
5
|
+
options.each do |directive|
|
6
|
+
directive.reverse_merge!(format: 'txt')
|
7
|
+
end
|
8
|
+
options
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.processor_class
|
12
|
+
Processors::FullText
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Hydra::Derivatives
|
2
|
+
class ImageDerivatives < Runner
|
3
|
+
|
4
|
+
# Adds format: 'png' as the default to each of the directives
|
5
|
+
def self.transform_directives(options)
|
6
|
+
options.each do |directive|
|
7
|
+
directive.reverse_merge!(format: 'png')
|
8
|
+
end
|
9
|
+
options
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.processor_class
|
13
|
+
Processors::Image
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Hydra::Derivatives
|
2
|
+
class Jpeg2kImageDerivatives < Runner
|
3
|
+
# # Adds format: 'png' as the default to each of the directives
|
4
|
+
# def self.transform_directives(options)
|
5
|
+
# options.each do |directive|
|
6
|
+
# directive.reverse_merge!(format: 'png')
|
7
|
+
# end
|
8
|
+
# options
|
9
|
+
# end
|
10
|
+
|
11
|
+
def self.processor_class
|
12
|
+
Processors::Jpeg2kImage
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Hydra
|
2
|
+
module Derivatives
|
3
|
+
class Runner
|
4
|
+
|
5
|
+
def self.output_file_service= val
|
6
|
+
@output_file_service = val
|
7
|
+
end
|
8
|
+
|
9
|
+
# Use the output service configured for this class or default to the global setting
|
10
|
+
def self.output_file_service
|
11
|
+
@output_file_service || Hydra::Derivatives.output_file_service
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.source_file_service= val
|
15
|
+
@source_file_service = val
|
16
|
+
end
|
17
|
+
|
18
|
+
# Use the source service configured for this class or default to the global setting
|
19
|
+
def self.source_file_service
|
20
|
+
@source_file_service || Hydra::Derivatives.source_file_service
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
# @param [String, ActiveFedora::Base] object_or_filename path to the source file, or an object
|
25
|
+
# @param [Hash] options options to pass to the encoder
|
26
|
+
# @options options [Array] :outputs a list of desired outputs, each entry is a hash that has :label (optional), :format and :url
|
27
|
+
def self.create(object_or_filename, options)
|
28
|
+
source_file(object_or_filename, options) do |f|
|
29
|
+
transform_directives(options.delete(:outputs)).each do |instructions|
|
30
|
+
processor_class.new(f.path,
|
31
|
+
instructions.merge(source_file_service: source_file_service),
|
32
|
+
output_file_service: output_file_service).process
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Override this method if you need to add any defaults
|
38
|
+
def self.transform_directives(options)
|
39
|
+
options
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.source_file(object_or_filename, options, &block)
|
43
|
+
source_file_service.call(object_or_filename, options, &block)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.processor_class
|
47
|
+
raise "Overide the processor_class method in a sub class"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
@@ -2,7 +2,7 @@ module Hydra::Derivatives
|
|
2
2
|
# This Service is an implementation of the Hydra::Derivatives::PeristOutputFileService
|
3
3
|
# It supports basic contained files, which is the behavior associated with Fedora 3 file datastreams that were migrated to Fedora 4
|
4
4
|
# and, at the time that this class was authored, corresponds to the behavior of ActiveFedora::Base.attach_file and ActiveFedora::Base.attached_files
|
5
|
-
### Rename this
|
5
|
+
### Rename this
|
6
6
|
class PersistBasicContainedOutputFileService < PersistOutputFileService
|
7
7
|
|
8
8
|
# This method conforms to the signature of the .call method on Hydra::Derivatives::PeristOutputFileService
|
@@ -10,16 +10,31 @@ module Hydra::Derivatives
|
|
10
10
|
#
|
11
11
|
# NOTE: Uses basic containment. If you want to use direct containment (ie. with PCDM) you must use a different service (ie. Hydra::Works::AddFileToGenericFile Service)
|
12
12
|
#
|
13
|
-
# @param [
|
14
|
-
# @param [
|
15
|
-
#
|
16
|
-
|
17
|
-
|
13
|
+
# @param [#read] stream the data to be persisted
|
14
|
+
# @param [Hash] directives directions which can be used to determine where to persist to.
|
15
|
+
# @option directives [String] url This can determine the path of the object.
|
16
|
+
def self.call(stream, directives)
|
17
|
+
file = Hydra::Derivatives::IoDecorator.new(stream, new_mime_type(directives.fetch(:format)))
|
18
18
|
o_name = determine_original_name(file)
|
19
19
|
m_type = determine_mime_type(file)
|
20
|
-
|
21
|
-
|
20
|
+
uri = URI(directives.fetch(:url))
|
21
|
+
raise ArgumentError, "#{uri} is not an http uri" unless uri.scheme == 'http'
|
22
|
+
remote_file = ActiveFedora::File.new(uri.to_s)
|
23
|
+
remote_file.content = file
|
24
|
+
remote_file.mime_type = m_type
|
25
|
+
remote_file.original_name = o_name
|
26
|
+
remote_file.save
|
22
27
|
end
|
23
28
|
|
29
|
+
def self.new_mime_type(format)
|
30
|
+
case format
|
31
|
+
when 'mp4'
|
32
|
+
'video/mp4' # default is application/mp4
|
33
|
+
when 'webm'
|
34
|
+
'video/webm' # default is audio/webm
|
35
|
+
else
|
36
|
+
MIME::Types.type_for(format).first.to_s
|
37
|
+
end
|
38
|
+
end
|
24
39
|
end
|
25
40
|
end
|
@@ -3,11 +3,10 @@ module Hydra::Derivatives
|
|
3
3
|
|
4
4
|
# Persists the file within the object at destination_name. Uses basic containment.
|
5
5
|
# If you want to use direct containment (ie. with PCDM) you must use a different service (ie. Hydra::Works::AddFileToGenericFile Service)
|
6
|
-
# @param [
|
7
|
-
# @param [
|
8
|
-
# @
|
9
|
-
|
10
|
-
def self.call(object, file, destination_path)
|
6
|
+
# @param [String] file_path the path to the file to be added
|
7
|
+
# @param [Hash] directives directions which can be used to determine where to persist to.
|
8
|
+
# @option directives [String] url This can determine the path of the object.
|
9
|
+
def self.call(file_path, directives)
|
11
10
|
raise NotImplementedError, "PersistOutputFileService is an abstract class. Implement `call' on #{self.class.name}"
|
12
11
|
end
|
13
12
|
|
@@ -2,11 +2,13 @@ module Hydra::Derivatives
|
|
2
2
|
class RetrieveSourceFileService
|
3
3
|
|
4
4
|
# Retrieves the source
|
5
|
-
# @param [
|
6
|
-
# @param [
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
# @param [ActiveFedora::Base] object the source file is attached to
|
6
|
+
# @param [Hash] options
|
7
|
+
# @option options [Symbol] :source a method that can be called on the object to retrieve the source file
|
8
|
+
# @yield [Tempfile] a temporary source file that has a lifetime of the block
|
9
|
+
def self.call(object, options, &block)
|
10
|
+
source_name = options.fetch(:source)
|
11
|
+
Hydra::Derivatives::TempfileService.create(object.send(source_name), &block)
|
10
12
|
end
|
11
13
|
end
|
12
|
-
end
|
14
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hydra::Derivatives::Processors::FullText do
|
4
|
+
let(:file_path) { File.join(fixture_path, 'test.docx') }
|
5
|
+
let(:directives) { { format: 'txt', url: 'http://localhost:8983/fedora/rest/dev/1234/ogg' } }
|
6
|
+
let(:processor) { described_class.new(file_path, directives) }
|
7
|
+
|
8
|
+
describe "process" do
|
9
|
+
subject { processor.process }
|
10
|
+
|
11
|
+
context "when it is successful" do
|
12
|
+
before do
|
13
|
+
allow_any_instance_of(described_class).to receive(:fetch).and_return('{"":"one two three"}')
|
14
|
+
end
|
15
|
+
it { is_expected.to be true }
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'extracts fulltext and stores the results' do
|
19
|
+
expect(processor.output_file_service).to receive(:call).with(/Project Charter for E-Content Delivery Platform Review/, directives)
|
20
|
+
processor.process
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe "fetch" do
|
25
|
+
subject { processor.send(:fetch) }
|
26
|
+
let(:request) { double }
|
27
|
+
let(:response_body) { 'returned by Solr' }
|
28
|
+
let(:resp) { double(code: '200', body: response_body) }
|
29
|
+
let(:uri) { URI('http://example.com:99/solr/update') }
|
30
|
+
|
31
|
+
before do
|
32
|
+
allow(processor).to receive(:uri).and_return(URI('http://example.com:99/solr/update'))
|
33
|
+
allow(Net::HTTP).to receive(:new).with('example.com', 99).and_return(request)
|
34
|
+
end
|
35
|
+
|
36
|
+
context "that is successful" do
|
37
|
+
let(:resp) { double(code: '200', body: response_body) }
|
38
|
+
it "calls the extraction service" do
|
39
|
+
expect(request).to receive(:post).with('http://example.com:99/solr/update', String, "Content-Type" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "Content-Length" => "24244").and_return(resp)
|
40
|
+
expect(subject).to eq response_body
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context "that fails" do
|
45
|
+
let(:resp) { double(code: '500', body: response_body) }
|
46
|
+
it "raises an error" do
|
47
|
+
expect(request).to receive(:post).with('http://example.com:99/solr/update', String, "Content-Type" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "Content-Length" => "24244").and_return(resp)
|
48
|
+
expect { subject }.to raise_error RuntimeError, /Solr Extract service was unsuccessful. 'http:\/\/example\.com:99\/solr\/update' returned code 500 for .*spec\/fixtures\/test.docx\nreturned by Solr/
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "uri" do
|
54
|
+
subject { processor.send(:uri) }
|
55
|
+
|
56
|
+
it "points at the extraction service" do
|
57
|
+
expect(subject).to be_kind_of URI
|
58
|
+
expect(subject.to_s).to end_with '/update/extract?extractOnly=true&wt=json&extractFormat=text'
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -1,29 +1,19 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Hydra::Derivatives::Image do
|
4
|
-
let(:object) { ActiveFedora::Base.new }
|
3
|
+
describe Hydra::Derivatives::Processors::Image do
|
5
4
|
let(:output_file) { double }
|
5
|
+
let(:file_name) { double }
|
6
6
|
|
7
|
-
subject {
|
7
|
+
subject { described_class.new(file_name, directives) }
|
8
8
|
|
9
9
|
before { allow(subject).to receive(:output_file).with(file_name).and_return(output_file) }
|
10
10
|
|
11
|
-
describe "when arguments are passed as a string" do
|
12
|
-
let(:directives) { { thumb: "100x100>" } }
|
13
|
-
let(:file_name) { 'content_thumb' }
|
14
|
-
|
15
|
-
it "should use the string as the size and the name is autogenerated" do
|
16
|
-
expect(subject).to receive(:create_resized_image).with(file_name, "100x100>", 'png')
|
17
|
-
subject.process
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
11
|
describe "when arguments are passed as a hash" do
|
22
|
-
let(:directives) { {
|
12
|
+
let(:directives) { { label: :thumb, size: "200x300>", format: 'png', quality: 75 } }
|
23
13
|
let(:file_name) { 'thumbnail' }
|
24
14
|
|
25
|
-
it "
|
26
|
-
expect(subject).to receive(:create_resized_image).with(file_name, "200x300>", 'png')
|
15
|
+
it "uses the specified size and name and quality" do
|
16
|
+
expect(subject).to receive(:create_resized_image).with(file_name, "200x300>", 'png', 75)
|
27
17
|
subject.process
|
28
18
|
end
|
29
19
|
end
|
@@ -39,7 +29,7 @@ describe Hydra::Derivatives::Image do
|
|
39
29
|
context 'when set' do
|
40
30
|
before do
|
41
31
|
subject.timeout = 0.1
|
42
|
-
allow_any_instance_of(
|
32
|
+
allow_any_instance_of(described_class).to receive(:process_without_timeout) { sleep 0.2 }
|
43
33
|
end
|
44
34
|
it 'raises a timeout exception' do
|
45
35
|
expect { subject.process }.to raise_error Hydra::Derivatives::TimeoutError
|
@@ -1,19 +1,19 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
require 'yaml'
|
3
3
|
|
4
|
-
describe Hydra::Derivatives::Jpeg2kImage do
|
4
|
+
describe Hydra::Derivatives::Processors::Jpeg2kImage do
|
5
5
|
let(:object) { ActiveFedora::Base.new }
|
6
6
|
|
7
7
|
describe "#calculate_recipe" do
|
8
8
|
it "calculates the number of levels from a size" do
|
9
9
|
dim = 7200
|
10
|
-
expect(
|
10
|
+
expect(described_class.level_count_for_size(dim)).to eq(6)
|
11
11
|
end
|
12
12
|
|
13
13
|
it "calculates the compression rates for each quality layer" do
|
14
14
|
compression_num = 10
|
15
15
|
layers = 8
|
16
|
-
calc =
|
16
|
+
calc = described_class.layer_rates(layers, compression_num)
|
17
17
|
expect(calc).to eq("2.4,1.48331273,0.91675694,0.56659885,0.3501847,0.21643059,0.13376427,0.0826726")
|
18
18
|
end
|
19
19
|
|
@@ -27,28 +27,26 @@ describe Hydra::Derivatives::Jpeg2kImage do
|
|
27
27
|
|
28
28
|
it "can get the recipe from a config file" do
|
29
29
|
args = { recipe: :myrecipe }
|
30
|
-
r =
|
30
|
+
r = described_class.kdu_compress_recipe(args, 'grey', 7200)
|
31
31
|
expect(r).to eq(@sample_cfg['jp2_recipes'][:myrecipe_grey])
|
32
32
|
end
|
33
33
|
|
34
34
|
it "can take a recipe as a string" do
|
35
35
|
args = { recipe: '-my -excellent -recipe' }
|
36
|
-
r =
|
36
|
+
r = described_class.kdu_compress_recipe(args, 'grey', 7200)
|
37
37
|
expect(r).to eq(args[:recipe])
|
38
38
|
end
|
39
39
|
|
40
40
|
it "will fall back to a #calculate_recipe if a symbol is passed but no recipe is found" do
|
41
41
|
args = { recipe: :x }
|
42
|
-
r =
|
43
|
-
expect(r).to eq(
|
42
|
+
r = described_class.kdu_compress_recipe(args, 'grey', 7200)
|
43
|
+
expect(r).to eq(described_class.calculate_recipe(args, 'grey', 7200))
|
44
44
|
end
|
45
45
|
|
46
46
|
it "will fall back to a #calculate_recipe if there is no attempt to provide one" do
|
47
47
|
args = {}
|
48
|
-
r =
|
49
|
-
expect(r).to eq(
|
48
|
+
r = described_class.kdu_compress_recipe(args, 'grey', 7200)
|
49
|
+
expect(r).to eq(described_class.calculate_recipe(args, 'grey', 7200))
|
50
50
|
end
|
51
|
-
|
52
51
|
end
|
53
|
-
|
54
52
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hydra::Derivatives::Processors::Processor do
|
4
|
+
|
5
|
+
let(:object) { "Fake Object" }
|
6
|
+
let(:source_name) { 'content' }
|
7
|
+
let(:directives) { { thumb: "100x100>" } }
|
8
|
+
let(:file_path) { double }
|
9
|
+
|
10
|
+
subject { described_class.new(file_path, directives)}
|
11
|
+
|
12
|
+
describe "output_file_service" do
|
13
|
+
let(:custom_output_file_service) { "fake service" }
|
14
|
+
let(:another_custom_output_file_service) { "another fake service" }
|
15
|
+
|
16
|
+
context "as a global configuration setting" do
|
17
|
+
before do
|
18
|
+
allow(Hydra::Derivatives).to receive(:output_file_service).and_return(custom_output_file_service)
|
19
|
+
end
|
20
|
+
it "utilizes the default output file service" do
|
21
|
+
expect(subject.output_file_service).to eq(custom_output_file_service)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
context "as an instance level configuration setting" do
|
26
|
+
subject do
|
27
|
+
described_class.new('/opt/derivatives/foo.mp4', directives,
|
28
|
+
output_file_service: another_custom_output_file_service)
|
29
|
+
end
|
30
|
+
|
31
|
+
it "accepts a custom output file service as an option" do
|
32
|
+
expect(subject.output_file_service).to eq(another_custom_output_file_service)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|