hydra-derivatives 2.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/History.md +43 -0
- data/README.md +23 -38
- data/VERSION +1 -1
- data/hydra-derivatives.gemspec +0 -1
- data/lib/hydra/derivatives.rb +23 -123
- data/lib/hydra/derivatives/io_decorator.rb +7 -1
- data/lib/hydra/derivatives/processors.rb +19 -0
- data/lib/hydra/derivatives/processors/audio.rb +6 -0
- data/lib/hydra/derivatives/processors/document.rb +28 -0
- data/lib/hydra/derivatives/processors/ffmpeg.rb +22 -0
- data/lib/hydra/derivatives/processors/full_text.rb +60 -0
- data/lib/hydra/derivatives/processors/image.rb +58 -0
- data/lib/hydra/derivatives/processors/jpeg2k_image.rb +129 -0
- data/lib/hydra/derivatives/processors/processor.rb +38 -0
- data/lib/hydra/derivatives/processors/raw_image.rb +37 -0
- data/lib/hydra/derivatives/processors/shell_based_processor.rb +108 -0
- data/lib/hydra/derivatives/{video.rb → processors/video.rb} +1 -1
- data/lib/hydra/derivatives/{video → processors/video}/config.rb +1 -1
- data/lib/hydra/derivatives/{video → processors/video}/processor.rb +2 -8
- data/lib/hydra/derivatives/runners/audio_derivatives.rb +7 -0
- data/lib/hydra/derivatives/runners/document_derivatives.rb +7 -0
- data/lib/hydra/derivatives/runners/full_text_extract.rb +16 -0
- data/lib/hydra/derivatives/runners/image_derivatives.rb +16 -0
- data/lib/hydra/derivatives/runners/jpeg2k_image_derivatives.rb +15 -0
- data/lib/hydra/derivatives/runners/pdf_derivatives.rb +6 -0
- data/lib/hydra/derivatives/runners/runner.rb +52 -0
- data/lib/hydra/derivatives/runners/video_derivatives.rb +7 -0
- data/lib/hydra/derivatives/services/mime_type_service.rb +10 -0
- data/lib/hydra/derivatives/services/persist_basic_contained_output_file_service.rb +23 -8
- data/lib/hydra/derivatives/services/persist_output_file_service.rb +4 -5
- data/lib/hydra/derivatives/services/retrieve_source_file_service.rb +8 -6
- data/spec/processors/full_text.rb +61 -0
- data/spec/{units → processors}/image_spec.rb +7 -17
- data/spec/{units → processors}/jpeg2k_spec.rb +9 -11
- data/spec/processors/processor_spec.rb +36 -0
- data/spec/processors/shell_based_processor_spec.rb +19 -0
- data/spec/processors/video_spec.rb +40 -0
- data/spec/services/audio_derivatives_spec.rb +76 -0
- data/spec/services/persist_basic_contained_output_file_service_spec.rb +4 -3
- data/spec/services/retrieve_source_file_service_spec.rb +16 -12
- data/spec/units/derivatives_spec.rb +18 -26
- data/spec/units/io_decorator_spec.rb +33 -0
- data/spec/units/transcoding_spec.rb +109 -86
- metadata +42 -44
- data/lib/hydra/derivatives/audio.rb +0 -19
- data/lib/hydra/derivatives/document.rb +0 -56
- data/lib/hydra/derivatives/extract_metadata.rb +0 -27
- data/lib/hydra/derivatives/ffmpeg.rb +0 -31
- data/lib/hydra/derivatives/image.rb +0 -73
- data/lib/hydra/derivatives/jpeg2k_image.rb +0 -136
- data/lib/hydra/derivatives/processor.rb +0 -33
- data/lib/hydra/derivatives/railtie.rb +0 -9
- data/lib/hydra/derivatives/raw_image.rb +0 -45
- data/lib/hydra/derivatives/shell_based_processor.rb +0 -81
- data/spec/lib/hydra/derivatives/extract_metadata_spec.rb +0 -39
- data/spec/units/extract_spec.rb +0 -22
- data/spec/units/processor_spec.rb +0 -61
- data/spec/units/shell_based_processor_spec.rb +0 -22
- data/spec/units/video_spec.rb +0 -50
@@ -1,6 +1,6 @@
|
|
1
|
-
module Hydra::Derivatives
|
1
|
+
module Hydra::Derivatives::Processors
|
2
2
|
module Video
|
3
|
-
class Processor < Hydra::Derivatives::Processor
|
3
|
+
class Processor < Hydra::Derivatives::Processors::Processor
|
4
4
|
include Ffmpeg
|
5
5
|
|
6
6
|
class_attribute :config
|
@@ -36,12 +36,6 @@ module Hydra::Derivatives
|
|
36
36
|
raise ArgumentError, "Unknown format `#{format}'"
|
37
37
|
end
|
38
38
|
end
|
39
|
-
|
40
|
-
def new_mime_type(format)
|
41
|
-
format == "jpg" ? "image/jpeg" : "video/#{format}"
|
42
|
-
end
|
43
39
|
end
|
44
40
|
end
|
45
41
|
end
|
46
|
-
|
47
|
-
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Hydra::Derivatives
|
2
|
+
class FullTextExtract < ImageDerivatives
|
3
|
+
# Adds format: 'txt' as the default to each of the directives
|
4
|
+
def self.transform_directives(options)
|
5
|
+
options.each do |directive|
|
6
|
+
directive.reverse_merge!(format: 'txt')
|
7
|
+
end
|
8
|
+
options
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.processor_class
|
12
|
+
Processors::FullText
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Hydra::Derivatives
|
2
|
+
class ImageDerivatives < Runner
|
3
|
+
|
4
|
+
# Adds format: 'png' as the default to each of the directives
|
5
|
+
def self.transform_directives(options)
|
6
|
+
options.each do |directive|
|
7
|
+
directive.reverse_merge!(format: 'png')
|
8
|
+
end
|
9
|
+
options
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.processor_class
|
13
|
+
Processors::Image
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Hydra::Derivatives
|
2
|
+
class Jpeg2kImageDerivatives < Runner
|
3
|
+
# # Adds format: 'png' as the default to each of the directives
|
4
|
+
# def self.transform_directives(options)
|
5
|
+
# options.each do |directive|
|
6
|
+
# directive.reverse_merge!(format: 'png')
|
7
|
+
# end
|
8
|
+
# options
|
9
|
+
# end
|
10
|
+
|
11
|
+
def self.processor_class
|
12
|
+
Processors::Jpeg2kImage
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Hydra
|
2
|
+
module Derivatives
|
3
|
+
class Runner
|
4
|
+
|
5
|
+
def self.output_file_service= val
|
6
|
+
@output_file_service = val
|
7
|
+
end
|
8
|
+
|
9
|
+
# Use the output service configured for this class or default to the global setting
|
10
|
+
def self.output_file_service
|
11
|
+
@output_file_service || Hydra::Derivatives.output_file_service
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.source_file_service= val
|
15
|
+
@source_file_service = val
|
16
|
+
end
|
17
|
+
|
18
|
+
# Use the source service configured for this class or default to the global setting
|
19
|
+
def self.source_file_service
|
20
|
+
@source_file_service || Hydra::Derivatives.source_file_service
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
# @param [String, ActiveFedora::Base] object_or_filename path to the source file, or an object
|
25
|
+
# @param [Hash] options options to pass to the encoder
|
26
|
+
# @options options [Array] :outputs a list of desired outputs, each entry is a hash that has :label (optional), :format and :url
|
27
|
+
def self.create(object_or_filename, options)
|
28
|
+
source_file(object_or_filename, options) do |f|
|
29
|
+
transform_directives(options.delete(:outputs)).each do |instructions|
|
30
|
+
processor_class.new(f.path,
|
31
|
+
instructions.merge(source_file_service: source_file_service),
|
32
|
+
output_file_service: output_file_service).process
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Override this method if you need to add any defaults
|
38
|
+
def self.transform_directives(options)
|
39
|
+
options
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.source_file(object_or_filename, options, &block)
|
43
|
+
source_file_service.call(object_or_filename, options, &block)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.processor_class
|
47
|
+
raise "Overide the processor_class method in a sub class"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
@@ -2,7 +2,7 @@ module Hydra::Derivatives
|
|
2
2
|
# This Service is an implementation of the Hydra::Derivatives::PeristOutputFileService
|
3
3
|
# It supports basic contained files, which is the behavior associated with Fedora 3 file datastreams that were migrated to Fedora 4
|
4
4
|
# and, at the time that this class was authored, corresponds to the behavior of ActiveFedora::Base.attach_file and ActiveFedora::Base.attached_files
|
5
|
-
### Rename this
|
5
|
+
### Rename this
|
6
6
|
class PersistBasicContainedOutputFileService < PersistOutputFileService
|
7
7
|
|
8
8
|
# This method conforms to the signature of the .call method on Hydra::Derivatives::PeristOutputFileService
|
@@ -10,16 +10,31 @@ module Hydra::Derivatives
|
|
10
10
|
#
|
11
11
|
# NOTE: Uses basic containment. If you want to use direct containment (ie. with PCDM) you must use a different service (ie. Hydra::Works::AddFileToGenericFile Service)
|
12
12
|
#
|
13
|
-
# @param [
|
14
|
-
# @param [
|
15
|
-
#
|
16
|
-
|
17
|
-
|
13
|
+
# @param [#read] stream the data to be persisted
|
14
|
+
# @param [Hash] directives directions which can be used to determine where to persist to.
|
15
|
+
# @option directives [String] url This can determine the path of the object.
|
16
|
+
def self.call(stream, directives)
|
17
|
+
file = Hydra::Derivatives::IoDecorator.new(stream, new_mime_type(directives.fetch(:format)))
|
18
18
|
o_name = determine_original_name(file)
|
19
19
|
m_type = determine_mime_type(file)
|
20
|
-
|
21
|
-
|
20
|
+
uri = URI(directives.fetch(:url))
|
21
|
+
raise ArgumentError, "#{uri} is not an http uri" unless uri.scheme == 'http'
|
22
|
+
remote_file = ActiveFedora::File.new(uri.to_s)
|
23
|
+
remote_file.content = file
|
24
|
+
remote_file.mime_type = m_type
|
25
|
+
remote_file.original_name = o_name
|
26
|
+
remote_file.save
|
22
27
|
end
|
23
28
|
|
29
|
+
def self.new_mime_type(format)
|
30
|
+
case format
|
31
|
+
when 'mp4'
|
32
|
+
'video/mp4' # default is application/mp4
|
33
|
+
when 'webm'
|
34
|
+
'video/webm' # default is audio/webm
|
35
|
+
else
|
36
|
+
MIME::Types.type_for(format).first.to_s
|
37
|
+
end
|
38
|
+
end
|
24
39
|
end
|
25
40
|
end
|
@@ -3,11 +3,10 @@ module Hydra::Derivatives
|
|
3
3
|
|
4
4
|
# Persists the file within the object at destination_name. Uses basic containment.
|
5
5
|
# If you want to use direct containment (ie. with PCDM) you must use a different service (ie. Hydra::Works::AddFileToGenericFile Service)
|
6
|
-
# @param [
|
7
|
-
# @param [
|
8
|
-
# @
|
9
|
-
|
10
|
-
def self.call(object, file, destination_path)
|
6
|
+
# @param [String] file_path the path to the file to be added
|
7
|
+
# @param [Hash] directives directions which can be used to determine where to persist to.
|
8
|
+
# @option directives [String] url This can determine the path of the object.
|
9
|
+
def self.call(file_path, directives)
|
11
10
|
raise NotImplementedError, "PersistOutputFileService is an abstract class. Implement `call' on #{self.class.name}"
|
12
11
|
end
|
13
12
|
|
@@ -2,11 +2,13 @@ module Hydra::Derivatives
|
|
2
2
|
class RetrieveSourceFileService
|
3
3
|
|
4
4
|
# Retrieves the source
|
5
|
-
# @param [
|
6
|
-
# @param [
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
# @param [ActiveFedora::Base] object the source file is attached to
|
6
|
+
# @param [Hash] options
|
7
|
+
# @option options [Symbol] :source a method that can be called on the object to retrieve the source file
|
8
|
+
# @yield [Tempfile] a temporary source file that has a lifetime of the block
|
9
|
+
def self.call(object, options, &block)
|
10
|
+
source_name = options.fetch(:source)
|
11
|
+
Hydra::Derivatives::TempfileService.create(object.send(source_name), &block)
|
10
12
|
end
|
11
13
|
end
|
12
|
-
end
|
14
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hydra::Derivatives::Processors::FullText do
|
4
|
+
let(:file_path) { File.join(fixture_path, 'test.docx') }
|
5
|
+
let(:directives) { { format: 'txt', url: 'http://localhost:8983/fedora/rest/dev/1234/ogg' } }
|
6
|
+
let(:processor) { described_class.new(file_path, directives) }
|
7
|
+
|
8
|
+
describe "process" do
|
9
|
+
subject { processor.process }
|
10
|
+
|
11
|
+
context "when it is successful" do
|
12
|
+
before do
|
13
|
+
allow_any_instance_of(described_class).to receive(:fetch).and_return('{"":"one two three"}')
|
14
|
+
end
|
15
|
+
it { is_expected.to be true }
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'extracts fulltext and stores the results' do
|
19
|
+
expect(processor.output_file_service).to receive(:call).with(/Project Charter for E-Content Delivery Platform Review/, directives)
|
20
|
+
processor.process
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe "fetch" do
|
25
|
+
subject { processor.send(:fetch) }
|
26
|
+
let(:request) { double }
|
27
|
+
let(:response_body) { 'returned by Solr' }
|
28
|
+
let(:resp) { double(code: '200', body: response_body) }
|
29
|
+
let(:uri) { URI('http://example.com:99/solr/update') }
|
30
|
+
|
31
|
+
before do
|
32
|
+
allow(processor).to receive(:uri).and_return(URI('http://example.com:99/solr/update'))
|
33
|
+
allow(Net::HTTP).to receive(:new).with('example.com', 99).and_return(request)
|
34
|
+
end
|
35
|
+
|
36
|
+
context "that is successful" do
|
37
|
+
let(:resp) { double(code: '200', body: response_body) }
|
38
|
+
it "calls the extraction service" do
|
39
|
+
expect(request).to receive(:post).with('http://example.com:99/solr/update', String, "Content-Type" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "Content-Length" => "24244").and_return(resp)
|
40
|
+
expect(subject).to eq response_body
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context "that fails" do
|
45
|
+
let(:resp) { double(code: '500', body: response_body) }
|
46
|
+
it "raises an error" do
|
47
|
+
expect(request).to receive(:post).with('http://example.com:99/solr/update', String, "Content-Type" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "Content-Length" => "24244").and_return(resp)
|
48
|
+
expect { subject }.to raise_error RuntimeError, /Solr Extract service was unsuccessful. 'http:\/\/example\.com:99\/solr\/update' returned code 500 for .*spec\/fixtures\/test.docx\nreturned by Solr/
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "uri" do
|
54
|
+
subject { processor.send(:uri) }
|
55
|
+
|
56
|
+
it "points at the extraction service" do
|
57
|
+
expect(subject).to be_kind_of URI
|
58
|
+
expect(subject.to_s).to end_with '/update/extract?extractOnly=true&wt=json&extractFormat=text'
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -1,29 +1,19 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Hydra::Derivatives::Image do
|
4
|
-
let(:object) { ActiveFedora::Base.new }
|
3
|
+
describe Hydra::Derivatives::Processors::Image do
|
5
4
|
let(:output_file) { double }
|
5
|
+
let(:file_name) { double }
|
6
6
|
|
7
|
-
subject {
|
7
|
+
subject { described_class.new(file_name, directives) }
|
8
8
|
|
9
9
|
before { allow(subject).to receive(:output_file).with(file_name).and_return(output_file) }
|
10
10
|
|
11
|
-
describe "when arguments are passed as a string" do
|
12
|
-
let(:directives) { { thumb: "100x100>" } }
|
13
|
-
let(:file_name) { 'content_thumb' }
|
14
|
-
|
15
|
-
it "should use the string as the size and the name is autogenerated" do
|
16
|
-
expect(subject).to receive(:create_resized_image).with(file_name, "100x100>", 'png')
|
17
|
-
subject.process
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
11
|
describe "when arguments are passed as a hash" do
|
22
|
-
let(:directives) { {
|
12
|
+
let(:directives) { { label: :thumb, size: "200x300>", format: 'png', quality: 75 } }
|
23
13
|
let(:file_name) { 'thumbnail' }
|
24
14
|
|
25
|
-
it "
|
26
|
-
expect(subject).to receive(:create_resized_image).with(file_name, "200x300>", 'png')
|
15
|
+
it "uses the specified size and name and quality" do
|
16
|
+
expect(subject).to receive(:create_resized_image).with(file_name, "200x300>", 'png', 75)
|
27
17
|
subject.process
|
28
18
|
end
|
29
19
|
end
|
@@ -39,7 +29,7 @@ describe Hydra::Derivatives::Image do
|
|
39
29
|
context 'when set' do
|
40
30
|
before do
|
41
31
|
subject.timeout = 0.1
|
42
|
-
allow_any_instance_of(
|
32
|
+
allow_any_instance_of(described_class).to receive(:process_without_timeout) { sleep 0.2 }
|
43
33
|
end
|
44
34
|
it 'raises a timeout exception' do
|
45
35
|
expect { subject.process }.to raise_error Hydra::Derivatives::TimeoutError
|
@@ -1,19 +1,19 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
require 'yaml'
|
3
3
|
|
4
|
-
describe Hydra::Derivatives::Jpeg2kImage do
|
4
|
+
describe Hydra::Derivatives::Processors::Jpeg2kImage do
|
5
5
|
let(:object) { ActiveFedora::Base.new }
|
6
6
|
|
7
7
|
describe "#calculate_recipe" do
|
8
8
|
it "calculates the number of levels from a size" do
|
9
9
|
dim = 7200
|
10
|
-
expect(
|
10
|
+
expect(described_class.level_count_for_size(dim)).to eq(6)
|
11
11
|
end
|
12
12
|
|
13
13
|
it "calculates the compression rates for each quality layer" do
|
14
14
|
compression_num = 10
|
15
15
|
layers = 8
|
16
|
-
calc =
|
16
|
+
calc = described_class.layer_rates(layers, compression_num)
|
17
17
|
expect(calc).to eq("2.4,1.48331273,0.91675694,0.56659885,0.3501847,0.21643059,0.13376427,0.0826726")
|
18
18
|
end
|
19
19
|
|
@@ -27,28 +27,26 @@ describe Hydra::Derivatives::Jpeg2kImage do
|
|
27
27
|
|
28
28
|
it "can get the recipe from a config file" do
|
29
29
|
args = { recipe: :myrecipe }
|
30
|
-
r =
|
30
|
+
r = described_class.kdu_compress_recipe(args, 'grey', 7200)
|
31
31
|
expect(r).to eq(@sample_cfg['jp2_recipes'][:myrecipe_grey])
|
32
32
|
end
|
33
33
|
|
34
34
|
it "can take a recipe as a string" do
|
35
35
|
args = { recipe: '-my -excellent -recipe' }
|
36
|
-
r =
|
36
|
+
r = described_class.kdu_compress_recipe(args, 'grey', 7200)
|
37
37
|
expect(r).to eq(args[:recipe])
|
38
38
|
end
|
39
39
|
|
40
40
|
it "will fall back to a #calculate_recipe if a symbol is passed but no recipe is found" do
|
41
41
|
args = { recipe: :x }
|
42
|
-
r =
|
43
|
-
expect(r).to eq(
|
42
|
+
r = described_class.kdu_compress_recipe(args, 'grey', 7200)
|
43
|
+
expect(r).to eq(described_class.calculate_recipe(args, 'grey', 7200))
|
44
44
|
end
|
45
45
|
|
46
46
|
it "will fall back to a #calculate_recipe if there is no attempt to provide one" do
|
47
47
|
args = {}
|
48
|
-
r =
|
49
|
-
expect(r).to eq(
|
48
|
+
r = described_class.kdu_compress_recipe(args, 'grey', 7200)
|
49
|
+
expect(r).to eq(described_class.calculate_recipe(args, 'grey', 7200))
|
50
50
|
end
|
51
|
-
|
52
51
|
end
|
53
|
-
|
54
52
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hydra::Derivatives::Processors::Processor do
|
4
|
+
|
5
|
+
let(:object) { "Fake Object" }
|
6
|
+
let(:source_name) { 'content' }
|
7
|
+
let(:directives) { { thumb: "100x100>" } }
|
8
|
+
let(:file_path) { double }
|
9
|
+
|
10
|
+
subject { described_class.new(file_path, directives)}
|
11
|
+
|
12
|
+
describe "output_file_service" do
|
13
|
+
let(:custom_output_file_service) { "fake service" }
|
14
|
+
let(:another_custom_output_file_service) { "another fake service" }
|
15
|
+
|
16
|
+
context "as a global configuration setting" do
|
17
|
+
before do
|
18
|
+
allow(Hydra::Derivatives).to receive(:output_file_service).and_return(custom_output_file_service)
|
19
|
+
end
|
20
|
+
it "utilizes the default output file service" do
|
21
|
+
expect(subject.output_file_service).to eq(custom_output_file_service)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
context "as an instance level configuration setting" do
|
26
|
+
subject do
|
27
|
+
described_class.new('/opt/derivatives/foo.mp4', directives,
|
28
|
+
output_file_service: another_custom_output_file_service)
|
29
|
+
end
|
30
|
+
|
31
|
+
it "accepts a custom output file service as an option" do
|
32
|
+
expect(subject.output_file_service).to eq(another_custom_output_file_service)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|