hydra-derivatives 2.0.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -1
  3. data/History.md +43 -0
  4. data/README.md +23 -38
  5. data/VERSION +1 -1
  6. data/hydra-derivatives.gemspec +0 -1
  7. data/lib/hydra/derivatives.rb +23 -123
  8. data/lib/hydra/derivatives/io_decorator.rb +7 -1
  9. data/lib/hydra/derivatives/processors.rb +19 -0
  10. data/lib/hydra/derivatives/processors/audio.rb +6 -0
  11. data/lib/hydra/derivatives/processors/document.rb +28 -0
  12. data/lib/hydra/derivatives/processors/ffmpeg.rb +22 -0
  13. data/lib/hydra/derivatives/processors/full_text.rb +60 -0
  14. data/lib/hydra/derivatives/processors/image.rb +58 -0
  15. data/lib/hydra/derivatives/processors/jpeg2k_image.rb +129 -0
  16. data/lib/hydra/derivatives/processors/processor.rb +38 -0
  17. data/lib/hydra/derivatives/processors/raw_image.rb +37 -0
  18. data/lib/hydra/derivatives/processors/shell_based_processor.rb +108 -0
  19. data/lib/hydra/derivatives/{video.rb → processors/video.rb} +1 -1
  20. data/lib/hydra/derivatives/{video → processors/video}/config.rb +1 -1
  21. data/lib/hydra/derivatives/{video → processors/video}/processor.rb +2 -8
  22. data/lib/hydra/derivatives/runners/audio_derivatives.rb +7 -0
  23. data/lib/hydra/derivatives/runners/document_derivatives.rb +7 -0
  24. data/lib/hydra/derivatives/runners/full_text_extract.rb +16 -0
  25. data/lib/hydra/derivatives/runners/image_derivatives.rb +16 -0
  26. data/lib/hydra/derivatives/runners/jpeg2k_image_derivatives.rb +15 -0
  27. data/lib/hydra/derivatives/runners/pdf_derivatives.rb +6 -0
  28. data/lib/hydra/derivatives/runners/runner.rb +52 -0
  29. data/lib/hydra/derivatives/runners/video_derivatives.rb +7 -0
  30. data/lib/hydra/derivatives/services/mime_type_service.rb +10 -0
  31. data/lib/hydra/derivatives/services/persist_basic_contained_output_file_service.rb +23 -8
  32. data/lib/hydra/derivatives/services/persist_output_file_service.rb +4 -5
  33. data/lib/hydra/derivatives/services/retrieve_source_file_service.rb +8 -6
  34. data/spec/processors/full_text.rb +61 -0
  35. data/spec/{units → processors}/image_spec.rb +7 -17
  36. data/spec/{units → processors}/jpeg2k_spec.rb +9 -11
  37. data/spec/processors/processor_spec.rb +36 -0
  38. data/spec/processors/shell_based_processor_spec.rb +19 -0
  39. data/spec/processors/video_spec.rb +40 -0
  40. data/spec/services/audio_derivatives_spec.rb +76 -0
  41. data/spec/services/persist_basic_contained_output_file_service_spec.rb +4 -3
  42. data/spec/services/retrieve_source_file_service_spec.rb +16 -12
  43. data/spec/units/derivatives_spec.rb +18 -26
  44. data/spec/units/io_decorator_spec.rb +33 -0
  45. data/spec/units/transcoding_spec.rb +109 -86
  46. metadata +42 -44
  47. data/lib/hydra/derivatives/audio.rb +0 -19
  48. data/lib/hydra/derivatives/document.rb +0 -56
  49. data/lib/hydra/derivatives/extract_metadata.rb +0 -27
  50. data/lib/hydra/derivatives/ffmpeg.rb +0 -31
  51. data/lib/hydra/derivatives/image.rb +0 -73
  52. data/lib/hydra/derivatives/jpeg2k_image.rb +0 -136
  53. data/lib/hydra/derivatives/processor.rb +0 -33
  54. data/lib/hydra/derivatives/railtie.rb +0 -9
  55. data/lib/hydra/derivatives/raw_image.rb +0 -45
  56. data/lib/hydra/derivatives/shell_based_processor.rb +0 -81
  57. data/spec/lib/hydra/derivatives/extract_metadata_spec.rb +0 -39
  58. data/spec/units/extract_spec.rb +0 -22
  59. data/spec/units/processor_spec.rb +0 -61
  60. data/spec/units/shell_based_processor_spec.rb +0 -22
  61. data/spec/units/video_spec.rb +0 -50
@@ -1,6 +1,6 @@
1
- module Hydra::Derivatives
1
+ module Hydra::Derivatives::Processors
2
2
  module Video
3
- class Processor < Hydra::Derivatives::Processor
3
+ class Processor < Hydra::Derivatives::Processors::Processor
4
4
  include Ffmpeg
5
5
 
6
6
  class_attribute :config
@@ -36,12 +36,6 @@ module Hydra::Derivatives
36
36
  raise ArgumentError, "Unknown format `#{format}'"
37
37
  end
38
38
  end
39
-
40
- def new_mime_type(format)
41
- format == "jpg" ? "image/jpeg" : "video/#{format}"
42
- end
43
39
  end
44
40
  end
45
41
  end
46
-
47
-
@@ -0,0 +1,7 @@
1
+ module Hydra::Derivatives
2
+ class AudioDerivatives < Runner
3
+ def self.processor_class
4
+ Processors::Audio
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Hydra::Derivatives
2
+ class DocumentDerivatives < Runner
3
+ def self.processor_class
4
+ Processors::Document
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,16 @@
1
+ module Hydra::Derivatives
2
+ class FullTextExtract < ImageDerivatives
3
+ # Adds format: 'txt' as the default to each of the directives
4
+ def self.transform_directives(options)
5
+ options.each do |directive|
6
+ directive.reverse_merge!(format: 'txt')
7
+ end
8
+ options
9
+ end
10
+
11
+ def self.processor_class
12
+ Processors::FullText
13
+ end
14
+ end
15
+ end
16
+
@@ -0,0 +1,16 @@
1
+ module Hydra::Derivatives
2
+ class ImageDerivatives < Runner
3
+
4
+ # Adds format: 'png' as the default to each of the directives
5
+ def self.transform_directives(options)
6
+ options.each do |directive|
7
+ directive.reverse_merge!(format: 'png')
8
+ end
9
+ options
10
+ end
11
+
12
+ def self.processor_class
13
+ Processors::Image
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ module Hydra::Derivatives
2
+ class Jpeg2kImageDerivatives < Runner
3
+ # # Adds format: 'png' as the default to each of the directives
4
+ # def self.transform_directives(options)
5
+ # options.each do |directive|
6
+ # directive.reverse_merge!(format: 'png')
7
+ # end
8
+ # options
9
+ # end
10
+
11
+ def self.processor_class
12
+ Processors::Jpeg2kImage
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,6 @@
1
+ module Hydra
2
+ module Derivatives
3
+ class PdfDerivatives < ImageDerivatives
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,52 @@
1
+ module Hydra
2
+ module Derivatives
3
+ class Runner
4
+
5
+ def self.output_file_service= val
6
+ @output_file_service = val
7
+ end
8
+
9
+ # Use the output service configured for this class or default to the global setting
10
+ def self.output_file_service
11
+ @output_file_service || Hydra::Derivatives.output_file_service
12
+ end
13
+
14
+ def self.source_file_service= val
15
+ @source_file_service = val
16
+ end
17
+
18
+ # Use the source service configured for this class or default to the global setting
19
+ def self.source_file_service
20
+ @source_file_service || Hydra::Derivatives.source_file_service
21
+ end
22
+
23
+
24
+ # @param [String, ActiveFedora::Base] object_or_filename path to the source file, or an object
25
+ # @param [Hash] options options to pass to the encoder
26
+ # @options options [Array] :outputs a list of desired outputs, each entry is a hash that has :label (optional), :format and :url
27
+ def self.create(object_or_filename, options)
28
+ source_file(object_or_filename, options) do |f|
29
+ transform_directives(options.delete(:outputs)).each do |instructions|
30
+ processor_class.new(f.path,
31
+ instructions.merge(source_file_service: source_file_service),
32
+ output_file_service: output_file_service).process
33
+ end
34
+ end
35
+ end
36
+
37
+ # Override this method if you need to add any defaults
38
+ def self.transform_directives(options)
39
+ options
40
+ end
41
+
42
+ def self.source_file(object_or_filename, options, &block)
43
+ source_file_service.call(object_or_filename, options, &block)
44
+ end
45
+
46
+ def self.processor_class
47
+ raise "Overide the processor_class method in a sub class"
48
+ end
49
+ end
50
+ end
51
+ end
52
+
@@ -0,0 +1,7 @@
1
+ module Hydra::Derivatives
2
+ class VideoDerivatives < Runner
3
+ def self.processor_class
4
+ Processors::Video::Processor
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ require 'mime/types'
2
+
3
+ module Hydra::Derivatives
4
+ module MimeTypeService
5
+ # @param [String] file_path path to a file
6
+ def self.mime_type(file_path)
7
+ MIME::Types.type_for(file_path).first.to_s
8
+ end
9
+ end
10
+ end
@@ -2,7 +2,7 @@ module Hydra::Derivatives
2
2
  # This Service is an implementation of the Hydra::Derivatives::PeristOutputFileService
3
3
  # It supports basic contained files, which is the behavior associated with Fedora 3 file datastreams that were migrated to Fedora 4
4
4
  # and, at the time that this class was authored, corresponds to the behavior of ActiveFedora::Base.attach_file and ActiveFedora::Base.attached_files
5
- ### Rename this
5
+ ### Rename this
6
6
  class PersistBasicContainedOutputFileService < PersistOutputFileService
7
7
 
8
8
  # This method conforms to the signature of the .call method on Hydra::Derivatives::PeristOutputFileService
@@ -10,16 +10,31 @@ module Hydra::Derivatives
10
10
  #
11
11
  # NOTE: Uses basic containment. If you want to use direct containment (ie. with PCDM) you must use a different service (ie. Hydra::Works::AddFileToGenericFile Service)
12
12
  #
13
- # @param [ActiveFedora::Base] object file is be persisted to
14
- # @param [File] filestream to be added, should respond to :mime_type and :original_name
15
- # original_name will get used as the path for a chile resource in fedora, it is not a path to a file on disk.
16
-
17
- def self.call(object, file, destination_path)
13
+ # @param [#read] stream the data to be persisted
14
+ # @param [Hash] directives directions which can be used to determine where to persist to.
15
+ # @option directives [String] url This can determine the path of the object.
16
+ def self.call(stream, directives)
17
+ file = Hydra::Derivatives::IoDecorator.new(stream, new_mime_type(directives.fetch(:format)))
18
18
  o_name = determine_original_name(file)
19
19
  m_type = determine_mime_type(file)
20
- object.add_file(file, path: destination_path, mime_type: m_type, original_name: o_name)
21
- object.save
20
+ uri = URI(directives.fetch(:url))
21
+ raise ArgumentError, "#{uri} is not an http uri" unless uri.scheme == 'http'
22
+ remote_file = ActiveFedora::File.new(uri.to_s)
23
+ remote_file.content = file
24
+ remote_file.mime_type = m_type
25
+ remote_file.original_name = o_name
26
+ remote_file.save
22
27
  end
23
28
 
29
+ def self.new_mime_type(format)
30
+ case format
31
+ when 'mp4'
32
+ 'video/mp4' # default is application/mp4
33
+ when 'webm'
34
+ 'video/webm' # default is audio/webm
35
+ else
36
+ MIME::Types.type_for(format).first.to_s
37
+ end
38
+ end
24
39
  end
25
40
  end
@@ -3,11 +3,10 @@ module Hydra::Derivatives
3
3
 
4
4
  # Persists the file within the object at destination_name. Uses basic containment.
5
5
  # If you want to use direct containment (ie. with PCDM) you must use a different service (ie. Hydra::Works::AddFileToGenericFile Service)
6
- # @param [Object] object the source file is attached to
7
- # @param [File] filestream to be added, should respond to :mime_type, :original_name
8
- # @param [String] destination_name is the fedora path at which the child resource will be found or created beneath the object.
9
-
10
- def self.call(object, file, destination_path)
6
+ # @param [String] file_path the path to the file to be added
7
+ # @param [Hash] directives directions which can be used to determine where to persist to.
8
+ # @option directives [String] url This can determine the path of the object.
9
+ def self.call(file_path, directives)
11
10
  raise NotImplementedError, "PersistOutputFileService is an abstract class. Implement `call' on #{self.class.name}"
12
11
  end
13
12
 
@@ -2,11 +2,13 @@ module Hydra::Derivatives
2
2
  class RetrieveSourceFileService
3
3
 
4
4
  # Retrieves the source
5
- # @param [Object] object the source file is attached to
6
- # @param [String] method name that can be called on object to retrieve the source file
7
-
8
- def self.call(object, source_name)
9
- object.send(source_name)
5
+ # @param [ActiveFedora::Base] object the source file is attached to
6
+ # @param [Hash] options
7
+ # @option options [Symbol] :source a method that can be called on the object to retrieve the source file
8
+ # @yield [Tempfile] a temporary source file that has a lifetime of the block
9
+ def self.call(object, options, &block)
10
+ source_name = options.fetch(:source)
11
+ Hydra::Derivatives::TempfileService.create(object.send(source_name), &block)
10
12
  end
11
13
  end
12
- end
14
+ end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hydra::Derivatives::Processors::FullText do
4
+ let(:file_path) { File.join(fixture_path, 'test.docx') }
5
+ let(:directives) { { format: 'txt', url: 'http://localhost:8983/fedora/rest/dev/1234/ogg' } }
6
+ let(:processor) { described_class.new(file_path, directives) }
7
+
8
+ describe "process" do
9
+ subject { processor.process }
10
+
11
+ context "when it is successful" do
12
+ before do
13
+ allow_any_instance_of(described_class).to receive(:fetch).and_return('{"":"one two three"}')
14
+ end
15
+ it { is_expected.to be true }
16
+ end
17
+
18
+ it 'extracts fulltext and stores the results' do
19
+ expect(processor.output_file_service).to receive(:call).with(/Project Charter for E-Content Delivery Platform Review/, directives)
20
+ processor.process
21
+ end
22
+ end
23
+
24
+ describe "fetch" do
25
+ subject { processor.send(:fetch) }
26
+ let(:request) { double }
27
+ let(:response_body) { 'returned by Solr' }
28
+ let(:resp) { double(code: '200', body: response_body) }
29
+ let(:uri) { URI('http://example.com:99/solr/update') }
30
+
31
+ before do
32
+ allow(processor).to receive(:uri).and_return(URI('http://example.com:99/solr/update'))
33
+ allow(Net::HTTP).to receive(:new).with('example.com', 99).and_return(request)
34
+ end
35
+
36
+ context "that is successful" do
37
+ let(:resp) { double(code: '200', body: response_body) }
38
+ it "calls the extraction service" do
39
+ expect(request).to receive(:post).with('http://example.com:99/solr/update', String, "Content-Type" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "Content-Length" => "24244").and_return(resp)
40
+ expect(subject).to eq response_body
41
+ end
42
+ end
43
+
44
+ context "that fails" do
45
+ let(:resp) { double(code: '500', body: response_body) }
46
+ it "raises an error" do
47
+ expect(request).to receive(:post).with('http://example.com:99/solr/update', String, "Content-Type" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "Content-Length" => "24244").and_return(resp)
48
+ expect { subject }.to raise_error RuntimeError, /Solr Extract service was unsuccessful. 'http:\/\/example\.com:99\/solr\/update' returned code 500 for .*spec\/fixtures\/test.docx\nreturned by Solr/
49
+ end
50
+ end
51
+ end
52
+
53
+ describe "uri" do
54
+ subject { processor.send(:uri) }
55
+
56
+ it "points at the extraction service" do
57
+ expect(subject).to be_kind_of URI
58
+ expect(subject.to_s).to end_with '/update/extract?extractOnly=true&wt=json&extractFormat=text'
59
+ end
60
+ end
61
+ end
@@ -1,29 +1,19 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Hydra::Derivatives::Image do
4
- let(:object) { ActiveFedora::Base.new }
3
+ describe Hydra::Derivatives::Processors::Image do
5
4
  let(:output_file) { double }
5
+ let(:file_name) { double }
6
6
 
7
- subject { Hydra::Derivatives::Image.new(object, 'content', directives)}
7
+ subject { described_class.new(file_name, directives) }
8
8
 
9
9
  before { allow(subject).to receive(:output_file).with(file_name).and_return(output_file) }
10
10
 
11
- describe "when arguments are passed as a string" do
12
- let(:directives) { { thumb: "100x100>" } }
13
- let(:file_name) { 'content_thumb' }
14
-
15
- it "should use the string as the size and the name is autogenerated" do
16
- expect(subject).to receive(:create_resized_image).with(file_name, "100x100>", 'png')
17
- subject.process
18
- end
19
- end
20
-
21
11
  describe "when arguments are passed as a hash" do
22
- let(:directives) { { thumb: { size: "200x300>", datastream: file_name } } }
12
+ let(:directives) { { label: :thumb, size: "200x300>", format: 'png', quality: 75 } }
23
13
  let(:file_name) { 'thumbnail' }
24
14
 
25
- it "should use the specified size and name" do
26
- expect(subject).to receive(:create_resized_image).with(file_name, "200x300>", 'png')
15
+ it "uses the specified size and name and quality" do
16
+ expect(subject).to receive(:create_resized_image).with(file_name, "200x300>", 'png', 75)
27
17
  subject.process
28
18
  end
29
19
  end
@@ -39,7 +29,7 @@ describe Hydra::Derivatives::Image do
39
29
  context 'when set' do
40
30
  before do
41
31
  subject.timeout = 0.1
42
- allow_any_instance_of(Hydra::Derivatives::Image).to receive(:process_without_timeout) { sleep 0.2 }
32
+ allow_any_instance_of(described_class).to receive(:process_without_timeout) { sleep 0.2 }
43
33
  end
44
34
  it 'raises a timeout exception' do
45
35
  expect { subject.process }.to raise_error Hydra::Derivatives::TimeoutError
@@ -1,19 +1,19 @@
1
1
  require 'spec_helper'
2
2
  require 'yaml'
3
3
 
4
- describe Hydra::Derivatives::Jpeg2kImage do
4
+ describe Hydra::Derivatives::Processors::Jpeg2kImage do
5
5
  let(:object) { ActiveFedora::Base.new }
6
6
 
7
7
  describe "#calculate_recipe" do
8
8
  it "calculates the number of levels from a size" do
9
9
  dim = 7200
10
- expect(Hydra::Derivatives::Jpeg2kImage.level_count_for_size(dim)).to eq(6)
10
+ expect(described_class.level_count_for_size(dim)).to eq(6)
11
11
  end
12
12
 
13
13
  it "calculates the compression rates for each quality layer" do
14
14
  compression_num = 10
15
15
  layers = 8
16
- calc = Hydra::Derivatives::Jpeg2kImage.layer_rates(layers, compression_num)
16
+ calc = described_class.layer_rates(layers, compression_num)
17
17
  expect(calc).to eq("2.4,1.48331273,0.91675694,0.56659885,0.3501847,0.21643059,0.13376427,0.0826726")
18
18
  end
19
19
 
@@ -27,28 +27,26 @@ describe Hydra::Derivatives::Jpeg2kImage do
27
27
 
28
28
  it "can get the recipe from a config file" do
29
29
  args = { recipe: :myrecipe }
30
- r = Hydra::Derivatives::Jpeg2kImage.kdu_compress_recipe(args, 'grey', 7200)
30
+ r = described_class.kdu_compress_recipe(args, 'grey', 7200)
31
31
  expect(r).to eq(@sample_cfg['jp2_recipes'][:myrecipe_grey])
32
32
  end
33
33
 
34
34
  it "can take a recipe as a string" do
35
35
  args = { recipe: '-my -excellent -recipe' }
36
- r = Hydra::Derivatives::Jpeg2kImage.kdu_compress_recipe(args, 'grey', 7200)
36
+ r = described_class.kdu_compress_recipe(args, 'grey', 7200)
37
37
  expect(r).to eq(args[:recipe])
38
38
  end
39
39
 
40
40
  it "will fall back to a #calculate_recipe if a symbol is passed but no recipe is found" do
41
41
  args = { recipe: :x }
42
- r = Hydra::Derivatives::Jpeg2kImage.kdu_compress_recipe(args, 'grey', 7200)
43
- expect(r).to eq(Hydra::Derivatives::Jpeg2kImage.calculate_recipe(args, 'grey', 7200))
42
+ r = described_class.kdu_compress_recipe(args, 'grey', 7200)
43
+ expect(r).to eq(described_class.calculate_recipe(args, 'grey', 7200))
44
44
  end
45
45
 
46
46
  it "will fall back to a #calculate_recipe if there is no attempt to provide one" do
47
47
  args = {}
48
- r = Hydra::Derivatives::Jpeg2kImage.kdu_compress_recipe(args, 'grey', 7200)
49
- expect(r).to eq(Hydra::Derivatives::Jpeg2kImage.calculate_recipe(args, 'grey', 7200))
48
+ r = described_class.kdu_compress_recipe(args, 'grey', 7200)
49
+ expect(r).to eq(described_class.calculate_recipe(args, 'grey', 7200))
50
50
  end
51
-
52
51
  end
53
-
54
52
  end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hydra::Derivatives::Processors::Processor do
4
+
5
+ let(:object) { "Fake Object" }
6
+ let(:source_name) { 'content' }
7
+ let(:directives) { { thumb: "100x100>" } }
8
+ let(:file_path) { double }
9
+
10
+ subject { described_class.new(file_path, directives)}
11
+
12
+ describe "output_file_service" do
13
+ let(:custom_output_file_service) { "fake service" }
14
+ let(:another_custom_output_file_service) { "another fake service" }
15
+
16
+ context "as a global configuration setting" do
17
+ before do
18
+ allow(Hydra::Derivatives).to receive(:output_file_service).and_return(custom_output_file_service)
19
+ end
20
+ it "utilizes the default output file service" do
21
+ expect(subject.output_file_service).to eq(custom_output_file_service)
22
+ end
23
+ end
24
+
25
+ context "as an instance level configuration setting" do
26
+ subject do
27
+ described_class.new('/opt/derivatives/foo.mp4', directives,
28
+ output_file_service: another_custom_output_file_service)
29
+ end
30
+
31
+ it "accepts a custom output file service as an option" do
32
+ expect(subject.output_file_service).to eq(another_custom_output_file_service)
33
+ end
34
+ end
35
+ end
36
+ end