hydra-derivatives 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -1
  3. data/History.md +43 -0
  4. data/README.md +23 -38
  5. data/VERSION +1 -1
  6. data/hydra-derivatives.gemspec +0 -1
  7. data/lib/hydra/derivatives.rb +23 -123
  8. data/lib/hydra/derivatives/io_decorator.rb +7 -1
  9. data/lib/hydra/derivatives/processors.rb +19 -0
  10. data/lib/hydra/derivatives/processors/audio.rb +6 -0
  11. data/lib/hydra/derivatives/processors/document.rb +28 -0
  12. data/lib/hydra/derivatives/processors/ffmpeg.rb +22 -0
  13. data/lib/hydra/derivatives/processors/full_text.rb +60 -0
  14. data/lib/hydra/derivatives/processors/image.rb +58 -0
  15. data/lib/hydra/derivatives/processors/jpeg2k_image.rb +129 -0
  16. data/lib/hydra/derivatives/processors/processor.rb +38 -0
  17. data/lib/hydra/derivatives/processors/raw_image.rb +37 -0
  18. data/lib/hydra/derivatives/processors/shell_based_processor.rb +108 -0
  19. data/lib/hydra/derivatives/{video.rb → processors/video.rb} +1 -1
  20. data/lib/hydra/derivatives/{video → processors/video}/config.rb +1 -1
  21. data/lib/hydra/derivatives/{video → processors/video}/processor.rb +2 -8
  22. data/lib/hydra/derivatives/runners/audio_derivatives.rb +7 -0
  23. data/lib/hydra/derivatives/runners/document_derivatives.rb +7 -0
  24. data/lib/hydra/derivatives/runners/full_text_extract.rb +16 -0
  25. data/lib/hydra/derivatives/runners/image_derivatives.rb +16 -0
  26. data/lib/hydra/derivatives/runners/jpeg2k_image_derivatives.rb +15 -0
  27. data/lib/hydra/derivatives/runners/pdf_derivatives.rb +6 -0
  28. data/lib/hydra/derivatives/runners/runner.rb +52 -0
  29. data/lib/hydra/derivatives/runners/video_derivatives.rb +7 -0
  30. data/lib/hydra/derivatives/services/mime_type_service.rb +10 -0
  31. data/lib/hydra/derivatives/services/persist_basic_contained_output_file_service.rb +23 -8
  32. data/lib/hydra/derivatives/services/persist_output_file_service.rb +4 -5
  33. data/lib/hydra/derivatives/services/retrieve_source_file_service.rb +8 -6
  34. data/spec/processors/full_text.rb +61 -0
  35. data/spec/{units → processors}/image_spec.rb +7 -17
  36. data/spec/{units → processors}/jpeg2k_spec.rb +9 -11
  37. data/spec/processors/processor_spec.rb +36 -0
  38. data/spec/processors/shell_based_processor_spec.rb +19 -0
  39. data/spec/processors/video_spec.rb +40 -0
  40. data/spec/services/audio_derivatives_spec.rb +76 -0
  41. data/spec/services/persist_basic_contained_output_file_service_spec.rb +4 -3
  42. data/spec/services/retrieve_source_file_service_spec.rb +16 -12
  43. data/spec/units/derivatives_spec.rb +18 -26
  44. data/spec/units/io_decorator_spec.rb +33 -0
  45. data/spec/units/transcoding_spec.rb +109 -86
  46. metadata +42 -44
  47. data/lib/hydra/derivatives/audio.rb +0 -19
  48. data/lib/hydra/derivatives/document.rb +0 -56
  49. data/lib/hydra/derivatives/extract_metadata.rb +0 -27
  50. data/lib/hydra/derivatives/ffmpeg.rb +0 -31
  51. data/lib/hydra/derivatives/image.rb +0 -73
  52. data/lib/hydra/derivatives/jpeg2k_image.rb +0 -136
  53. data/lib/hydra/derivatives/processor.rb +0 -33
  54. data/lib/hydra/derivatives/railtie.rb +0 -9
  55. data/lib/hydra/derivatives/raw_image.rb +0 -45
  56. data/lib/hydra/derivatives/shell_based_processor.rb +0 -81
  57. data/spec/lib/hydra/derivatives/extract_metadata_spec.rb +0 -39
  58. data/spec/units/extract_spec.rb +0 -22
  59. data/spec/units/processor_spec.rb +0 -61
  60. data/spec/units/shell_based_processor_spec.rb +0 -22
  61. data/spec/units/video_spec.rb +0 -50
@@ -1,6 +1,6 @@
1
- module Hydra::Derivatives
1
+ module Hydra::Derivatives::Processors
2
2
  module Video
3
- class Processor < Hydra::Derivatives::Processor
3
+ class Processor < Hydra::Derivatives::Processors::Processor
4
4
  include Ffmpeg
5
5
 
6
6
  class_attribute :config
@@ -36,12 +36,6 @@ module Hydra::Derivatives
36
36
  raise ArgumentError, "Unknown format `#{format}'"
37
37
  end
38
38
  end
39
-
40
- def new_mime_type(format)
41
- format == "jpg" ? "image/jpeg" : "video/#{format}"
42
- end
43
39
  end
44
40
  end
45
41
  end
46
-
47
-
@@ -0,0 +1,7 @@
1
+ module Hydra::Derivatives
2
+ class AudioDerivatives < Runner
3
+ def self.processor_class
4
+ Processors::Audio
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Hydra::Derivatives
2
+ class DocumentDerivatives < Runner
3
+ def self.processor_class
4
+ Processors::Document
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,16 @@
1
+ module Hydra::Derivatives
2
+ class FullTextExtract < ImageDerivatives
3
+ # Adds format: 'txt' as the default to each of the directives
4
+ def self.transform_directives(options)
5
+ options.each do |directive|
6
+ directive.reverse_merge!(format: 'txt')
7
+ end
8
+ options
9
+ end
10
+
11
+ def self.processor_class
12
+ Processors::FullText
13
+ end
14
+ end
15
+ end
16
+
@@ -0,0 +1,16 @@
1
+ module Hydra::Derivatives
2
+ class ImageDerivatives < Runner
3
+
4
+ # Adds format: 'png' as the default to each of the directives
5
+ def self.transform_directives(options)
6
+ options.each do |directive|
7
+ directive.reverse_merge!(format: 'png')
8
+ end
9
+ options
10
+ end
11
+
12
+ def self.processor_class
13
+ Processors::Image
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ module Hydra::Derivatives
2
+ class Jpeg2kImageDerivatives < Runner
3
+ # # Adds format: 'png' as the default to each of the directives
4
+ # def self.transform_directives(options)
5
+ # options.each do |directive|
6
+ # directive.reverse_merge!(format: 'png')
7
+ # end
8
+ # options
9
+ # end
10
+
11
+ def self.processor_class
12
+ Processors::Jpeg2kImage
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,6 @@
1
+ module Hydra
2
+ module Derivatives
3
+ class PdfDerivatives < ImageDerivatives
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,52 @@
1
+ module Hydra
2
+ module Derivatives
3
+ class Runner
4
+
5
+ def self.output_file_service= val
6
+ @output_file_service = val
7
+ end
8
+
9
+ # Use the output service configured for this class or default to the global setting
10
+ def self.output_file_service
11
+ @output_file_service || Hydra::Derivatives.output_file_service
12
+ end
13
+
14
+ def self.source_file_service= val
15
+ @source_file_service = val
16
+ end
17
+
18
+ # Use the source service configured for this class or default to the global setting
19
+ def self.source_file_service
20
+ @source_file_service || Hydra::Derivatives.source_file_service
21
+ end
22
+
23
+
24
+ # @param [String, ActiveFedora::Base] object_or_filename path to the source file, or an object
25
+ # @param [Hash] options options to pass to the encoder
26
+ # @options options [Array] :outputs a list of desired outputs, each entry is a hash that has :label (optional), :format and :url
27
+ def self.create(object_or_filename, options)
28
+ source_file(object_or_filename, options) do |f|
29
+ transform_directives(options.delete(:outputs)).each do |instructions|
30
+ processor_class.new(f.path,
31
+ instructions.merge(source_file_service: source_file_service),
32
+ output_file_service: output_file_service).process
33
+ end
34
+ end
35
+ end
36
+
37
+ # Override this method if you need to add any defaults
38
+ def self.transform_directives(options)
39
+ options
40
+ end
41
+
42
+ def self.source_file(object_or_filename, options, &block)
43
+ source_file_service.call(object_or_filename, options, &block)
44
+ end
45
+
46
+ def self.processor_class
47
+ raise "Overide the processor_class method in a sub class"
48
+ end
49
+ end
50
+ end
51
+ end
52
+
@@ -0,0 +1,7 @@
1
+ module Hydra::Derivatives
2
+ class VideoDerivatives < Runner
3
+ def self.processor_class
4
+ Processors::Video::Processor
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ require 'mime/types'
2
+
3
+ module Hydra::Derivatives
4
+ module MimeTypeService
5
+ # @param [String] file_path path to a file
6
+ def self.mime_type(file_path)
7
+ MIME::Types.type_for(file_path).first.to_s
8
+ end
9
+ end
10
+ end
@@ -2,7 +2,7 @@ module Hydra::Derivatives
2
2
  # This Service is an implementation of the Hydra::Derivatives::PeristOutputFileService
3
3
  # It supports basic contained files, which is the behavior associated with Fedora 3 file datastreams that were migrated to Fedora 4
4
4
  # and, at the time that this class was authored, corresponds to the behavior of ActiveFedora::Base.attach_file and ActiveFedora::Base.attached_files
5
- ### Rename this
5
+ ### Rename this
6
6
  class PersistBasicContainedOutputFileService < PersistOutputFileService
7
7
 
8
8
  # This method conforms to the signature of the .call method on Hydra::Derivatives::PeristOutputFileService
@@ -10,16 +10,31 @@ module Hydra::Derivatives
10
10
  #
11
11
  # NOTE: Uses basic containment. If you want to use direct containment (ie. with PCDM) you must use a different service (ie. Hydra::Works::AddFileToGenericFile Service)
12
12
  #
13
- # @param [ActiveFedora::Base] object file is be persisted to
14
- # @param [File] filestream to be added, should respond to :mime_type and :original_name
15
- # original_name will get used as the path for a chile resource in fedora, it is not a path to a file on disk.
16
-
17
- def self.call(object, file, destination_path)
13
+ # @param [#read] stream the data to be persisted
14
+ # @param [Hash] directives directions which can be used to determine where to persist to.
15
+ # @option directives [String] url This can determine the path of the object.
16
+ def self.call(stream, directives)
17
+ file = Hydra::Derivatives::IoDecorator.new(stream, new_mime_type(directives.fetch(:format)))
18
18
  o_name = determine_original_name(file)
19
19
  m_type = determine_mime_type(file)
20
- object.add_file(file, path: destination_path, mime_type: m_type, original_name: o_name)
21
- object.save
20
+ uri = URI(directives.fetch(:url))
21
+ raise ArgumentError, "#{uri} is not an http uri" unless uri.scheme == 'http'
22
+ remote_file = ActiveFedora::File.new(uri.to_s)
23
+ remote_file.content = file
24
+ remote_file.mime_type = m_type
25
+ remote_file.original_name = o_name
26
+ remote_file.save
22
27
  end
23
28
 
29
+ def self.new_mime_type(format)
30
+ case format
31
+ when 'mp4'
32
+ 'video/mp4' # default is application/mp4
33
+ when 'webm'
34
+ 'video/webm' # default is audio/webm
35
+ else
36
+ MIME::Types.type_for(format).first.to_s
37
+ end
38
+ end
24
39
  end
25
40
  end
@@ -3,11 +3,10 @@ module Hydra::Derivatives
3
3
 
4
4
  # Persists the file within the object at destination_name. Uses basic containment.
5
5
  # If you want to use direct containment (ie. with PCDM) you must use a different service (ie. Hydra::Works::AddFileToGenericFile Service)
6
- # @param [Object] object the source file is attached to
7
- # @param [File] filestream to be added, should respond to :mime_type, :original_name
8
- # @param [String] destination_name is the fedora path at which the child resource will be found or created beneath the object.
9
-
10
- def self.call(object, file, destination_path)
6
+ # @param [String] file_path the path to the file to be added
7
+ # @param [Hash] directives directions which can be used to determine where to persist to.
8
+ # @option directives [String] url This can determine the path of the object.
9
+ def self.call(file_path, directives)
11
10
  raise NotImplementedError, "PersistOutputFileService is an abstract class. Implement `call' on #{self.class.name}"
12
11
  end
13
12
 
@@ -2,11 +2,13 @@ module Hydra::Derivatives
2
2
  class RetrieveSourceFileService
3
3
 
4
4
  # Retrieves the source
5
- # @param [Object] object the source file is attached to
6
- # @param [String] method name that can be called on object to retrieve the source file
7
-
8
- def self.call(object, source_name)
9
- object.send(source_name)
5
+ # @param [ActiveFedora::Base] object the source file is attached to
6
+ # @param [Hash] options
7
+ # @option options [Symbol] :source a method that can be called on the object to retrieve the source file
8
+ # @yield [Tempfile] a temporary source file that has a lifetime of the block
9
+ def self.call(object, options, &block)
10
+ source_name = options.fetch(:source)
11
+ Hydra::Derivatives::TempfileService.create(object.send(source_name), &block)
10
12
  end
11
13
  end
12
- end
14
+ end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hydra::Derivatives::Processors::FullText do
4
+ let(:file_path) { File.join(fixture_path, 'test.docx') }
5
+ let(:directives) { { format: 'txt', url: 'http://localhost:8983/fedora/rest/dev/1234/ogg' } }
6
+ let(:processor) { described_class.new(file_path, directives) }
7
+
8
+ describe "process" do
9
+ subject { processor.process }
10
+
11
+ context "when it is successful" do
12
+ before do
13
+ allow_any_instance_of(described_class).to receive(:fetch).and_return('{"":"one two three"}')
14
+ end
15
+ it { is_expected.to be true }
16
+ end
17
+
18
+ it 'extracts fulltext and stores the results' do
19
+ expect(processor.output_file_service).to receive(:call).with(/Project Charter for E-Content Delivery Platform Review/, directives)
20
+ processor.process
21
+ end
22
+ end
23
+
24
+ describe "fetch" do
25
+ subject { processor.send(:fetch) }
26
+ let(:request) { double }
27
+ let(:response_body) { 'returned by Solr' }
28
+ let(:resp) { double(code: '200', body: response_body) }
29
+ let(:uri) { URI('http://example.com:99/solr/update') }
30
+
31
+ before do
32
+ allow(processor).to receive(:uri).and_return(URI('http://example.com:99/solr/update'))
33
+ allow(Net::HTTP).to receive(:new).with('example.com', 99).and_return(request)
34
+ end
35
+
36
+ context "that is successful" do
37
+ let(:resp) { double(code: '200', body: response_body) }
38
+ it "calls the extraction service" do
39
+ expect(request).to receive(:post).with('http://example.com:99/solr/update', String, "Content-Type" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "Content-Length" => "24244").and_return(resp)
40
+ expect(subject).to eq response_body
41
+ end
42
+ end
43
+
44
+ context "that fails" do
45
+ let(:resp) { double(code: '500', body: response_body) }
46
+ it "raises an error" do
47
+ expect(request).to receive(:post).with('http://example.com:99/solr/update', String, "Content-Type" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "Content-Length" => "24244").and_return(resp)
48
+ expect { subject }.to raise_error RuntimeError, /Solr Extract service was unsuccessful. 'http:\/\/example\.com:99\/solr\/update' returned code 500 for .*spec\/fixtures\/test.docx\nreturned by Solr/
49
+ end
50
+ end
51
+ end
52
+
53
+ describe "uri" do
54
+ subject { processor.send(:uri) }
55
+
56
+ it "points at the extraction service" do
57
+ expect(subject).to be_kind_of URI
58
+ expect(subject.to_s).to end_with '/update/extract?extractOnly=true&wt=json&extractFormat=text'
59
+ end
60
+ end
61
+ end
@@ -1,29 +1,19 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Hydra::Derivatives::Image do
4
- let(:object) { ActiveFedora::Base.new }
3
+ describe Hydra::Derivatives::Processors::Image do
5
4
  let(:output_file) { double }
5
+ let(:file_name) { double }
6
6
 
7
- subject { Hydra::Derivatives::Image.new(object, 'content', directives)}
7
+ subject { described_class.new(file_name, directives) }
8
8
 
9
9
  before { allow(subject).to receive(:output_file).with(file_name).and_return(output_file) }
10
10
 
11
- describe "when arguments are passed as a string" do
12
- let(:directives) { { thumb: "100x100>" } }
13
- let(:file_name) { 'content_thumb' }
14
-
15
- it "should use the string as the size and the name is autogenerated" do
16
- expect(subject).to receive(:create_resized_image).with(file_name, "100x100>", 'png')
17
- subject.process
18
- end
19
- end
20
-
21
11
  describe "when arguments are passed as a hash" do
22
- let(:directives) { { thumb: { size: "200x300>", datastream: file_name } } }
12
+ let(:directives) { { label: :thumb, size: "200x300>", format: 'png', quality: 75 } }
23
13
  let(:file_name) { 'thumbnail' }
24
14
 
25
- it "should use the specified size and name" do
26
- expect(subject).to receive(:create_resized_image).with(file_name, "200x300>", 'png')
15
+ it "uses the specified size and name and quality" do
16
+ expect(subject).to receive(:create_resized_image).with(file_name, "200x300>", 'png', 75)
27
17
  subject.process
28
18
  end
29
19
  end
@@ -39,7 +29,7 @@ describe Hydra::Derivatives::Image do
39
29
  context 'when set' do
40
30
  before do
41
31
  subject.timeout = 0.1
42
- allow_any_instance_of(Hydra::Derivatives::Image).to receive(:process_without_timeout) { sleep 0.2 }
32
+ allow_any_instance_of(described_class).to receive(:process_without_timeout) { sleep 0.2 }
43
33
  end
44
34
  it 'raises a timeout exception' do
45
35
  expect { subject.process }.to raise_error Hydra::Derivatives::TimeoutError
@@ -1,19 +1,19 @@
1
1
  require 'spec_helper'
2
2
  require 'yaml'
3
3
 
4
- describe Hydra::Derivatives::Jpeg2kImage do
4
+ describe Hydra::Derivatives::Processors::Jpeg2kImage do
5
5
  let(:object) { ActiveFedora::Base.new }
6
6
 
7
7
  describe "#calculate_recipe" do
8
8
  it "calculates the number of levels from a size" do
9
9
  dim = 7200
10
- expect(Hydra::Derivatives::Jpeg2kImage.level_count_for_size(dim)).to eq(6)
10
+ expect(described_class.level_count_for_size(dim)).to eq(6)
11
11
  end
12
12
 
13
13
  it "calculates the compression rates for each quality layer" do
14
14
  compression_num = 10
15
15
  layers = 8
16
- calc = Hydra::Derivatives::Jpeg2kImage.layer_rates(layers, compression_num)
16
+ calc = described_class.layer_rates(layers, compression_num)
17
17
  expect(calc).to eq("2.4,1.48331273,0.91675694,0.56659885,0.3501847,0.21643059,0.13376427,0.0826726")
18
18
  end
19
19
 
@@ -27,28 +27,26 @@ describe Hydra::Derivatives::Jpeg2kImage do
27
27
 
28
28
  it "can get the recipe from a config file" do
29
29
  args = { recipe: :myrecipe }
30
- r = Hydra::Derivatives::Jpeg2kImage.kdu_compress_recipe(args, 'grey', 7200)
30
+ r = described_class.kdu_compress_recipe(args, 'grey', 7200)
31
31
  expect(r).to eq(@sample_cfg['jp2_recipes'][:myrecipe_grey])
32
32
  end
33
33
 
34
34
  it "can take a recipe as a string" do
35
35
  args = { recipe: '-my -excellent -recipe' }
36
- r = Hydra::Derivatives::Jpeg2kImage.kdu_compress_recipe(args, 'grey', 7200)
36
+ r = described_class.kdu_compress_recipe(args, 'grey', 7200)
37
37
  expect(r).to eq(args[:recipe])
38
38
  end
39
39
 
40
40
  it "will fall back to a #calculate_recipe if a symbol is passed but no recipe is found" do
41
41
  args = { recipe: :x }
42
- r = Hydra::Derivatives::Jpeg2kImage.kdu_compress_recipe(args, 'grey', 7200)
43
- expect(r).to eq(Hydra::Derivatives::Jpeg2kImage.calculate_recipe(args, 'grey', 7200))
42
+ r = described_class.kdu_compress_recipe(args, 'grey', 7200)
43
+ expect(r).to eq(described_class.calculate_recipe(args, 'grey', 7200))
44
44
  end
45
45
 
46
46
  it "will fall back to a #calculate_recipe if there is no attempt to provide one" do
47
47
  args = {}
48
- r = Hydra::Derivatives::Jpeg2kImage.kdu_compress_recipe(args, 'grey', 7200)
49
- expect(r).to eq(Hydra::Derivatives::Jpeg2kImage.calculate_recipe(args, 'grey', 7200))
48
+ r = described_class.kdu_compress_recipe(args, 'grey', 7200)
49
+ expect(r).to eq(described_class.calculate_recipe(args, 'grey', 7200))
50
50
  end
51
-
52
51
  end
53
-
54
52
  end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hydra::Derivatives::Processors::Processor do
4
+
5
+ let(:object) { "Fake Object" }
6
+ let(:source_name) { 'content' }
7
+ let(:directives) { { thumb: "100x100>" } }
8
+ let(:file_path) { double }
9
+
10
+ subject { described_class.new(file_path, directives)}
11
+
12
+ describe "output_file_service" do
13
+ let(:custom_output_file_service) { "fake service" }
14
+ let(:another_custom_output_file_service) { "another fake service" }
15
+
16
+ context "as a global configuration setting" do
17
+ before do
18
+ allow(Hydra::Derivatives).to receive(:output_file_service).and_return(custom_output_file_service)
19
+ end
20
+ it "utilizes the default output file service" do
21
+ expect(subject.output_file_service).to eq(custom_output_file_service)
22
+ end
23
+ end
24
+
25
+ context "as an instance level configuration setting" do
26
+ subject do
27
+ described_class.new('/opt/derivatives/foo.mp4', directives,
28
+ output_file_service: another_custom_output_file_service)
29
+ end
30
+
31
+ it "accepts a custom output file service as an option" do
32
+ expect(subject.output_file_service).to eq(another_custom_output_file_service)
33
+ end
34
+ end
35
+ end
36
+ end