hydra-works 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +17 -8
  3. data/README.md +9 -9
  4. data/hydra-works.gemspec +3 -4
  5. data/lib/hydra/works.rb +22 -13
  6. data/lib/hydra/works/characterization.rb +31 -0
  7. data/lib/hydra/works/characterization/schema/audio_schema.rb +12 -0
  8. data/lib/hydra/works/characterization/schema/base_schema.rb +17 -0
  9. data/lib/hydra/works/characterization/schema/document_schema.rb +18 -0
  10. data/lib/hydra/works/characterization/schema/image_schema.rb +22 -0
  11. data/lib/hydra/works/characterization/schema/video_schema.rb +9 -0
  12. data/lib/hydra/works/models/characterization/already_there_strategy.rb +12 -0
  13. data/lib/hydra/works/models/characterization/fits_datastream.rb +162 -0
  14. data/lib/hydra/works/models/concerns/collection_behavior.rb +42 -34
  15. data/lib/hydra/works/models/concerns/file_set/characterization/audio.rb +14 -0
  16. data/lib/hydra/works/models/concerns/file_set/characterization/base.rb +23 -0
  17. data/lib/hydra/works/models/concerns/file_set/characterization/document.rb +10 -0
  18. data/lib/hydra/works/models/concerns/file_set/characterization/image.rb +10 -0
  19. data/lib/hydra/works/models/concerns/file_set/characterization/video.rb +17 -0
  20. data/lib/hydra/works/models/concerns/{generic_file → file_set}/contained_files.rb +1 -1
  21. data/lib/hydra/works/models/concerns/file_set/derivatives.rb +27 -0
  22. data/lib/hydra/works/models/concerns/{generic_file → file_set}/mime_types.rb +1 -1
  23. data/lib/hydra/works/models/concerns/{generic_file → file_set}/versioned_content.rb +2 -2
  24. data/lib/hydra/works/models/concerns/{generic_file → file_set}/virus_check.rb +1 -1
  25. data/lib/hydra/works/models/concerns/file_set_behavior.rb +67 -0
  26. data/lib/hydra/works/models/concerns/generic_file_behavior.rb +4 -50
  27. data/lib/hydra/works/models/concerns/generic_work_behavior.rb +5 -77
  28. data/lib/hydra/works/models/concerns/work_behavior.rb +118 -0
  29. data/lib/hydra/works/models/file_set.rb +7 -0
  30. data/lib/hydra/works/models/generic_file.rb +8 -8
  31. data/lib/hydra/works/models/generic_work.rb +15 -4
  32. data/lib/hydra/works/models/work.rb +6 -0
  33. data/lib/hydra/works/services/{generic_file/add_file_to_generic_file.rb → add_file_to_file_set.rb} +20 -20
  34. data/lib/hydra/works/services/add_file_to_generic_file.rb +8 -0
  35. data/lib/hydra/works/services/characterization_service.rb +118 -0
  36. data/lib/hydra/works/services/persist_derivative.rb +35 -0
  37. data/lib/hydra/works/services/{generic_file/upload_file_to_generic_file.rb → upload_file_to_file_set.rb} +3 -3
  38. data/lib/hydra/works/services/upload_file_to_generic_file.rb +8 -0
  39. data/lib/hydra/works/version.rb +1 -1
  40. data/lib/hydra/works/vocab/works_terms.rb +2 -2
  41. data/spec/fixtures/fits_0.6.2_avi.xml +29 -0
  42. data/spec/fixtures/fits_0.6.2_jp2.xml +36 -0
  43. data/spec/fixtures/fits_0.6.2_jpg.xml +43 -0
  44. data/spec/fixtures/fits_0.6.2_pdf.xml +42 -0
  45. data/spec/fixtures/fits_0.8.5_avi.xml +50 -0
  46. data/spec/fixtures/fits_0.8.5_docx.xml +41 -0
  47. data/spec/fixtures/fits_0.8.5_jp2.xml +51 -0
  48. data/spec/fixtures/fits_0.8.5_mp3.xml +47 -0
  49. data/spec/fixtures/fits_0.8.5_mp4.xml +47 -0
  50. data/spec/fixtures/fits_0.8.5_pdf.xml +54 -0
  51. data/spec/fixtures/pdf_fits.xml +54 -0
  52. data/spec/hydra/works/models/collection_spec.rb +58 -326
  53. data/spec/hydra/works/models/concerns/{generic_file → file_set}/contained_files_spec.rb +16 -16
  54. data/spec/hydra/works/models/concerns/{generic_file → file_set}/mime_types_spec.rb +2 -2
  55. data/spec/hydra/works/models/concerns/file_set/versioned_content_spec.rb +32 -0
  56. data/spec/hydra/works/models/concerns/{generic_file → file_set}/virus_check_spec.rb +3 -3
  57. data/spec/hydra/works/models/concerns/file_set_behavior_spec.rb +12 -0
  58. data/spec/hydra/works/models/generic_file_spec.rb +16 -13
  59. data/spec/hydra/works/models/generic_work_spec.rb +148 -318
  60. data/spec/hydra/works/services/{generic_file/add_file_to_generic_file_spec.rb → add_file_to_file_set_spec.rb} +4 -4
  61. data/spec/hydra/works/services/characterization_service_spec.rb +199 -0
  62. data/spec/hydra/works/services/persist_derivatives_spec.rb +57 -29
  63. data/spec/hydra/works/services/{generic_file/upload_file_spec.rb → upload_file_spec.rb} +7 -17
  64. data/spec/hydra/works_spec.rb +23 -59
  65. data/spec/spec_helper.rb +4 -2
  66. data/spec/support/file_set_helper.rb +14 -0
  67. metadata +84 -55
  68. data/lib/hydra/works/errors/full_text_extraction_error.rb +0 -5
  69. data/lib/hydra/works/models/concerns/block_child_objects.rb +0 -22
  70. data/lib/hydra/works/models/concerns/generic_file/derivatives.rb +0 -26
  71. data/lib/hydra/works/services/generic_file/full_text_extraction_service.rb +0 -57
  72. data/lib/hydra/works/services/generic_file/generate_thumbnail.rb +0 -13
  73. data/lib/hydra/works/services/generic_file/persist_derivative.rb +0 -20
  74. data/spec/hydra/works/models/concerns/block_child_objects_spec.rb +0 -17
  75. data/spec/hydra/works/models/concerns/generic_file/versioned_content_spec.rb +0 -32
  76. data/spec/hydra/works/models/concerns/generic_file_behavior_spec.rb +0 -12
  77. data/spec/hydra/works/services/full_text_extraction_service_spec.rb +0 -89
  78. data/spec/hydra/works/services/generic_file/generate/thumbnail_spec.rb +0 -19
@@ -1,57 +0,0 @@
1
- module Hydra::Works
2
- # Extract the full text from the content using Solr's extract handler
3
- class FullTextExtractionService
4
- def self.run(generic_file)
5
- new(generic_file).extract
6
- end
7
-
8
- delegate :original_file, :id, to: :@generic_file
9
-
10
- def initialize(generic_file)
11
- @generic_file = generic_file
12
- end
13
-
14
- ##
15
- # Extract full text from the content using Solr's extract handler.
16
- # This will extract text from the file uploaded to generic_file.
17
- # The file uploaded to @generic_file can be accessed via :original_file.
18
- #
19
- # @return [String] The extracted text
20
- def extract
21
- JSON.parse(fetch)[''].rstrip
22
- rescue Hydra::Works::FullTextExtractionError => e
23
- raise e
24
- rescue => e
25
- raise Hydra::Works::FullTextExtractionError.new, "Error extracting content from #{id}: #{e.inspect}"
26
- end
27
-
28
- # send the request to the extract service and return the response if it was successful.
29
- # TODO: this pulls the whole file into memory. We should stream it from Fedora instead
30
- # @return [String] the result of calling the extract service
31
- def fetch
32
- req = Net::HTTP.new(uri.host, uri.port)
33
- resp = req.post(uri.to_s, original_file.content, request_headers)
34
- raise Hydra::Works::FullTextExtractionError.new, "Solr Extract service was unsuccessful. '#{uri}' returned code #{resp.code} for #{id}\n#{resp.body}" unless resp.code == '200'
35
- original_file.content.rewind if original_file.content.respond_to?(:rewind)
36
-
37
- resp.body
38
- end
39
-
40
- # @return [Hash] the request headers to send to the Solr extract service
41
- def request_headers
42
- { Faraday::Request::UrlEncoded::CONTENT_TYPE => "#{original_file.mime_type};charset=utf-8",
43
- Faraday::Adapter::CONTENT_LENGTH => original_file.size.to_s }
44
- end
45
-
46
- # @returns [URI] path to the extract service
47
- def uri
48
- @uri ||= URI("#{connection_url}/update/extract?extractOnly=true&wt=json&extractFormat=text")
49
- end
50
-
51
- private
52
-
53
- def connection_url
54
- ActiveFedora.solr_config[:url]
55
- end
56
- end
57
- end
@@ -1,13 +0,0 @@
1
- module Hydra::Works
2
- class GenerateThumbnail
3
- def self.call(object, content: :original_file)
4
- fail ArgumentError, "object has no content at #{content} from which to generate a thumbnail" if object.send(content).nil?
5
-
6
- # Always replace the thumbnail with whatever is from the original file
7
- object.build_thumbnail if object.thumbnail.nil?
8
-
9
- object.create_derivatives
10
- object
11
- end
12
- end
13
- end
@@ -1,20 +0,0 @@
1
- require 'hydra/derivatives'
2
-
3
- module Hydra::Works
4
- class PersistDerivative < Hydra::Derivatives::PersistOutputFileService
5
- ##
6
- # Persists a derivative to a GenericFile
7
- # This Service conforms to the signature of `Hydra::Derivatives::PersistOutputFileService`.
8
- # The purpose of this Service is for use as an alternative to the default Hydra::Derivatives::PersistOutputFileService. It's necessary because the default behavior in Hydra::Derivatives assumes that you're using LDP Basic Containment. Hydra::Works::GenericFiles use IndirectContainment. This Service handles that case.
9
- # This service will always update existing and does not do versioning of persisted files.
10
- #
11
- # @param [Hydra::Works::GenericFile::Base] object the file will be added to
12
- # @param [Hydra::Derivatives::IoDecorator] file the derivative filestream
13
- # @param [String] extract file type symbol (e.g. :thumbnail) from Hydra::Derivatives created destination_name
14
-
15
- def self.call(object, file, destination_name)
16
- type = destination_name.gsub(/^original_file_/, '').to_sym
17
- Hydra::Works::AddFileToGenericFile.call(object, file, type, update_existing: true, versioning: false)
18
- end
19
- end
20
- end
@@ -1,17 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Hydra::Works::BlockChildObjects do
4
- subject { Hydra::Works::GenericFile::Base.new }
5
-
6
- describe '#objects=?' do
7
- it 'raises an error' do
8
- expect { subject.objects = [] }.to raise_error(StandardError, /method `objects=' not allowed for #<Hydra::Works::GenericFile::Base.*/)
9
- end
10
- end
11
-
12
- describe '#objects' do
13
- it 'raises an error' do
14
- expect { subject.objects }.to raise_error(StandardError, /method `objects' not allowed for #<Hydra::Works::GenericFile::Base.*/)
15
- end
16
- end
17
- end
@@ -1,32 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Hydra::Works::GenericFile::VersionedContent do
4
- let(:generic_file) { Hydra::Works::GenericFile::Base.new }
5
- before do
6
- Hydra::Works::UploadFileToGenericFile.call(generic_file, File.open(File.join(fixture_path, 'sample-file.pdf')))
7
- Hydra::Works::UploadFileToGenericFile.call(generic_file, File.open(File.join(fixture_path, 'updated-file.txt')))
8
- end
9
-
10
- describe 'content_versions' do
11
- subject { generic_file.content_versions }
12
- it 'lists all of the versions of original_file' do
13
- expect(subject.count).to eq(2)
14
- expect(subject.map(&:uri)).to eq(generic_file.original_file.versions.all.map(&:uri))
15
- end
16
- end
17
-
18
- describe 'latest_content_version' do
19
- subject { generic_file.latest_content_version }
20
- it 'returns the most recent version entry for original_file' do
21
- # Can't use a simple equivalence because they are actually different ResourceVersion objects
22
- expect(subject.uri).to eq(generic_file.original_file.versions.last.uri)
23
- expect(subject.label).to eq(generic_file.original_file.versions.last.label)
24
- end
25
- end
26
-
27
- describe 'current_content_version_uri' do
28
- it 'returns the URI of the most recent version of original_file' do
29
- expect(generic_file.current_content_version_uri).to eq(generic_file.original_file.versions.last.uri)
30
- end
31
- end
32
- end
@@ -1,12 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Hydra::Works::GenericFileBehavior do
4
- class IncludesGenericFileBehavior < ActiveFedora::Base
5
- include Hydra::Works::GenericFileBehavior
6
- end
7
- subject { IncludesGenericFileBehavior.new }
8
-
9
- it 'ensures that objects will be recognized as generic_files' do
10
- expect(subject).to be_works_generic_file
11
- end
12
- end
@@ -1,89 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Hydra::Works::FullTextExtractionService do
4
- let(:generic_file) { Hydra::Works::GenericFile::Base.new }
5
-
6
- describe 'integration test' do
7
- before do
8
- Hydra::Works::UploadFileToGenericFile.call(generic_file, File.open(File.join(fixture_path, 'sample-file.pdf')))
9
- end
10
- subject { described_class.run(generic_file) }
11
- it 'extracts fulltext and stores the results' do
12
- expect(subject).to include('This is some original content')
13
- end
14
- end
15
-
16
- describe "run" do
17
- let(:generic_file) { double(id: '123') }
18
- subject { described_class.run(generic_file) }
19
-
20
- context "when it is successful" do
21
- before do
22
- allow_any_instance_of(described_class).to receive(:fetch).and_return('{"":"one two three"}')
23
- end
24
- it { is_expected.to eq 'one two three' }
25
- end
26
-
27
- context "when solr raises an error" do
28
- before do
29
- allow_any_instance_of(described_class).to receive(:fetch).and_raise(Hydra::Works::FullTextExtractionError.new, "Solr failed")
30
- end
31
- it "raises an error" do
32
- expect { subject }.to raise_error Hydra::Works::FullTextExtractionError, 'Solr failed'
33
- end
34
- end
35
-
36
- context "network error" do
37
- before do
38
- allow_any_instance_of(described_class).to receive(:fetch).and_raise(Errno::ECONNRESET)
39
- end
40
- it "raises an error" do
41
- expect { subject }.to raise_error Hydra::Works::FullTextExtractionError, 'Error extracting content from 123: #<Errno::ECONNRESET: Connection reset by peer>'
42
- end
43
- end
44
- end
45
-
46
- describe "fetch" do
47
- let(:generic_file) { double('generic file', id: '123', original_file: original) }
48
- let(:original) { double(content: content, size: 13, mime_type: 'text/plain') }
49
- let(:service) { described_class.new(generic_file) }
50
- subject { service.fetch }
51
- let(:request) { double }
52
- let(:response_body) { 'returned by Solr' }
53
- let(:resp) { double(code: '200', body: response_body) }
54
- let(:uri) { URI('http://example.com:99/solr/update') }
55
- let(:content) { 'file contents' }
56
-
57
- before do
58
- allow(service).to receive(:uri).and_return(URI('http://example.com:99/solr/update'))
59
- allow(Net::HTTP).to receive(:new).with('example.com', 99).and_return(request)
60
- end
61
-
62
- context "that is successful" do
63
- let(:resp) { double(code: '200', body: response_body) }
64
- it "calls the extraction service" do
65
- expect(request).to receive(:post).with('http://example.com:99/solr/update', content, "Content-Type" => "text/plain;charset=utf-8", "Content-Length" => "13").and_return(resp)
66
- expect(subject).to eq response_body
67
- end
68
- end
69
-
70
- context "that fails" do
71
- let(:resp) { double(code: '500', body: response_body) }
72
- it "raises an error" do
73
- expect(request).to receive(:post).with('http://example.com:99/solr/update', content, "Content-Type" => "text/plain;charset=utf-8", "Content-Length" => "13").and_return(resp)
74
- expect { subject }.to raise_error Hydra::Works::FullTextExtractionError, "Solr Extract service was unsuccessful. 'http://example.com:99/solr/update' returned code 500 for 123\nreturned by Solr"
75
- end
76
- end
77
- end
78
-
79
- describe "uri" do
80
- let(:generic_file) { double }
81
- let(:service) { described_class.new(generic_file) }
82
- subject { service.uri }
83
-
84
- it "points at the extraction service" do
85
- expect(subject).to be_kind_of URI
86
- expect(subject.to_s).to end_with '/update/extract?extractOnly=true&wt=json&extractFormat=text'
87
- end
88
- end
89
- end
@@ -1,19 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Hydra::Works::GenerateThumbnail do
4
- context 'when the object has no original file' do
5
- let(:error_message) { 'object has no content at original_file from which to generate a thumbnail' }
6
- let(:object) { double('object', original_file: nil) }
7
- it 'raises an error' do
8
- expect(-> { described_class.call(object) }).to raise_error(ArgumentError, error_message)
9
- end
10
- end
11
-
12
- context 'when the object has no content at specified location' do
13
- let(:error_message) { 'object has no content at my_location from which to generate a thumbnail' }
14
- let(:object) { double('object', my_location: nil) }
15
- it 'raises an error' do
16
- expect(-> { described_class.call(object, content: :my_location) }).to raise_error(ArgumentError, error_message)
17
- end
18
- end
19
- end