hydra-works 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +17 -8
- data/README.md +9 -9
- data/hydra-works.gemspec +3 -4
- data/lib/hydra/works.rb +22 -13
- data/lib/hydra/works/characterization.rb +31 -0
- data/lib/hydra/works/characterization/schema/audio_schema.rb +12 -0
- data/lib/hydra/works/characterization/schema/base_schema.rb +17 -0
- data/lib/hydra/works/characterization/schema/document_schema.rb +18 -0
- data/lib/hydra/works/characterization/schema/image_schema.rb +22 -0
- data/lib/hydra/works/characterization/schema/video_schema.rb +9 -0
- data/lib/hydra/works/models/characterization/already_there_strategy.rb +12 -0
- data/lib/hydra/works/models/characterization/fits_datastream.rb +162 -0
- data/lib/hydra/works/models/concerns/collection_behavior.rb +42 -34
- data/lib/hydra/works/models/concerns/file_set/characterization/audio.rb +14 -0
- data/lib/hydra/works/models/concerns/file_set/characterization/base.rb +23 -0
- data/lib/hydra/works/models/concerns/file_set/characterization/document.rb +10 -0
- data/lib/hydra/works/models/concerns/file_set/characterization/image.rb +10 -0
- data/lib/hydra/works/models/concerns/file_set/characterization/video.rb +17 -0
- data/lib/hydra/works/models/concerns/{generic_file → file_set}/contained_files.rb +1 -1
- data/lib/hydra/works/models/concerns/file_set/derivatives.rb +27 -0
- data/lib/hydra/works/models/concerns/{generic_file → file_set}/mime_types.rb +1 -1
- data/lib/hydra/works/models/concerns/{generic_file → file_set}/versioned_content.rb +2 -2
- data/lib/hydra/works/models/concerns/{generic_file → file_set}/virus_check.rb +1 -1
- data/lib/hydra/works/models/concerns/file_set_behavior.rb +67 -0
- data/lib/hydra/works/models/concerns/generic_file_behavior.rb +4 -50
- data/lib/hydra/works/models/concerns/generic_work_behavior.rb +5 -77
- data/lib/hydra/works/models/concerns/work_behavior.rb +118 -0
- data/lib/hydra/works/models/file_set.rb +7 -0
- data/lib/hydra/works/models/generic_file.rb +8 -8
- data/lib/hydra/works/models/generic_work.rb +15 -4
- data/lib/hydra/works/models/work.rb +6 -0
- data/lib/hydra/works/services/{generic_file/add_file_to_generic_file.rb → add_file_to_file_set.rb} +20 -20
- data/lib/hydra/works/services/add_file_to_generic_file.rb +8 -0
- data/lib/hydra/works/services/characterization_service.rb +118 -0
- data/lib/hydra/works/services/persist_derivative.rb +35 -0
- data/lib/hydra/works/services/{generic_file/upload_file_to_generic_file.rb → upload_file_to_file_set.rb} +3 -3
- data/lib/hydra/works/services/upload_file_to_generic_file.rb +8 -0
- data/lib/hydra/works/version.rb +1 -1
- data/lib/hydra/works/vocab/works_terms.rb +2 -2
- data/spec/fixtures/fits_0.6.2_avi.xml +29 -0
- data/spec/fixtures/fits_0.6.2_jp2.xml +36 -0
- data/spec/fixtures/fits_0.6.2_jpg.xml +43 -0
- data/spec/fixtures/fits_0.6.2_pdf.xml +42 -0
- data/spec/fixtures/fits_0.8.5_avi.xml +50 -0
- data/spec/fixtures/fits_0.8.5_docx.xml +41 -0
- data/spec/fixtures/fits_0.8.5_jp2.xml +51 -0
- data/spec/fixtures/fits_0.8.5_mp3.xml +47 -0
- data/spec/fixtures/fits_0.8.5_mp4.xml +47 -0
- data/spec/fixtures/fits_0.8.5_pdf.xml +54 -0
- data/spec/fixtures/pdf_fits.xml +54 -0
- data/spec/hydra/works/models/collection_spec.rb +58 -326
- data/spec/hydra/works/models/concerns/{generic_file → file_set}/contained_files_spec.rb +16 -16
- data/spec/hydra/works/models/concerns/{generic_file → file_set}/mime_types_spec.rb +2 -2
- data/spec/hydra/works/models/concerns/file_set/versioned_content_spec.rb +32 -0
- data/spec/hydra/works/models/concerns/{generic_file → file_set}/virus_check_spec.rb +3 -3
- data/spec/hydra/works/models/concerns/file_set_behavior_spec.rb +12 -0
- data/spec/hydra/works/models/generic_file_spec.rb +16 -13
- data/spec/hydra/works/models/generic_work_spec.rb +148 -318
- data/spec/hydra/works/services/{generic_file/add_file_to_generic_file_spec.rb → add_file_to_file_set_spec.rb} +4 -4
- data/spec/hydra/works/services/characterization_service_spec.rb +199 -0
- data/spec/hydra/works/services/persist_derivatives_spec.rb +57 -29
- data/spec/hydra/works/services/{generic_file/upload_file_spec.rb → upload_file_spec.rb} +7 -17
- data/spec/hydra/works_spec.rb +23 -59
- data/spec/spec_helper.rb +4 -2
- data/spec/support/file_set_helper.rb +14 -0
- metadata +84 -55
- data/lib/hydra/works/errors/full_text_extraction_error.rb +0 -5
- data/lib/hydra/works/models/concerns/block_child_objects.rb +0 -22
- data/lib/hydra/works/models/concerns/generic_file/derivatives.rb +0 -26
- data/lib/hydra/works/services/generic_file/full_text_extraction_service.rb +0 -57
- data/lib/hydra/works/services/generic_file/generate_thumbnail.rb +0 -13
- data/lib/hydra/works/services/generic_file/persist_derivative.rb +0 -20
- data/spec/hydra/works/models/concerns/block_child_objects_spec.rb +0 -17
- data/spec/hydra/works/models/concerns/generic_file/versioned_content_spec.rb +0 -32
- data/spec/hydra/works/models/concerns/generic_file_behavior_spec.rb +0 -12
- data/spec/hydra/works/services/full_text_extraction_service_spec.rb +0 -89
- data/spec/hydra/works/services/generic_file/generate/thumbnail_spec.rb +0 -19
@@ -1,57 +0,0 @@
|
|
1
|
-
module Hydra::Works
|
2
|
-
# Extract the full text from the content using Solr's extract handler
|
3
|
-
class FullTextExtractionService
|
4
|
-
def self.run(generic_file)
|
5
|
-
new(generic_file).extract
|
6
|
-
end
|
7
|
-
|
8
|
-
delegate :original_file, :id, to: :@generic_file
|
9
|
-
|
10
|
-
def initialize(generic_file)
|
11
|
-
@generic_file = generic_file
|
12
|
-
end
|
13
|
-
|
14
|
-
##
|
15
|
-
# Extract full text from the content using Solr's extract handler.
|
16
|
-
# This will extract text from the file uploaded to generic_file.
|
17
|
-
# The file uploaded to @generic_file can be accessed via :original_file.
|
18
|
-
#
|
19
|
-
# @return [String] The extracted text
|
20
|
-
def extract
|
21
|
-
JSON.parse(fetch)[''].rstrip
|
22
|
-
rescue Hydra::Works::FullTextExtractionError => e
|
23
|
-
raise e
|
24
|
-
rescue => e
|
25
|
-
raise Hydra::Works::FullTextExtractionError.new, "Error extracting content from #{id}: #{e.inspect}"
|
26
|
-
end
|
27
|
-
|
28
|
-
# send the request to the extract service and return the response if it was successful.
|
29
|
-
# TODO: this pulls the whole file into memory. We should stream it from Fedora instead
|
30
|
-
# @return [String] the result of calling the extract service
|
31
|
-
def fetch
|
32
|
-
req = Net::HTTP.new(uri.host, uri.port)
|
33
|
-
resp = req.post(uri.to_s, original_file.content, request_headers)
|
34
|
-
raise Hydra::Works::FullTextExtractionError.new, "Solr Extract service was unsuccessful. '#{uri}' returned code #{resp.code} for #{id}\n#{resp.body}" unless resp.code == '200'
|
35
|
-
original_file.content.rewind if original_file.content.respond_to?(:rewind)
|
36
|
-
|
37
|
-
resp.body
|
38
|
-
end
|
39
|
-
|
40
|
-
# @return [Hash] the request headers to send to the Solr extract service
|
41
|
-
def request_headers
|
42
|
-
{ Faraday::Request::UrlEncoded::CONTENT_TYPE => "#{original_file.mime_type};charset=utf-8",
|
43
|
-
Faraday::Adapter::CONTENT_LENGTH => original_file.size.to_s }
|
44
|
-
end
|
45
|
-
|
46
|
-
# @returns [URI] path to the extract service
|
47
|
-
def uri
|
48
|
-
@uri ||= URI("#{connection_url}/update/extract?extractOnly=true&wt=json&extractFormat=text")
|
49
|
-
end
|
50
|
-
|
51
|
-
private
|
52
|
-
|
53
|
-
def connection_url
|
54
|
-
ActiveFedora.solr_config[:url]
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
module Hydra::Works
|
2
|
-
class GenerateThumbnail
|
3
|
-
def self.call(object, content: :original_file)
|
4
|
-
fail ArgumentError, "object has no content at #{content} from which to generate a thumbnail" if object.send(content).nil?
|
5
|
-
|
6
|
-
# Always replace the thumbnail with whatever is from the original file
|
7
|
-
object.build_thumbnail if object.thumbnail.nil?
|
8
|
-
|
9
|
-
object.create_derivatives
|
10
|
-
object
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
@@ -1,20 +0,0 @@
|
|
1
|
-
require 'hydra/derivatives'
|
2
|
-
|
3
|
-
module Hydra::Works
|
4
|
-
class PersistDerivative < Hydra::Derivatives::PersistOutputFileService
|
5
|
-
##
|
6
|
-
# Persists a derivative to a GenericFile
|
7
|
-
# This Service conforms to the signature of `Hydra::Derivatives::PersistOutputFileService`.
|
8
|
-
# The purpose of this Service is for use as an alternative to the default Hydra::Derivatives::PersistOutputFileService. It's necessary because the default behavior in Hydra::Derivatives assumes that you're using LDP Basic Containment. Hydra::Works::GenericFiles use IndirectContainment. This Service handles that case.
|
9
|
-
# This service will always update existing and does not do versioning of persisted files.
|
10
|
-
#
|
11
|
-
# @param [Hydra::Works::GenericFile::Base] object the file will be added to
|
12
|
-
# @param [Hydra::Derivatives::IoDecorator] file the derivative filestream
|
13
|
-
# @param [String] extract file type symbol (e.g. :thumbnail) from Hydra::Derivatives created destination_name
|
14
|
-
|
15
|
-
def self.call(object, file, destination_name)
|
16
|
-
type = destination_name.gsub(/^original_file_/, '').to_sym
|
17
|
-
Hydra::Works::AddFileToGenericFile.call(object, file, type, update_existing: true, versioning: false)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Hydra::Works::BlockChildObjects do
|
4
|
-
subject { Hydra::Works::GenericFile::Base.new }
|
5
|
-
|
6
|
-
describe '#objects=?' do
|
7
|
-
it 'raises an error' do
|
8
|
-
expect { subject.objects = [] }.to raise_error(StandardError, /method `objects=' not allowed for #<Hydra::Works::GenericFile::Base.*/)
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
describe '#objects' do
|
13
|
-
it 'raises an error' do
|
14
|
-
expect { subject.objects }.to raise_error(StandardError, /method `objects' not allowed for #<Hydra::Works::GenericFile::Base.*/)
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
@@ -1,32 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Hydra::Works::GenericFile::VersionedContent do
|
4
|
-
let(:generic_file) { Hydra::Works::GenericFile::Base.new }
|
5
|
-
before do
|
6
|
-
Hydra::Works::UploadFileToGenericFile.call(generic_file, File.open(File.join(fixture_path, 'sample-file.pdf')))
|
7
|
-
Hydra::Works::UploadFileToGenericFile.call(generic_file, File.open(File.join(fixture_path, 'updated-file.txt')))
|
8
|
-
end
|
9
|
-
|
10
|
-
describe 'content_versions' do
|
11
|
-
subject { generic_file.content_versions }
|
12
|
-
it 'lists all of the versions of original_file' do
|
13
|
-
expect(subject.count).to eq(2)
|
14
|
-
expect(subject.map(&:uri)).to eq(generic_file.original_file.versions.all.map(&:uri))
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
describe 'latest_content_version' do
|
19
|
-
subject { generic_file.latest_content_version }
|
20
|
-
it 'returns the most recent version entry for original_file' do
|
21
|
-
# Can't use a simple equivalence because they are actually different ResourceVersion objects
|
22
|
-
expect(subject.uri).to eq(generic_file.original_file.versions.last.uri)
|
23
|
-
expect(subject.label).to eq(generic_file.original_file.versions.last.label)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
describe 'current_content_version_uri' do
|
28
|
-
it 'returns the URI of the most recent version of original_file' do
|
29
|
-
expect(generic_file.current_content_version_uri).to eq(generic_file.original_file.versions.last.uri)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
@@ -1,12 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Hydra::Works::GenericFileBehavior do
|
4
|
-
class IncludesGenericFileBehavior < ActiveFedora::Base
|
5
|
-
include Hydra::Works::GenericFileBehavior
|
6
|
-
end
|
7
|
-
subject { IncludesGenericFileBehavior.new }
|
8
|
-
|
9
|
-
it 'ensures that objects will be recognized as generic_files' do
|
10
|
-
expect(subject).to be_works_generic_file
|
11
|
-
end
|
12
|
-
end
|
@@ -1,89 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Hydra::Works::FullTextExtractionService do
|
4
|
-
let(:generic_file) { Hydra::Works::GenericFile::Base.new }
|
5
|
-
|
6
|
-
describe 'integration test' do
|
7
|
-
before do
|
8
|
-
Hydra::Works::UploadFileToGenericFile.call(generic_file, File.open(File.join(fixture_path, 'sample-file.pdf')))
|
9
|
-
end
|
10
|
-
subject { described_class.run(generic_file) }
|
11
|
-
it 'extracts fulltext and stores the results' do
|
12
|
-
expect(subject).to include('This is some original content')
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
describe "run" do
|
17
|
-
let(:generic_file) { double(id: '123') }
|
18
|
-
subject { described_class.run(generic_file) }
|
19
|
-
|
20
|
-
context "when it is successful" do
|
21
|
-
before do
|
22
|
-
allow_any_instance_of(described_class).to receive(:fetch).and_return('{"":"one two three"}')
|
23
|
-
end
|
24
|
-
it { is_expected.to eq 'one two three' }
|
25
|
-
end
|
26
|
-
|
27
|
-
context "when solr raises an error" do
|
28
|
-
before do
|
29
|
-
allow_any_instance_of(described_class).to receive(:fetch).and_raise(Hydra::Works::FullTextExtractionError.new, "Solr failed")
|
30
|
-
end
|
31
|
-
it "raises an error" do
|
32
|
-
expect { subject }.to raise_error Hydra::Works::FullTextExtractionError, 'Solr failed'
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
context "network error" do
|
37
|
-
before do
|
38
|
-
allow_any_instance_of(described_class).to receive(:fetch).and_raise(Errno::ECONNRESET)
|
39
|
-
end
|
40
|
-
it "raises an error" do
|
41
|
-
expect { subject }.to raise_error Hydra::Works::FullTextExtractionError, 'Error extracting content from 123: #<Errno::ECONNRESET: Connection reset by peer>'
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
describe "fetch" do
|
47
|
-
let(:generic_file) { double('generic file', id: '123', original_file: original) }
|
48
|
-
let(:original) { double(content: content, size: 13, mime_type: 'text/plain') }
|
49
|
-
let(:service) { described_class.new(generic_file) }
|
50
|
-
subject { service.fetch }
|
51
|
-
let(:request) { double }
|
52
|
-
let(:response_body) { 'returned by Solr' }
|
53
|
-
let(:resp) { double(code: '200', body: response_body) }
|
54
|
-
let(:uri) { URI('http://example.com:99/solr/update') }
|
55
|
-
let(:content) { 'file contents' }
|
56
|
-
|
57
|
-
before do
|
58
|
-
allow(service).to receive(:uri).and_return(URI('http://example.com:99/solr/update'))
|
59
|
-
allow(Net::HTTP).to receive(:new).with('example.com', 99).and_return(request)
|
60
|
-
end
|
61
|
-
|
62
|
-
context "that is successful" do
|
63
|
-
let(:resp) { double(code: '200', body: response_body) }
|
64
|
-
it "calls the extraction service" do
|
65
|
-
expect(request).to receive(:post).with('http://example.com:99/solr/update', content, "Content-Type" => "text/plain;charset=utf-8", "Content-Length" => "13").and_return(resp)
|
66
|
-
expect(subject).to eq response_body
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
context "that fails" do
|
71
|
-
let(:resp) { double(code: '500', body: response_body) }
|
72
|
-
it "raises an error" do
|
73
|
-
expect(request).to receive(:post).with('http://example.com:99/solr/update', content, "Content-Type" => "text/plain;charset=utf-8", "Content-Length" => "13").and_return(resp)
|
74
|
-
expect { subject }.to raise_error Hydra::Works::FullTextExtractionError, "Solr Extract service was unsuccessful. 'http://example.com:99/solr/update' returned code 500 for 123\nreturned by Solr"
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
describe "uri" do
|
80
|
-
let(:generic_file) { double }
|
81
|
-
let(:service) { described_class.new(generic_file) }
|
82
|
-
subject { service.uri }
|
83
|
-
|
84
|
-
it "points at the extraction service" do
|
85
|
-
expect(subject).to be_kind_of URI
|
86
|
-
expect(subject.to_s).to end_with '/update/extract?extractOnly=true&wt=json&extractFormat=text'
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Hydra::Works::GenerateThumbnail do
|
4
|
-
context 'when the object has no original file' do
|
5
|
-
let(:error_message) { 'object has no content at original_file from which to generate a thumbnail' }
|
6
|
-
let(:object) { double('object', original_file: nil) }
|
7
|
-
it 'raises an error' do
|
8
|
-
expect(-> { described_class.call(object) }).to raise_error(ArgumentError, error_message)
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
context 'when the object has no content at specified location' do
|
13
|
-
let(:error_message) { 'object has no content at my_location from which to generate a thumbnail' }
|
14
|
-
let(:object) { double('object', my_location: nil) }
|
15
|
-
it 'raises an error' do
|
16
|
-
expect(-> { described_class.call(object, content: :my_location) }).to raise_error(ArgumentError, error_message)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|