hydra-file_characterization 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/hydra/file_characterization/characterizer.rb +36 -24
- data/lib/hydra/file_characterization/characterizers/fits.rb +9 -3
- data/lib/hydra/file_characterization/version.rb +1 -1
- data/spec/lib/hydra/file_characterization/characterizer_spec.rb +8 -5
- data/spec/lib/hydra/file_characterization/characterizers/fits_spec.rb +32 -22
- data/spec/lib/hydra/file_characterization_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -1
- metadata +15 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34fca572cbda0f223170f1a23ae8cd43dcd1b6f0
|
4
|
+
data.tar.gz: a13e96336fa3a2327d92f1e77938c1bf1e3510a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d65d9d4e65cc7275859bcf6fe1798f9e854bd40c31a966be4846de5717cb1f0b19de1cbe2bfbdfb8d4e0cae134adf5e7f4a9d7c698f2f6e64b49175f355c359e
|
7
|
+
data.tar.gz: 5beff9b3009b44f25dd362137befa286673b92dfdc36fdb9f63cd69c482773ee8f3cb998772ac66a0f50a0873bc51decfc147aa7eff8f889354a3bc5e51a974b
|
@@ -19,11 +19,7 @@ module Hydra::FileCharacterization
|
|
19
19
|
raise Hydra::FileCharacterization::FileNotFoundError.new("File: #{filename} does not exist.")
|
20
20
|
end
|
21
21
|
|
22
|
-
|
23
|
-
tool_path.call(filename)
|
24
|
-
else
|
25
|
-
internal_call
|
26
|
-
end
|
22
|
+
post_process(output)
|
27
23
|
end
|
28
24
|
|
29
25
|
def tool_path
|
@@ -32,27 +28,43 @@ module Hydra::FileCharacterization
|
|
32
28
|
|
33
29
|
protected
|
34
30
|
|
35
|
-
|
36
|
-
|
37
|
-
|
31
|
+
# Override this method if you want your processor to mutate the
|
32
|
+
# raw output
|
33
|
+
def post_process(raw_output)
|
34
|
+
raw_output
|
35
|
+
end
|
38
36
|
|
39
|
-
|
40
|
-
|
41
|
-
begin
|
42
|
-
out = stdout.read
|
43
|
-
err = stderr.read
|
44
|
-
exit_status = wait_thr.value
|
45
|
-
raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
|
46
|
-
out
|
47
|
-
ensure
|
48
|
-
stdin.close
|
49
|
-
stdout.close
|
50
|
-
stderr.close
|
37
|
+
def convention_based_tool_name
|
38
|
+
self.class.name.split("::").last.downcase
|
51
39
|
end
|
52
|
-
end
|
53
40
|
|
54
|
-
|
55
|
-
|
56
|
-
|
41
|
+
def internal_call
|
42
|
+
stdin, stdout, stderr, wait_thr = popen3(command)
|
43
|
+
begin
|
44
|
+
out = stdout.read
|
45
|
+
err = stderr.read
|
46
|
+
exit_status = wait_thr.value
|
47
|
+
raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
|
48
|
+
out
|
49
|
+
ensure
|
50
|
+
stdin.close
|
51
|
+
stdout.close
|
52
|
+
stderr.close
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def command
|
57
|
+
raise NotImplementedError, "Method #command should be overriden in child classes"
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def output
|
63
|
+
if tool_path.respond_to?(:call)
|
64
|
+
tool_path.call(filename)
|
65
|
+
else
|
66
|
+
internal_call
|
67
|
+
end
|
68
|
+
end
|
57
69
|
end
|
58
70
|
end
|
@@ -4,9 +4,15 @@ module Hydra::FileCharacterization::Characterizers
|
|
4
4
|
class Fits < Hydra::FileCharacterization::Characterizer
|
5
5
|
|
6
6
|
protected
|
7
|
-
def command
|
8
|
-
"#{tool_path} -i \"#{filename}\""
|
9
|
-
end
|
10
7
|
|
8
|
+
def command
|
9
|
+
"#{tool_path} -i \"#{filename}\""
|
10
|
+
end
|
11
|
+
|
12
|
+
# Remove any residual non-XML from JHOVE
|
13
|
+
# See: https://github.com/harvard-lts/fits/issues/20
|
14
|
+
def post_process(raw_output)
|
15
|
+
raw_output.sub(/^READBOX seen=true\n/, '')
|
16
|
+
end
|
11
17
|
end
|
12
18
|
end
|
@@ -6,7 +6,8 @@ module Hydra::FileCharacterization
|
|
6
6
|
let(:instance_tool_path) { nil }
|
7
7
|
let(:class_tool_path) { nil }
|
8
8
|
|
9
|
-
|
9
|
+
let(:characterizer) { Hydra::FileCharacterization::Characterizer.new(filename, instance_tool_path) }
|
10
|
+
subject { characterizer }
|
10
11
|
around(:each) do |example|
|
11
12
|
Hydra::FileCharacterization::Characterizer.tool_path = class_tool_path
|
12
13
|
example.run
|
@@ -30,23 +31,25 @@ module Hydra::FileCharacterization
|
|
30
31
|
end
|
31
32
|
|
32
33
|
context 'tool_path' do
|
34
|
+
subject { characterizer.tool_path }
|
35
|
+
|
33
36
|
context 'with custom instance tool_path' do
|
34
37
|
let(:instance_tool_path) { '/arbitrary/path' }
|
35
38
|
let(:class_tool_path) { '/a_different/path' }
|
36
39
|
|
37
|
-
|
40
|
+
it { is_expected.to eq instance_tool_path }
|
38
41
|
end
|
39
42
|
|
40
43
|
context 'with custom class tool_path' do
|
41
44
|
let(:instance_tool_path) { nil }
|
42
45
|
let(:class_tool_path) { '/a_different/path' }
|
43
46
|
|
44
|
-
|
47
|
+
it { is_expected.to eq class_tool_path }
|
45
48
|
end
|
46
49
|
|
47
50
|
context 'without a specified tool_path' do
|
48
|
-
|
51
|
+
it { is_expected.to eq 'characterizer' }
|
49
52
|
end
|
50
53
|
end
|
51
54
|
end
|
52
|
-
end
|
55
|
+
end
|
@@ -2,37 +2,47 @@ require 'spec_helper'
|
|
2
2
|
require 'hydra/file_characterization/characterizers/fits'
|
3
3
|
|
4
4
|
module Hydra::FileCharacterization::Characterizers
|
5
|
-
|
6
5
|
describe Fits do
|
6
|
+
let(:fits) { Fits.new(filename) }
|
7
7
|
|
8
|
-
|
8
|
+
describe "#call" do
|
9
|
+
subject { fits.call }
|
9
10
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
expect(subject.call).to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg"))
|
11
|
+
context 'validfile' do
|
12
|
+
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
13
|
+
it { is_expected.to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg")) }
|
14
14
|
end
|
15
|
-
end
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
context 'invalidFile' do
|
17
|
+
let(:filename) { fixture_file('nofile.pdf') }
|
18
|
+
it "raises an error" do
|
19
|
+
expect { subject }.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
|
20
|
+
end
|
21
21
|
end
|
22
|
-
end
|
23
22
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
expect(subject.call).to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream"))
|
23
|
+
context 'corruptFile' do
|
24
|
+
let(:filename) { fixture_file('brendan_broken.dxxd') }
|
25
|
+
it { is_expected.to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream")) }
|
28
26
|
end
|
29
|
-
end
|
30
27
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
28
|
+
context 'zip file should be characterized not its contents' do
|
29
|
+
let(:filename) { fixture_file('archive.zip') }
|
30
|
+
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
|
31
|
+
end
|
35
32
|
|
33
|
+
context 'when JHOVE adds non-xml' do
|
34
|
+
# https://github.com/harvard-lts/fits/issues/20
|
35
|
+
before do
|
36
|
+
allow(fits).to receive(:internal_call).and_return(
|
37
|
+
'READBOX seen=true
|
38
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
39
|
+
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
40
|
+
<identification/></fits>')
|
41
|
+
end
|
42
|
+
|
43
|
+
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
44
|
+
it { is_expected.not_to include('READBOX') }
|
45
|
+
end
|
46
|
+
end
|
36
47
|
end
|
37
|
-
|
38
48
|
end
|
@@ -14,7 +14,7 @@ module Hydra
|
|
14
14
|
|
15
15
|
describe 'for fits' do
|
16
16
|
let(:tool_names) { [:fits] }
|
17
|
-
it {
|
17
|
+
it { is_expected.to match(/#{'<identity format="Plain text" mimetype="text/plain"'}/) }
|
18
18
|
end
|
19
19
|
|
20
20
|
describe 'with configured path' do
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hydra-file_characterization
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Treacy
|
@@ -11,62 +11,62 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2015-08-31 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: activesupport
|
18
18
|
requirement: !ruby/object:Gem::Requirement
|
19
19
|
requirements:
|
20
|
-
- -
|
20
|
+
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: 3.0.0
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 3.0.0
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: rspec
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
requirements:
|
34
|
-
- -
|
34
|
+
- - ">="
|
35
35
|
- !ruby/object:Gem::Version
|
36
36
|
version: '0'
|
37
37
|
type: :development
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
40
40
|
requirements:
|
41
|
-
- -
|
41
|
+
- - ">="
|
42
42
|
- !ruby/object:Gem::Version
|
43
43
|
version: '0'
|
44
44
|
- !ruby/object:Gem::Dependency
|
45
45
|
name: guard
|
46
46
|
requirement: !ruby/object:Gem::Requirement
|
47
47
|
requirements:
|
48
|
-
- -
|
48
|
+
- - ">="
|
49
49
|
- !ruby/object:Gem::Version
|
50
50
|
version: '0'
|
51
51
|
type: :development
|
52
52
|
prerelease: false
|
53
53
|
version_requirements: !ruby/object:Gem::Requirement
|
54
54
|
requirements:
|
55
|
-
- -
|
55
|
+
- - ">="
|
56
56
|
- !ruby/object:Gem::Version
|
57
57
|
version: '0'
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: guard-rspec
|
60
60
|
requirement: !ruby/object:Gem::Requirement
|
61
61
|
requirements:
|
62
|
-
- -
|
62
|
+
- - ">="
|
63
63
|
- !ruby/object:Gem::Version
|
64
64
|
version: '0'
|
65
65
|
type: :development
|
66
66
|
prerelease: false
|
67
67
|
version_requirements: !ruby/object:Gem::Requirement
|
68
68
|
requirements:
|
69
|
-
- -
|
69
|
+
- - ">="
|
70
70
|
- !ruby/object:Gem::Version
|
71
71
|
version: '0'
|
72
72
|
description: To provide a wrapper for file characterization
|
@@ -79,8 +79,8 @@ executables: []
|
|
79
79
|
extensions: []
|
80
80
|
extra_rdoc_files: []
|
81
81
|
files:
|
82
|
-
- .gitignore
|
83
|
-
- .rspec
|
82
|
+
- ".gitignore"
|
83
|
+
- ".rspec"
|
84
84
|
- CONTRIBUTING.md
|
85
85
|
- Gemfile
|
86
86
|
- Guardfile
|
@@ -120,17 +120,17 @@ require_paths:
|
|
120
120
|
- lib
|
121
121
|
required_ruby_version: !ruby/object:Gem::Requirement
|
122
122
|
requirements:
|
123
|
-
- -
|
123
|
+
- - ">="
|
124
124
|
- !ruby/object:Gem::Version
|
125
125
|
version: '0'
|
126
126
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
127
|
requirements:
|
128
|
-
- -
|
128
|
+
- - ">="
|
129
129
|
- !ruby/object:Gem::Version
|
130
130
|
version: '0'
|
131
131
|
requirements: []
|
132
132
|
rubyforge_project:
|
133
|
-
rubygems_version: 2.
|
133
|
+
rubygems_version: 2.4.5
|
134
134
|
signing_key:
|
135
135
|
specification_version: 4
|
136
136
|
summary: To provide a wrapper for file characterization
|