hydra-file_characterization 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/hydra/file_characterization/characterizer.rb +36 -24
- data/lib/hydra/file_characterization/characterizers/fits.rb +9 -3
- data/lib/hydra/file_characterization/version.rb +1 -1
- data/spec/lib/hydra/file_characterization/characterizer_spec.rb +8 -5
- data/spec/lib/hydra/file_characterization/characterizers/fits_spec.rb +32 -22
- data/spec/lib/hydra/file_characterization_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -1
- metadata +15 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34fca572cbda0f223170f1a23ae8cd43dcd1b6f0
|
4
|
+
data.tar.gz: a13e96336fa3a2327d92f1e77938c1bf1e3510a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d65d9d4e65cc7275859bcf6fe1798f9e854bd40c31a966be4846de5717cb1f0b19de1cbe2bfbdfb8d4e0cae134adf5e7f4a9d7c698f2f6e64b49175f355c359e
|
7
|
+
data.tar.gz: 5beff9b3009b44f25dd362137befa286673b92dfdc36fdb9f63cd69c482773ee8f3cb998772ac66a0f50a0873bc51decfc147aa7eff8f889354a3bc5e51a974b
|
@@ -19,11 +19,7 @@ module Hydra::FileCharacterization
|
|
19
19
|
raise Hydra::FileCharacterization::FileNotFoundError.new("File: #{filename} does not exist.")
|
20
20
|
end
|
21
21
|
|
22
|
-
|
23
|
-
tool_path.call(filename)
|
24
|
-
else
|
25
|
-
internal_call
|
26
|
-
end
|
22
|
+
post_process(output)
|
27
23
|
end
|
28
24
|
|
29
25
|
def tool_path
|
@@ -32,27 +28,43 @@ module Hydra::FileCharacterization
|
|
32
28
|
|
33
29
|
protected
|
34
30
|
|
35
|
-
|
36
|
-
|
37
|
-
|
31
|
+
# Override this method if you want your processor to mutate the
|
32
|
+
# raw output
|
33
|
+
def post_process(raw_output)
|
34
|
+
raw_output
|
35
|
+
end
|
38
36
|
|
39
|
-
|
40
|
-
|
41
|
-
begin
|
42
|
-
out = stdout.read
|
43
|
-
err = stderr.read
|
44
|
-
exit_status = wait_thr.value
|
45
|
-
raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
|
46
|
-
out
|
47
|
-
ensure
|
48
|
-
stdin.close
|
49
|
-
stdout.close
|
50
|
-
stderr.close
|
37
|
+
def convention_based_tool_name
|
38
|
+
self.class.name.split("::").last.downcase
|
51
39
|
end
|
52
|
-
end
|
53
40
|
|
54
|
-
|
55
|
-
|
56
|
-
|
41
|
+
def internal_call
|
42
|
+
stdin, stdout, stderr, wait_thr = popen3(command)
|
43
|
+
begin
|
44
|
+
out = stdout.read
|
45
|
+
err = stderr.read
|
46
|
+
exit_status = wait_thr.value
|
47
|
+
raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
|
48
|
+
out
|
49
|
+
ensure
|
50
|
+
stdin.close
|
51
|
+
stdout.close
|
52
|
+
stderr.close
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def command
|
57
|
+
raise NotImplementedError, "Method #command should be overriden in child classes"
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def output
|
63
|
+
if tool_path.respond_to?(:call)
|
64
|
+
tool_path.call(filename)
|
65
|
+
else
|
66
|
+
internal_call
|
67
|
+
end
|
68
|
+
end
|
57
69
|
end
|
58
70
|
end
|
@@ -4,9 +4,15 @@ module Hydra::FileCharacterization::Characterizers
|
|
4
4
|
class Fits < Hydra::FileCharacterization::Characterizer
|
5
5
|
|
6
6
|
protected
|
7
|
-
def command
|
8
|
-
"#{tool_path} -i \"#{filename}\""
|
9
|
-
end
|
10
7
|
|
8
|
+
def command
|
9
|
+
"#{tool_path} -i \"#{filename}\""
|
10
|
+
end
|
11
|
+
|
12
|
+
# Remove any residual non-XML from JHOVE
|
13
|
+
# See: https://github.com/harvard-lts/fits/issues/20
|
14
|
+
def post_process(raw_output)
|
15
|
+
raw_output.sub(/^READBOX seen=true\n/, '')
|
16
|
+
end
|
11
17
|
end
|
12
18
|
end
|
@@ -6,7 +6,8 @@ module Hydra::FileCharacterization
|
|
6
6
|
let(:instance_tool_path) { nil }
|
7
7
|
let(:class_tool_path) { nil }
|
8
8
|
|
9
|
-
|
9
|
+
let(:characterizer) { Hydra::FileCharacterization::Characterizer.new(filename, instance_tool_path) }
|
10
|
+
subject { characterizer }
|
10
11
|
around(:each) do |example|
|
11
12
|
Hydra::FileCharacterization::Characterizer.tool_path = class_tool_path
|
12
13
|
example.run
|
@@ -30,23 +31,25 @@ module Hydra::FileCharacterization
|
|
30
31
|
end
|
31
32
|
|
32
33
|
context 'tool_path' do
|
34
|
+
subject { characterizer.tool_path }
|
35
|
+
|
33
36
|
context 'with custom instance tool_path' do
|
34
37
|
let(:instance_tool_path) { '/arbitrary/path' }
|
35
38
|
let(:class_tool_path) { '/a_different/path' }
|
36
39
|
|
37
|
-
|
40
|
+
it { is_expected.to eq instance_tool_path }
|
38
41
|
end
|
39
42
|
|
40
43
|
context 'with custom class tool_path' do
|
41
44
|
let(:instance_tool_path) { nil }
|
42
45
|
let(:class_tool_path) { '/a_different/path' }
|
43
46
|
|
44
|
-
|
47
|
+
it { is_expected.to eq class_tool_path }
|
45
48
|
end
|
46
49
|
|
47
50
|
context 'without a specified tool_path' do
|
48
|
-
|
51
|
+
it { is_expected.to eq 'characterizer' }
|
49
52
|
end
|
50
53
|
end
|
51
54
|
end
|
52
|
-
end
|
55
|
+
end
|
@@ -2,37 +2,47 @@ require 'spec_helper'
|
|
2
2
|
require 'hydra/file_characterization/characterizers/fits'
|
3
3
|
|
4
4
|
module Hydra::FileCharacterization::Characterizers
|
5
|
-
|
6
5
|
describe Fits do
|
6
|
+
let(:fits) { Fits.new(filename) }
|
7
7
|
|
8
|
-
|
8
|
+
describe "#call" do
|
9
|
+
subject { fits.call }
|
9
10
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
expect(subject.call).to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg"))
|
11
|
+
context 'validfile' do
|
12
|
+
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
13
|
+
it { is_expected.to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg")) }
|
14
14
|
end
|
15
|
-
end
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
context 'invalidFile' do
|
17
|
+
let(:filename) { fixture_file('nofile.pdf') }
|
18
|
+
it "raises an error" do
|
19
|
+
expect { subject }.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
|
20
|
+
end
|
21
21
|
end
|
22
|
-
end
|
23
22
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
expect(subject.call).to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream"))
|
23
|
+
context 'corruptFile' do
|
24
|
+
let(:filename) { fixture_file('brendan_broken.dxxd') }
|
25
|
+
it { is_expected.to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream")) }
|
28
26
|
end
|
29
|
-
end
|
30
27
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
28
|
+
context 'zip file should be characterized not its contents' do
|
29
|
+
let(:filename) { fixture_file('archive.zip') }
|
30
|
+
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
|
31
|
+
end
|
35
32
|
|
33
|
+
context 'when JHOVE adds non-xml' do
|
34
|
+
# https://github.com/harvard-lts/fits/issues/20
|
35
|
+
before do
|
36
|
+
allow(fits).to receive(:internal_call).and_return(
|
37
|
+
'READBOX seen=true
|
38
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
39
|
+
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
40
|
+
<identification/></fits>')
|
41
|
+
end
|
42
|
+
|
43
|
+
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
44
|
+
it { is_expected.not_to include('READBOX') }
|
45
|
+
end
|
46
|
+
end
|
36
47
|
end
|
37
|
-
|
38
48
|
end
|
@@ -14,7 +14,7 @@ module Hydra
|
|
14
14
|
|
15
15
|
describe 'for fits' do
|
16
16
|
let(:tool_names) { [:fits] }
|
17
|
-
it {
|
17
|
+
it { is_expected.to match(/#{'<identity format="Plain text" mimetype="text/plain"'}/) }
|
18
18
|
end
|
19
19
|
|
20
20
|
describe 'with configured path' do
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hydra-file_characterization
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Treacy
|
@@ -11,62 +11,62 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2015-08-31 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: activesupport
|
18
18
|
requirement: !ruby/object:Gem::Requirement
|
19
19
|
requirements:
|
20
|
-
- -
|
20
|
+
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: 3.0.0
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 3.0.0
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: rspec
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
requirements:
|
34
|
-
- -
|
34
|
+
- - ">="
|
35
35
|
- !ruby/object:Gem::Version
|
36
36
|
version: '0'
|
37
37
|
type: :development
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
40
40
|
requirements:
|
41
|
-
- -
|
41
|
+
- - ">="
|
42
42
|
- !ruby/object:Gem::Version
|
43
43
|
version: '0'
|
44
44
|
- !ruby/object:Gem::Dependency
|
45
45
|
name: guard
|
46
46
|
requirement: !ruby/object:Gem::Requirement
|
47
47
|
requirements:
|
48
|
-
- -
|
48
|
+
- - ">="
|
49
49
|
- !ruby/object:Gem::Version
|
50
50
|
version: '0'
|
51
51
|
type: :development
|
52
52
|
prerelease: false
|
53
53
|
version_requirements: !ruby/object:Gem::Requirement
|
54
54
|
requirements:
|
55
|
-
- -
|
55
|
+
- - ">="
|
56
56
|
- !ruby/object:Gem::Version
|
57
57
|
version: '0'
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: guard-rspec
|
60
60
|
requirement: !ruby/object:Gem::Requirement
|
61
61
|
requirements:
|
62
|
-
- -
|
62
|
+
- - ">="
|
63
63
|
- !ruby/object:Gem::Version
|
64
64
|
version: '0'
|
65
65
|
type: :development
|
66
66
|
prerelease: false
|
67
67
|
version_requirements: !ruby/object:Gem::Requirement
|
68
68
|
requirements:
|
69
|
-
- -
|
69
|
+
- - ">="
|
70
70
|
- !ruby/object:Gem::Version
|
71
71
|
version: '0'
|
72
72
|
description: To provide a wrapper for file characterization
|
@@ -79,8 +79,8 @@ executables: []
|
|
79
79
|
extensions: []
|
80
80
|
extra_rdoc_files: []
|
81
81
|
files:
|
82
|
-
- .gitignore
|
83
|
-
- .rspec
|
82
|
+
- ".gitignore"
|
83
|
+
- ".rspec"
|
84
84
|
- CONTRIBUTING.md
|
85
85
|
- Gemfile
|
86
86
|
- Guardfile
|
@@ -120,17 +120,17 @@ require_paths:
|
|
120
120
|
- lib
|
121
121
|
required_ruby_version: !ruby/object:Gem::Requirement
|
122
122
|
requirements:
|
123
|
-
- -
|
123
|
+
- - ">="
|
124
124
|
- !ruby/object:Gem::Version
|
125
125
|
version: '0'
|
126
126
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
127
|
requirements:
|
128
|
-
- -
|
128
|
+
- - ">="
|
129
129
|
- !ruby/object:Gem::Version
|
130
130
|
version: '0'
|
131
131
|
requirements: []
|
132
132
|
rubyforge_project:
|
133
|
-
rubygems_version: 2.
|
133
|
+
rubygems_version: 2.4.5
|
134
134
|
signing_key:
|
135
135
|
specification_version: 4
|
136
136
|
summary: To provide a wrapper for file characterization
|