hydra-file_characterization 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 998550a6f8a8090fa8e735b3d863f8b8da45fa26
4
- data.tar.gz: aeedc6fcecc36f7ce17e07dcc4479709971ca30e
3
+ metadata.gz: 34fca572cbda0f223170f1a23ae8cd43dcd1b6f0
4
+ data.tar.gz: a13e96336fa3a2327d92f1e77938c1bf1e3510a4
5
5
  SHA512:
6
- metadata.gz: 42f80b2cdde8ff05833dee837dadba37770fd27a91ddefcdd6c1b90563b05c9d737f7bd0274a4dfe7a3ef05b8c0ab057db626ea72cadc3a27d4d8036dabbc217
7
- data.tar.gz: e9bc81ce59caf0702fb8d762d4881341635793dadc6ad46e4dae08464bc8f0ea89ec1bb0c6bd1cf3c5ea52aca6f820415a18ca8f97a741d98014733702d3e5fc
6
+ metadata.gz: d65d9d4e65cc7275859bcf6fe1798f9e854bd40c31a966be4846de5717cb1f0b19de1cbe2bfbdfb8d4e0cae134adf5e7f4a9d7c698f2f6e64b49175f355c359e
7
+ data.tar.gz: 5beff9b3009b44f25dd362137befa286673b92dfdc36fdb9f63cd69c482773ee8f3cb998772ac66a0f50a0873bc51decfc147aa7eff8f889354a3bc5e51a974b
@@ -19,11 +19,7 @@ module Hydra::FileCharacterization
19
19
  raise Hydra::FileCharacterization::FileNotFoundError.new("File: #{filename} does not exist.")
20
20
  end
21
21
 
22
- if tool_path.respond_to?(:call)
23
- tool_path.call(filename)
24
- else
25
- internal_call
26
- end
22
+ post_process(output)
27
23
  end
28
24
 
29
25
  def tool_path
@@ -32,27 +28,43 @@ module Hydra::FileCharacterization
32
28
 
33
29
  protected
34
30
 
35
- def convention_based_tool_name
36
- self.class.name.split("::").last.downcase
37
- end
31
+ # Override this method if you want your processor to mutate the
32
+ # raw output
33
+ def post_process(raw_output)
34
+ raw_output
35
+ end
38
36
 
39
- def internal_call
40
- stdin, stdout, stderr, wait_thr = popen3(command)
41
- begin
42
- out = stdout.read
43
- err = stderr.read
44
- exit_status = wait_thr.value
45
- raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
46
- out
47
- ensure
48
- stdin.close
49
- stdout.close
50
- stderr.close
37
+ def convention_based_tool_name
38
+ self.class.name.split("::").last.downcase
51
39
  end
52
- end
53
40
 
54
- def command
55
- raise NotImplementedError, "Method #command should be overriden in child classes"
56
- end
41
+ def internal_call
42
+ stdin, stdout, stderr, wait_thr = popen3(command)
43
+ begin
44
+ out = stdout.read
45
+ err = stderr.read
46
+ exit_status = wait_thr.value
47
+ raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
48
+ out
49
+ ensure
50
+ stdin.close
51
+ stdout.close
52
+ stderr.close
53
+ end
54
+ end
55
+
56
+ def command
57
+ raise NotImplementedError, "Method #command should be overriden in child classes"
58
+ end
59
+
60
+ private
61
+
62
+ def output
63
+ if tool_path.respond_to?(:call)
64
+ tool_path.call(filename)
65
+ else
66
+ internal_call
67
+ end
68
+ end
57
69
  end
58
70
  end
@@ -4,9 +4,15 @@ module Hydra::FileCharacterization::Characterizers
4
4
  class Fits < Hydra::FileCharacterization::Characterizer
5
5
 
6
6
  protected
7
- def command
8
- "#{tool_path} -i \"#{filename}\""
9
- end
10
7
 
8
+ def command
9
+ "#{tool_path} -i \"#{filename}\""
10
+ end
11
+
12
+ # Remove any residual non-XML from JHOVE
13
+ # See: https://github.com/harvard-lts/fits/issues/20
14
+ def post_process(raw_output)
15
+ raw_output.sub(/^READBOX seen=true\n/, '')
16
+ end
11
17
  end
12
18
  end
@@ -1,5 +1,5 @@
1
1
  module Hydra
2
2
  module FileCharacterization
3
- VERSION = "0.3.1"
3
+ VERSION = "0.3.2"
4
4
  end
5
5
  end
@@ -6,7 +6,8 @@ module Hydra::FileCharacterization
6
6
  let(:instance_tool_path) { nil }
7
7
  let(:class_tool_path) { nil }
8
8
 
9
- subject { Hydra::FileCharacterization::Characterizer.new(filename, instance_tool_path) }
9
+ let(:characterizer) { Hydra::FileCharacterization::Characterizer.new(filename, instance_tool_path) }
10
+ subject { characterizer }
10
11
  around(:each) do |example|
11
12
  Hydra::FileCharacterization::Characterizer.tool_path = class_tool_path
12
13
  example.run
@@ -30,23 +31,25 @@ module Hydra::FileCharacterization
30
31
  end
31
32
 
32
33
  context 'tool_path' do
34
+ subject { characterizer.tool_path }
35
+
33
36
  context 'with custom instance tool_path' do
34
37
  let(:instance_tool_path) { '/arbitrary/path' }
35
38
  let(:class_tool_path) { '/a_different/path' }
36
39
 
37
- its(:tool_path) { should eq instance_tool_path}
40
+ it { is_expected.to eq instance_tool_path }
38
41
  end
39
42
 
40
43
  context 'with custom class tool_path' do
41
44
  let(:instance_tool_path) { nil }
42
45
  let(:class_tool_path) { '/a_different/path' }
43
46
 
44
- its(:tool_path) { should eq class_tool_path}
47
+ it { is_expected.to eq class_tool_path }
45
48
  end
46
49
 
47
50
  context 'without a specified tool_path' do
48
- its(:tool_path) { should eq 'characterizer' }
51
+ it { is_expected.to eq 'characterizer' }
49
52
  end
50
53
  end
51
54
  end
52
- end
55
+ end
@@ -2,37 +2,47 @@ require 'spec_helper'
2
2
  require 'hydra/file_characterization/characterizers/fits'
3
3
 
4
4
  module Hydra::FileCharacterization::Characterizers
5
-
6
5
  describe Fits do
6
+ let(:fits) { Fits.new(filename) }
7
7
 
8
- subject { Fits.new(filename) }
8
+ describe "#call" do
9
+ subject { fits.call }
9
10
 
10
- describe 'validfile' do
11
- let(:filename) { fixture_file('brendan_behan.jpeg') }
12
- it '#call' do
13
- expect(subject.call).to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg"))
11
+ context 'validfile' do
12
+ let(:filename) { fixture_file('brendan_behan.jpeg') }
13
+ it { is_expected.to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg")) }
14
14
  end
15
- end
16
15
 
17
- describe 'invalidFile' do
18
- let(:filename) { fixture_file('nofile.pdf') }
19
- it "should raise an error if the path does not contain the file" do
20
- expect {subject.call}.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
16
+ context 'invalidFile' do
17
+ let(:filename) { fixture_file('nofile.pdf') }
18
+ it "raises an error" do
19
+ expect { subject }.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
20
+ end
21
21
  end
22
- end
23
22
 
24
- describe 'corruptFile' do
25
- let(:filename) { fixture_file('brendan_broken.dxxd') }
26
- it "should return xml showing Unknown Binary and application/octet-stream mimetype" do
27
- expect(subject.call).to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream"))
23
+ context 'corruptFile' do
24
+ let(:filename) { fixture_file('brendan_broken.dxxd') }
25
+ it { is_expected.to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream")) }
28
26
  end
29
- end
30
27
 
31
- describe 'zip file should be characterized not its contents' do
32
- let(:filename) { fixture_file('archive.zip') }
33
- its(:call) { should include(%(<identity format="ZIP Format" mimetype="application/zip"))}
34
- end
28
+ context 'zip file should be characterized not its contents' do
29
+ let(:filename) { fixture_file('archive.zip') }
30
+ it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
31
+ end
35
32
 
33
+ context 'when JHOVE adds non-xml' do
34
+ # https://github.com/harvard-lts/fits/issues/20
35
+ before do
36
+ allow(fits).to receive(:internal_call).and_return(
37
+ 'READBOX seen=true
38
+ <?xml version="1.0" encoding="UTF-8"?>
39
+ <fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
40
+ <identification/></fits>')
41
+ end
42
+
43
+ let(:filename) { fixture_file('brendan_behan.jpeg') }
44
+ it { is_expected.not_to include('READBOX') }
45
+ end
46
+ end
36
47
  end
37
-
38
48
  end
@@ -14,7 +14,7 @@ module Hydra
14
14
 
15
15
  describe 'for fits' do
16
16
  let(:tool_names) { [:fits] }
17
- it { should match(/#{'<identity format="Plain text" mimetype="text/plain"'}/) }
17
+ it { is_expected.to match(/#{'<identity format="Plain text" mimetype="text/plain"'}/) }
18
18
  end
19
19
 
20
20
  describe 'with configured path' do
data/spec/spec_helper.rb CHANGED
@@ -18,7 +18,6 @@ end
18
18
 
19
19
  RSpec.configure do |config|
20
20
  config.include SpecSupport
21
- config.treat_symbols_as_metadata_keys_with_true_values = true
22
21
  config.run_all_when_everything_filtered = true
23
22
  config.filter_run :focus
24
23
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hydra-file_characterization
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Treacy
@@ -11,62 +11,62 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-10-25 00:00:00.000000000 Z
14
+ date: 2015-08-31 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: activesupport
18
18
  requirement: !ruby/object:Gem::Requirement
19
19
  requirements:
20
- - - '>='
20
+ - - ">="
21
21
  - !ruby/object:Gem::Version
22
22
  version: 3.0.0
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - '>='
27
+ - - ">="
28
28
  - !ruby/object:Gem::Version
29
29
  version: 3.0.0
30
30
  - !ruby/object:Gem::Dependency
31
31
  name: rspec
32
32
  requirement: !ruby/object:Gem::Requirement
33
33
  requirements:
34
- - - '>='
34
+ - - ">="
35
35
  - !ruby/object:Gem::Version
36
36
  version: '0'
37
37
  type: :development
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
40
40
  requirements:
41
- - - '>='
41
+ - - ">="
42
42
  - !ruby/object:Gem::Version
43
43
  version: '0'
44
44
  - !ruby/object:Gem::Dependency
45
45
  name: guard
46
46
  requirement: !ruby/object:Gem::Requirement
47
47
  requirements:
48
- - - '>='
48
+ - - ">="
49
49
  - !ruby/object:Gem::Version
50
50
  version: '0'
51
51
  type: :development
52
52
  prerelease: false
53
53
  version_requirements: !ruby/object:Gem::Requirement
54
54
  requirements:
55
- - - '>='
55
+ - - ">="
56
56
  - !ruby/object:Gem::Version
57
57
  version: '0'
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: guard-rspec
60
60
  requirement: !ruby/object:Gem::Requirement
61
61
  requirements:
62
- - - '>='
62
+ - - ">="
63
63
  - !ruby/object:Gem::Version
64
64
  version: '0'
65
65
  type: :development
66
66
  prerelease: false
67
67
  version_requirements: !ruby/object:Gem::Requirement
68
68
  requirements:
69
- - - '>='
69
+ - - ">="
70
70
  - !ruby/object:Gem::Version
71
71
  version: '0'
72
72
  description: To provide a wrapper for file characterization
@@ -79,8 +79,8 @@ executables: []
79
79
  extensions: []
80
80
  extra_rdoc_files: []
81
81
  files:
82
- - .gitignore
83
- - .rspec
82
+ - ".gitignore"
83
+ - ".rspec"
84
84
  - CONTRIBUTING.md
85
85
  - Gemfile
86
86
  - Guardfile
@@ -120,17 +120,17 @@ require_paths:
120
120
  - lib
121
121
  required_ruby_version: !ruby/object:Gem::Requirement
122
122
  requirements:
123
- - - '>='
123
+ - - ">="
124
124
  - !ruby/object:Gem::Version
125
125
  version: '0'
126
126
  required_rubygems_version: !ruby/object:Gem::Requirement
127
127
  requirements:
128
- - - '>='
128
+ - - ">="
129
129
  - !ruby/object:Gem::Version
130
130
  version: '0'
131
131
  requirements: []
132
132
  rubyforge_project:
133
- rubygems_version: 2.0.3
133
+ rubygems_version: 2.4.5
134
134
  signing_key:
135
135
  specification_version: 4
136
136
  summary: To provide a wrapper for file characterization