hydra-file_characterization 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 998550a6f8a8090fa8e735b3d863f8b8da45fa26
4
- data.tar.gz: aeedc6fcecc36f7ce17e07dcc4479709971ca30e
3
+ metadata.gz: 34fca572cbda0f223170f1a23ae8cd43dcd1b6f0
4
+ data.tar.gz: a13e96336fa3a2327d92f1e77938c1bf1e3510a4
5
5
  SHA512:
6
- metadata.gz: 42f80b2cdde8ff05833dee837dadba37770fd27a91ddefcdd6c1b90563b05c9d737f7bd0274a4dfe7a3ef05b8c0ab057db626ea72cadc3a27d4d8036dabbc217
7
- data.tar.gz: e9bc81ce59caf0702fb8d762d4881341635793dadc6ad46e4dae08464bc8f0ea89ec1bb0c6bd1cf3c5ea52aca6f820415a18ca8f97a741d98014733702d3e5fc
6
+ metadata.gz: d65d9d4e65cc7275859bcf6fe1798f9e854bd40c31a966be4846de5717cb1f0b19de1cbe2bfbdfb8d4e0cae134adf5e7f4a9d7c698f2f6e64b49175f355c359e
7
+ data.tar.gz: 5beff9b3009b44f25dd362137befa286673b92dfdc36fdb9f63cd69c482773ee8f3cb998772ac66a0f50a0873bc51decfc147aa7eff8f889354a3bc5e51a974b
@@ -19,11 +19,7 @@ module Hydra::FileCharacterization
19
19
  raise Hydra::FileCharacterization::FileNotFoundError.new("File: #{filename} does not exist.")
20
20
  end
21
21
 
22
- if tool_path.respond_to?(:call)
23
- tool_path.call(filename)
24
- else
25
- internal_call
26
- end
22
+ post_process(output)
27
23
  end
28
24
 
29
25
  def tool_path
@@ -32,27 +28,43 @@ module Hydra::FileCharacterization
32
28
 
33
29
  protected
34
30
 
35
- def convention_based_tool_name
36
- self.class.name.split("::").last.downcase
37
- end
31
+ # Override this method if you want your processor to mutate the
32
+ # raw output
33
+ def post_process(raw_output)
34
+ raw_output
35
+ end
38
36
 
39
- def internal_call
40
- stdin, stdout, stderr, wait_thr = popen3(command)
41
- begin
42
- out = stdout.read
43
- err = stderr.read
44
- exit_status = wait_thr.value
45
- raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
46
- out
47
- ensure
48
- stdin.close
49
- stdout.close
50
- stderr.close
37
+ def convention_based_tool_name
38
+ self.class.name.split("::").last.downcase
51
39
  end
52
- end
53
40
 
54
- def command
55
- raise NotImplementedError, "Method #command should be overriden in child classes"
56
- end
41
+ def internal_call
42
+ stdin, stdout, stderr, wait_thr = popen3(command)
43
+ begin
44
+ out = stdout.read
45
+ err = stderr.read
46
+ exit_status = wait_thr.value
47
+ raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
48
+ out
49
+ ensure
50
+ stdin.close
51
+ stdout.close
52
+ stderr.close
53
+ end
54
+ end
55
+
56
+ def command
57
+ raise NotImplementedError, "Method #command should be overriden in child classes"
58
+ end
59
+
60
+ private
61
+
62
+ def output
63
+ if tool_path.respond_to?(:call)
64
+ tool_path.call(filename)
65
+ else
66
+ internal_call
67
+ end
68
+ end
57
69
  end
58
70
  end
@@ -4,9 +4,15 @@ module Hydra::FileCharacterization::Characterizers
4
4
  class Fits < Hydra::FileCharacterization::Characterizer
5
5
 
6
6
  protected
7
- def command
8
- "#{tool_path} -i \"#{filename}\""
9
- end
10
7
 
8
+ def command
9
+ "#{tool_path} -i \"#{filename}\""
10
+ end
11
+
12
+ # Remove any residual non-XML from JHOVE
13
+ # See: https://github.com/harvard-lts/fits/issues/20
14
+ def post_process(raw_output)
15
+ raw_output.sub(/^READBOX seen=true\n/, '')
16
+ end
11
17
  end
12
18
  end
@@ -1,5 +1,5 @@
1
1
  module Hydra
2
2
  module FileCharacterization
3
- VERSION = "0.3.1"
3
+ VERSION = "0.3.2"
4
4
  end
5
5
  end
@@ -6,7 +6,8 @@ module Hydra::FileCharacterization
6
6
  let(:instance_tool_path) { nil }
7
7
  let(:class_tool_path) { nil }
8
8
 
9
- subject { Hydra::FileCharacterization::Characterizer.new(filename, instance_tool_path) }
9
+ let(:characterizer) { Hydra::FileCharacterization::Characterizer.new(filename, instance_tool_path) }
10
+ subject { characterizer }
10
11
  around(:each) do |example|
11
12
  Hydra::FileCharacterization::Characterizer.tool_path = class_tool_path
12
13
  example.run
@@ -30,23 +31,25 @@ module Hydra::FileCharacterization
30
31
  end
31
32
 
32
33
  context 'tool_path' do
34
+ subject { characterizer.tool_path }
35
+
33
36
  context 'with custom instance tool_path' do
34
37
  let(:instance_tool_path) { '/arbitrary/path' }
35
38
  let(:class_tool_path) { '/a_different/path' }
36
39
 
37
- its(:tool_path) { should eq instance_tool_path}
40
+ it { is_expected.to eq instance_tool_path }
38
41
  end
39
42
 
40
43
  context 'with custom class tool_path' do
41
44
  let(:instance_tool_path) { nil }
42
45
  let(:class_tool_path) { '/a_different/path' }
43
46
 
44
- its(:tool_path) { should eq class_tool_path}
47
+ it { is_expected.to eq class_tool_path }
45
48
  end
46
49
 
47
50
  context 'without a specified tool_path' do
48
- its(:tool_path) { should eq 'characterizer' }
51
+ it { is_expected.to eq 'characterizer' }
49
52
  end
50
53
  end
51
54
  end
52
- end
55
+ end
@@ -2,37 +2,47 @@ require 'spec_helper'
2
2
  require 'hydra/file_characterization/characterizers/fits'
3
3
 
4
4
  module Hydra::FileCharacterization::Characterizers
5
-
6
5
  describe Fits do
6
+ let(:fits) { Fits.new(filename) }
7
7
 
8
- subject { Fits.new(filename) }
8
+ describe "#call" do
9
+ subject { fits.call }
9
10
 
10
- describe 'validfile' do
11
- let(:filename) { fixture_file('brendan_behan.jpeg') }
12
- it '#call' do
13
- expect(subject.call).to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg"))
11
+ context 'validfile' do
12
+ let(:filename) { fixture_file('brendan_behan.jpeg') }
13
+ it { is_expected.to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg")) }
14
14
  end
15
- end
16
15
 
17
- describe 'invalidFile' do
18
- let(:filename) { fixture_file('nofile.pdf') }
19
- it "should raise an error if the path does not contain the file" do
20
- expect {subject.call}.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
16
+ context 'invalidFile' do
17
+ let(:filename) { fixture_file('nofile.pdf') }
18
+ it "raises an error" do
19
+ expect { subject }.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
20
+ end
21
21
  end
22
- end
23
22
 
24
- describe 'corruptFile' do
25
- let(:filename) { fixture_file('brendan_broken.dxxd') }
26
- it "should return xml showing Unknown Binary and application/octet-stream mimetype" do
27
- expect(subject.call).to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream"))
23
+ context 'corruptFile' do
24
+ let(:filename) { fixture_file('brendan_broken.dxxd') }
25
+ it { is_expected.to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream")) }
28
26
  end
29
- end
30
27
 
31
- describe 'zip file should be characterized not its contents' do
32
- let(:filename) { fixture_file('archive.zip') }
33
- its(:call) { should include(%(<identity format="ZIP Format" mimetype="application/zip"))}
34
- end
28
+ context 'zip file should be characterized not its contents' do
29
+ let(:filename) { fixture_file('archive.zip') }
30
+ it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
31
+ end
35
32
 
33
+ context 'when JHOVE adds non-xml' do
34
+ # https://github.com/harvard-lts/fits/issues/20
35
+ before do
36
+ allow(fits).to receive(:internal_call).and_return(
37
+ 'READBOX seen=true
38
+ <?xml version="1.0" encoding="UTF-8"?>
39
+ <fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
40
+ <identification/></fits>')
41
+ end
42
+
43
+ let(:filename) { fixture_file('brendan_behan.jpeg') }
44
+ it { is_expected.not_to include('READBOX') }
45
+ end
46
+ end
36
47
  end
37
-
38
48
  end
@@ -14,7 +14,7 @@ module Hydra
14
14
 
15
15
  describe 'for fits' do
16
16
  let(:tool_names) { [:fits] }
17
- it { should match(/#{'<identity format="Plain text" mimetype="text/plain"'}/) }
17
+ it { is_expected.to match(/#{'<identity format="Plain text" mimetype="text/plain"'}/) }
18
18
  end
19
19
 
20
20
  describe 'with configured path' do
data/spec/spec_helper.rb CHANGED
@@ -18,7 +18,6 @@ end
18
18
 
19
19
  RSpec.configure do |config|
20
20
  config.include SpecSupport
21
- config.treat_symbols_as_metadata_keys_with_true_values = true
22
21
  config.run_all_when_everything_filtered = true
23
22
  config.filter_run :focus
24
23
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hydra-file_characterization
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Treacy
@@ -11,62 +11,62 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-10-25 00:00:00.000000000 Z
14
+ date: 2015-08-31 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: activesupport
18
18
  requirement: !ruby/object:Gem::Requirement
19
19
  requirements:
20
- - - '>='
20
+ - - ">="
21
21
  - !ruby/object:Gem::Version
22
22
  version: 3.0.0
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - '>='
27
+ - - ">="
28
28
  - !ruby/object:Gem::Version
29
29
  version: 3.0.0
30
30
  - !ruby/object:Gem::Dependency
31
31
  name: rspec
32
32
  requirement: !ruby/object:Gem::Requirement
33
33
  requirements:
34
- - - '>='
34
+ - - ">="
35
35
  - !ruby/object:Gem::Version
36
36
  version: '0'
37
37
  type: :development
38
38
  prerelease: false
39
39
  version_requirements: !ruby/object:Gem::Requirement
40
40
  requirements:
41
- - - '>='
41
+ - - ">="
42
42
  - !ruby/object:Gem::Version
43
43
  version: '0'
44
44
  - !ruby/object:Gem::Dependency
45
45
  name: guard
46
46
  requirement: !ruby/object:Gem::Requirement
47
47
  requirements:
48
- - - '>='
48
+ - - ">="
49
49
  - !ruby/object:Gem::Version
50
50
  version: '0'
51
51
  type: :development
52
52
  prerelease: false
53
53
  version_requirements: !ruby/object:Gem::Requirement
54
54
  requirements:
55
- - - '>='
55
+ - - ">="
56
56
  - !ruby/object:Gem::Version
57
57
  version: '0'
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: guard-rspec
60
60
  requirement: !ruby/object:Gem::Requirement
61
61
  requirements:
62
- - - '>='
62
+ - - ">="
63
63
  - !ruby/object:Gem::Version
64
64
  version: '0'
65
65
  type: :development
66
66
  prerelease: false
67
67
  version_requirements: !ruby/object:Gem::Requirement
68
68
  requirements:
69
- - - '>='
69
+ - - ">="
70
70
  - !ruby/object:Gem::Version
71
71
  version: '0'
72
72
  description: To provide a wrapper for file characterization
@@ -79,8 +79,8 @@ executables: []
79
79
  extensions: []
80
80
  extra_rdoc_files: []
81
81
  files:
82
- - .gitignore
83
- - .rspec
82
+ - ".gitignore"
83
+ - ".rspec"
84
84
  - CONTRIBUTING.md
85
85
  - Gemfile
86
86
  - Guardfile
@@ -120,17 +120,17 @@ require_paths:
120
120
  - lib
121
121
  required_ruby_version: !ruby/object:Gem::Requirement
122
122
  requirements:
123
- - - '>='
123
+ - - ">="
124
124
  - !ruby/object:Gem::Version
125
125
  version: '0'
126
126
  required_rubygems_version: !ruby/object:Gem::Requirement
127
127
  requirements:
128
- - - '>='
128
+ - - ">="
129
129
  - !ruby/object:Gem::Version
130
130
  version: '0'
131
131
  requirements: []
132
132
  rubyforge_project:
133
- rubygems_version: 2.0.3
133
+ rubygems_version: 2.4.5
134
134
  signing_key:
135
135
  specification_version: 4
136
136
  summary: To provide a wrapper for file characterization