hydra-file_characterization 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -0
  3. data/.rspec +0 -0
  4. data/.travis.yml +0 -0
  5. data/CODE_OF_CONDUCT.md +0 -0
  6. data/CONTRIBUTING.md +0 -0
  7. data/Gemfile +0 -0
  8. data/Guardfile +0 -0
  9. data/LICENSE +0 -0
  10. data/README.md +0 -0
  11. data/Rakefile +0 -0
  12. data/SUPPORT.md +0 -0
  13. data/hydra-file_characterization.gemspec +0 -0
  14. data/lib/hydra-file_characterization.rb +0 -0
  15. data/lib/hydra/file_characterization.rb +0 -0
  16. data/lib/hydra/file_characterization/characterizer.rb +0 -0
  17. data/lib/hydra/file_characterization/characterizers.rb +1 -0
  18. data/lib/hydra/file_characterization/characterizers/ffprobe.rb +0 -0
  19. data/lib/hydra/file_characterization/characterizers/fits.rb +0 -0
  20. data/lib/hydra/file_characterization/characterizers/fits_servlet.rb +23 -0
  21. data/lib/hydra/file_characterization/exceptions.rb +0 -0
  22. data/lib/hydra/file_characterization/to_temp_file.rb +0 -0
  23. data/lib/hydra/file_characterization/version.rb +1 -1
  24. data/spec/fixtures/archive.zip +0 -0
  25. data/spec/fixtures/archive.zip.fits.xml +0 -0
  26. data/spec/fixtures/brendan_behan.jpeg +0 -0
  27. data/spec/fixtures/brendan_behan.jpeg.fits.xml +0 -0
  28. data/spec/fixtures/brendan_broken.dxxd +0 -0
  29. data/spec/fixtures/brendan_broken.dxxd.fits.xml +0 -0
  30. data/spec/lib/hydra/file_characterization/characterizer_spec.rb +0 -0
  31. data/spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb +0 -0
  32. data/spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb +68 -0
  33. data/spec/lib/hydra/file_characterization/characterizers/fits_spec.rb +0 -0
  34. data/spec/lib/hydra/file_characterization/characterizers_spec.rb +0 -0
  35. data/spec/lib/hydra/file_characterization/to_temp_file_spec.rb +0 -0
  36. data/spec/lib/hydra/file_characterization_spec.rb +0 -0
  37. data/spec/spec_helper.rb +0 -0
  38. metadata +6 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fc80d9f0fda674639f974b581b57939e1b7cd9b32a992b7119f90ea017c883ad
4
- data.tar.gz: a4e3af95b921b20d395b3405f29b3e2bfcc3a15882265f8989338f1fcf9594ae
3
+ metadata.gz: 97c20a68f011d01afddf0a42c3fa2802ab17b972c1ca0044910dfce652750f75
4
+ data.tar.gz: 147fd5b69e6af4de7abf5f768996c2f9442624a78a620b6d9a110b3d0b68b2d1
5
5
  SHA512:
6
- metadata.gz: a6fd8b4b84d9a5fa5e1a9d28c6aba85611690656a7aaee7205ee0cdf30e3f797ebdfaa8952d4a226efdd15ff608f2685bf12e6133d05b3a7f6ed491f1cedad5e
7
- data.tar.gz: 243e3a2d57e41f3e2cb9a71004c4a2ccc8f136115d2008390d3a66570a4c1f0e1dec4ebfb8c490f702cb99ee3b8d25d0dd7ec8febad60463d57b5783152a0f51
6
+ metadata.gz: 5ff697b557ec3abdbfef62f3f45fb646da5c2b17b9aa32f26be6584798d1bf02e8f8401602a5c00a312b9a4da40fc6bffc674379eada7d82284d838963a2d815
7
+ data.tar.gz: 5396ae39437c6fc528cccbcc3dacff23c5759fee17740b9c3382ca16d769ecd0ef1c4bf8b1ee1b45e28d49ccac93c8ea06c77ebfc11d32390d9a4a45524b009d
data/.gitignore CHANGED
File without changes
data/.rspec CHANGED
File without changes
File without changes
File without changes
File without changes
data/Gemfile CHANGED
File without changes
data/Guardfile CHANGED
File without changes
data/LICENSE CHANGED
File without changes
data/README.md CHANGED
File without changes
data/Rakefile CHANGED
File without changes
data/SUPPORT.md CHANGED
File without changes
File without changes
File without changes
File without changes
@@ -29,3 +29,4 @@ end
29
29
 
30
30
  require 'hydra/file_characterization/characterizers/fits'
31
31
  require 'hydra/file_characterization/characterizers/ffprobe'
32
+ require 'hydra/file_characterization/characterizers/fits_servlet'
@@ -0,0 +1,23 @@
1
+ require 'hydra/file_characterization/exceptions'
2
+ require 'hydra/file_characterization/characterizer'
3
+ require 'logger'
4
+ module Hydra::FileCharacterization::Characterizers
5
+ class FitsServlet < Hydra::FileCharacterization::Characterizer
6
+
7
+ protected
8
+
9
+ def command
10
+ "curl -k -F datafile=@#{filename} #{ENV['FITS_SERVLET_URL']}/examine"
11
+ end
12
+
13
+ # Remove any non-XML output that precedes the <?xml> tag
14
+ # See: https://github.com/harvard-lts/fits/issues/20
15
+ # https://github.com/harvard-lts/fits/issues/40
16
+ # https://github.com/harvard-lts/fits/issues/46
17
+ def post_process(raw_output)
18
+ md = /\A(.*)(<\?xml.*)\Z/m.match(raw_output)
19
+ logger.warn "FITS produced non-xml output: \"#{md[1].chomp}\"" unless md[1].empty?
20
+ md[2]
21
+ end
22
+ end
23
+ end
@@ -1,5 +1,5 @@
1
1
  module Hydra
2
2
  module FileCharacterization
3
- VERSION = "1.0.0"
3
+ VERSION = "1.1.0"
4
4
  end
5
5
  end
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,68 @@
1
+ require 'spec_helper'
2
+ require 'hydra/file_characterization/characterizers/fits_servlet'
3
+
4
+ module Hydra::FileCharacterization::Characterizers
5
+ describe FitsServlet do
6
+ let(:fits) { Fits.new(filename) }
7
+
8
+ describe "#call", unless: ENV['TRAVIS'] do
9
+ subject { fits.call }
10
+
11
+ context 'validfile' do
12
+ let(:filename) { fixture_file('brendan_behan.jpeg') }
13
+ it { is_expected.to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg")) }
14
+ end
15
+
16
+ context 'invalidFile' do
17
+ let(:filename) { fixture_file('nofile.pdf') }
18
+ it "raises an error" do
19
+ expect { subject }.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
20
+ end
21
+ end
22
+
23
+ context 'corruptFile' do
24
+ let(:filename) { fixture_file('brendan_broken.dxxd') }
25
+ it { is_expected.to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream")) }
26
+ end
27
+
28
+ context 'zip file should be characterized not its contents' do
29
+ let(:filename) { fixture_file('archive.zip') }
30
+ it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
31
+ end
32
+ end
33
+
34
+ context 'when JHOVE adds non-xml' do
35
+ # https://github.com/harvard-lts/fits/issues/20
36
+ subject { fits.call }
37
+
38
+ before do
39
+ expect(fits.logger).to receive(:warn)
40
+ allow(fits).to receive(:internal_call).and_return(
41
+ 'READBOX seen=true
42
+ <?xml version="1.0" encoding="UTF-8"?>
43
+ <fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
44
+ <identification/></fits>')
45
+ end
46
+
47
+ let(:filename) { fixture_file('brendan_behan.jpeg') }
48
+ it { is_expected.not_to include('READBOX') }
49
+ end
50
+
51
+ context "when FITS itself adds non-xml" do
52
+ # https://github.com/harvard-lts/fits/issues/46
53
+ subject { fits.call }
54
+
55
+ before do
56
+ expect(fits.logger).to receive(:warn)
57
+ allow(fits).to receive(:internal_call).and_return(
58
+ '2015-10-15 17:14:25,761 ERROR [main] ToolBelt:79 - Thread 1 error initializing edu.harvard.hul.ois.fits.tools.droid.Droid: edu.harvard.hul.ois.fits.exceptions.FitsToolException Message: DROID cannot run under Java 8
59
+ <?xml version="1.0" encoding="UTF-8"?>
60
+ <fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
61
+ <identification/></fits>')
62
+ end
63
+
64
+ let(:filename) { fixture_file('brendan_behan.jpeg') }
65
+ it { is_expected.not_to include('FitsToolException') }
66
+ end
67
+ end
68
+ end
File without changes
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hydra-file_characterization
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Treacy
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2018-08-28 00:00:00.000000000 Z
14
+ date: 2019-05-01 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: activesupport
@@ -125,6 +125,7 @@ files:
125
125
  - lib/hydra/file_characterization/characterizers.rb
126
126
  - lib/hydra/file_characterization/characterizers/ffprobe.rb
127
127
  - lib/hydra/file_characterization/characterizers/fits.rb
128
+ - lib/hydra/file_characterization/characterizers/fits_servlet.rb
128
129
  - lib/hydra/file_characterization/exceptions.rb
129
130
  - lib/hydra/file_characterization/to_temp_file.rb
130
131
  - lib/hydra/file_characterization/version.rb
@@ -136,6 +137,7 @@ files:
136
137
  - spec/fixtures/brendan_broken.dxxd.fits.xml
137
138
  - spec/lib/hydra/file_characterization/characterizer_spec.rb
138
139
  - spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb
140
+ - spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb
139
141
  - spec/lib/hydra/file_characterization/characterizers/fits_spec.rb
140
142
  - spec/lib/hydra/file_characterization/characterizers_spec.rb
141
143
  - spec/lib/hydra/file_characterization/to_temp_file_spec.rb
@@ -161,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
161
163
  version: '0'
162
164
  requirements: []
163
165
  rubyforge_project:
164
- rubygems_version: 2.7.7
166
+ rubygems_version: 2.7.9
165
167
  signing_key:
166
168
  specification_version: 4
167
169
  summary: To provide a wrapper for file characterization
@@ -174,6 +176,7 @@ test_files:
174
176
  - spec/fixtures/brendan_broken.dxxd.fits.xml
175
177
  - spec/lib/hydra/file_characterization/characterizer_spec.rb
176
178
  - spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb
179
+ - spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb
177
180
  - spec/lib/hydra/file_characterization/characterizers/fits_spec.rb
178
181
  - spec/lib/hydra/file_characterization/characterizers_spec.rb
179
182
  - spec/lib/hydra/file_characterization/to_temp_file_spec.rb