hydra-file_characterization 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -0
- data/.rspec +0 -0
- data/.travis.yml +0 -0
- data/CODE_OF_CONDUCT.md +0 -0
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -0
- data/Guardfile +0 -0
- data/LICENSE +0 -0
- data/README.md +0 -0
- data/Rakefile +0 -0
- data/SUPPORT.md +0 -0
- data/hydra-file_characterization.gemspec +0 -0
- data/lib/hydra-file_characterization.rb +0 -0
- data/lib/hydra/file_characterization.rb +0 -0
- data/lib/hydra/file_characterization/characterizer.rb +0 -0
- data/lib/hydra/file_characterization/characterizers.rb +1 -0
- data/lib/hydra/file_characterization/characterizers/ffprobe.rb +0 -0
- data/lib/hydra/file_characterization/characterizers/fits.rb +0 -0
- data/lib/hydra/file_characterization/characterizers/fits_servlet.rb +23 -0
- data/lib/hydra/file_characterization/exceptions.rb +0 -0
- data/lib/hydra/file_characterization/to_temp_file.rb +0 -0
- data/lib/hydra/file_characterization/version.rb +1 -1
- data/spec/fixtures/archive.zip +0 -0
- data/spec/fixtures/archive.zip.fits.xml +0 -0
- data/spec/fixtures/brendan_behan.jpeg +0 -0
- data/spec/fixtures/brendan_behan.jpeg.fits.xml +0 -0
- data/spec/fixtures/brendan_broken.dxxd +0 -0
- data/spec/fixtures/brendan_broken.dxxd.fits.xml +0 -0
- data/spec/lib/hydra/file_characterization/characterizer_spec.rb +0 -0
- data/spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb +0 -0
- data/spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb +68 -0
- data/spec/lib/hydra/file_characterization/characterizers/fits_spec.rb +0 -0
- data/spec/lib/hydra/file_characterization/characterizers_spec.rb +0 -0
- data/spec/lib/hydra/file_characterization/to_temp_file_spec.rb +0 -0
- data/spec/lib/hydra/file_characterization_spec.rb +0 -0
- data/spec/spec_helper.rb +0 -0
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97c20a68f011d01afddf0a42c3fa2802ab17b972c1ca0044910dfce652750f75
|
4
|
+
data.tar.gz: 147fd5b69e6af4de7abf5f768996c2f9442624a78a620b6d9a110b3d0b68b2d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ff697b557ec3abdbfef62f3f45fb646da5c2b17b9aa32f26be6584798d1bf02e8f8401602a5c00a312b9a4da40fc6bffc674379eada7d82284d838963a2d815
|
7
|
+
data.tar.gz: 5396ae39437c6fc528cccbcc3dacff23c5759fee17740b9c3382ca16d769ecd0ef1c4bf8b1ee1b45e28d49ccac93c8ea06c77ebfc11d32390d9a4a45524b009d
|
data/.gitignore
CHANGED
File without changes
|
data/.rspec
CHANGED
File without changes
|
data/.travis.yml
CHANGED
File without changes
|
data/CODE_OF_CONDUCT.md
CHANGED
File without changes
|
data/CONTRIBUTING.md
CHANGED
File without changes
|
data/Gemfile
CHANGED
File without changes
|
data/Guardfile
CHANGED
File without changes
|
data/LICENSE
CHANGED
File without changes
|
data/README.md
CHANGED
File without changes
|
data/Rakefile
CHANGED
File without changes
|
data/SUPPORT.md
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'hydra/file_characterization/exceptions'
|
2
|
+
require 'hydra/file_characterization/characterizer'
|
3
|
+
require 'logger'
|
4
|
+
module Hydra::FileCharacterization::Characterizers
|
5
|
+
class FitsServlet < Hydra::FileCharacterization::Characterizer
|
6
|
+
|
7
|
+
protected
|
8
|
+
|
9
|
+
def command
|
10
|
+
"curl -k -F datafile=@#{filename} #{ENV['FITS_SERVLET_URL']}/examine"
|
11
|
+
end
|
12
|
+
|
13
|
+
# Remove any non-XML output that precedes the <?xml> tag
|
14
|
+
# See: https://github.com/harvard-lts/fits/issues/20
|
15
|
+
# https://github.com/harvard-lts/fits/issues/40
|
16
|
+
# https://github.com/harvard-lts/fits/issues/46
|
17
|
+
def post_process(raw_output)
|
18
|
+
md = /\A(.*)(<\?xml.*)\Z/m.match(raw_output)
|
19
|
+
logger.warn "FITS produced non-xml output: \"#{md[1].chomp}\"" unless md[1].empty?
|
20
|
+
md[2]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
File without changes
|
File without changes
|
data/spec/fixtures/archive.zip
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'hydra/file_characterization/characterizers/fits_servlet'
|
3
|
+
|
4
|
+
module Hydra::FileCharacterization::Characterizers
|
5
|
+
describe FitsServlet do
|
6
|
+
let(:fits) { Fits.new(filename) }
|
7
|
+
|
8
|
+
describe "#call", unless: ENV['TRAVIS'] do
|
9
|
+
subject { fits.call }
|
10
|
+
|
11
|
+
context 'validfile' do
|
12
|
+
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
13
|
+
it { is_expected.to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg")) }
|
14
|
+
end
|
15
|
+
|
16
|
+
context 'invalidFile' do
|
17
|
+
let(:filename) { fixture_file('nofile.pdf') }
|
18
|
+
it "raises an error" do
|
19
|
+
expect { subject }.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context 'corruptFile' do
|
24
|
+
let(:filename) { fixture_file('brendan_broken.dxxd') }
|
25
|
+
it { is_expected.to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream")) }
|
26
|
+
end
|
27
|
+
|
28
|
+
context 'zip file should be characterized not its contents' do
|
29
|
+
let(:filename) { fixture_file('archive.zip') }
|
30
|
+
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
context 'when JHOVE adds non-xml' do
|
35
|
+
# https://github.com/harvard-lts/fits/issues/20
|
36
|
+
subject { fits.call }
|
37
|
+
|
38
|
+
before do
|
39
|
+
expect(fits.logger).to receive(:warn)
|
40
|
+
allow(fits).to receive(:internal_call).and_return(
|
41
|
+
'READBOX seen=true
|
42
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
43
|
+
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
44
|
+
<identification/></fits>')
|
45
|
+
end
|
46
|
+
|
47
|
+
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
48
|
+
it { is_expected.not_to include('READBOX') }
|
49
|
+
end
|
50
|
+
|
51
|
+
context "when FITS itself adds non-xml" do
|
52
|
+
# https://github.com/harvard-lts/fits/issues/46
|
53
|
+
subject { fits.call }
|
54
|
+
|
55
|
+
before do
|
56
|
+
expect(fits.logger).to receive(:warn)
|
57
|
+
allow(fits).to receive(:internal_call).and_return(
|
58
|
+
'2015-10-15 17:14:25,761 ERROR [main] ToolBelt:79 - Thread 1 error initializing edu.harvard.hul.ois.fits.tools.droid.Droid: edu.harvard.hul.ois.fits.exceptions.FitsToolException Message: DROID cannot run under Java 8
|
59
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
60
|
+
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
61
|
+
<identification/></fits>')
|
62
|
+
end
|
63
|
+
|
64
|
+
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
65
|
+
it { is_expected.not_to include('FitsToolException') }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/spec/spec_helper.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hydra-file_characterization
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Treacy
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2019-05-01 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: activesupport
|
@@ -125,6 +125,7 @@ files:
|
|
125
125
|
- lib/hydra/file_characterization/characterizers.rb
|
126
126
|
- lib/hydra/file_characterization/characterizers/ffprobe.rb
|
127
127
|
- lib/hydra/file_characterization/characterizers/fits.rb
|
128
|
+
- lib/hydra/file_characterization/characterizers/fits_servlet.rb
|
128
129
|
- lib/hydra/file_characterization/exceptions.rb
|
129
130
|
- lib/hydra/file_characterization/to_temp_file.rb
|
130
131
|
- lib/hydra/file_characterization/version.rb
|
@@ -136,6 +137,7 @@ files:
|
|
136
137
|
- spec/fixtures/brendan_broken.dxxd.fits.xml
|
137
138
|
- spec/lib/hydra/file_characterization/characterizer_spec.rb
|
138
139
|
- spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb
|
140
|
+
- spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb
|
139
141
|
- spec/lib/hydra/file_characterization/characterizers/fits_spec.rb
|
140
142
|
- spec/lib/hydra/file_characterization/characterizers_spec.rb
|
141
143
|
- spec/lib/hydra/file_characterization/to_temp_file_spec.rb
|
@@ -161,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
161
163
|
version: '0'
|
162
164
|
requirements: []
|
163
165
|
rubyforge_project:
|
164
|
-
rubygems_version: 2.7.
|
166
|
+
rubygems_version: 2.7.9
|
165
167
|
signing_key:
|
166
168
|
specification_version: 4
|
167
169
|
summary: To provide a wrapper for file characterization
|
@@ -174,6 +176,7 @@ test_files:
|
|
174
176
|
- spec/fixtures/brendan_broken.dxxd.fits.xml
|
175
177
|
- spec/lib/hydra/file_characterization/characterizer_spec.rb
|
176
178
|
- spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb
|
179
|
+
- spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb
|
177
180
|
- spec/lib/hydra/file_characterization/characterizers/fits_spec.rb
|
178
181
|
- spec/lib/hydra/file_characterization/characterizers_spec.rb
|
179
182
|
- spec/lib/hydra/file_characterization/to_temp_file_spec.rb
|