hydra-file_characterization 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -0
- data/.rspec +0 -0
- data/.travis.yml +0 -0
- data/CODE_OF_CONDUCT.md +0 -0
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -0
- data/Guardfile +0 -0
- data/LICENSE +0 -0
- data/README.md +0 -0
- data/Rakefile +0 -0
- data/SUPPORT.md +0 -0
- data/hydra-file_characterization.gemspec +0 -0
- data/lib/hydra-file_characterization.rb +0 -0
- data/lib/hydra/file_characterization.rb +0 -0
- data/lib/hydra/file_characterization/characterizer.rb +0 -0
- data/lib/hydra/file_characterization/characterizers.rb +1 -0
- data/lib/hydra/file_characterization/characterizers/ffprobe.rb +0 -0
- data/lib/hydra/file_characterization/characterizers/fits.rb +0 -0
- data/lib/hydra/file_characterization/characterizers/fits_servlet.rb +23 -0
- data/lib/hydra/file_characterization/exceptions.rb +0 -0
- data/lib/hydra/file_characterization/to_temp_file.rb +0 -0
- data/lib/hydra/file_characterization/version.rb +1 -1
- data/spec/fixtures/archive.zip +0 -0
- data/spec/fixtures/archive.zip.fits.xml +0 -0
- data/spec/fixtures/brendan_behan.jpeg +0 -0
- data/spec/fixtures/brendan_behan.jpeg.fits.xml +0 -0
- data/spec/fixtures/brendan_broken.dxxd +0 -0
- data/spec/fixtures/brendan_broken.dxxd.fits.xml +0 -0
- data/spec/lib/hydra/file_characterization/characterizer_spec.rb +0 -0
- data/spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb +0 -0
- data/spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb +68 -0
- data/spec/lib/hydra/file_characterization/characterizers/fits_spec.rb +0 -0
- data/spec/lib/hydra/file_characterization/characterizers_spec.rb +0 -0
- data/spec/lib/hydra/file_characterization/to_temp_file_spec.rb +0 -0
- data/spec/lib/hydra/file_characterization_spec.rb +0 -0
- data/spec/spec_helper.rb +0 -0
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97c20a68f011d01afddf0a42c3fa2802ab17b972c1ca0044910dfce652750f75
|
4
|
+
data.tar.gz: 147fd5b69e6af4de7abf5f768996c2f9442624a78a620b6d9a110b3d0b68b2d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ff697b557ec3abdbfef62f3f45fb646da5c2b17b9aa32f26be6584798d1bf02e8f8401602a5c00a312b9a4da40fc6bffc674379eada7d82284d838963a2d815
|
7
|
+
data.tar.gz: 5396ae39437c6fc528cccbcc3dacff23c5759fee17740b9c3382ca16d769ecd0ef1c4bf8b1ee1b45e28d49ccac93c8ea06c77ebfc11d32390d9a4a45524b009d
|
data/.gitignore
CHANGED
File without changes
|
data/.rspec
CHANGED
File without changes
|
data/.travis.yml
CHANGED
File without changes
|
data/CODE_OF_CONDUCT.md
CHANGED
File without changes
|
data/CONTRIBUTING.md
CHANGED
File without changes
|
data/Gemfile
CHANGED
File without changes
|
data/Guardfile
CHANGED
File without changes
|
data/LICENSE
CHANGED
File without changes
|
data/README.md
CHANGED
File without changes
|
data/Rakefile
CHANGED
File without changes
|
data/SUPPORT.md
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'hydra/file_characterization/exceptions'
|
2
|
+
require 'hydra/file_characterization/characterizer'
|
3
|
+
require 'logger'
|
4
|
+
module Hydra::FileCharacterization::Characterizers
|
5
|
+
class FitsServlet < Hydra::FileCharacterization::Characterizer
|
6
|
+
|
7
|
+
protected
|
8
|
+
|
9
|
+
def command
|
10
|
+
"curl -k -F datafile=@#{filename} #{ENV['FITS_SERVLET_URL']}/examine"
|
11
|
+
end
|
12
|
+
|
13
|
+
# Remove any non-XML output that precedes the <?xml> tag
|
14
|
+
# See: https://github.com/harvard-lts/fits/issues/20
|
15
|
+
# https://github.com/harvard-lts/fits/issues/40
|
16
|
+
# https://github.com/harvard-lts/fits/issues/46
|
17
|
+
def post_process(raw_output)
|
18
|
+
md = /\A(.*)(<\?xml.*)\Z/m.match(raw_output)
|
19
|
+
logger.warn "FITS produced non-xml output: \"#{md[1].chomp}\"" unless md[1].empty?
|
20
|
+
md[2]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
File without changes
|
File without changes
|
data/spec/fixtures/archive.zip
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'hydra/file_characterization/characterizers/fits_servlet'
|
3
|
+
|
4
|
+
module Hydra::FileCharacterization::Characterizers
|
5
|
+
describe FitsServlet do
|
6
|
+
let(:fits) { Fits.new(filename) }
|
7
|
+
|
8
|
+
describe "#call", unless: ENV['TRAVIS'] do
|
9
|
+
subject { fits.call }
|
10
|
+
|
11
|
+
context 'validfile' do
|
12
|
+
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
13
|
+
it { is_expected.to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg")) }
|
14
|
+
end
|
15
|
+
|
16
|
+
context 'invalidFile' do
|
17
|
+
let(:filename) { fixture_file('nofile.pdf') }
|
18
|
+
it "raises an error" do
|
19
|
+
expect { subject }.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context 'corruptFile' do
|
24
|
+
let(:filename) { fixture_file('brendan_broken.dxxd') }
|
25
|
+
it { is_expected.to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream")) }
|
26
|
+
end
|
27
|
+
|
28
|
+
context 'zip file should be characterized not its contents' do
|
29
|
+
let(:filename) { fixture_file('archive.zip') }
|
30
|
+
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
context 'when JHOVE adds non-xml' do
|
35
|
+
# https://github.com/harvard-lts/fits/issues/20
|
36
|
+
subject { fits.call }
|
37
|
+
|
38
|
+
before do
|
39
|
+
expect(fits.logger).to receive(:warn)
|
40
|
+
allow(fits).to receive(:internal_call).and_return(
|
41
|
+
'READBOX seen=true
|
42
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
43
|
+
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
44
|
+
<identification/></fits>')
|
45
|
+
end
|
46
|
+
|
47
|
+
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
48
|
+
it { is_expected.not_to include('READBOX') }
|
49
|
+
end
|
50
|
+
|
51
|
+
context "when FITS itself adds non-xml" do
|
52
|
+
# https://github.com/harvard-lts/fits/issues/46
|
53
|
+
subject { fits.call }
|
54
|
+
|
55
|
+
before do
|
56
|
+
expect(fits.logger).to receive(:warn)
|
57
|
+
allow(fits).to receive(:internal_call).and_return(
|
58
|
+
'2015-10-15 17:14:25,761 ERROR [main] ToolBelt:79 - Thread 1 error initializing edu.harvard.hul.ois.fits.tools.droid.Droid: edu.harvard.hul.ois.fits.exceptions.FitsToolException Message: DROID cannot run under Java 8
|
59
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
60
|
+
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
61
|
+
<identification/></fits>')
|
62
|
+
end
|
63
|
+
|
64
|
+
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
65
|
+
it { is_expected.not_to include('FitsToolException') }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/spec/spec_helper.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hydra-file_characterization
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Treacy
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2019-05-01 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: activesupport
|
@@ -125,6 +125,7 @@ files:
|
|
125
125
|
- lib/hydra/file_characterization/characterizers.rb
|
126
126
|
- lib/hydra/file_characterization/characterizers/ffprobe.rb
|
127
127
|
- lib/hydra/file_characterization/characterizers/fits.rb
|
128
|
+
- lib/hydra/file_characterization/characterizers/fits_servlet.rb
|
128
129
|
- lib/hydra/file_characterization/exceptions.rb
|
129
130
|
- lib/hydra/file_characterization/to_temp_file.rb
|
130
131
|
- lib/hydra/file_characterization/version.rb
|
@@ -136,6 +137,7 @@ files:
|
|
136
137
|
- spec/fixtures/brendan_broken.dxxd.fits.xml
|
137
138
|
- spec/lib/hydra/file_characterization/characterizer_spec.rb
|
138
139
|
- spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb
|
140
|
+
- spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb
|
139
141
|
- spec/lib/hydra/file_characterization/characterizers/fits_spec.rb
|
140
142
|
- spec/lib/hydra/file_characterization/characterizers_spec.rb
|
141
143
|
- spec/lib/hydra/file_characterization/to_temp_file_spec.rb
|
@@ -161,7 +163,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
161
163
|
version: '0'
|
162
164
|
requirements: []
|
163
165
|
rubyforge_project:
|
164
|
-
rubygems_version: 2.7.
|
166
|
+
rubygems_version: 2.7.9
|
165
167
|
signing_key:
|
166
168
|
specification_version: 4
|
167
169
|
summary: To provide a wrapper for file characterization
|
@@ -174,6 +176,7 @@ test_files:
|
|
174
176
|
- spec/fixtures/brendan_broken.dxxd.fits.xml
|
175
177
|
- spec/lib/hydra/file_characterization/characterizer_spec.rb
|
176
178
|
- spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb
|
179
|
+
- spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb
|
177
180
|
- spec/lib/hydra/file_characterization/characterizers/fits_spec.rb
|
178
181
|
- spec/lib/hydra/file_characterization/characterizers_spec.rb
|
179
182
|
- spec/lib/hydra/file_characterization/to_temp_file_spec.rb
|