hydra-file_characterization 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -0
- data/.github_changelog_generator +1 -1
- data/.rubocop.yml +10 -0
- data/.rubocop_todo.yml +92 -0
- data/CHANGELOG.md +11 -2
- data/Gemfile +1 -0
- data/Guardfile +1 -0
- data/Rakefile +1 -0
- data/hydra-file_characterization.gemspec +7 -5
- data/lib/hydra-file_characterization.rb +1 -0
- data/lib/hydra/file_characterization.rb +32 -36
- data/lib/hydra/file_characterization/characterizer.rb +35 -36
- data/lib/hydra/file_characterization/characterizers.rb +4 -3
- data/lib/hydra/file_characterization/characterizers/ffprobe.rb +2 -2
- data/lib/hydra/file_characterization/characterizers/fits.rb +13 -13
- data/lib/hydra/file_characterization/characterizers/fits_servlet.rb +13 -13
- data/lib/hydra/file_characterization/exceptions.rb +1 -2
- data/lib/hydra/file_characterization/to_temp_file.rb +3 -3
- data/lib/hydra/file_characterization/version.rb +2 -1
- data/spec/lib/hydra/file_characterization/characterizer_spec.rb +9 -8
- data/spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb +4 -7
- data/spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb +6 -3
- data/spec/lib/hydra/file_characterization/characterizers/fits_spec.rb +7 -4
- data/spec/lib/hydra/file_characterization/characterizers_spec.rb +10 -10
- data/spec/lib/hydra/file_characterization/to_temp_file_spec.rb +2 -4
- data/spec/spec_helper.rb +2 -1
- metadata +21 -6
- data/.travis.yml +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5d7bfac9adb8dec899a9e598255637841aaeba51754c56ac52f4e6eda93f5d1
|
4
|
+
data.tar.gz: f5cd50a069ada9dede312853101cdb463cbd722db331d418ae18cb7fe2641e6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d478b249740cbf248f0ea4e053541d9169104920040bb2e563018bee9c0b58ac3419e07b2c937f4e4ae6015a6c4be11a712a9a6fd3a1825d7ba0d0528f16a30
|
7
|
+
data.tar.gz: 97d13c1f85ca53511a707d5e5338fcbc72c6405bc85bd99b3beaacd34e4faad735a04362f90a948c03cb1700de9145487b81c96d9fc57488a32647f84378d834
|
data/.circleci/config.yml
CHANGED
data/.github_changelog_generator
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
unreleased=true
|
2
|
-
future-release=1.1.
|
2
|
+
future-release=1.1.2
|
data/.rubocop.yml
ADDED
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2020-06-10 16:20:03 -0400 using RuboCop version 0.85.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
# Configuration parameters: Include.
|
11
|
+
# Include: **/*.gemfile, **/Gemfile, **/gems.rb
|
12
|
+
Bundler/DuplicatedGem:
|
13
|
+
Exclude:
|
14
|
+
- 'Gemfile'
|
15
|
+
|
16
|
+
# Offense count: 1
|
17
|
+
# Cop supports --auto-correct.
|
18
|
+
# Configuration parameters: EnforcedStyleAlignWith, AutoCorrect, Severity.
|
19
|
+
# SupportedStylesAlignWith: keyword, variable, start_of_line
|
20
|
+
Layout/EndAlignment:
|
21
|
+
Exclude:
|
22
|
+
- 'lib/hydra/file_characterization.rb'
|
23
|
+
|
24
|
+
# Offense count: 6
|
25
|
+
# Cop supports --auto-correct.
|
26
|
+
# Configuration parameters: AutoCorrect, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
|
27
|
+
# URISchemes: http, https
|
28
|
+
Layout/LineLength:
|
29
|
+
Max: 293
|
30
|
+
|
31
|
+
# Offense count: 5
|
32
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
33
|
+
# ExcludedMethods: refine
|
34
|
+
Metrics/BlockLength:
|
35
|
+
Max: 50
|
36
|
+
|
37
|
+
# Offense count: 1
|
38
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
39
|
+
Metrics/MethodLength:
|
40
|
+
Max: 15
|
41
|
+
|
42
|
+
# Offense count: 1
|
43
|
+
# Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, Regex, IgnoreExecutableScripts, AllowedAcronyms.
|
44
|
+
# AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
|
45
|
+
Naming/FileName:
|
46
|
+
Exclude:
|
47
|
+
- '**/Gemfile'
|
48
|
+
- '**/*.rake'
|
49
|
+
- 'Capfile'
|
50
|
+
- 'config/deploy/*'
|
51
|
+
- 'lib/hydra-file_characterization.rb'
|
52
|
+
|
53
|
+
# Offense count: 2
|
54
|
+
# Configuration parameters: Max.
|
55
|
+
RSpec/ExampleLength:
|
56
|
+
Exclude:
|
57
|
+
- 'spec/lib/hydra/file_characterization/to_temp_file_spec.rb'
|
58
|
+
|
59
|
+
# Offense count: 6
|
60
|
+
# Configuration parameters: AssignmentOnly.
|
61
|
+
RSpec/InstanceVariable:
|
62
|
+
Exclude:
|
63
|
+
- 'spec/lib/hydra/file_characterization/to_temp_file_spec.rb'
|
64
|
+
|
65
|
+
# Offense count: 4
|
66
|
+
# Configuration parameters: .
|
67
|
+
# SupportedStyles: have_received, receive
|
68
|
+
RSpec/MessageSpies:
|
69
|
+
EnforcedStyle: receive
|
70
|
+
|
71
|
+
# Offense count: 9
|
72
|
+
# Configuration parameters: IgnoreSharedExamples.
|
73
|
+
RSpec/NamedSubject:
|
74
|
+
Exclude:
|
75
|
+
- 'spec/lib/hydra/file_characterization/characterizer_spec.rb'
|
76
|
+
- 'spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb'
|
77
|
+
- 'spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb'
|
78
|
+
- 'spec/lib/hydra/file_characterization/characterizers/fits_spec.rb'
|
79
|
+
- 'spec/lib/hydra/file_characterization/characterizers_spec.rb'
|
80
|
+
- 'spec/lib/hydra/file_characterization/to_temp_file_spec.rb'
|
81
|
+
|
82
|
+
# Offense count: 1
|
83
|
+
# Configuration parameters: AllowedChars.
|
84
|
+
Style/AsciiComments:
|
85
|
+
Exclude:
|
86
|
+
- 'lib/hydra/file_characterization.rb'
|
87
|
+
|
88
|
+
# Offense count: 1
|
89
|
+
# Configuration parameters: MinBodyLength.
|
90
|
+
Style/GuardClause:
|
91
|
+
Exclude:
|
92
|
+
- 'lib/hydra/file_characterization/characterizers.rb'
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,16 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
-
## [1.1.
|
3
|
+
## [1.1.2](https://github.com/samvera/hydra-file_characterization/tree/1.1.2) (2020-06-10)
|
4
4
|
|
5
|
-
[Full Changelog](https://github.com/samvera/hydra-file_characterization/compare/v1.1.
|
5
|
+
[Full Changelog](https://github.com/samvera/hydra-file_characterization/compare/v1.1.1...1.1.2)
|
6
|
+
|
7
|
+
**Merged pull requests:**
|
8
|
+
|
9
|
+
- Enforce samvera style rules [\#49](https://github.com/samvera/hydra-file_characterization/pull/49) ([bess](https://github.com/bess))
|
10
|
+
|
11
|
+
## [v1.1.1](https://github.com/samvera/hydra-file_characterization/tree/v1.1.1) (2020-06-10)
|
12
|
+
|
13
|
+
[Full Changelog](https://github.com/samvera/hydra-file_characterization/compare/v1.1.0...v1.1.1)
|
6
14
|
|
7
15
|
**Closed issues:**
|
8
16
|
|
@@ -12,6 +20,7 @@
|
|
12
20
|
|
13
21
|
**Merged pull requests:**
|
14
22
|
|
23
|
+
- Prep for 1.1.1 release [\#48](https://github.com/samvera/hydra-file_characterization/pull/48) ([bess](https://github.com/bess))
|
15
24
|
- Adding Ruby 2.7.z and Rails 6.y.z releases to the CircleCI build configuration [\#46](https://github.com/samvera/hydra-file_characterization/pull/46) ([jrgriffiniii](https://github.com/jrgriffiniii))
|
16
25
|
- Update CircleCI Ruby and Rails versions [\#43](https://github.com/samvera/hydra-file_characterization/pull/43) ([botimer](https://github.com/botimer))
|
17
26
|
- Updates the CircleCI configuration to test against Rails release 5.1.7 and Ruby releases 2.6.3, 2.5.5, and 2.4.6 [\#42](https://github.com/samvera/hydra-file_characterization/pull/42) ([jrgriffiniii](https://github.com/jrgriffiniii))
|
data/Gemfile
CHANGED
data/Guardfile
CHANGED
data/Rakefile
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
lib = File.expand_path('../lib', __FILE__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'hydra/file_characterization/version'
|
@@ -12,23 +13,24 @@ Gem::Specification.new do |gem|
|
|
12
13
|
"Sue Richeson",
|
13
14
|
"Rajesh Balekai"
|
14
15
|
]
|
15
|
-
gem.email
|
16
|
+
gem.email = [
|
16
17
|
"jatr@kb.dk",
|
17
18
|
"jeremy.n.friesen@gmail.com",
|
18
19
|
"spr7b@virginia.edu",
|
19
20
|
"rbalekai@gmail.com"
|
20
21
|
]
|
21
|
-
gem.description =
|
22
|
-
gem.summary =
|
22
|
+
gem.description = 'To provide a wrapper for file characterization'
|
23
|
+
gem.summary = 'To provide a wrapper for file characterization'
|
23
24
|
gem.homepage = "https://github.com/projecthydra/hydra-file_characterization"
|
24
25
|
gem.license = "APACHE2"
|
25
26
|
|
26
|
-
gem.files = `git ls-files`.split(
|
27
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
27
|
+
gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
28
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
28
29
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
29
30
|
gem.require_paths = ["lib"]
|
30
31
|
|
31
32
|
gem.add_dependency "activesupport", ">= 3.0.0"
|
33
|
+
gem.add_development_dependency 'bixby', '~> 3.0.0'
|
32
34
|
gem.add_development_dependency 'coveralls'
|
33
35
|
gem.add_development_dependency 'github_changelog_generator'
|
34
36
|
gem.add_development_dependency "guard"
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require "hydra/file_characterization/version"
|
2
3
|
require "hydra/file_characterization/exceptions"
|
3
4
|
require "hydra/file_characterization/to_temp_file"
|
@@ -6,7 +7,6 @@ require "hydra/file_characterization/characterizers"
|
|
6
7
|
require "active_support/configurable"
|
7
8
|
|
8
9
|
module Hydra
|
9
|
-
|
10
10
|
module_function
|
11
11
|
|
12
12
|
# A convenience method
|
@@ -15,7 +15,6 @@ module Hydra
|
|
15
15
|
end
|
16
16
|
|
17
17
|
module FileCharacterization
|
18
|
-
|
19
18
|
class << self
|
20
19
|
attr_accessor :configuration
|
21
20
|
end
|
@@ -66,7 +65,7 @@ module Hydra
|
|
66
65
|
tool_names = Array(tool_names).flatten.compact
|
67
66
|
custom_paths = {}
|
68
67
|
yield(custom_paths) if block_given?
|
69
|
-
|
68
|
+
|
70
69
|
tool_outputs = run_characterizers(content, filename, tool_names, custom_paths)
|
71
70
|
tool_names.size == 1 ? tool_outputs.first : tool_outputs
|
72
71
|
end
|
@@ -76,50 +75,47 @@ module Hydra
|
|
76
75
|
yield(configuration)
|
77
76
|
end
|
78
77
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
else
|
91
|
-
args.shift
|
92
|
-
end
|
93
|
-
tool_names = args
|
94
|
-
return content, filename, tool_names
|
78
|
+
# Break up a list of arguments into two possible lists:
|
79
|
+
# option1: [String] content, [String] filename, [Array] tool_names
|
80
|
+
# option2: [File] content, [Array] tool_names
|
81
|
+
# In the case of option2, derive the filename from the file's path
|
82
|
+
# @return [String, File], [String], [Array]
|
83
|
+
def self.extract_arguments(args)
|
84
|
+
content = args.shift
|
85
|
+
filename = if content.is_a?(File) && !args[0].is_a?(String)
|
86
|
+
File.basename(content.path)
|
87
|
+
else
|
88
|
+
args.shift
|
95
89
|
end
|
90
|
+
tool_names = args
|
91
|
+
[content, filename, tool_names]
|
92
|
+
end
|
96
93
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
end
|
94
|
+
# @param [File, String] content Either an open file or a string. If a string is passed
|
95
|
+
# a temp file will be created
|
96
|
+
# @param [String] filename Used in creating a temp file name
|
97
|
+
# @param [Array<Symbol>] tool_names A list of symbols referencing the characerization tools to run
|
98
|
+
# @param [Hash] custom_paths The paths to the executables of the tool.
|
99
|
+
def self.run_characterizers(content, filename, tool_names, custom_paths)
|
100
|
+
if content.is_a? File
|
101
|
+
run_characterizers_on_file(content, tool_names, custom_paths)
|
102
|
+
else
|
103
|
+
FileCharacterization::ToTempFile.open(filename, content) do |f|
|
104
|
+
run_characterizers_on_file(f, tool_names, custom_paths)
|
109
105
|
end
|
110
106
|
end
|
107
|
+
end
|
111
108
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
end
|
109
|
+
def self.run_characterizers_on_file(f, tool_names, custom_paths)
|
110
|
+
tool_names.map do |tool_name|
|
111
|
+
FileCharacterization.characterize_with(tool_name, f.path, custom_paths[tool_name])
|
116
112
|
end
|
113
|
+
end
|
117
114
|
|
118
115
|
class Configuration
|
119
116
|
def tool_path(tool_name, tool_path)
|
120
117
|
Hydra::FileCharacterization.characterizer(tool_name).tool_path = tool_path
|
121
118
|
end
|
122
119
|
end
|
123
|
-
|
124
120
|
end
|
125
121
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'hydra/file_characterization/exceptions'
|
2
3
|
require 'open3'
|
3
4
|
require 'active_support/core_ext/class/attribute'
|
@@ -15,9 +16,7 @@ module Hydra::FileCharacterization
|
|
15
16
|
end
|
16
17
|
|
17
18
|
def call
|
18
|
-
unless File.
|
19
|
-
raise Hydra::FileCharacterization::FileNotFoundError.new("File: #{filename} does not exist.")
|
20
|
-
end
|
19
|
+
raise Hydra::FileCharacterization::FileNotFoundError, "File: #{filename} does not exist." unless File.exist?(filename)
|
21
20
|
|
22
21
|
post_process(output)
|
23
22
|
end
|
@@ -32,47 +31,47 @@ module Hydra::FileCharacterization
|
|
32
31
|
|
33
32
|
protected
|
34
33
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
# Override this method if you want your processor to mutate the
|
35
|
+
# raw output
|
36
|
+
def post_process(raw_output)
|
37
|
+
raw_output
|
38
|
+
end
|
40
39
|
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
def convention_based_tool_name
|
41
|
+
self.class.name.split("::").last.downcase
|
42
|
+
end
|
44
43
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
end
|
44
|
+
def internal_call
|
45
|
+
stdin, stdout, stderr, wait_thr = popen3(command)
|
46
|
+
begin
|
47
|
+
out = stdout.read
|
48
|
+
err = stderr.read
|
49
|
+
exit_status = wait_thr.value
|
50
|
+
raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
|
51
|
+
out
|
52
|
+
ensure
|
53
|
+
stdin.close
|
54
|
+
stdout.close
|
55
|
+
stderr.close
|
58
56
|
end
|
57
|
+
end
|
59
58
|
|
60
|
-
|
61
|
-
|
62
|
-
|
59
|
+
def command
|
60
|
+
raise NotImplementedError, "Method #command should be overriden in child classes"
|
61
|
+
end
|
63
62
|
|
64
63
|
private
|
65
64
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
end
|
65
|
+
def output
|
66
|
+
if tool_path.respond_to?(:call)
|
67
|
+
tool_path.call(filename)
|
68
|
+
else
|
69
|
+
internal_call
|
72
70
|
end
|
71
|
+
end
|
73
72
|
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
def activefedora_logger
|
74
|
+
ActiveFedora::Base.logger if defined? ActiveFedora
|
75
|
+
end
|
77
76
|
end
|
78
77
|
end
|
@@ -1,19 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Hydra::FileCharacterization
|
2
3
|
module Characterizers
|
3
4
|
end
|
4
5
|
|
5
6
|
module_function
|
7
|
+
|
6
8
|
def characterizer(tool_name)
|
7
9
|
characterizer_name = characterizer_name_from(tool_name)
|
8
10
|
if Characterizers.const_defined?(characterizer_name)
|
9
11
|
Characterizers.const_get(characterizer_name)
|
10
12
|
else
|
11
|
-
raise ToolNotFoundError
|
13
|
+
raise ToolNotFoundError, tool_name
|
12
14
|
end
|
13
15
|
end
|
14
16
|
|
15
17
|
def characterizer_name_from(tool_name)
|
16
|
-
tool_name.to_s.gsub(/(?:^|_)([a-z])/) {
|
18
|
+
tool_name.to_s.gsub(/(?:^|_)([a-z])/) { Regexp.last_match(1).upcase }
|
17
19
|
end
|
18
20
|
|
19
21
|
def characterize_with(tool_name, path_to_file, path_to_tool)
|
@@ -24,7 +26,6 @@ module Hydra::FileCharacterization
|
|
24
26
|
tool_obj.call
|
25
27
|
end
|
26
28
|
end
|
27
|
-
|
28
29
|
end
|
29
30
|
|
30
31
|
require 'hydra/file_characterization/characterizers/fits'
|
@@ -1,13 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'hydra/file_characterization/exceptions'
|
2
3
|
require 'hydra/file_characterization/characterizer'
|
3
4
|
|
4
5
|
module Hydra::FileCharacterization::Characterizers
|
5
6
|
class Ffprobe < Hydra::FileCharacterization::Characterizer
|
6
|
-
|
7
7
|
protected
|
8
|
+
|
8
9
|
def command
|
9
10
|
"#{tool_path} -i \"#{filename}\" -print_format xml -show_streams -v quiet"
|
10
11
|
end
|
11
|
-
|
12
12
|
end
|
13
13
|
end
|
@@ -1,23 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'hydra/file_characterization/exceptions'
|
2
3
|
require 'hydra/file_characterization/characterizer'
|
3
4
|
require 'logger'
|
4
5
|
module Hydra::FileCharacterization::Characterizers
|
5
6
|
class Fits < Hydra::FileCharacterization::Characterizer
|
6
|
-
|
7
7
|
protected
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
def command
|
10
|
+
"#{tool_path} -i \"#{filename}\""
|
11
|
+
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
13
|
+
# Remove any non-XML output that precedes the <?xml> tag
|
14
|
+
# See: https://github.com/harvard-lts/fits/issues/20
|
15
|
+
# https://github.com/harvard-lts/fits/issues/40
|
16
|
+
# https://github.com/harvard-lts/fits/issues/46
|
17
|
+
def post_process(raw_output)
|
18
|
+
md = /\A(.*)(<\?xml.*)\Z/m.match(raw_output)
|
19
|
+
logger.warn "FITS produced non-xml output: \"#{md[1].chomp}\"" unless md[1].empty?
|
20
|
+
md[2]
|
21
|
+
end
|
22
22
|
end
|
23
23
|
end
|
@@ -1,23 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'hydra/file_characterization/exceptions'
|
2
3
|
require 'hydra/file_characterization/characterizer'
|
3
4
|
require 'logger'
|
4
5
|
module Hydra::FileCharacterization::Characterizers
|
5
6
|
class FitsServlet < Hydra::FileCharacterization::Characterizer
|
6
|
-
|
7
7
|
protected
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
def command
|
10
|
+
"curl -k -F datafile=@#{filename} #{ENV['FITS_SERVLET_URL']}/examine"
|
11
|
+
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
13
|
+
# Remove any non-XML output that precedes the <?xml> tag
|
14
|
+
# See: https://github.com/harvard-lts/fits/issues/20
|
15
|
+
# https://github.com/harvard-lts/fits/issues/40
|
16
|
+
# https://github.com/harvard-lts/fits/issues/46
|
17
|
+
def post_process(raw_output)
|
18
|
+
md = /\A(.*)(<\?xml.*)\Z/m.match(raw_output)
|
19
|
+
logger.warn "FITS produced non-xml output: \"#{md[1].chomp}\"" unless md[1].empty?
|
20
|
+
md[2]
|
21
|
+
end
|
22
22
|
end
|
23
23
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Hydra::FileCharacterization
|
2
|
-
|
3
3
|
class FileNotFoundError < RuntimeError
|
4
4
|
end
|
5
5
|
|
@@ -8,5 +8,4 @@ module Hydra::FileCharacterization
|
|
8
8
|
super("Unable to find Hydra::FileCharacterization tool with name :#{tool_name}")
|
9
9
|
end
|
10
10
|
end
|
11
|
-
|
12
11
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'open3'
|
2
3
|
require 'tempfile'
|
3
4
|
|
@@ -16,7 +17,7 @@ module Hydra::FileCharacterization
|
|
16
17
|
end
|
17
18
|
|
18
19
|
def call(data)
|
19
|
-
f = Tempfile.new([File.basename(filename),File.extname(filename)])
|
20
|
+
f = Tempfile.new([File.basename(filename), File.extname(filename)])
|
20
21
|
begin
|
21
22
|
f.binmode
|
22
23
|
if data.respond_to? :read
|
@@ -32,6 +33,5 @@ module Hydra::FileCharacterization
|
|
32
33
|
f.unlink
|
33
34
|
end
|
34
35
|
end
|
35
|
-
|
36
36
|
end
|
37
|
-
end
|
37
|
+
end
|
@@ -1,30 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
|
3
4
|
module Hydra::FileCharacterization
|
4
5
|
describe Characterizer do
|
6
|
+
subject { characterizer }
|
5
7
|
let(:filename) { __FILE__ }
|
6
8
|
let(:instance_tool_path) { nil }
|
7
9
|
let(:class_tool_path) { nil }
|
8
10
|
|
9
|
-
let(:characterizer) {
|
10
|
-
|
11
|
-
|
12
|
-
Hydra::FileCharacterization::Characterizer.tool_path = class_tool_path
|
11
|
+
let(:characterizer) { described_class.new(filename, instance_tool_path) }
|
12
|
+
around do |example|
|
13
|
+
described_class.tool_path = class_tool_path
|
13
14
|
example.run
|
14
|
-
|
15
|
+
described_class.tool_path = nil
|
15
16
|
end
|
16
17
|
|
17
18
|
context 'call' do
|
18
19
|
context 'with missing file' do
|
19
20
|
let(:filename) { '/dev/path/to/bogus/file' }
|
20
|
-
it '
|
21
|
+
it 'raises FileNotFoundError' do
|
21
22
|
expect { subject.call }.to raise_error(FileNotFoundError)
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
25
26
|
context 'with a callable tool path' do
|
26
|
-
let(:class_tool_path) {
|
27
|
-
it '
|
27
|
+
let(:class_tool_path) { ->(filename) { [filename, :output] } }
|
28
|
+
it 'raises FileNotFoundError' do
|
28
29
|
expect(subject.call).to eq [filename, :output]
|
29
30
|
end
|
30
31
|
end
|
@@ -1,19 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
require 'hydra/file_characterization/characterizers/ffprobe'
|
3
4
|
|
4
5
|
module Hydra::FileCharacterization::Characterizers
|
5
|
-
|
6
6
|
describe Ffprobe do
|
7
|
-
|
8
|
-
subject { Ffprobe.new(filename) }
|
7
|
+
subject { described_class.new(filename) }
|
9
8
|
|
10
9
|
describe 'invalidFile' do
|
11
10
|
let(:filename) { fixture_file('nofile.pdf') }
|
12
|
-
it "
|
13
|
-
expect {subject.call}.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
|
11
|
+
it "raises an error if the path does not contain the file" do
|
12
|
+
expect { subject.call }.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
18
|
-
|
19
16
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
require 'hydra/file_characterization/characterizers/fits_servlet'
|
3
4
|
|
@@ -27,7 +28,7 @@ module Hydra::FileCharacterization::Characterizers
|
|
27
28
|
|
28
29
|
context 'zip file should be characterized not its contents' do
|
29
30
|
let(:filename) { fixture_file('archive.zip') }
|
30
|
-
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
|
31
|
+
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip")) }
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
@@ -41,7 +42,8 @@ module Hydra::FileCharacterization::Characterizers
|
|
41
42
|
'READBOX seen=true
|
42
43
|
<?xml version="1.0" encoding="UTF-8"?>
|
43
44
|
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
44
|
-
<identification/></fits>'
|
45
|
+
<identification/></fits>'
|
46
|
+
)
|
45
47
|
end
|
46
48
|
|
47
49
|
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
@@ -58,7 +60,8 @@ module Hydra::FileCharacterization::Characterizers
|
|
58
60
|
'2015-10-15 17:14:25,761 ERROR [main] ToolBelt:79 - Thread 1 error initializing edu.harvard.hul.ois.fits.tools.droid.Droid: edu.harvard.hul.ois.fits.exceptions.FitsToolException Message: DROID cannot run under Java 8
|
59
61
|
<?xml version="1.0" encoding="UTF-8"?>
|
60
62
|
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
61
|
-
<identification/></fits>'
|
63
|
+
<identification/></fits>'
|
64
|
+
)
|
62
65
|
end
|
63
66
|
|
64
67
|
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
@@ -1,9 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
require 'hydra/file_characterization/characterizers/fits'
|
3
4
|
|
4
5
|
module Hydra::FileCharacterization::Characterizers
|
5
6
|
describe Fits do
|
6
|
-
let(:fits) {
|
7
|
+
let(:fits) { described_class.new(filename) }
|
7
8
|
|
8
9
|
describe "#call", unless: ENV['TRAVIS'] do
|
9
10
|
subject { fits.call }
|
@@ -27,7 +28,7 @@ module Hydra::FileCharacterization::Characterizers
|
|
27
28
|
|
28
29
|
context 'zip file should be characterized not its contents' do
|
29
30
|
let(:filename) { fixture_file('archive.zip') }
|
30
|
-
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
|
31
|
+
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip")) }
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
@@ -41,7 +42,8 @@ module Hydra::FileCharacterization::Characterizers
|
|
41
42
|
'READBOX seen=true
|
42
43
|
<?xml version="1.0" encoding="UTF-8"?>
|
43
44
|
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
44
|
-
<identification/></fits>'
|
45
|
+
<identification/></fits>'
|
46
|
+
)
|
45
47
|
end
|
46
48
|
|
47
49
|
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
@@ -58,7 +60,8 @@ module Hydra::FileCharacterization::Characterizers
|
|
58
60
|
'2015-10-15 17:14:25,761 ERROR [main] ToolBelt:79 - Thread 1 error initializing edu.harvard.hul.ois.fits.tools.droid.Droid: edu.harvard.hul.ois.fits.exceptions.FitsToolException Message: DROID cannot run under Java 8
|
59
61
|
<?xml version="1.0" encoding="UTF-8"?>
|
60
62
|
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
61
|
-
<identification/></fits>'
|
63
|
+
<identification/></fits>'
|
64
|
+
)
|
62
65
|
end
|
63
66
|
|
64
67
|
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
require 'hydra/file_characterization/characterizers'
|
3
4
|
|
@@ -7,36 +8,35 @@ module Hydra::FileCharacterization
|
|
7
8
|
|
8
9
|
describe 'with :fits tool_name' do
|
9
10
|
let(:tool_name) { :fits }
|
10
|
-
it {
|
11
|
+
it { is_expected.to eq(Characterizers::Fits) }
|
11
12
|
end
|
12
13
|
|
13
14
|
describe 'with :ffprobe tool_name' do
|
14
15
|
let(:tool_name) { :ffprobe }
|
15
|
-
it {
|
16
|
+
it { is_expected.to eq(Characterizers::Ffprobe) }
|
16
17
|
end
|
17
18
|
|
18
19
|
context '.characterize_with' do
|
20
|
+
subject { Hydra::FileCharacterization.characterize_with(tool_name, filename, tool_path) }
|
19
21
|
let(:tool_name) { :fits }
|
20
22
|
let(:filename) { __FILE__ }
|
21
23
|
let(:tool_path) { nil }
|
22
|
-
subject { Hydra::FileCharacterization.characterize_with(tool_name, filename, tool_path) }
|
23
24
|
|
24
25
|
context 'with callable tool_path and missing tool name' do
|
25
|
-
let(:tool_path) {
|
26
|
+
let(:tool_path) { ->(filename) { [filename, :tool_path] } }
|
26
27
|
let(:tool_name) { :chunky_salsa }
|
27
|
-
it {
|
28
|
+
it { is_expected.to eq [filename, :tool_path] }
|
28
29
|
end
|
29
30
|
|
30
31
|
context 'with missing tool name and non-callable tool_path' do
|
31
32
|
let(:tool_name) { :chunky_salsa }
|
32
33
|
let(:tool_path) { '/path' }
|
33
|
-
it '
|
34
|
-
expect
|
34
|
+
it 'raises exception' do
|
35
|
+
expect do
|
35
36
|
subject
|
36
|
-
|
37
|
+
end.to raise_error(ToolNotFoundError)
|
37
38
|
end
|
38
39
|
end
|
39
40
|
end
|
40
|
-
|
41
41
|
end
|
42
|
-
end
|
42
|
+
end
|
@@ -1,10 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
require 'hydra/file_characterization/to_temp_file'
|
3
4
|
|
4
5
|
module Hydra::FileCharacterization
|
5
|
-
|
6
6
|
describe 'ToTempFile' do
|
7
|
-
|
8
7
|
let(:content) { "This is the content of the file." }
|
9
8
|
let(:filename) { "hello.rb" }
|
10
9
|
|
@@ -44,6 +43,5 @@ module Hydra::FileCharacterization
|
|
44
43
|
end
|
45
44
|
end
|
46
45
|
end
|
47
|
-
|
48
46
|
end
|
49
|
-
end
|
47
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
3
|
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
4
|
# Require this file using `require "spec_helper"` to ensure that it is only
|
@@ -6,7 +7,7 @@
|
|
6
7
|
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
8
|
|
8
9
|
GEM_ROOT = File.expand_path("../../", __FILE__)
|
9
|
-
|
10
|
+
$LOAD_PATH.unshift File.join(GEM_ROOT, "lib")
|
10
11
|
|
11
12
|
require 'coveralls'
|
12
13
|
Coveralls.wear!
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hydra-file_characterization
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Treacy
|
8
8
|
- Jeremy Friesen
|
9
9
|
- Sue Richeson
|
10
10
|
- Rajesh Balekai
|
11
|
-
autorequire:
|
11
|
+
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
14
|
date: 2020-06-10 00:00:00.000000000 Z
|
@@ -27,6 +27,20 @@ dependencies:
|
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 3.0.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: bixby
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - "~>"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: 3.0.0
|
37
|
+
type: :development
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - "~>"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 3.0.0
|
30
44
|
- !ruby/object:Gem::Dependency
|
31
45
|
name: coveralls
|
32
46
|
requirement: !ruby/object:Gem::Requirement
|
@@ -139,7 +153,8 @@ files:
|
|
139
153
|
- ".github_changelog_generator"
|
140
154
|
- ".gitignore"
|
141
155
|
- ".rspec"
|
142
|
-
- ".
|
156
|
+
- ".rubocop.yml"
|
157
|
+
- ".rubocop_todo.yml"
|
143
158
|
- CHANGELOG.md
|
144
159
|
- CODE_OF_CONDUCT.md
|
145
160
|
- CONTRIBUTING.md
|
@@ -178,7 +193,7 @@ homepage: https://github.com/projecthydra/hydra-file_characterization
|
|
178
193
|
licenses:
|
179
194
|
- APACHE2
|
180
195
|
metadata: {}
|
181
|
-
post_install_message:
|
196
|
+
post_install_message:
|
182
197
|
rdoc_options: []
|
183
198
|
require_paths:
|
184
199
|
- lib
|
@@ -193,8 +208,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
193
208
|
- !ruby/object:Gem::Version
|
194
209
|
version: '0'
|
195
210
|
requirements: []
|
196
|
-
rubygems_version: 3.1.
|
197
|
-
signing_key:
|
211
|
+
rubygems_version: 3.1.4
|
212
|
+
signing_key:
|
198
213
|
specification_version: 4
|
199
214
|
summary: To provide a wrapper for file characterization
|
200
215
|
test_files:
|