hydra-file_characterization 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -0
- data/.github_changelog_generator +1 -1
- data/.rubocop.yml +10 -0
- data/.rubocop_todo.yml +92 -0
- data/CHANGELOG.md +11 -2
- data/Gemfile +1 -0
- data/Guardfile +1 -0
- data/Rakefile +1 -0
- data/hydra-file_characterization.gemspec +7 -5
- data/lib/hydra-file_characterization.rb +1 -0
- data/lib/hydra/file_characterization.rb +32 -36
- data/lib/hydra/file_characterization/characterizer.rb +35 -36
- data/lib/hydra/file_characterization/characterizers.rb +4 -3
- data/lib/hydra/file_characterization/characterizers/ffprobe.rb +2 -2
- data/lib/hydra/file_characterization/characterizers/fits.rb +13 -13
- data/lib/hydra/file_characterization/characterizers/fits_servlet.rb +13 -13
- data/lib/hydra/file_characterization/exceptions.rb +1 -2
- data/lib/hydra/file_characterization/to_temp_file.rb +3 -3
- data/lib/hydra/file_characterization/version.rb +2 -1
- data/spec/lib/hydra/file_characterization/characterizer_spec.rb +9 -8
- data/spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb +4 -7
- data/spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb +6 -3
- data/spec/lib/hydra/file_characterization/characterizers/fits_spec.rb +7 -4
- data/spec/lib/hydra/file_characterization/characterizers_spec.rb +10 -10
- data/spec/lib/hydra/file_characterization/to_temp_file_spec.rb +2 -4
- data/spec/spec_helper.rb +2 -1
- metadata +21 -6
- data/.travis.yml +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5d7bfac9adb8dec899a9e598255637841aaeba51754c56ac52f4e6eda93f5d1
|
4
|
+
data.tar.gz: f5cd50a069ada9dede312853101cdb463cbd722db331d418ae18cb7fe2641e6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d478b249740cbf248f0ea4e053541d9169104920040bb2e563018bee9c0b58ac3419e07b2c937f4e4ae6015a6c4be11a712a9a6fd3a1825d7ba0d0528f16a30
|
7
|
+
data.tar.gz: 97d13c1f85ca53511a707d5e5338fcbc72c6405bc85bd99b3beaacd34e4faad735a04362f90a948c03cb1700de9145487b81c96d9fc57488a32647f84378d834
|
data/.circleci/config.yml
CHANGED
data/.github_changelog_generator
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
unreleased=true
|
2
|
-
future-release=1.1.
|
2
|
+
future-release=1.1.2
|
data/.rubocop.yml
ADDED
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2020-06-10 16:20:03 -0400 using RuboCop version 0.85.1.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
# Configuration parameters: Include.
|
11
|
+
# Include: **/*.gemfile, **/Gemfile, **/gems.rb
|
12
|
+
Bundler/DuplicatedGem:
|
13
|
+
Exclude:
|
14
|
+
- 'Gemfile'
|
15
|
+
|
16
|
+
# Offense count: 1
|
17
|
+
# Cop supports --auto-correct.
|
18
|
+
# Configuration parameters: EnforcedStyleAlignWith, AutoCorrect, Severity.
|
19
|
+
# SupportedStylesAlignWith: keyword, variable, start_of_line
|
20
|
+
Layout/EndAlignment:
|
21
|
+
Exclude:
|
22
|
+
- 'lib/hydra/file_characterization.rb'
|
23
|
+
|
24
|
+
# Offense count: 6
|
25
|
+
# Cop supports --auto-correct.
|
26
|
+
# Configuration parameters: AutoCorrect, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
|
27
|
+
# URISchemes: http, https
|
28
|
+
Layout/LineLength:
|
29
|
+
Max: 293
|
30
|
+
|
31
|
+
# Offense count: 5
|
32
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
33
|
+
# ExcludedMethods: refine
|
34
|
+
Metrics/BlockLength:
|
35
|
+
Max: 50
|
36
|
+
|
37
|
+
# Offense count: 1
|
38
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
39
|
+
Metrics/MethodLength:
|
40
|
+
Max: 15
|
41
|
+
|
42
|
+
# Offense count: 1
|
43
|
+
# Configuration parameters: ExpectMatchingDefinition, CheckDefinitionPathHierarchy, Regex, IgnoreExecutableScripts, AllowedAcronyms.
|
44
|
+
# AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
|
45
|
+
Naming/FileName:
|
46
|
+
Exclude:
|
47
|
+
- '**/Gemfile'
|
48
|
+
- '**/*.rake'
|
49
|
+
- 'Capfile'
|
50
|
+
- 'config/deploy/*'
|
51
|
+
- 'lib/hydra-file_characterization.rb'
|
52
|
+
|
53
|
+
# Offense count: 2
|
54
|
+
# Configuration parameters: Max.
|
55
|
+
RSpec/ExampleLength:
|
56
|
+
Exclude:
|
57
|
+
- 'spec/lib/hydra/file_characterization/to_temp_file_spec.rb'
|
58
|
+
|
59
|
+
# Offense count: 6
|
60
|
+
# Configuration parameters: AssignmentOnly.
|
61
|
+
RSpec/InstanceVariable:
|
62
|
+
Exclude:
|
63
|
+
- 'spec/lib/hydra/file_characterization/to_temp_file_spec.rb'
|
64
|
+
|
65
|
+
# Offense count: 4
|
66
|
+
# Configuration parameters: .
|
67
|
+
# SupportedStyles: have_received, receive
|
68
|
+
RSpec/MessageSpies:
|
69
|
+
EnforcedStyle: receive
|
70
|
+
|
71
|
+
# Offense count: 9
|
72
|
+
# Configuration parameters: IgnoreSharedExamples.
|
73
|
+
RSpec/NamedSubject:
|
74
|
+
Exclude:
|
75
|
+
- 'spec/lib/hydra/file_characterization/characterizer_spec.rb'
|
76
|
+
- 'spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb'
|
77
|
+
- 'spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb'
|
78
|
+
- 'spec/lib/hydra/file_characterization/characterizers/fits_spec.rb'
|
79
|
+
- 'spec/lib/hydra/file_characterization/characterizers_spec.rb'
|
80
|
+
- 'spec/lib/hydra/file_characterization/to_temp_file_spec.rb'
|
81
|
+
|
82
|
+
# Offense count: 1
|
83
|
+
# Configuration parameters: AllowedChars.
|
84
|
+
Style/AsciiComments:
|
85
|
+
Exclude:
|
86
|
+
- 'lib/hydra/file_characterization.rb'
|
87
|
+
|
88
|
+
# Offense count: 1
|
89
|
+
# Configuration parameters: MinBodyLength.
|
90
|
+
Style/GuardClause:
|
91
|
+
Exclude:
|
92
|
+
- 'lib/hydra/file_characterization/characterizers.rb'
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,16 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
-
## [1.1.
|
3
|
+
## [1.1.2](https://github.com/samvera/hydra-file_characterization/tree/1.1.2) (2020-06-10)
|
4
4
|
|
5
|
-
[Full Changelog](https://github.com/samvera/hydra-file_characterization/compare/v1.1.
|
5
|
+
[Full Changelog](https://github.com/samvera/hydra-file_characterization/compare/v1.1.1...1.1.2)
|
6
|
+
|
7
|
+
**Merged pull requests:**
|
8
|
+
|
9
|
+
- Enforce samvera style rules [\#49](https://github.com/samvera/hydra-file_characterization/pull/49) ([bess](https://github.com/bess))
|
10
|
+
|
11
|
+
## [v1.1.1](https://github.com/samvera/hydra-file_characterization/tree/v1.1.1) (2020-06-10)
|
12
|
+
|
13
|
+
[Full Changelog](https://github.com/samvera/hydra-file_characterization/compare/v1.1.0...v1.1.1)
|
6
14
|
|
7
15
|
**Closed issues:**
|
8
16
|
|
@@ -12,6 +20,7 @@
|
|
12
20
|
|
13
21
|
**Merged pull requests:**
|
14
22
|
|
23
|
+
- Prep for 1.1.1 release [\#48](https://github.com/samvera/hydra-file_characterization/pull/48) ([bess](https://github.com/bess))
|
15
24
|
- Adding Ruby 2.7.z and Rails 6.y.z releases to the CircleCI build configuration [\#46](https://github.com/samvera/hydra-file_characterization/pull/46) ([jrgriffiniii](https://github.com/jrgriffiniii))
|
16
25
|
- Update CircleCI Ruby and Rails versions [\#43](https://github.com/samvera/hydra-file_characterization/pull/43) ([botimer](https://github.com/botimer))
|
17
26
|
- Updates the CircleCI configuration to test against Rails release 5.1.7 and Ruby releases 2.6.3, 2.5.5, and 2.4.6 [\#42](https://github.com/samvera/hydra-file_characterization/pull/42) ([jrgriffiniii](https://github.com/jrgriffiniii))
|
data/Gemfile
CHANGED
data/Guardfile
CHANGED
data/Rakefile
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
lib = File.expand_path('../lib', __FILE__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'hydra/file_characterization/version'
|
@@ -12,23 +13,24 @@ Gem::Specification.new do |gem|
|
|
12
13
|
"Sue Richeson",
|
13
14
|
"Rajesh Balekai"
|
14
15
|
]
|
15
|
-
gem.email
|
16
|
+
gem.email = [
|
16
17
|
"jatr@kb.dk",
|
17
18
|
"jeremy.n.friesen@gmail.com",
|
18
19
|
"spr7b@virginia.edu",
|
19
20
|
"rbalekai@gmail.com"
|
20
21
|
]
|
21
|
-
gem.description =
|
22
|
-
gem.summary =
|
22
|
+
gem.description = 'To provide a wrapper for file characterization'
|
23
|
+
gem.summary = 'To provide a wrapper for file characterization'
|
23
24
|
gem.homepage = "https://github.com/projecthydra/hydra-file_characterization"
|
24
25
|
gem.license = "APACHE2"
|
25
26
|
|
26
|
-
gem.files = `git ls-files`.split(
|
27
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
27
|
+
gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
28
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
28
29
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
29
30
|
gem.require_paths = ["lib"]
|
30
31
|
|
31
32
|
gem.add_dependency "activesupport", ">= 3.0.0"
|
33
|
+
gem.add_development_dependency 'bixby', '~> 3.0.0'
|
32
34
|
gem.add_development_dependency 'coveralls'
|
33
35
|
gem.add_development_dependency 'github_changelog_generator'
|
34
36
|
gem.add_development_dependency "guard"
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require "hydra/file_characterization/version"
|
2
3
|
require "hydra/file_characterization/exceptions"
|
3
4
|
require "hydra/file_characterization/to_temp_file"
|
@@ -6,7 +7,6 @@ require "hydra/file_characterization/characterizers"
|
|
6
7
|
require "active_support/configurable"
|
7
8
|
|
8
9
|
module Hydra
|
9
|
-
|
10
10
|
module_function
|
11
11
|
|
12
12
|
# A convenience method
|
@@ -15,7 +15,6 @@ module Hydra
|
|
15
15
|
end
|
16
16
|
|
17
17
|
module FileCharacterization
|
18
|
-
|
19
18
|
class << self
|
20
19
|
attr_accessor :configuration
|
21
20
|
end
|
@@ -66,7 +65,7 @@ module Hydra
|
|
66
65
|
tool_names = Array(tool_names).flatten.compact
|
67
66
|
custom_paths = {}
|
68
67
|
yield(custom_paths) if block_given?
|
69
|
-
|
68
|
+
|
70
69
|
tool_outputs = run_characterizers(content, filename, tool_names, custom_paths)
|
71
70
|
tool_names.size == 1 ? tool_outputs.first : tool_outputs
|
72
71
|
end
|
@@ -76,50 +75,47 @@ module Hydra
|
|
76
75
|
yield(configuration)
|
77
76
|
end
|
78
77
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
else
|
91
|
-
args.shift
|
92
|
-
end
|
93
|
-
tool_names = args
|
94
|
-
return content, filename, tool_names
|
78
|
+
# Break up a list of arguments into two possible lists:
|
79
|
+
# option1: [String] content, [String] filename, [Array] tool_names
|
80
|
+
# option2: [File] content, [Array] tool_names
|
81
|
+
# In the case of option2, derive the filename from the file's path
|
82
|
+
# @return [String, File], [String], [Array]
|
83
|
+
def self.extract_arguments(args)
|
84
|
+
content = args.shift
|
85
|
+
filename = if content.is_a?(File) && !args[0].is_a?(String)
|
86
|
+
File.basename(content.path)
|
87
|
+
else
|
88
|
+
args.shift
|
95
89
|
end
|
90
|
+
tool_names = args
|
91
|
+
[content, filename, tool_names]
|
92
|
+
end
|
96
93
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
end
|
94
|
+
# @param [File, String] content Either an open file or a string. If a string is passed
|
95
|
+
# a temp file will be created
|
96
|
+
# @param [String] filename Used in creating a temp file name
|
97
|
+
# @param [Array<Symbol>] tool_names A list of symbols referencing the characerization tools to run
|
98
|
+
# @param [Hash] custom_paths The paths to the executables of the tool.
|
99
|
+
def self.run_characterizers(content, filename, tool_names, custom_paths)
|
100
|
+
if content.is_a? File
|
101
|
+
run_characterizers_on_file(content, tool_names, custom_paths)
|
102
|
+
else
|
103
|
+
FileCharacterization::ToTempFile.open(filename, content) do |f|
|
104
|
+
run_characterizers_on_file(f, tool_names, custom_paths)
|
109
105
|
end
|
110
106
|
end
|
107
|
+
end
|
111
108
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
end
|
109
|
+
def self.run_characterizers_on_file(f, tool_names, custom_paths)
|
110
|
+
tool_names.map do |tool_name|
|
111
|
+
FileCharacterization.characterize_with(tool_name, f.path, custom_paths[tool_name])
|
116
112
|
end
|
113
|
+
end
|
117
114
|
|
118
115
|
class Configuration
|
119
116
|
def tool_path(tool_name, tool_path)
|
120
117
|
Hydra::FileCharacterization.characterizer(tool_name).tool_path = tool_path
|
121
118
|
end
|
122
119
|
end
|
123
|
-
|
124
120
|
end
|
125
121
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'hydra/file_characterization/exceptions'
|
2
3
|
require 'open3'
|
3
4
|
require 'active_support/core_ext/class/attribute'
|
@@ -15,9 +16,7 @@ module Hydra::FileCharacterization
|
|
15
16
|
end
|
16
17
|
|
17
18
|
def call
|
18
|
-
unless File.
|
19
|
-
raise Hydra::FileCharacterization::FileNotFoundError.new("File: #{filename} does not exist.")
|
20
|
-
end
|
19
|
+
raise Hydra::FileCharacterization::FileNotFoundError, "File: #{filename} does not exist." unless File.exist?(filename)
|
21
20
|
|
22
21
|
post_process(output)
|
23
22
|
end
|
@@ -32,47 +31,47 @@ module Hydra::FileCharacterization
|
|
32
31
|
|
33
32
|
protected
|
34
33
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
# Override this method if you want your processor to mutate the
|
35
|
+
# raw output
|
36
|
+
def post_process(raw_output)
|
37
|
+
raw_output
|
38
|
+
end
|
40
39
|
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
def convention_based_tool_name
|
41
|
+
self.class.name.split("::").last.downcase
|
42
|
+
end
|
44
43
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
end
|
44
|
+
def internal_call
|
45
|
+
stdin, stdout, stderr, wait_thr = popen3(command)
|
46
|
+
begin
|
47
|
+
out = stdout.read
|
48
|
+
err = stderr.read
|
49
|
+
exit_status = wait_thr.value
|
50
|
+
raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
|
51
|
+
out
|
52
|
+
ensure
|
53
|
+
stdin.close
|
54
|
+
stdout.close
|
55
|
+
stderr.close
|
58
56
|
end
|
57
|
+
end
|
59
58
|
|
60
|
-
|
61
|
-
|
62
|
-
|
59
|
+
def command
|
60
|
+
raise NotImplementedError, "Method #command should be overriden in child classes"
|
61
|
+
end
|
63
62
|
|
64
63
|
private
|
65
64
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
end
|
65
|
+
def output
|
66
|
+
if tool_path.respond_to?(:call)
|
67
|
+
tool_path.call(filename)
|
68
|
+
else
|
69
|
+
internal_call
|
72
70
|
end
|
71
|
+
end
|
73
72
|
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
def activefedora_logger
|
74
|
+
ActiveFedora::Base.logger if defined? ActiveFedora
|
75
|
+
end
|
77
76
|
end
|
78
77
|
end
|
@@ -1,19 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Hydra::FileCharacterization
|
2
3
|
module Characterizers
|
3
4
|
end
|
4
5
|
|
5
6
|
module_function
|
7
|
+
|
6
8
|
def characterizer(tool_name)
|
7
9
|
characterizer_name = characterizer_name_from(tool_name)
|
8
10
|
if Characterizers.const_defined?(characterizer_name)
|
9
11
|
Characterizers.const_get(characterizer_name)
|
10
12
|
else
|
11
|
-
raise ToolNotFoundError
|
13
|
+
raise ToolNotFoundError, tool_name
|
12
14
|
end
|
13
15
|
end
|
14
16
|
|
15
17
|
def characterizer_name_from(tool_name)
|
16
|
-
tool_name.to_s.gsub(/(?:^|_)([a-z])/) {
|
18
|
+
tool_name.to_s.gsub(/(?:^|_)([a-z])/) { Regexp.last_match(1).upcase }
|
17
19
|
end
|
18
20
|
|
19
21
|
def characterize_with(tool_name, path_to_file, path_to_tool)
|
@@ -24,7 +26,6 @@ module Hydra::FileCharacterization
|
|
24
26
|
tool_obj.call
|
25
27
|
end
|
26
28
|
end
|
27
|
-
|
28
29
|
end
|
29
30
|
|
30
31
|
require 'hydra/file_characterization/characterizers/fits'
|
@@ -1,13 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'hydra/file_characterization/exceptions'
|
2
3
|
require 'hydra/file_characterization/characterizer'
|
3
4
|
|
4
5
|
module Hydra::FileCharacterization::Characterizers
|
5
6
|
class Ffprobe < Hydra::FileCharacterization::Characterizer
|
6
|
-
|
7
7
|
protected
|
8
|
+
|
8
9
|
def command
|
9
10
|
"#{tool_path} -i \"#{filename}\" -print_format xml -show_streams -v quiet"
|
10
11
|
end
|
11
|
-
|
12
12
|
end
|
13
13
|
end
|
@@ -1,23 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'hydra/file_characterization/exceptions'
|
2
3
|
require 'hydra/file_characterization/characterizer'
|
3
4
|
require 'logger'
|
4
5
|
module Hydra::FileCharacterization::Characterizers
|
5
6
|
class Fits < Hydra::FileCharacterization::Characterizer
|
6
|
-
|
7
7
|
protected
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
def command
|
10
|
+
"#{tool_path} -i \"#{filename}\""
|
11
|
+
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
13
|
+
# Remove any non-XML output that precedes the <?xml> tag
|
14
|
+
# See: https://github.com/harvard-lts/fits/issues/20
|
15
|
+
# https://github.com/harvard-lts/fits/issues/40
|
16
|
+
# https://github.com/harvard-lts/fits/issues/46
|
17
|
+
def post_process(raw_output)
|
18
|
+
md = /\A(.*)(<\?xml.*)\Z/m.match(raw_output)
|
19
|
+
logger.warn "FITS produced non-xml output: \"#{md[1].chomp}\"" unless md[1].empty?
|
20
|
+
md[2]
|
21
|
+
end
|
22
22
|
end
|
23
23
|
end
|
@@ -1,23 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'hydra/file_characterization/exceptions'
|
2
3
|
require 'hydra/file_characterization/characterizer'
|
3
4
|
require 'logger'
|
4
5
|
module Hydra::FileCharacterization::Characterizers
|
5
6
|
class FitsServlet < Hydra::FileCharacterization::Characterizer
|
6
|
-
|
7
7
|
protected
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
def command
|
10
|
+
"curl -k -F datafile=@#{filename} #{ENV['FITS_SERVLET_URL']}/examine"
|
11
|
+
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
13
|
+
# Remove any non-XML output that precedes the <?xml> tag
|
14
|
+
# See: https://github.com/harvard-lts/fits/issues/20
|
15
|
+
# https://github.com/harvard-lts/fits/issues/40
|
16
|
+
# https://github.com/harvard-lts/fits/issues/46
|
17
|
+
def post_process(raw_output)
|
18
|
+
md = /\A(.*)(<\?xml.*)\Z/m.match(raw_output)
|
19
|
+
logger.warn "FITS produced non-xml output: \"#{md[1].chomp}\"" unless md[1].empty?
|
20
|
+
md[2]
|
21
|
+
end
|
22
22
|
end
|
23
23
|
end
|
@@ -1,5 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Hydra::FileCharacterization
|
2
|
-
|
3
3
|
class FileNotFoundError < RuntimeError
|
4
4
|
end
|
5
5
|
|
@@ -8,5 +8,4 @@ module Hydra::FileCharacterization
|
|
8
8
|
super("Unable to find Hydra::FileCharacterization tool with name :#{tool_name}")
|
9
9
|
end
|
10
10
|
end
|
11
|
-
|
12
11
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'open3'
|
2
3
|
require 'tempfile'
|
3
4
|
|
@@ -16,7 +17,7 @@ module Hydra::FileCharacterization
|
|
16
17
|
end
|
17
18
|
|
18
19
|
def call(data)
|
19
|
-
f = Tempfile.new([File.basename(filename),File.extname(filename)])
|
20
|
+
f = Tempfile.new([File.basename(filename), File.extname(filename)])
|
20
21
|
begin
|
21
22
|
f.binmode
|
22
23
|
if data.respond_to? :read
|
@@ -32,6 +33,5 @@ module Hydra::FileCharacterization
|
|
32
33
|
f.unlink
|
33
34
|
end
|
34
35
|
end
|
35
|
-
|
36
36
|
end
|
37
|
-
end
|
37
|
+
end
|
@@ -1,30 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
|
3
4
|
module Hydra::FileCharacterization
|
4
5
|
describe Characterizer do
|
6
|
+
subject { characterizer }
|
5
7
|
let(:filename) { __FILE__ }
|
6
8
|
let(:instance_tool_path) { nil }
|
7
9
|
let(:class_tool_path) { nil }
|
8
10
|
|
9
|
-
let(:characterizer) {
|
10
|
-
|
11
|
-
|
12
|
-
Hydra::FileCharacterization::Characterizer.tool_path = class_tool_path
|
11
|
+
let(:characterizer) { described_class.new(filename, instance_tool_path) }
|
12
|
+
around do |example|
|
13
|
+
described_class.tool_path = class_tool_path
|
13
14
|
example.run
|
14
|
-
|
15
|
+
described_class.tool_path = nil
|
15
16
|
end
|
16
17
|
|
17
18
|
context 'call' do
|
18
19
|
context 'with missing file' do
|
19
20
|
let(:filename) { '/dev/path/to/bogus/file' }
|
20
|
-
it '
|
21
|
+
it 'raises FileNotFoundError' do
|
21
22
|
expect { subject.call }.to raise_error(FileNotFoundError)
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
25
26
|
context 'with a callable tool path' do
|
26
|
-
let(:class_tool_path) {
|
27
|
-
it '
|
27
|
+
let(:class_tool_path) { ->(filename) { [filename, :output] } }
|
28
|
+
it 'raises FileNotFoundError' do
|
28
29
|
expect(subject.call).to eq [filename, :output]
|
29
30
|
end
|
30
31
|
end
|
@@ -1,19 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
require 'hydra/file_characterization/characterizers/ffprobe'
|
3
4
|
|
4
5
|
module Hydra::FileCharacterization::Characterizers
|
5
|
-
|
6
6
|
describe Ffprobe do
|
7
|
-
|
8
|
-
subject { Ffprobe.new(filename) }
|
7
|
+
subject { described_class.new(filename) }
|
9
8
|
|
10
9
|
describe 'invalidFile' do
|
11
10
|
let(:filename) { fixture_file('nofile.pdf') }
|
12
|
-
it "
|
13
|
-
expect {subject.call}.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
|
11
|
+
it "raises an error if the path does not contain the file" do
|
12
|
+
expect { subject.call }.to raise_error(Hydra::FileCharacterization::FileNotFoundError)
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
18
|
-
|
19
16
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
require 'hydra/file_characterization/characterizers/fits_servlet'
|
3
4
|
|
@@ -27,7 +28,7 @@ module Hydra::FileCharacterization::Characterizers
|
|
27
28
|
|
28
29
|
context 'zip file should be characterized not its contents' do
|
29
30
|
let(:filename) { fixture_file('archive.zip') }
|
30
|
-
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
|
31
|
+
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip")) }
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
@@ -41,7 +42,8 @@ module Hydra::FileCharacterization::Characterizers
|
|
41
42
|
'READBOX seen=true
|
42
43
|
<?xml version="1.0" encoding="UTF-8"?>
|
43
44
|
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
44
|
-
<identification/></fits>'
|
45
|
+
<identification/></fits>'
|
46
|
+
)
|
45
47
|
end
|
46
48
|
|
47
49
|
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
@@ -58,7 +60,8 @@ module Hydra::FileCharacterization::Characterizers
|
|
58
60
|
'2015-10-15 17:14:25,761 ERROR [main] ToolBelt:79 - Thread 1 error initializing edu.harvard.hul.ois.fits.tools.droid.Droid: edu.harvard.hul.ois.fits.exceptions.FitsToolException Message: DROID cannot run under Java 8
|
59
61
|
<?xml version="1.0" encoding="UTF-8"?>
|
60
62
|
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
61
|
-
<identification/></fits>'
|
63
|
+
<identification/></fits>'
|
64
|
+
)
|
62
65
|
end
|
63
66
|
|
64
67
|
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
@@ -1,9 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
require 'hydra/file_characterization/characterizers/fits'
|
3
4
|
|
4
5
|
module Hydra::FileCharacterization::Characterizers
|
5
6
|
describe Fits do
|
6
|
-
let(:fits) {
|
7
|
+
let(:fits) { described_class.new(filename) }
|
7
8
|
|
8
9
|
describe "#call", unless: ENV['TRAVIS'] do
|
9
10
|
subject { fits.call }
|
@@ -27,7 +28,7 @@ module Hydra::FileCharacterization::Characterizers
|
|
27
28
|
|
28
29
|
context 'zip file should be characterized not its contents' do
|
29
30
|
let(:filename) { fixture_file('archive.zip') }
|
30
|
-
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip"))}
|
31
|
+
it { is_expected.to include(%(<identity format="ZIP Format" mimetype="application/zip")) }
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
@@ -41,7 +42,8 @@ module Hydra::FileCharacterization::Characterizers
|
|
41
42
|
'READBOX seen=true
|
42
43
|
<?xml version="1.0" encoding="UTF-8"?>
|
43
44
|
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
44
|
-
<identification/></fits>'
|
45
|
+
<identification/></fits>'
|
46
|
+
)
|
45
47
|
end
|
46
48
|
|
47
49
|
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
@@ -58,7 +60,8 @@ module Hydra::FileCharacterization::Characterizers
|
|
58
60
|
'2015-10-15 17:14:25,761 ERROR [main] ToolBelt:79 - Thread 1 error initializing edu.harvard.hul.ois.fits.tools.droid.Droid: edu.harvard.hul.ois.fits.exceptions.FitsToolException Message: DROID cannot run under Java 8
|
59
61
|
<?xml version="1.0" encoding="UTF-8"?>
|
60
62
|
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.2" timestamp="15/09/14 10:00 AM">
|
61
|
-
<identification/></fits>'
|
63
|
+
<identification/></fits>'
|
64
|
+
)
|
62
65
|
end
|
63
66
|
|
64
67
|
let(:filename) { fixture_file('brendan_behan.jpeg') }
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
require 'hydra/file_characterization/characterizers'
|
3
4
|
|
@@ -7,36 +8,35 @@ module Hydra::FileCharacterization
|
|
7
8
|
|
8
9
|
describe 'with :fits tool_name' do
|
9
10
|
let(:tool_name) { :fits }
|
10
|
-
it {
|
11
|
+
it { is_expected.to eq(Characterizers::Fits) }
|
11
12
|
end
|
12
13
|
|
13
14
|
describe 'with :ffprobe tool_name' do
|
14
15
|
let(:tool_name) { :ffprobe }
|
15
|
-
it {
|
16
|
+
it { is_expected.to eq(Characterizers::Ffprobe) }
|
16
17
|
end
|
17
18
|
|
18
19
|
context '.characterize_with' do
|
20
|
+
subject { Hydra::FileCharacterization.characterize_with(tool_name, filename, tool_path) }
|
19
21
|
let(:tool_name) { :fits }
|
20
22
|
let(:filename) { __FILE__ }
|
21
23
|
let(:tool_path) { nil }
|
22
|
-
subject { Hydra::FileCharacterization.characterize_with(tool_name, filename, tool_path) }
|
23
24
|
|
24
25
|
context 'with callable tool_path and missing tool name' do
|
25
|
-
let(:tool_path) {
|
26
|
+
let(:tool_path) { ->(filename) { [filename, :tool_path] } }
|
26
27
|
let(:tool_name) { :chunky_salsa }
|
27
|
-
it {
|
28
|
+
it { is_expected.to eq [filename, :tool_path] }
|
28
29
|
end
|
29
30
|
|
30
31
|
context 'with missing tool name and non-callable tool_path' do
|
31
32
|
let(:tool_name) { :chunky_salsa }
|
32
33
|
let(:tool_path) { '/path' }
|
33
|
-
it '
|
34
|
-
expect
|
34
|
+
it 'raises exception' do
|
35
|
+
expect do
|
35
36
|
subject
|
36
|
-
|
37
|
+
end.to raise_error(ToolNotFoundError)
|
37
38
|
end
|
38
39
|
end
|
39
40
|
end
|
40
|
-
|
41
41
|
end
|
42
|
-
end
|
42
|
+
end
|
@@ -1,10 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'spec_helper'
|
2
3
|
require 'hydra/file_characterization/to_temp_file'
|
3
4
|
|
4
5
|
module Hydra::FileCharacterization
|
5
|
-
|
6
6
|
describe 'ToTempFile' do
|
7
|
-
|
8
7
|
let(:content) { "This is the content of the file." }
|
9
8
|
let(:filename) { "hello.rb" }
|
10
9
|
|
@@ -44,6 +43,5 @@ module Hydra::FileCharacterization
|
|
44
43
|
end
|
45
44
|
end
|
46
45
|
end
|
47
|
-
|
48
46
|
end
|
49
|
-
end
|
47
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
3
|
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
4
|
# Require this file using `require "spec_helper"` to ensure that it is only
|
@@ -6,7 +7,7 @@
|
|
6
7
|
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
8
|
|
8
9
|
GEM_ROOT = File.expand_path("../../", __FILE__)
|
9
|
-
|
10
|
+
$LOAD_PATH.unshift File.join(GEM_ROOT, "lib")
|
10
11
|
|
11
12
|
require 'coveralls'
|
12
13
|
Coveralls.wear!
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hydra-file_characterization
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Treacy
|
8
8
|
- Jeremy Friesen
|
9
9
|
- Sue Richeson
|
10
10
|
- Rajesh Balekai
|
11
|
-
autorequire:
|
11
|
+
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
14
|
date: 2020-06-10 00:00:00.000000000 Z
|
@@ -27,6 +27,20 @@ dependencies:
|
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 3.0.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: bixby
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - "~>"
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: 3.0.0
|
37
|
+
type: :development
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - "~>"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 3.0.0
|
30
44
|
- !ruby/object:Gem::Dependency
|
31
45
|
name: coveralls
|
32
46
|
requirement: !ruby/object:Gem::Requirement
|
@@ -139,7 +153,8 @@ files:
|
|
139
153
|
- ".github_changelog_generator"
|
140
154
|
- ".gitignore"
|
141
155
|
- ".rspec"
|
142
|
-
- ".
|
156
|
+
- ".rubocop.yml"
|
157
|
+
- ".rubocop_todo.yml"
|
143
158
|
- CHANGELOG.md
|
144
159
|
- CODE_OF_CONDUCT.md
|
145
160
|
- CONTRIBUTING.md
|
@@ -178,7 +193,7 @@ homepage: https://github.com/projecthydra/hydra-file_characterization
|
|
178
193
|
licenses:
|
179
194
|
- APACHE2
|
180
195
|
metadata: {}
|
181
|
-
post_install_message:
|
196
|
+
post_install_message:
|
182
197
|
rdoc_options: []
|
183
198
|
require_paths:
|
184
199
|
- lib
|
@@ -193,8 +208,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
193
208
|
- !ruby/object:Gem::Version
|
194
209
|
version: '0'
|
195
210
|
requirements: []
|
196
|
-
rubygems_version: 3.1.
|
197
|
-
signing_key:
|
211
|
+
rubygems_version: 3.1.4
|
212
|
+
signing_key:
|
198
213
|
specification_version: 4
|
199
214
|
summary: To provide a wrapper for file characterization
|
200
215
|
test_files:
|