hydra-file_characterization 0.3.2 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.circleci/config.yml +91 -0
- data/.github_changelog_generator +2 -0
- data/.rubocop.yml +10 -0
- data/.rubocop_todo.yml +92 -0
- data/CHANGELOG.md +189 -0
- data/CODE_OF_CONDUCT.md +36 -0
- data/CONTRIBUTING.md +70 -22
- data/Gemfile +9 -0
- data/Guardfile +1 -0
- data/LICENSE +14 -16
- data/README.md +54 -13
- data/Rakefile +7 -0
- data/SUPPORT.md +5 -0
- data/hydra-file_characterization.gemspec +12 -6
- data/lib/hydra-file_characterization.rb +1 -0
- data/lib/hydra/file_characterization.rb +32 -36
- data/lib/hydra/file_characterization/characterizer.rb +40 -33
- data/lib/hydra/file_characterization/characterizers.rb +5 -3
- data/lib/hydra/file_characterization/characterizers/ffprobe.rb +2 -2
- data/lib/hydra/file_characterization/characterizers/fits.rb +14 -9
- data/lib/hydra/file_characterization/characterizers/fits_servlet.rb +23 -0
- data/lib/hydra/file_characterization/exceptions.rb +1 -2
- data/lib/hydra/file_characterization/to_temp_file.rb +3 -3
- data/lib/hydra/file_characterization/version.rb +2 -1
- data/spec/lib/hydra/file_characterization/characterizer_spec.rb +9 -8
- data/spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb +4 -7
- data/spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb +71 -0
- data/spec/lib/hydra/file_characterization/characterizers/fits_spec.rb +35 -12
- data/spec/lib/hydra/file_characterization/characterizers_spec.rb +10 -10
- data/spec/lib/hydra/file_characterization/to_temp_file_spec.rb +2 -4
- data/spec/lib/hydra/file_characterization_spec.rb +8 -3
- data/spec/spec_helper.rb +7 -2
- metadata +84 -6
data/Gemfile
CHANGED
@@ -1,4 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
source 'https://rubygems.org'
|
2
3
|
|
3
4
|
# Specify your gem's dependencies in hydra/file_characterization.gemspec
|
4
5
|
gemspec
|
6
|
+
|
7
|
+
if ENV['RAILS_VERSION']
|
8
|
+
if ENV['RAILS_VERSION'] == 'edge'
|
9
|
+
gem 'rails', github: 'rails/rails'
|
10
|
+
else
|
11
|
+
gem 'rails', ENV['RAILS_VERSION']
|
12
|
+
end
|
13
|
+
end
|
data/Guardfile
CHANGED
data/LICENSE
CHANGED
@@ -1,16 +1,14 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
# See the License for the specific language governing permissions and
|
16
|
-
# limitations under the License.
|
1
|
+
Copyright 2013 Notre Dame
|
2
|
+
Additional copyright may be held by others, as reflected in the commit history.
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
data/README.md
CHANGED
@@ -1,12 +1,28 @@
|
|
1
|
-
# hydra-
|
1
|
+
# hydra-file_characterization
|
2
2
|
|
3
|
-
|
3
|
+
Code: [](http://badge.fury.io/rb/hydra-file_characterization) [](https://circleci.com/gh/samvera/hydra-file_characterization) [](https://coveralls.io/github/samvera/hydra-file_characterization?branch=master)
|
4
4
|
|
5
|
-
|
5
|
+
Docs: [](./CONTRIBUTING.md) [](./LICENSE)
|
6
6
|
|
7
|
-
|
7
|
+
Jump in: [](http://slack.samvera.org/)
|
8
8
|
|
9
|
-
|
9
|
+
# What is hydra-file_characterization?
|
10
|
+
|
11
|
+
Provides a wrapper for file characterization.
|
12
|
+
|
13
|
+
## Product Owner & Maintenance
|
14
|
+
|
15
|
+
hydra-file_characterization is a Core Component of the Samvera community. The documentation for what this means can be found [here](http://samvera.github.io/core_components.html#requirements-for-a-core-component).
|
16
|
+
|
17
|
+
### Product Owner
|
18
|
+
|
19
|
+
[little9](https://github.com/little9)
|
20
|
+
|
21
|
+
# Help
|
22
|
+
|
23
|
+
The Samvera community is here to help. Please see our [support guide](./SUPPORT.md).
|
24
|
+
|
25
|
+
# Getting Started
|
10
26
|
|
11
27
|
If you are using Rails add the following to an initializer (./config/initializers/hydra-file_characterization_config.rb):
|
12
28
|
|
@@ -16,12 +32,20 @@ Hydra::FileCharacterization.configure do |config|
|
|
16
32
|
end
|
17
33
|
```
|
18
34
|
|
19
|
-
You can call a single characterizer…
|
20
35
|
```ruby
|
21
|
-
|
36
|
+
Hydra::FileCharacterization.characterize(File.read(filename), File.basename(filename), :fits)
|
22
37
|
```
|
23
38
|
|
24
|
-
|
39
|
+
- Why `file.read`? To highlight that we want a string. In the case of ActiveFedora, we have a StringIO instead of a file.
|
40
|
+
- Why `file.basename`? In the case of Fits, the characterization takes cues from the extension name.
|
41
|
+
|
42
|
+
You can call a single characterizer...
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
xml_string = Hydra::FileCharacterization.characterize(File.read("/path/to/my/file.rb"), 'file.rb', :fits)
|
46
|
+
```
|
47
|
+
|
48
|
+
...for this particular call, you can specify custom fits path...
|
25
49
|
|
26
50
|
```ruby
|
27
51
|
xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :fits) do |config|
|
@@ -29,7 +53,7 @@ xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.
|
|
29
53
|
end
|
30
54
|
```
|
31
55
|
|
32
|
-
|
56
|
+
...or even make the path callable...
|
33
57
|
|
34
58
|
```ruby
|
35
59
|
xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :fits) do |config|
|
@@ -37,7 +61,7 @@ xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.
|
|
37
61
|
end
|
38
62
|
```
|
39
63
|
|
40
|
-
|
64
|
+
...or even create your custom characterizer on the file...
|
41
65
|
|
42
66
|
```ruby
|
43
67
|
xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :my_characterizer) do |config|
|
@@ -51,9 +75,26 @@ You can also call multiple characterizers at the same time.
|
|
51
75
|
fits_xml, ffprobe_xml = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :fits, :ffprobe)
|
52
76
|
```
|
53
77
|
|
54
|
-
* Why `file.read`? To highlight that we want a string. In the case of ActiveFedora, we have a StringIO instead of a file.
|
55
|
-
* Why `file.basename`? In the case of Fits, the characterization takes cues from the extension name.
|
56
|
-
|
57
78
|
## Registering New Characterizers
|
58
79
|
|
59
80
|
This is possible by adding a characterizer to the `Hydra::FileCharacterization::Characterizers`' namespace.
|
81
|
+
|
82
|
+
## Releasing
|
83
|
+
|
84
|
+
1. `bundle install`
|
85
|
+
2. Increase the version number in `lib/hydra/file_characterization/version.rb`
|
86
|
+
3. Increase the same version number in `.github_changelog_generator`
|
87
|
+
4. Update `CHANGELOG.md` by running this command:
|
88
|
+
|
89
|
+
```
|
90
|
+
github_changelog_generator --user samvera --project hydra-file_characterization --token YOUR_GITHUB_TOKEN_HERE
|
91
|
+
```
|
92
|
+
|
93
|
+
5. Commit these changes to the master branch
|
94
|
+
6. Run `rake release`
|
95
|
+
|
96
|
+
# Acknowledgments
|
97
|
+
|
98
|
+
This software has been developed by and is brought to you by the Samvera community. Learn more at the [Samvera website](http://samvera.org/).
|
99
|
+
|
100
|
+

|
data/Rakefile
CHANGED
data/SUPPORT.md
ADDED
@@ -0,0 +1,5 @@
|
|
1
|
+
If you would like to report an issue, first search [the list of issues](https://github.com/samvera/hydra-file_characterization/issues/) to see if someone else has already reported it, and then feel free to [create a new issue](https://github.com/samvera/hydra-file_characterization/issues/new).
|
2
|
+
i
|
3
|
+
If you have questions or need help, please email [the Samvera community tech list](https://groups.google.com/forum/#!forum/samvera-tech) or stop by the #dev channel in [the Samvera community Slack team](https://wiki.duraspace.org/pages/viewpage.action?pageId=87460391#Getintouch!-Slack).
|
4
|
+
|
5
|
+
You can learn more about the various Samvera communication channels on the [Get in touch!](https://wiki.duraspace.org/pages/viewpage.action?pageId=87460391) wiki page.
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
lib = File.expand_path('../lib', __FILE__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'hydra/file_characterization/version'
|
@@ -12,24 +13,29 @@ Gem::Specification.new do |gem|
|
|
12
13
|
"Sue Richeson",
|
13
14
|
"Rajesh Balekai"
|
14
15
|
]
|
15
|
-
gem.email
|
16
|
+
gem.email = [
|
16
17
|
"jatr@kb.dk",
|
17
18
|
"jeremy.n.friesen@gmail.com",
|
18
19
|
"spr7b@virginia.edu",
|
19
20
|
"rbalekai@gmail.com"
|
20
21
|
]
|
21
|
-
gem.description =
|
22
|
-
gem.summary =
|
22
|
+
gem.description = 'To provide a wrapper for file characterization'
|
23
|
+
gem.summary = 'To provide a wrapper for file characterization'
|
23
24
|
gem.homepage = "https://github.com/projecthydra/hydra-file_characterization"
|
24
25
|
gem.license = "APACHE2"
|
25
26
|
|
26
|
-
gem.files = `git ls-files`.split(
|
27
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
27
|
+
gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
28
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
28
29
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
29
30
|
gem.require_paths = ["lib"]
|
30
31
|
|
31
32
|
gem.add_dependency "activesupport", ">= 3.0.0"
|
32
|
-
gem.add_development_dependency
|
33
|
+
gem.add_development_dependency 'bixby', '~> 3.0.0'
|
34
|
+
gem.add_development_dependency 'coveralls'
|
35
|
+
gem.add_development_dependency 'github_changelog_generator'
|
33
36
|
gem.add_development_dependency "guard"
|
34
37
|
gem.add_development_dependency 'guard-rspec'
|
38
|
+
gem.add_development_dependency "rake"
|
39
|
+
gem.add_development_dependency "rspec"
|
40
|
+
gem.add_development_dependency 'rspec_junit_formatter'
|
35
41
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require "hydra/file_characterization/version"
|
2
3
|
require "hydra/file_characterization/exceptions"
|
3
4
|
require "hydra/file_characterization/to_temp_file"
|
@@ -6,7 +7,6 @@ require "hydra/file_characterization/characterizers"
|
|
6
7
|
require "active_support/configurable"
|
7
8
|
|
8
9
|
module Hydra
|
9
|
-
|
10
10
|
module_function
|
11
11
|
|
12
12
|
# A convenience method
|
@@ -15,7 +15,6 @@ module Hydra
|
|
15
15
|
end
|
16
16
|
|
17
17
|
module FileCharacterization
|
18
|
-
|
19
18
|
class << self
|
20
19
|
attr_accessor :configuration
|
21
20
|
end
|
@@ -66,7 +65,7 @@ module Hydra
|
|
66
65
|
tool_names = Array(tool_names).flatten.compact
|
67
66
|
custom_paths = {}
|
68
67
|
yield(custom_paths) if block_given?
|
69
|
-
|
68
|
+
|
70
69
|
tool_outputs = run_characterizers(content, filename, tool_names, custom_paths)
|
71
70
|
tool_names.size == 1 ? tool_outputs.first : tool_outputs
|
72
71
|
end
|
@@ -76,50 +75,47 @@ module Hydra
|
|
76
75
|
yield(configuration)
|
77
76
|
end
|
78
77
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
else
|
91
|
-
args.shift
|
92
|
-
end
|
93
|
-
tool_names = args
|
94
|
-
return content, filename, tool_names
|
78
|
+
# Break up a list of arguments into two possible lists:
|
79
|
+
# option1: [String] content, [String] filename, [Array] tool_names
|
80
|
+
# option2: [File] content, [Array] tool_names
|
81
|
+
# In the case of option2, derive the filename from the file's path
|
82
|
+
# @return [String, File], [String], [Array]
|
83
|
+
def self.extract_arguments(args)
|
84
|
+
content = args.shift
|
85
|
+
filename = if content.is_a?(File) && !args[0].is_a?(String)
|
86
|
+
File.basename(content.path)
|
87
|
+
else
|
88
|
+
args.shift
|
95
89
|
end
|
90
|
+
tool_names = args
|
91
|
+
[content, filename, tool_names]
|
92
|
+
end
|
96
93
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
end
|
94
|
+
# @param [File, String] content Either an open file or a string. If a string is passed
|
95
|
+
# a temp file will be created
|
96
|
+
# @param [String] filename Used in creating a temp file name
|
97
|
+
# @param [Array<Symbol>] tool_names A list of symbols referencing the characerization tools to run
|
98
|
+
# @param [Hash] custom_paths The paths to the executables of the tool.
|
99
|
+
def self.run_characterizers(content, filename, tool_names, custom_paths)
|
100
|
+
if content.is_a? File
|
101
|
+
run_characterizers_on_file(content, tool_names, custom_paths)
|
102
|
+
else
|
103
|
+
FileCharacterization::ToTempFile.open(filename, content) do |f|
|
104
|
+
run_characterizers_on_file(f, tool_names, custom_paths)
|
109
105
|
end
|
110
106
|
end
|
107
|
+
end
|
111
108
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
end
|
109
|
+
def self.run_characterizers_on_file(f, tool_names, custom_paths)
|
110
|
+
tool_names.map do |tool_name|
|
111
|
+
FileCharacterization.characterize_with(tool_name, f.path, custom_paths[tool_name])
|
116
112
|
end
|
113
|
+
end
|
117
114
|
|
118
115
|
class Configuration
|
119
116
|
def tool_path(tool_name, tool_path)
|
120
117
|
Hydra::FileCharacterization.characterizer(tool_name).tool_path = tool_path
|
121
118
|
end
|
122
119
|
end
|
123
|
-
|
124
120
|
end
|
125
121
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'hydra/file_characterization/exceptions'
|
2
3
|
require 'open3'
|
3
4
|
require 'active_support/core_ext/class/attribute'
|
@@ -15,9 +16,7 @@ module Hydra::FileCharacterization
|
|
15
16
|
end
|
16
17
|
|
17
18
|
def call
|
18
|
-
unless File.
|
19
|
-
raise Hydra::FileCharacterization::FileNotFoundError.new("File: #{filename} does not exist.")
|
20
|
-
end
|
19
|
+
raise Hydra::FileCharacterization::FileNotFoundError, "File: #{filename} does not exist." unless File.exist?(filename)
|
21
20
|
|
22
21
|
post_process(output)
|
23
22
|
end
|
@@ -26,45 +25,53 @@ module Hydra::FileCharacterization
|
|
26
25
|
@tool_path || self.class.tool_path || convention_based_tool_name
|
27
26
|
end
|
28
27
|
|
28
|
+
def logger
|
29
|
+
@logger ||= activefedora_logger || Logger.new(STDERR)
|
30
|
+
end
|
31
|
+
|
29
32
|
protected
|
30
33
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
# Override this method if you want your processor to mutate the
|
35
|
+
# raw output
|
36
|
+
def post_process(raw_output)
|
37
|
+
raw_output
|
38
|
+
end
|
36
39
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
+
def convention_based_tool_name
|
41
|
+
self.class.name.split("::").last.downcase
|
42
|
+
end
|
40
43
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
end
|
44
|
+
def internal_call
|
45
|
+
stdin, stdout, stderr, wait_thr = popen3(command)
|
46
|
+
begin
|
47
|
+
out = stdout.read
|
48
|
+
err = stderr.read
|
49
|
+
exit_status = wait_thr.value
|
50
|
+
raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
|
51
|
+
out
|
52
|
+
ensure
|
53
|
+
stdin.close
|
54
|
+
stdout.close
|
55
|
+
stderr.close
|
54
56
|
end
|
57
|
+
end
|
55
58
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
def command
|
60
|
+
raise NotImplementedError, "Method #command should be overriden in child classes"
|
61
|
+
end
|
59
62
|
|
60
63
|
private
|
61
64
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
end
|
65
|
+
def output
|
66
|
+
if tool_path.respond_to?(:call)
|
67
|
+
tool_path.call(filename)
|
68
|
+
else
|
69
|
+
internal_call
|
68
70
|
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def activefedora_logger
|
74
|
+
ActiveFedora::Base.logger if defined? ActiveFedora
|
75
|
+
end
|
69
76
|
end
|
70
77
|
end
|
@@ -1,19 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Hydra::FileCharacterization
|
2
3
|
module Characterizers
|
3
4
|
end
|
4
5
|
|
5
6
|
module_function
|
7
|
+
|
6
8
|
def characterizer(tool_name)
|
7
9
|
characterizer_name = characterizer_name_from(tool_name)
|
8
10
|
if Characterizers.const_defined?(characterizer_name)
|
9
11
|
Characterizers.const_get(characterizer_name)
|
10
12
|
else
|
11
|
-
raise ToolNotFoundError
|
13
|
+
raise ToolNotFoundError, tool_name
|
12
14
|
end
|
13
15
|
end
|
14
16
|
|
15
17
|
def characterizer_name_from(tool_name)
|
16
|
-
tool_name.to_s.gsub(/(?:^|_)([a-z])/) {
|
18
|
+
tool_name.to_s.gsub(/(?:^|_)([a-z])/) { Regexp.last_match(1).upcase }
|
17
19
|
end
|
18
20
|
|
19
21
|
def characterize_with(tool_name, path_to_file, path_to_tool)
|
@@ -24,8 +26,8 @@ module Hydra::FileCharacterization
|
|
24
26
|
tool_obj.call
|
25
27
|
end
|
26
28
|
end
|
27
|
-
|
28
29
|
end
|
29
30
|
|
30
31
|
require 'hydra/file_characterization/characterizers/fits'
|
31
32
|
require 'hydra/file_characterization/characterizers/ffprobe'
|
33
|
+
require 'hydra/file_characterization/characterizers/fits_servlet'
|