hydra-file_characterization 0.3.2 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.circleci/config.yml +91 -0
- data/.github_changelog_generator +2 -0
- data/.rubocop.yml +10 -0
- data/.rubocop_todo.yml +92 -0
- data/CHANGELOG.md +189 -0
- data/CODE_OF_CONDUCT.md +36 -0
- data/CONTRIBUTING.md +70 -22
- data/Gemfile +9 -0
- data/Guardfile +1 -0
- data/LICENSE +14 -16
- data/README.md +54 -13
- data/Rakefile +7 -0
- data/SUPPORT.md +5 -0
- data/hydra-file_characterization.gemspec +12 -6
- data/lib/hydra-file_characterization.rb +1 -0
- data/lib/hydra/file_characterization.rb +32 -36
- data/lib/hydra/file_characterization/characterizer.rb +40 -33
- data/lib/hydra/file_characterization/characterizers.rb +5 -3
- data/lib/hydra/file_characterization/characterizers/ffprobe.rb +2 -2
- data/lib/hydra/file_characterization/characterizers/fits.rb +14 -9
- data/lib/hydra/file_characterization/characterizers/fits_servlet.rb +23 -0
- data/lib/hydra/file_characterization/exceptions.rb +1 -2
- data/lib/hydra/file_characterization/to_temp_file.rb +3 -3
- data/lib/hydra/file_characterization/version.rb +2 -1
- data/spec/lib/hydra/file_characterization/characterizer_spec.rb +9 -8
- data/spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb +4 -7
- data/spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb +71 -0
- data/spec/lib/hydra/file_characterization/characterizers/fits_spec.rb +35 -12
- data/spec/lib/hydra/file_characterization/characterizers_spec.rb +10 -10
- data/spec/lib/hydra/file_characterization/to_temp_file_spec.rb +2 -4
- data/spec/lib/hydra/file_characterization_spec.rb +8 -3
- data/spec/spec_helper.rb +7 -2
- metadata +84 -6
data/Gemfile
CHANGED
@@ -1,4 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
source 'https://rubygems.org'
|
2
3
|
|
3
4
|
# Specify your gem's dependencies in hydra/file_characterization.gemspec
|
4
5
|
gemspec
|
6
|
+
|
7
|
+
if ENV['RAILS_VERSION']
|
8
|
+
if ENV['RAILS_VERSION'] == 'edge'
|
9
|
+
gem 'rails', github: 'rails/rails'
|
10
|
+
else
|
11
|
+
gem 'rails', ENV['RAILS_VERSION']
|
12
|
+
end
|
13
|
+
end
|
data/Guardfile
CHANGED
data/LICENSE
CHANGED
@@ -1,16 +1,14 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
# See the License for the specific language governing permissions and
|
16
|
-
# limitations under the License.
|
1
|
+
Copyright 2013 Notre Dame
|
2
|
+
Additional copyright may be held by others, as reflected in the commit history.
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
data/README.md
CHANGED
@@ -1,12 +1,28 @@
|
|
1
|
-
# hydra-
|
1
|
+
# hydra-file_characterization
|
2
2
|
|
3
|
-
|
3
|
+
Code: [![Version](https://badge.fury.io/rb/hydra-file_characterization.png)](http://badge.fury.io/rb/hydra-file_characterization) [![CircleCI](https://circleci.com/gh/samvera/hydra-file_characterization.svg?style=svg)](https://circleci.com/gh/samvera/hydra-file_characterization) [![Coverage Status](https://coveralls.io/repos/github/samvera/hydra-file_characterization/badge.svg?branch=master)](https://coveralls.io/github/samvera/hydra-file_characterization?branch=master)
|
4
4
|
|
5
|
-
|
5
|
+
Docs: [![Contribution Guidelines](http://img.shields.io/badge/CONTRIBUTING-Guidelines-blue.svg)](./CONTRIBUTING.md) [![Apache 2.0 License](http://img.shields.io/badge/APACHE2-license-blue.svg)](./LICENSE)
|
6
6
|
|
7
|
-
|
7
|
+
Jump in: [![Slack Status](http://slack.samvera.org/badge.svg)](http://slack.samvera.org/)
|
8
8
|
|
9
|
-
|
9
|
+
# What is hydra-file_characterization?
|
10
|
+
|
11
|
+
Provides a wrapper for file characterization.
|
12
|
+
|
13
|
+
## Product Owner & Maintenance
|
14
|
+
|
15
|
+
hydra-file_characterization is a Core Component of the Samvera community. The documentation for what this means can be found [here](http://samvera.github.io/core_components.html#requirements-for-a-core-component).
|
16
|
+
|
17
|
+
### Product Owner
|
18
|
+
|
19
|
+
[little9](https://github.com/little9)
|
20
|
+
|
21
|
+
# Help
|
22
|
+
|
23
|
+
The Samvera community is here to help. Please see our [support guide](./SUPPORT.md).
|
24
|
+
|
25
|
+
# Getting Started
|
10
26
|
|
11
27
|
If you are using Rails add the following to an initializer (./config/initializers/hydra-file_characterization_config.rb):
|
12
28
|
|
@@ -16,12 +32,20 @@ Hydra::FileCharacterization.configure do |config|
|
|
16
32
|
end
|
17
33
|
```
|
18
34
|
|
19
|
-
You can call a single characterizer…
|
20
35
|
```ruby
|
21
|
-
|
36
|
+
Hydra::FileCharacterization.characterize(File.read(filename), File.basename(filename), :fits)
|
22
37
|
```
|
23
38
|
|
24
|
-
|
39
|
+
- Why `file.read`? To highlight that we want a string. In the case of ActiveFedora, we have a StringIO instead of a file.
|
40
|
+
- Why `file.basename`? In the case of Fits, the characterization takes cues from the extension name.
|
41
|
+
|
42
|
+
You can call a single characterizer...
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
xml_string = Hydra::FileCharacterization.characterize(File.read("/path/to/my/file.rb"), 'file.rb', :fits)
|
46
|
+
```
|
47
|
+
|
48
|
+
...for this particular call, you can specify custom fits path...
|
25
49
|
|
26
50
|
```ruby
|
27
51
|
xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :fits) do |config|
|
@@ -29,7 +53,7 @@ xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.
|
|
29
53
|
end
|
30
54
|
```
|
31
55
|
|
32
|
-
|
56
|
+
...or even make the path callable...
|
33
57
|
|
34
58
|
```ruby
|
35
59
|
xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :fits) do |config|
|
@@ -37,7 +61,7 @@ xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.
|
|
37
61
|
end
|
38
62
|
```
|
39
63
|
|
40
|
-
|
64
|
+
...or even create your custom characterizer on the file...
|
41
65
|
|
42
66
|
```ruby
|
43
67
|
xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :my_characterizer) do |config|
|
@@ -51,9 +75,26 @@ You can also call multiple characterizers at the same time.
|
|
51
75
|
fits_xml, ffprobe_xml = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :fits, :ffprobe)
|
52
76
|
```
|
53
77
|
|
54
|
-
* Why `file.read`? To highlight that we want a string. In the case of ActiveFedora, we have a StringIO instead of a file.
|
55
|
-
* Why `file.basename`? In the case of Fits, the characterization takes cues from the extension name.
|
56
|
-
|
57
78
|
## Registering New Characterizers
|
58
79
|
|
59
80
|
This is possible by adding a characterizer to the `Hydra::FileCharacterization::Characterizers`' namespace.
|
81
|
+
|
82
|
+
## Releasing
|
83
|
+
|
84
|
+
1. `bundle install`
|
85
|
+
2. Increase the version number in `lib/hydra/file_characterization/version.rb`
|
86
|
+
3. Increase the same version number in `.github_changelog_generator`
|
87
|
+
4. Update `CHANGELOG.md` by running this command:
|
88
|
+
|
89
|
+
```
|
90
|
+
github_changelog_generator --user samvera --project hydra-file_characterization --token YOUR_GITHUB_TOKEN_HERE
|
91
|
+
```
|
92
|
+
|
93
|
+
5. Commit these changes to the master branch
|
94
|
+
6. Run `rake release`
|
95
|
+
|
96
|
+
# Acknowledgments
|
97
|
+
|
98
|
+
This software has been developed by and is brought to you by the Samvera community. Learn more at the [Samvera website](http://samvera.org/).
|
99
|
+
|
100
|
+
![Samvera Logo](https://wiki.duraspace.org/download/thumbnails/87459292/samvera-fall-font2-200w.png?version=1&modificationDate=1498550535816&api=v2)
|
data/Rakefile
CHANGED
data/SUPPORT.md
ADDED
@@ -0,0 +1,5 @@
|
|
1
|
+
If you would like to report an issue, first search [the list of issues](https://github.com/samvera/hydra-file_characterization/issues/) to see if someone else has already reported it, and then feel free to [create a new issue](https://github.com/samvera/hydra-file_characterization/issues/new).
|
2
|
+
i
|
3
|
+
If you have questions or need help, please email [the Samvera community tech list](https://groups.google.com/forum/#!forum/samvera-tech) or stop by the #dev channel in [the Samvera community Slack team](https://wiki.duraspace.org/pages/viewpage.action?pageId=87460391#Getintouch!-Slack).
|
4
|
+
|
5
|
+
You can learn more about the various Samvera communication channels on the [Get in touch!](https://wiki.duraspace.org/pages/viewpage.action?pageId=87460391) wiki page.
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
lib = File.expand_path('../lib', __FILE__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'hydra/file_characterization/version'
|
@@ -12,24 +13,29 @@ Gem::Specification.new do |gem|
|
|
12
13
|
"Sue Richeson",
|
13
14
|
"Rajesh Balekai"
|
14
15
|
]
|
15
|
-
gem.email
|
16
|
+
gem.email = [
|
16
17
|
"jatr@kb.dk",
|
17
18
|
"jeremy.n.friesen@gmail.com",
|
18
19
|
"spr7b@virginia.edu",
|
19
20
|
"rbalekai@gmail.com"
|
20
21
|
]
|
21
|
-
gem.description =
|
22
|
-
gem.summary =
|
22
|
+
gem.description = 'To provide a wrapper for file characterization'
|
23
|
+
gem.summary = 'To provide a wrapper for file characterization'
|
23
24
|
gem.homepage = "https://github.com/projecthydra/hydra-file_characterization"
|
24
25
|
gem.license = "APACHE2"
|
25
26
|
|
26
|
-
gem.files = `git ls-files`.split(
|
27
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
27
|
+
gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
28
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
28
29
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
29
30
|
gem.require_paths = ["lib"]
|
30
31
|
|
31
32
|
gem.add_dependency "activesupport", ">= 3.0.0"
|
32
|
-
gem.add_development_dependency
|
33
|
+
gem.add_development_dependency 'bixby', '~> 3.0.0'
|
34
|
+
gem.add_development_dependency 'coveralls'
|
35
|
+
gem.add_development_dependency 'github_changelog_generator'
|
33
36
|
gem.add_development_dependency "guard"
|
34
37
|
gem.add_development_dependency 'guard-rspec'
|
38
|
+
gem.add_development_dependency "rake"
|
39
|
+
gem.add_development_dependency "rspec"
|
40
|
+
gem.add_development_dependency 'rspec_junit_formatter'
|
35
41
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require "hydra/file_characterization/version"
|
2
3
|
require "hydra/file_characterization/exceptions"
|
3
4
|
require "hydra/file_characterization/to_temp_file"
|
@@ -6,7 +7,6 @@ require "hydra/file_characterization/characterizers"
|
|
6
7
|
require "active_support/configurable"
|
7
8
|
|
8
9
|
module Hydra
|
9
|
-
|
10
10
|
module_function
|
11
11
|
|
12
12
|
# A convenience method
|
@@ -15,7 +15,6 @@ module Hydra
|
|
15
15
|
end
|
16
16
|
|
17
17
|
module FileCharacterization
|
18
|
-
|
19
18
|
class << self
|
20
19
|
attr_accessor :configuration
|
21
20
|
end
|
@@ -66,7 +65,7 @@ module Hydra
|
|
66
65
|
tool_names = Array(tool_names).flatten.compact
|
67
66
|
custom_paths = {}
|
68
67
|
yield(custom_paths) if block_given?
|
69
|
-
|
68
|
+
|
70
69
|
tool_outputs = run_characterizers(content, filename, tool_names, custom_paths)
|
71
70
|
tool_names.size == 1 ? tool_outputs.first : tool_outputs
|
72
71
|
end
|
@@ -76,50 +75,47 @@ module Hydra
|
|
76
75
|
yield(configuration)
|
77
76
|
end
|
78
77
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
else
|
91
|
-
args.shift
|
92
|
-
end
|
93
|
-
tool_names = args
|
94
|
-
return content, filename, tool_names
|
78
|
+
# Break up a list of arguments into two possible lists:
|
79
|
+
# option1: [String] content, [String] filename, [Array] tool_names
|
80
|
+
# option2: [File] content, [Array] tool_names
|
81
|
+
# In the case of option2, derive the filename from the file's path
|
82
|
+
# @return [String, File], [String], [Array]
|
83
|
+
def self.extract_arguments(args)
|
84
|
+
content = args.shift
|
85
|
+
filename = if content.is_a?(File) && !args[0].is_a?(String)
|
86
|
+
File.basename(content.path)
|
87
|
+
else
|
88
|
+
args.shift
|
95
89
|
end
|
90
|
+
tool_names = args
|
91
|
+
[content, filename, tool_names]
|
92
|
+
end
|
96
93
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
end
|
94
|
+
# @param [File, String] content Either an open file or a string. If a string is passed
|
95
|
+
# a temp file will be created
|
96
|
+
# @param [String] filename Used in creating a temp file name
|
97
|
+
# @param [Array<Symbol>] tool_names A list of symbols referencing the characerization tools to run
|
98
|
+
# @param [Hash] custom_paths The paths to the executables of the tool.
|
99
|
+
def self.run_characterizers(content, filename, tool_names, custom_paths)
|
100
|
+
if content.is_a? File
|
101
|
+
run_characterizers_on_file(content, tool_names, custom_paths)
|
102
|
+
else
|
103
|
+
FileCharacterization::ToTempFile.open(filename, content) do |f|
|
104
|
+
run_characterizers_on_file(f, tool_names, custom_paths)
|
109
105
|
end
|
110
106
|
end
|
107
|
+
end
|
111
108
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
end
|
109
|
+
def self.run_characterizers_on_file(f, tool_names, custom_paths)
|
110
|
+
tool_names.map do |tool_name|
|
111
|
+
FileCharacterization.characterize_with(tool_name, f.path, custom_paths[tool_name])
|
116
112
|
end
|
113
|
+
end
|
117
114
|
|
118
115
|
class Configuration
|
119
116
|
def tool_path(tool_name, tool_path)
|
120
117
|
Hydra::FileCharacterization.characterizer(tool_name).tool_path = tool_path
|
121
118
|
end
|
122
119
|
end
|
123
|
-
|
124
120
|
end
|
125
121
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'hydra/file_characterization/exceptions'
|
2
3
|
require 'open3'
|
3
4
|
require 'active_support/core_ext/class/attribute'
|
@@ -15,9 +16,7 @@ module Hydra::FileCharacterization
|
|
15
16
|
end
|
16
17
|
|
17
18
|
def call
|
18
|
-
unless File.
|
19
|
-
raise Hydra::FileCharacterization::FileNotFoundError.new("File: #{filename} does not exist.")
|
20
|
-
end
|
19
|
+
raise Hydra::FileCharacterization::FileNotFoundError, "File: #{filename} does not exist." unless File.exist?(filename)
|
21
20
|
|
22
21
|
post_process(output)
|
23
22
|
end
|
@@ -26,45 +25,53 @@ module Hydra::FileCharacterization
|
|
26
25
|
@tool_path || self.class.tool_path || convention_based_tool_name
|
27
26
|
end
|
28
27
|
|
28
|
+
def logger
|
29
|
+
@logger ||= activefedora_logger || Logger.new(STDERR)
|
30
|
+
end
|
31
|
+
|
29
32
|
protected
|
30
33
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
# Override this method if you want your processor to mutate the
|
35
|
+
# raw output
|
36
|
+
def post_process(raw_output)
|
37
|
+
raw_output
|
38
|
+
end
|
36
39
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
+
def convention_based_tool_name
|
41
|
+
self.class.name.split("::").last.downcase
|
42
|
+
end
|
40
43
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
end
|
44
|
+
def internal_call
|
45
|
+
stdin, stdout, stderr, wait_thr = popen3(command)
|
46
|
+
begin
|
47
|
+
out = stdout.read
|
48
|
+
err = stderr.read
|
49
|
+
exit_status = wait_thr.value
|
50
|
+
raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
|
51
|
+
out
|
52
|
+
ensure
|
53
|
+
stdin.close
|
54
|
+
stdout.close
|
55
|
+
stderr.close
|
54
56
|
end
|
57
|
+
end
|
55
58
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
def command
|
60
|
+
raise NotImplementedError, "Method #command should be overriden in child classes"
|
61
|
+
end
|
59
62
|
|
60
63
|
private
|
61
64
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
end
|
65
|
+
def output
|
66
|
+
if tool_path.respond_to?(:call)
|
67
|
+
tool_path.call(filename)
|
68
|
+
else
|
69
|
+
internal_call
|
68
70
|
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def activefedora_logger
|
74
|
+
ActiveFedora::Base.logger if defined? ActiveFedora
|
75
|
+
end
|
69
76
|
end
|
70
77
|
end
|
@@ -1,19 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Hydra::FileCharacterization
|
2
3
|
module Characterizers
|
3
4
|
end
|
4
5
|
|
5
6
|
module_function
|
7
|
+
|
6
8
|
def characterizer(tool_name)
|
7
9
|
characterizer_name = characterizer_name_from(tool_name)
|
8
10
|
if Characterizers.const_defined?(characterizer_name)
|
9
11
|
Characterizers.const_get(characterizer_name)
|
10
12
|
else
|
11
|
-
raise ToolNotFoundError
|
13
|
+
raise ToolNotFoundError, tool_name
|
12
14
|
end
|
13
15
|
end
|
14
16
|
|
15
17
|
def characterizer_name_from(tool_name)
|
16
|
-
tool_name.to_s.gsub(/(?:^|_)([a-z])/) {
|
18
|
+
tool_name.to_s.gsub(/(?:^|_)([a-z])/) { Regexp.last_match(1).upcase }
|
17
19
|
end
|
18
20
|
|
19
21
|
def characterize_with(tool_name, path_to_file, path_to_tool)
|
@@ -24,8 +26,8 @@ module Hydra::FileCharacterization
|
|
24
26
|
tool_obj.call
|
25
27
|
end
|
26
28
|
end
|
27
|
-
|
28
29
|
end
|
29
30
|
|
30
31
|
require 'hydra/file_characterization/characterizers/fits'
|
31
32
|
require 'hydra/file_characterization/characterizers/ffprobe'
|
33
|
+
require 'hydra/file_characterization/characterizers/fits_servlet'
|