hydra-file_characterization 0.3.2 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +91 -0
  3. data/.github_changelog_generator +2 -0
  4. data/.rubocop.yml +10 -0
  5. data/.rubocop_todo.yml +92 -0
  6. data/CHANGELOG.md +189 -0
  7. data/CODE_OF_CONDUCT.md +36 -0
  8. data/CONTRIBUTING.md +70 -22
  9. data/Gemfile +9 -0
  10. data/Guardfile +1 -0
  11. data/LICENSE +14 -16
  12. data/README.md +54 -13
  13. data/Rakefile +7 -0
  14. data/SUPPORT.md +5 -0
  15. data/hydra-file_characterization.gemspec +12 -6
  16. data/lib/hydra-file_characterization.rb +1 -0
  17. data/lib/hydra/file_characterization.rb +32 -36
  18. data/lib/hydra/file_characterization/characterizer.rb +40 -33
  19. data/lib/hydra/file_characterization/characterizers.rb +5 -3
  20. data/lib/hydra/file_characterization/characterizers/ffprobe.rb +2 -2
  21. data/lib/hydra/file_characterization/characterizers/fits.rb +14 -9
  22. data/lib/hydra/file_characterization/characterizers/fits_servlet.rb +23 -0
  23. data/lib/hydra/file_characterization/exceptions.rb +1 -2
  24. data/lib/hydra/file_characterization/to_temp_file.rb +3 -3
  25. data/lib/hydra/file_characterization/version.rb +2 -1
  26. data/spec/lib/hydra/file_characterization/characterizer_spec.rb +9 -8
  27. data/spec/lib/hydra/file_characterization/characterizers/ffprobe_spec.rb +4 -7
  28. data/spec/lib/hydra/file_characterization/characterizers/fit_servlet_spec.rb +71 -0
  29. data/spec/lib/hydra/file_characterization/characterizers/fits_spec.rb +35 -12
  30. data/spec/lib/hydra/file_characterization/characterizers_spec.rb +10 -10
  31. data/spec/lib/hydra/file_characterization/to_temp_file_spec.rb +2 -4
  32. data/spec/lib/hydra/file_characterization_spec.rb +8 -3
  33. data/spec/spec_helper.rb +7 -2
  34. metadata +84 -6
data/Gemfile CHANGED
@@ -1,4 +1,13 @@
1
+ # frozen_string_literal: true
1
2
  source 'https://rubygems.org'
2
3
 
3
4
  # Specify your gem's dependencies in hydra/file_characterization.gemspec
4
5
  gemspec
6
+
7
+ if ENV['RAILS_VERSION']
8
+ if ENV['RAILS_VERSION'] == 'edge'
9
+ gem 'rails', github: 'rails/rails'
10
+ else
11
+ gem 'rails', ENV['RAILS_VERSION']
12
+ end
13
+ end
data/Guardfile CHANGED
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  # A sample Guardfile
2
3
  # More info at https://github.com/guard/guard#readme
3
4
 
data/LICENSE CHANGED
@@ -1,16 +1,14 @@
1
- ##########################################################################
2
- #
3
- # Copyright 2013 Notre Dame
4
- # Additional copyright may be held by others, as reflected in the commit log
5
- #
6
- # Licensed under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
1
+ Copyright 2013 Notre Dame
2
+ Additional copyright may be held by others, as reflected in the commit history.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
data/README.md CHANGED
@@ -1,12 +1,28 @@
1
- # hydra-file_chracterization [![Version](https://badge.fury.io/rb/hydra-file_characterization.png)](http://badge.fury.io/rb/hydra-file_characterization) [![Build Status](https://travis-ci.org/projecthydra/hydra-file_characterization.png?branch=master)](https://travis-ci.org/projecthydra/hydra-file_characterization) [![Dependency Status](https://gemnasium.com/projecthydra/hydra-file_characterization.png)](https://gemnasium.com/projecthydra/hydra-file_characterization)
1
+ # hydra-file_characterization
2
2
 
3
- Hydra::FileCharacterization as (extracted from Sufia and Hydra::Derivatives)
3
+ Code: [![Version](https://badge.fury.io/rb/hydra-file_characterization.png)](http://badge.fury.io/rb/hydra-file_characterization) [![CircleCI](https://circleci.com/gh/samvera/hydra-file_characterization.svg?style=svg)](https://circleci.com/gh/samvera/hydra-file_characterization) [![Coverage Status](https://coveralls.io/repos/github/samvera/hydra-file_characterization/badge.svg?branch=master)](https://coveralls.io/github/samvera/hydra-file_characterization?branch=master)
4
4
 
5
- ## Purpose
5
+ Docs: [![Contribution Guidelines](http://img.shields.io/badge/CONTRIBUTING-Guidelines-blue.svg)](./CONTRIBUTING.md) [![Apache 2.0 License](http://img.shields.io/badge/APACHE2-license-blue.svg)](./LICENSE)
6
6
 
7
- To provide a wrapper for file characterization
7
+ Jump in: [![Slack Status](http://slack.samvera.org/badge.svg)](http://slack.samvera.org/)
8
8
 
9
- ## How To Use
9
+ # What is hydra-file_characterization?
10
+
11
+ Provides a wrapper for file characterization.
12
+
13
+ ## Product Owner & Maintenance
14
+
15
+ hydra-file_characterization is a Core Component of the Samvera community. The documentation for what this means can be found [here](http://samvera.github.io/core_components.html#requirements-for-a-core-component).
16
+
17
+ ### Product Owner
18
+
19
+ [little9](https://github.com/little9)
20
+
21
+ # Help
22
+
23
+ The Samvera community is here to help. Please see our [support guide](./SUPPORT.md).
24
+
25
+ # Getting Started
10
26
 
11
27
  If you are using Rails add the following to an initializer (./config/initializers/hydra-file_characterization_config.rb):
12
28
 
@@ -16,12 +32,20 @@ Hydra::FileCharacterization.configure do |config|
16
32
  end
17
33
  ```
18
34
 
19
- You can call a single characterizer…
20
35
  ```ruby
21
- xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :fits)
36
+ Hydra::FileCharacterization.characterize(File.read(filename), File.basename(filename), :fits)
22
37
  ```
23
38
 
24
- …for this particular call, you can specify custom fits path…
39
+ - Why `file.read`? To highlight that we want a string. In the case of ActiveFedora, we have a StringIO instead of a file.
40
+ - Why `file.basename`? In the case of Fits, the characterization takes cues from the extension name.
41
+
42
+ You can call a single characterizer...
43
+
44
+ ```ruby
45
+ xml_string = Hydra::FileCharacterization.characterize(File.read("/path/to/my/file.rb"), 'file.rb', :fits)
46
+ ```
47
+
48
+ ...for this particular call, you can specify custom fits path...
25
49
 
26
50
  ```ruby
27
51
  xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :fits) do |config|
@@ -29,7 +53,7 @@ xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.
29
53
  end
30
54
  ```
31
55
 
32
- or even make the path callable
56
+ ...or even make the path callable...
33
57
 
34
58
  ```ruby
35
59
  xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :fits) do |config|
@@ -37,7 +61,7 @@ xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.
37
61
  end
38
62
  ```
39
63
 
40
- or even create your custom characterizer on the file
64
+ ...or even create your custom characterizer on the file...
41
65
 
42
66
  ```ruby
43
67
  xml_string = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :my_characterizer) do |config|
@@ -51,9 +75,26 @@ You can also call multiple characterizers at the same time.
51
75
  fits_xml, ffprobe_xml = Hydra::FileCharacterization.characterize(contents_of_a_file, 'file.rb', :fits, :ffprobe)
52
76
  ```
53
77
 
54
- * Why `file.read`? To highlight that we want a string. In the case of ActiveFedora, we have a StringIO instead of a file.
55
- * Why `file.basename`? In the case of Fits, the characterization takes cues from the extension name.
56
-
57
78
  ## Registering New Characterizers
58
79
 
59
80
  This is possible by adding a characterizer to the `Hydra::FileCharacterization::Characterizers`' namespace.
81
+
82
+ ## Releasing
83
+
84
+ 1. `bundle install`
85
+ 2. Increase the version number in `lib/hydra/file_characterization/version.rb`
86
+ 3. Increase the same version number in `.github_changelog_generator`
87
+ 4. Update `CHANGELOG.md` by running this command:
88
+
89
+ ```
90
+ github_changelog_generator --user samvera --project hydra-file_characterization --token YOUR_GITHUB_TOKEN_HERE
91
+ ```
92
+
93
+ 5. Commit these changes to the master branch
94
+ 6. Run `rake release`
95
+
96
+ # Acknowledgments
97
+
98
+ This software has been developed by and is brought to you by the Samvera community. Learn more at the [Samvera website](http://samvera.org/).
99
+
100
+ ![Samvera Logo](https://wiki.duraspace.org/download/thumbnails/87459292/samvera-fall-font2-200w.png?version=1&modificationDate=1498550535816&api=v2)
data/Rakefile CHANGED
@@ -1 +1,8 @@
1
+ # frozen_string_literal: true
1
2
  require "bundler/gem_tasks"
3
+ require 'rspec/core'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
@@ -0,0 +1,5 @@
1
+ If you would like to report an issue, first search [the list of issues](https://github.com/samvera/hydra-file_characterization/issues/) to see if someone else has already reported it, and then feel free to [create a new issue](https://github.com/samvera/hydra-file_characterization/issues/new).
2
+ i
3
+ If you have questions or need help, please email [the Samvera community tech list](https://groups.google.com/forum/#!forum/samvera-tech) or stop by the #dev channel in [the Samvera community Slack team](https://wiki.duraspace.org/pages/viewpage.action?pageId=87460391#Getintouch!-Slack).
4
+
5
+ You can learn more about the various Samvera communication channels on the [Get in touch!](https://wiki.duraspace.org/pages/viewpage.action?pageId=87460391) wiki page.
@@ -1,4 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
+ # frozen_string_literal: true
2
3
  lib = File.expand_path('../lib', __FILE__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
5
  require 'hydra/file_characterization/version'
@@ -12,24 +13,29 @@ Gem::Specification.new do |gem|
12
13
  "Sue Richeson",
13
14
  "Rajesh Balekai"
14
15
  ]
15
- gem.email = [
16
+ gem.email = [
16
17
  "jatr@kb.dk",
17
18
  "jeremy.n.friesen@gmail.com",
18
19
  "spr7b@virginia.edu",
19
20
  "rbalekai@gmail.com"
20
21
  ]
21
- gem.description = %q{To provide a wrapper for file characterization}
22
- gem.summary = %q{To provide a wrapper for file characterization}
22
+ gem.description = 'To provide a wrapper for file characterization'
23
+ gem.summary = 'To provide a wrapper for file characterization'
23
24
  gem.homepage = "https://github.com/projecthydra/hydra-file_characterization"
24
25
  gem.license = "APACHE2"
25
26
 
26
- gem.files = `git ls-files`.split($/)
27
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
27
+ gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
28
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
28
29
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
29
30
  gem.require_paths = ["lib"]
30
31
 
31
32
  gem.add_dependency "activesupport", ">= 3.0.0"
32
- gem.add_development_dependency "rspec"
33
+ gem.add_development_dependency 'bixby', '~> 3.0.0'
34
+ gem.add_development_dependency 'coveralls'
35
+ gem.add_development_dependency 'github_changelog_generator'
33
36
  gem.add_development_dependency "guard"
34
37
  gem.add_development_dependency 'guard-rspec'
38
+ gem.add_development_dependency "rake"
39
+ gem.add_development_dependency "rspec"
40
+ gem.add_development_dependency 'rspec_junit_formatter'
35
41
  end
@@ -1 +1,2 @@
1
+ # frozen_string_literal: true
1
2
  require "hydra/file_characterization"
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require "hydra/file_characterization/version"
2
3
  require "hydra/file_characterization/exceptions"
3
4
  require "hydra/file_characterization/to_temp_file"
@@ -6,7 +7,6 @@ require "hydra/file_characterization/characterizers"
6
7
  require "active_support/configurable"
7
8
 
8
9
  module Hydra
9
-
10
10
  module_function
11
11
 
12
12
  # A convenience method
@@ -15,7 +15,6 @@ module Hydra
15
15
  end
16
16
 
17
17
  module FileCharacterization
18
-
19
18
  class << self
20
19
  attr_accessor :configuration
21
20
  end
@@ -66,7 +65,7 @@ module Hydra
66
65
  tool_names = Array(tool_names).flatten.compact
67
66
  custom_paths = {}
68
67
  yield(custom_paths) if block_given?
69
-
68
+
70
69
  tool_outputs = run_characterizers(content, filename, tool_names, custom_paths)
71
70
  tool_names.size == 1 ? tool_outputs.first : tool_outputs
72
71
  end
@@ -76,50 +75,47 @@ module Hydra
76
75
  yield(configuration)
77
76
  end
78
77
 
79
- private
80
-
81
- # Break up a list of arguments into two possible lists:
82
- # option1: [String] content, [String] filename, [Array] tool_names
83
- # option2: [File] content, [Array] tool_names
84
- # In the case of option2, derive the filename from the file's path
85
- # @return [String, File], [String], [Array]
86
- def self.extract_arguments(args)
87
- content = args.shift
88
- filename = if content.is_a?(File) && !args[0].is_a?(String)
89
- File.basename(content.path)
90
- else
91
- args.shift
92
- end
93
- tool_names = args
94
- return content, filename, tool_names
78
+ # Break up a list of arguments into two possible lists:
79
+ # option1: [String] content, [String] filename, [Array] tool_names
80
+ # option2: [File] content, [Array] tool_names
81
+ # In the case of option2, derive the filename from the file's path
82
+ # @return [String, File], [String], [Array]
83
+ def self.extract_arguments(args)
84
+ content = args.shift
85
+ filename = if content.is_a?(File) && !args[0].is_a?(String)
86
+ File.basename(content.path)
87
+ else
88
+ args.shift
95
89
  end
90
+ tool_names = args
91
+ [content, filename, tool_names]
92
+ end
96
93
 
97
- # @param [File, String] content Either an open file or a string. If a string is passed
98
- # a temp file will be created
99
- # @param [String] filename Used in creating a temp file name
100
- # @param [Array<Symbol>] tool_names A list of symbols referencing the characerization tools to run
101
- # @param [Hash] custom_paths The paths to the executables of the tool.
102
- def self.run_characterizers(content, filename, tool_names, custom_paths)
103
- if content.is_a? File
104
- run_characterizers_on_file(content, tool_names, custom_paths)
105
- else
106
- FileCharacterization::ToTempFile.open(filename, content) do |f|
107
- run_characterizers_on_file(f, tool_names, custom_paths)
108
- end
94
+ # @param [File, String] content Either an open file or a string. If a string is passed
95
+ # a temp file will be created
96
+ # @param [String] filename Used in creating a temp file name
97
+ # @param [Array<Symbol>] tool_names A list of symbols referencing the characerization tools to run
98
+ # @param [Hash] custom_paths The paths to the executables of the tool.
99
+ def self.run_characterizers(content, filename, tool_names, custom_paths)
100
+ if content.is_a? File
101
+ run_characterizers_on_file(content, tool_names, custom_paths)
102
+ else
103
+ FileCharacterization::ToTempFile.open(filename, content) do |f|
104
+ run_characterizers_on_file(f, tool_names, custom_paths)
109
105
  end
110
106
  end
107
+ end
111
108
 
112
- def self.run_characterizers_on_file(f, tool_names, custom_paths)
113
- tool_names.map do |tool_name|
114
- FileCharacterization.characterize_with(tool_name, f.path, custom_paths[tool_name])
115
- end
109
+ def self.run_characterizers_on_file(f, tool_names, custom_paths)
110
+ tool_names.map do |tool_name|
111
+ FileCharacterization.characterize_with(tool_name, f.path, custom_paths[tool_name])
116
112
  end
113
+ end
117
114
 
118
115
  class Configuration
119
116
  def tool_path(tool_name, tool_path)
120
117
  Hydra::FileCharacterization.characterizer(tool_name).tool_path = tool_path
121
118
  end
122
119
  end
123
-
124
120
  end
125
121
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'hydra/file_characterization/exceptions'
2
3
  require 'open3'
3
4
  require 'active_support/core_ext/class/attribute'
@@ -15,9 +16,7 @@ module Hydra::FileCharacterization
15
16
  end
16
17
 
17
18
  def call
18
- unless File.exists?(filename)
19
- raise Hydra::FileCharacterization::FileNotFoundError.new("File: #{filename} does not exist.")
20
- end
19
+ raise Hydra::FileCharacterization::FileNotFoundError, "File: #{filename} does not exist." unless File.exist?(filename)
21
20
 
22
21
  post_process(output)
23
22
  end
@@ -26,45 +25,53 @@ module Hydra::FileCharacterization
26
25
  @tool_path || self.class.tool_path || convention_based_tool_name
27
26
  end
28
27
 
28
+ def logger
29
+ @logger ||= activefedora_logger || Logger.new(STDERR)
30
+ end
31
+
29
32
  protected
30
33
 
31
- # Override this method if you want your processor to mutate the
32
- # raw output
33
- def post_process(raw_output)
34
- raw_output
35
- end
34
+ # Override this method if you want your processor to mutate the
35
+ # raw output
36
+ def post_process(raw_output)
37
+ raw_output
38
+ end
36
39
 
37
- def convention_based_tool_name
38
- self.class.name.split("::").last.downcase
39
- end
40
+ def convention_based_tool_name
41
+ self.class.name.split("::").last.downcase
42
+ end
40
43
 
41
- def internal_call
42
- stdin, stdout, stderr, wait_thr = popen3(command)
43
- begin
44
- out = stdout.read
45
- err = stderr.read
46
- exit_status = wait_thr.value
47
- raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
48
- out
49
- ensure
50
- stdin.close
51
- stdout.close
52
- stderr.close
53
- end
44
+ def internal_call
45
+ stdin, stdout, stderr, wait_thr = popen3(command)
46
+ begin
47
+ out = stdout.read
48
+ err = stderr.read
49
+ exit_status = wait_thr.value
50
+ raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
51
+ out
52
+ ensure
53
+ stdin.close
54
+ stdout.close
55
+ stderr.close
54
56
  end
57
+ end
55
58
 
56
- def command
57
- raise NotImplementedError, "Method #command should be overriden in child classes"
58
- end
59
+ def command
60
+ raise NotImplementedError, "Method #command should be overriden in child classes"
61
+ end
59
62
 
60
63
  private
61
64
 
62
- def output
63
- if tool_path.respond_to?(:call)
64
- tool_path.call(filename)
65
- else
66
- internal_call
67
- end
65
+ def output
66
+ if tool_path.respond_to?(:call)
67
+ tool_path.call(filename)
68
+ else
69
+ internal_call
68
70
  end
71
+ end
72
+
73
+ def activefedora_logger
74
+ ActiveFedora::Base.logger if defined? ActiveFedora
75
+ end
69
76
  end
70
77
  end
@@ -1,19 +1,21 @@
1
+ # frozen_string_literal: true
1
2
  module Hydra::FileCharacterization
2
3
  module Characterizers
3
4
  end
4
5
 
5
6
  module_function
7
+
6
8
  def characterizer(tool_name)
7
9
  characterizer_name = characterizer_name_from(tool_name)
8
10
  if Characterizers.const_defined?(characterizer_name)
9
11
  Characterizers.const_get(characterizer_name)
10
12
  else
11
- raise ToolNotFoundError.new(tool_name)
13
+ raise ToolNotFoundError, tool_name
12
14
  end
13
15
  end
14
16
 
15
17
  def characterizer_name_from(tool_name)
16
- tool_name.to_s.gsub(/(?:^|_)([a-z])/) { $1.upcase }
18
+ tool_name.to_s.gsub(/(?:^|_)([a-z])/) { Regexp.last_match(1).upcase }
17
19
  end
18
20
 
19
21
  def characterize_with(tool_name, path_to_file, path_to_tool)
@@ -24,8 +26,8 @@ module Hydra::FileCharacterization
24
26
  tool_obj.call
25
27
  end
26
28
  end
27
-
28
29
  end
29
30
 
30
31
  require 'hydra/file_characterization/characterizers/fits'
31
32
  require 'hydra/file_characterization/characterizers/ffprobe'
33
+ require 'hydra/file_characterization/characterizers/fits_servlet'