hydra-file_characterization 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b48cc683c771d57e58d8e3ba222f55822d1b2aca
4
- data.tar.gz: 384687f9e2a2dd88304addc3c34681a5c05d065e
3
+ metadata.gz: 3042462b8a14d3ec133d9ccc86fa20663fe4260d
4
+ data.tar.gz: 12e6b321515dfdcbf5621d107bf8b9a28ea0a597
5
5
  SHA512:
6
- metadata.gz: c2febb48269319ea4d22a31d07da41a5d5fa3ccf8293f878d333339c90bf1ad2c69379587de1751db4d6a7953614f3e4f634e42b66aa1d2e7b04d792a009b871
7
- data.tar.gz: 46ba44281c3773fb119c2ff3b196fcfce7e83e4a03b4711d4590d210ac04d786f455322df0ca6969b6a3d84408d590daafc1ee2b8654777eda7d334a656ac21f
6
+ metadata.gz: 53c9607e32ab504221c9f8752450af260ac109ebd497d85c546e5fd35dc22293169b16ddef8884351a34e4a1759636a45461a8172f8ac774d867a57d09080568
7
+ data.tar.gz: 4b1bdb304e398b2f78e5ce039d94e666373d6f247d11d7912fba2db35ac51f1daed7dcad21924df53be7fa875e9048aab9091f74c21cbc50816ad1277c826309
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ .idea/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
@@ -0,0 +1,8 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'rspec' do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ end
data/README.md CHANGED
@@ -13,6 +13,13 @@ How others are using the extract_metadata method
13
13
 
14
14
  ## Todo Steps
15
15
 
16
- - Given a filename, characterize the file and return a raw XML stream
16
+ - ~~Given a filename, characterize the file and return a raw XML stream~~
17
+ - Provide method for converting a StringIO and original file name to a temp file with comparable, then running the characterizer against the tempfile
18
+ - Provide a configuration option for the fits path; This would be the default for the characterizer
19
+ - Update existing Sufia implementation
20
+ - Deprecrate Hydra::Derivatives direct method call
21
+ - Instead call the characterizer with the content
17
22
  - Allow characterization services to be chained together
23
+ - This would involve renaming the Characterizer to something else (i.e. Characterizers::Fits)
18
24
  - Provide an ActiveFedora Datastream that maps the raw XML stream to a datastructure
25
+
@@ -8,11 +8,15 @@ Gem::Specification.new do |gem|
8
8
  gem.version = Hydra::FileCharacterization::VERSION
9
9
  gem.authors = [
10
10
  "James Treacy",
11
- "Jeremy Friesen"
11
+ "Jeremy Friesen",
12
+ "Sue Richeson",
13
+ "Rajesh Balekai"
12
14
  ]
13
15
  gem.email = [
14
16
  "jatr@kb.dk",
15
- "jeremy.n.friesen@gmail.com"
17
+ "jeremy.n.friesen@gmail.com",
18
+ "spr7b@virginia.edu",
19
+ "rbalekai@gmail.com"
16
20
  ]
17
21
  gem.description = %q{To provide a wrapper for file characterization}
18
22
  gem.summary = %q{To provide a wrapper for file characterization}
@@ -23,4 +27,9 @@ Gem::Specification.new do |gem|
23
27
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
24
28
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
25
29
  gem.require_paths = ["lib"]
30
+
31
+ gem.add_dependency "activesupport", ">= 3.0.0"
32
+ gem.add_development_dependency "rspec"
33
+ gem.add_development_dependency "guard"
34
+ gem.add_development_dependency 'guard-rspec'
26
35
  end
@@ -1,7 +1,14 @@
1
1
  require "hydra-file_characterization/version"
2
+ require "hydra-file_characterization/characterizer"
3
+ require "active_support/configurable"
2
4
 
3
5
  module Hydra
4
6
  module FileCharacterization
5
- # Your code goes here...
7
+
8
+ class Configuration
9
+ include ActiveSupport::Configurable
10
+ config_accessor :fits_path
11
+ end
6
12
  end
7
13
  end
14
+
@@ -0,0 +1,36 @@
1
+ require 'open3'
2
+
3
+ module Hydra::FileCharacterization
4
+
5
+ class Characterizer
6
+ include Open3
7
+
8
+ class FileNotFoundError < RuntimeError
9
+ end
10
+
11
+ attr_reader :filename, :fits_path
12
+ def initialize(filename, fits_path)
13
+ @filename = filename
14
+ @fits_path = fits_path
15
+ end
16
+
17
+ def call
18
+ unless File.exists?(filename)
19
+ raise FileNotFoundError.new("File: #{filename} does not exist.")
20
+ end
21
+ command = "#{fits_path} -i \"#{filename}\""
22
+ stdin, stdout, stderr, wait_thr = popen3(command)
23
+ begin
24
+ out = stdout.read
25
+ err = stderr.read
26
+ exit_status = wait_thr.value
27
+ raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
28
+ out
29
+ ensure
30
+ stdin.close
31
+ stdout.close
32
+ stderr.close
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,25 @@
1
+ require 'open3'
2
+
3
+ module Hydra::FileCharacterization
4
+ class ToTempFile
5
+ include Open3
6
+
7
+ attr_accessor :data, :filename
8
+
9
+ def initialize(data, filename)
10
+ @data = data
11
+ @filename = filename
12
+ end
13
+
14
+ def call
15
+ return unless data.empty?
16
+ timestamp = DateTime.now.strftime("%Y%m%d%M%S")
17
+ Tempfile.open("#{timestamp}_#{File.basename(filename)}") do |f|
18
+ f.binmode
19
+ f.write(data)
20
+ f.rewind
21
+ yield(f)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -1,5 +1,5 @@
1
1
  module Hydra
2
2
  module FileCharacterization
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
Binary file
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.6.2" timestamp="9/17/13 3:46 PM">
3
+ <identification>
4
+ <identity format="ZIP Format" mimetype="application/zip" toolname="FITS" toolversion="0.6.2">
5
+ <tool toolname="file utility" toolversion="5.04" />
6
+ <tool toolname="Exiftool" toolversion="9.06" />
7
+ <tool toolname="Droid" toolversion="3.0" />
8
+ <tool toolname="ffident" toolversion="0.2" />
9
+ <version toolname="file utility" toolversion="5.04">2.0</version>
10
+ <externalIdentifier toolname="Droid" toolversion="3.0" type="puid">x-fmt/263</externalIdentifier>
11
+ </identity>
12
+ </identification>
13
+ <fileinfo>
14
+ <lastmodified toolname="Exiftool" toolversion="9.06" status="SINGLE_RESULT">2013:09:17 15:45:51-04:00</lastmodified>
15
+ <filepath toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">/Users/jfriesen/Repositories/hydra-file_characterization/spec/fixtures/archive.zip</filepath>
16
+ <filename toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">spec/fixtures/archive.zip</filename>
17
+ <size toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">302</size>
18
+ <md5checksum toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">7c5b22ea09ba0eb837f70e2e8094b26f</md5checksum>
19
+ <fslastmodified toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">1379447151000</fslastmodified>
20
+ </fileinfo>
21
+ <filestatus />
22
+ <metadata />
23
+ </fits>
24
+
@@ -0,0 +1,43 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.6.2" timestamp="9/17/13 1:28 PM">
3
+ <identification>
4
+ <identity format="JPEG File Interchange Format" mimetype="image/jpeg" toolname="FITS" toolversion="0.6.2">
5
+ <tool toolname="Jhove" toolversion="1.5" />
6
+ <tool toolname="file utility" toolversion="5.04" />
7
+ <tool toolname="Exiftool" toolversion="9.06" />
8
+ <tool toolname="Droid" toolversion="3.0" />
9
+ <tool toolname="NLNZ Metadata Extractor" toolversion="3.4GA" />
10
+ <version toolname="Jhove" toolversion="1.5">1.01</version>
11
+ <externalIdentifier toolname="Droid" toolversion="3.0" type="puid">fmt/43</externalIdentifier>
12
+ </identity>
13
+ </identification>
14
+ <fileinfo>
15
+ <size toolname="Jhove" toolversion="1.5">8744</size>
16
+ <lastmodified toolname="Exiftool" toolversion="9.06" status="SINGLE_RESULT">2013:09:17 13:15:45-04:00</lastmodified>
17
+ <filepath toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">/Users/jfriesen/Repositories/hydra-file_characterization/spec/fixtures/brendan_behan.jpeg</filepath>
18
+ <filename toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">/Users/jfriesen/Repositories/hydra-file_characterization/spec/fixtures/brendan_behan.jpeg</filename>
19
+ <md5checksum toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">66a51b92863dd8e2c71c38979d84161c</md5checksum>
20
+ <fslastmodified toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">1379438145000</fslastmodified>
21
+ </fileinfo>
22
+ <filestatus>
23
+ <well-formed toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">true</well-formed>
24
+ <valid toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">true</valid>
25
+ </filestatus>
26
+ <metadata>
27
+ <image>
28
+ <byteOrder toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">big endian</byteOrder>
29
+ <compressionScheme toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">JPEG (old-style)</compressionScheme>
30
+ <imageWidth toolname="Jhove" toolversion="1.5">189</imageWidth>
31
+ <imageHeight toolname="Jhove" toolversion="1.5">267</imageHeight>
32
+ <colorSpace toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">YCbCr</colorSpace>
33
+ <YCbCrSubSampling toolname="Exiftool" toolversion="9.06" status="SINGLE_RESULT">2 2</YCbCrSubSampling>
34
+ <samplingFrequencyUnit toolname="Jhove" toolversion="1.5" status="CONFLICT">no absolute unit of measurement</samplingFrequencyUnit>
35
+ <samplingFrequencyUnit toolname="Exiftool" toolversion="9.06" status="CONFLICT">None</samplingFrequencyUnit>
36
+ <xSamplingFrequency toolname="Exiftool" toolversion="9.06">1</xSamplingFrequency>
37
+ <ySamplingFrequency toolname="Exiftool" toolversion="9.06">1</ySamplingFrequency>
38
+ <bitsPerSample toolname="Jhove" toolversion="1.5">8 8 8</bitsPerSample>
39
+ <samplesPerPixel toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">3</samplesPerPixel>
40
+ <lightSource toolname="NLNZ Metadata Extractor" toolversion="3.4GA" status="SINGLE_RESULT">unknown</lightSource>
41
+ </image>
42
+ </metadata>
43
+ </fits>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.6.2" timestamp="9/17/13 3:44 PM">
3
+ <identification status="UNKNOWN">
4
+ <identity format="Unknown Binary" mimetype="application/octet-stream" toolname="FITS" toolversion="0.6.2">
5
+ <tool toolname="Jhove" toolversion="1.5" />
6
+ </identity>
7
+ </identification>
8
+ <fileinfo>
9
+ <filepath toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">/Users/jfriesen/Repositories/hydra-file_characterization/spec/fixtures/brendan_broken.dxxd</filepath>
10
+ <filename toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">/Users/jfriesen/Repositories/hydra-file_characterization/spec/fixtures/brendan_broken.dxxd</filename>
11
+ <size toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">9574</size>
12
+ <md5checksum toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">ca786ec0489e53c945d1fa7b584bac7f</md5checksum>
13
+ <fslastmodified toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">1379446883000</fslastmodified>
14
+ </fileinfo>
15
+ <filestatus />
16
+ <metadata />
17
+ </fits>
18
+
@@ -0,0 +1,40 @@
1
+ require 'spec_helper'
2
+ require 'hydra-file_characterization'
3
+
4
+ describe Hydra::FileCharacterization do
5
+ describe 'Characterizer' do
6
+ def fixture_file(filename)
7
+ File.expand_path(File.join('../../../fixtures', filename), __FILE__)
8
+ end
9
+
10
+ let(:fits_path) { `which fits || which fits.sh`.strip }
11
+ subject { Hydra::FileCharacterization::Characterizer.new(filename, fits_path) }
12
+
13
+ describe 'validfile' do
14
+ let(:filename) { fixture_file('brendan_behan.jpeg') }
15
+ it '#call' do
16
+ expect(subject.call).to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg"))
17
+ end
18
+ end
19
+
20
+ describe 'invalidFile' do
21
+ let(:filename) { fixture_file('nofile.pdf') }
22
+ it "should raise an error if the path does not contain the file" do
23
+ expect {subject.call}.to raise_error(Hydra::FileCharacterization::Characterizer::FileNotFoundError)
24
+ end
25
+ end
26
+
27
+ describe 'corruptFile' do
28
+ let(:filename) { fixture_file('brendan_broken.dxxd') }
29
+ it "should return xml showing Unknown Binary and application/octet-stream mimetype" do
30
+ expect(subject.call).to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream"))
31
+ end
32
+ end
33
+
34
+ describe 'zip file should be characterized not its contents' do
35
+ let(:filename) { fixture_file('archive.zip') }
36
+ its(:call) { should include(%(<identity format="ZIP Format" mimetype="application/zip"))}
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,20 @@
1
+ require 'spec_helper'
2
+ require 'hydra-file_characterization/to_temp_file'
3
+
4
+ describe Hydra::FileCharacterization do
5
+
6
+ describe 'ToTempFile' do
7
+ let(:string) { "This is the content of the file." }
8
+ subject { Hydra::FileCharacterization::ToTempFile.new(string, "hello.rb") }
9
+
10
+ it 'create a tempfile that exists' do
11
+ @temp_file = ''
12
+ subject.call do |temp_file|
13
+ @temp_file = temp_file
14
+ expect(File.exist?(@temp_file)).to eq true
15
+ expect(File.extname(@temp_file)).to eq '.rb'
16
+ end
17
+ expect(File.exist?(@temp_file)).to eq false
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+ require 'hydra-file_characterization'
3
+
4
+ describe Hydra::FileCharacterization do
5
+
6
+
7
+
8
+ describe 'config' do
9
+ subject { Hydra::FileCharacterization::Configuration.new }
10
+ let (:expected_fits_path) {"string"}
11
+ before(:each) do
12
+ subject.fits_path = expected_fits_path
13
+ end
14
+ its(:config) {should have_key :fits_path}
15
+ its(:fits_path) {should == expected_fits_path}
16
+ end
17
+
18
+ end
@@ -0,0 +1,17 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ RSpec.configure do |config|
8
+ config.treat_symbols_as_metadata_keys_with_true_values = true
9
+ config.run_all_when_everything_filtered = true
10
+ config.filter_run :focus
11
+
12
+ # Run specs in random order to surface order dependencies. If you find an
13
+ # order dependency and want to debug it, you can fix the order by providing
14
+ # the seed, which is printed after each run.
15
+ # --seed 1234
16
+ config.order = 'random'
17
+ end
metadata CHANGED
@@ -1,32 +1,106 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hydra-file_characterization
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Treacy
8
8
  - Jeremy Friesen
9
+ - Sue Richeson
10
+ - Rajesh Balekai
9
11
  autorequire:
10
12
  bindir: bin
11
13
  cert_chain: []
12
- date: 2013-09-17 00:00:00.000000000 Z
13
- dependencies: []
14
+ date: 2013-09-18 00:00:00.000000000 Z
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: activesupport
18
+ requirement: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - '>='
21
+ - !ruby/object:Gem::Version
22
+ version: 3.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 3.0.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - '>='
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ type: :development
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ - !ruby/object:Gem::Dependency
45
+ name: guard
46
+ requirement: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ type: :development
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ - !ruby/object:Gem::Dependency
59
+ name: guard-rspec
60
+ requirement: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ type: :development
66
+ prerelease: false
67
+ version_requirements: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
14
72
  description: To provide a wrapper for file characterization
15
73
  email:
16
74
  - jatr@kb.dk
17
75
  - jeremy.n.friesen@gmail.com
76
+ - spr7b@virginia.edu
77
+ - rbalekai@gmail.com
18
78
  executables: []
19
79
  extensions: []
20
80
  extra_rdoc_files: []
21
81
  files:
22
82
  - .gitignore
83
+ - .rspec
23
84
  - Gemfile
85
+ - Guardfile
24
86
  - LICENSE
25
87
  - README.md
26
88
  - Rakefile
27
89
  - hydra-file_characterization.gemspec
28
90
  - lib/hydra-file_characterization.rb
91
+ - lib/hydra-file_characterization/characterizer.rb
92
+ - lib/hydra-file_characterization/to_temp_file.rb
29
93
  - lib/hydra-file_characterization/version.rb
94
+ - spec/fixtures/archive.zip
95
+ - spec/fixtures/archive.zip.fits.xml
96
+ - spec/fixtures/brendan_behan.jpeg
97
+ - spec/fixtures/brendan_behan.jpeg.fits.xml
98
+ - spec/fixtures/brendan_broken.dxxd
99
+ - spec/fixtures/brendan_broken.dxxd.fits.xml
100
+ - spec/lib/hydra-file_characterization/characterizer_spec.rb
101
+ - spec/lib/hydra-file_characterization/to_temp_file_spec.rb
102
+ - spec/lib/hydra-file_characterization_spec.rb
103
+ - spec/spec_helper.rb
30
104
  homepage: https://github.com/projecthydra/hydra-file_characterization
31
105
  licenses:
32
106
  - APACHE2
@@ -51,4 +125,14 @@ rubygems_version: 2.0.3
51
125
  signing_key:
52
126
  specification_version: 4
53
127
  summary: To provide a wrapper for file characterization
54
- test_files: []
128
+ test_files:
129
+ - spec/fixtures/archive.zip
130
+ - spec/fixtures/archive.zip.fits.xml
131
+ - spec/fixtures/brendan_behan.jpeg
132
+ - spec/fixtures/brendan_behan.jpeg.fits.xml
133
+ - spec/fixtures/brendan_broken.dxxd
134
+ - spec/fixtures/brendan_broken.dxxd.fits.xml
135
+ - spec/lib/hydra-file_characterization/characterizer_spec.rb
136
+ - spec/lib/hydra-file_characterization/to_temp_file_spec.rb
137
+ - spec/lib/hydra-file_characterization_spec.rb
138
+ - spec/spec_helper.rb