hydra-file_characterization 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b48cc683c771d57e58d8e3ba222f55822d1b2aca
4
- data.tar.gz: 384687f9e2a2dd88304addc3c34681a5c05d065e
3
+ metadata.gz: 3042462b8a14d3ec133d9ccc86fa20663fe4260d
4
+ data.tar.gz: 12e6b321515dfdcbf5621d107bf8b9a28ea0a597
5
5
  SHA512:
6
- metadata.gz: c2febb48269319ea4d22a31d07da41a5d5fa3ccf8293f878d333339c90bf1ad2c69379587de1751db4d6a7953614f3e4f634e42b66aa1d2e7b04d792a009b871
7
- data.tar.gz: 46ba44281c3773fb119c2ff3b196fcfce7e83e4a03b4711d4590d210ac04d786f455322df0ca6969b6a3d84408d590daafc1ee2b8654777eda7d334a656ac21f
6
+ metadata.gz: 53c9607e32ab504221c9f8752450af260ac109ebd497d85c546e5fd35dc22293169b16ddef8884351a34e4a1759636a45461a8172f8ac774d867a57d09080568
7
+ data.tar.gz: 4b1bdb304e398b2f78e5ce039d94e666373d6f247d11d7912fba2db35ac51f1daed7dcad21924df53be7fa875e9048aab9091f74c21cbc50816ad1277c826309
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ .idea/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
@@ -0,0 +1,8 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'rspec' do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ end
data/README.md CHANGED
@@ -13,6 +13,13 @@ How others are using the extract_metadata method
13
13
 
14
14
  ## Todo Steps
15
15
 
16
- - Given a filename, characterize the file and return a raw XML stream
16
+ - ~~Given a filename, characterize the file and return a raw XML stream~~
17
+ - Provide method for converting a StringIO and original file name to a temp file with comparable, then running the characterizer against the tempfile
18
+ - Provide a configuration option for the fits path; This would be the default for the characterizer
19
+ - Update existing Sufia implementation
20
+ - Deprecrate Hydra::Derivatives direct method call
21
+ - Instead call the characterizer with the content
17
22
  - Allow characterization services to be chained together
23
+ - This would involve renaming the Characterizer to something else (i.e. Characterizers::Fits)
18
24
  - Provide an ActiveFedora Datastream that maps the raw XML stream to a datastructure
25
+
@@ -8,11 +8,15 @@ Gem::Specification.new do |gem|
8
8
  gem.version = Hydra::FileCharacterization::VERSION
9
9
  gem.authors = [
10
10
  "James Treacy",
11
- "Jeremy Friesen"
11
+ "Jeremy Friesen",
12
+ "Sue Richeson",
13
+ "Rajesh Balekai"
12
14
  ]
13
15
  gem.email = [
14
16
  "jatr@kb.dk",
15
- "jeremy.n.friesen@gmail.com"
17
+ "jeremy.n.friesen@gmail.com",
18
+ "spr7b@virginia.edu",
19
+ "rbalekai@gmail.com"
16
20
  ]
17
21
  gem.description = %q{To provide a wrapper for file characterization}
18
22
  gem.summary = %q{To provide a wrapper for file characterization}
@@ -23,4 +27,9 @@ Gem::Specification.new do |gem|
23
27
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
24
28
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
25
29
  gem.require_paths = ["lib"]
30
+
31
+ gem.add_dependency "activesupport", ">= 3.0.0"
32
+ gem.add_development_dependency "rspec"
33
+ gem.add_development_dependency "guard"
34
+ gem.add_development_dependency 'guard-rspec'
26
35
  end
@@ -1,7 +1,14 @@
1
1
  require "hydra-file_characterization/version"
2
+ require "hydra-file_characterization/characterizer"
3
+ require "active_support/configurable"
2
4
 
3
5
  module Hydra
4
6
  module FileCharacterization
5
- # Your code goes here...
7
+
8
+ class Configuration
9
+ include ActiveSupport::Configurable
10
+ config_accessor :fits_path
11
+ end
6
12
  end
7
13
  end
14
+
@@ -0,0 +1,36 @@
1
+ require 'open3'
2
+
3
+ module Hydra::FileCharacterization
4
+
5
+ class Characterizer
6
+ include Open3
7
+
8
+ class FileNotFoundError < RuntimeError
9
+ end
10
+
11
+ attr_reader :filename, :fits_path
12
+ def initialize(filename, fits_path)
13
+ @filename = filename
14
+ @fits_path = fits_path
15
+ end
16
+
17
+ def call
18
+ unless File.exists?(filename)
19
+ raise FileNotFoundError.new("File: #{filename} does not exist.")
20
+ end
21
+ command = "#{fits_path} -i \"#{filename}\""
22
+ stdin, stdout, stderr, wait_thr = popen3(command)
23
+ begin
24
+ out = stdout.read
25
+ err = stderr.read
26
+ exit_status = wait_thr.value
27
+ raise "Unable to execute command \"#{command}\"\n#{err}" unless exit_status.success?
28
+ out
29
+ ensure
30
+ stdin.close
31
+ stdout.close
32
+ stderr.close
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,25 @@
1
+ require 'open3'
2
+
3
+ module Hydra::FileCharacterization
4
+ class ToTempFile
5
+ include Open3
6
+
7
+ attr_accessor :data, :filename
8
+
9
+ def initialize(data, filename)
10
+ @data = data
11
+ @filename = filename
12
+ end
13
+
14
+ def call
15
+ return unless data.empty?
16
+ timestamp = DateTime.now.strftime("%Y%m%d%M%S")
17
+ Tempfile.open("#{timestamp}_#{File.basename(filename)}") do |f|
18
+ f.binmode
19
+ f.write(data)
20
+ f.rewind
21
+ yield(f)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -1,5 +1,5 @@
1
1
  module Hydra
2
2
  module FileCharacterization
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
Binary file
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.6.2" timestamp="9/17/13 3:46 PM">
3
+ <identification>
4
+ <identity format="ZIP Format" mimetype="application/zip" toolname="FITS" toolversion="0.6.2">
5
+ <tool toolname="file utility" toolversion="5.04" />
6
+ <tool toolname="Exiftool" toolversion="9.06" />
7
+ <tool toolname="Droid" toolversion="3.0" />
8
+ <tool toolname="ffident" toolversion="0.2" />
9
+ <version toolname="file utility" toolversion="5.04">2.0</version>
10
+ <externalIdentifier toolname="Droid" toolversion="3.0" type="puid">x-fmt/263</externalIdentifier>
11
+ </identity>
12
+ </identification>
13
+ <fileinfo>
14
+ <lastmodified toolname="Exiftool" toolversion="9.06" status="SINGLE_RESULT">2013:09:17 15:45:51-04:00</lastmodified>
15
+ <filepath toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">/Users/jfriesen/Repositories/hydra-file_characterization/spec/fixtures/archive.zip</filepath>
16
+ <filename toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">spec/fixtures/archive.zip</filename>
17
+ <size toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">302</size>
18
+ <md5checksum toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">7c5b22ea09ba0eb837f70e2e8094b26f</md5checksum>
19
+ <fslastmodified toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">1379447151000</fslastmodified>
20
+ </fileinfo>
21
+ <filestatus />
22
+ <metadata />
23
+ </fits>
24
+
@@ -0,0 +1,43 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.6.2" timestamp="9/17/13 1:28 PM">
3
+ <identification>
4
+ <identity format="JPEG File Interchange Format" mimetype="image/jpeg" toolname="FITS" toolversion="0.6.2">
5
+ <tool toolname="Jhove" toolversion="1.5" />
6
+ <tool toolname="file utility" toolversion="5.04" />
7
+ <tool toolname="Exiftool" toolversion="9.06" />
8
+ <tool toolname="Droid" toolversion="3.0" />
9
+ <tool toolname="NLNZ Metadata Extractor" toolversion="3.4GA" />
10
+ <version toolname="Jhove" toolversion="1.5">1.01</version>
11
+ <externalIdentifier toolname="Droid" toolversion="3.0" type="puid">fmt/43</externalIdentifier>
12
+ </identity>
13
+ </identification>
14
+ <fileinfo>
15
+ <size toolname="Jhove" toolversion="1.5">8744</size>
16
+ <lastmodified toolname="Exiftool" toolversion="9.06" status="SINGLE_RESULT">2013:09:17 13:15:45-04:00</lastmodified>
17
+ <filepath toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">/Users/jfriesen/Repositories/hydra-file_characterization/spec/fixtures/brendan_behan.jpeg</filepath>
18
+ <filename toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">/Users/jfriesen/Repositories/hydra-file_characterization/spec/fixtures/brendan_behan.jpeg</filename>
19
+ <md5checksum toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">66a51b92863dd8e2c71c38979d84161c</md5checksum>
20
+ <fslastmodified toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">1379438145000</fslastmodified>
21
+ </fileinfo>
22
+ <filestatus>
23
+ <well-formed toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">true</well-formed>
24
+ <valid toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">true</valid>
25
+ </filestatus>
26
+ <metadata>
27
+ <image>
28
+ <byteOrder toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">big endian</byteOrder>
29
+ <compressionScheme toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">JPEG (old-style)</compressionScheme>
30
+ <imageWidth toolname="Jhove" toolversion="1.5">189</imageWidth>
31
+ <imageHeight toolname="Jhove" toolversion="1.5">267</imageHeight>
32
+ <colorSpace toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">YCbCr</colorSpace>
33
+ <YCbCrSubSampling toolname="Exiftool" toolversion="9.06" status="SINGLE_RESULT">2 2</YCbCrSubSampling>
34
+ <samplingFrequencyUnit toolname="Jhove" toolversion="1.5" status="CONFLICT">no absolute unit of measurement</samplingFrequencyUnit>
35
+ <samplingFrequencyUnit toolname="Exiftool" toolversion="9.06" status="CONFLICT">None</samplingFrequencyUnit>
36
+ <xSamplingFrequency toolname="Exiftool" toolversion="9.06">1</xSamplingFrequency>
37
+ <ySamplingFrequency toolname="Exiftool" toolversion="9.06">1</ySamplingFrequency>
38
+ <bitsPerSample toolname="Jhove" toolversion="1.5">8 8 8</bitsPerSample>
39
+ <samplesPerPixel toolname="Jhove" toolversion="1.5" status="SINGLE_RESULT">3</samplesPerPixel>
40
+ <lightSource toolname="NLNZ Metadata Extractor" toolversion="3.4GA" status="SINGLE_RESULT">unknown</lightSource>
41
+ </image>
42
+ </metadata>
43
+ </fits>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.6.2" timestamp="9/17/13 3:44 PM">
3
+ <identification status="UNKNOWN">
4
+ <identity format="Unknown Binary" mimetype="application/octet-stream" toolname="FITS" toolversion="0.6.2">
5
+ <tool toolname="Jhove" toolversion="1.5" />
6
+ </identity>
7
+ </identification>
8
+ <fileinfo>
9
+ <filepath toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">/Users/jfriesen/Repositories/hydra-file_characterization/spec/fixtures/brendan_broken.dxxd</filepath>
10
+ <filename toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">/Users/jfriesen/Repositories/hydra-file_characterization/spec/fixtures/brendan_broken.dxxd</filename>
11
+ <size toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">9574</size>
12
+ <md5checksum toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">ca786ec0489e53c945d1fa7b584bac7f</md5checksum>
13
+ <fslastmodified toolname="OIS File Information" toolversion="0.1" status="SINGLE_RESULT">1379446883000</fslastmodified>
14
+ </fileinfo>
15
+ <filestatus />
16
+ <metadata />
17
+ </fits>
18
+
@@ -0,0 +1,40 @@
1
+ require 'spec_helper'
2
+ require 'hydra-file_characterization'
3
+
4
+ describe Hydra::FileCharacterization do
5
+ describe 'Characterizer' do
6
+ def fixture_file(filename)
7
+ File.expand_path(File.join('../../../fixtures', filename), __FILE__)
8
+ end
9
+
10
+ let(:fits_path) { `which fits || which fits.sh`.strip }
11
+ subject { Hydra::FileCharacterization::Characterizer.new(filename, fits_path) }
12
+
13
+ describe 'validfile' do
14
+ let(:filename) { fixture_file('brendan_behan.jpeg') }
15
+ it '#call' do
16
+ expect(subject.call).to include(%(<identity format="JPEG File Interchange Format" mimetype="image/jpeg"))
17
+ end
18
+ end
19
+
20
+ describe 'invalidFile' do
21
+ let(:filename) { fixture_file('nofile.pdf') }
22
+ it "should raise an error if the path does not contain the file" do
23
+ expect {subject.call}.to raise_error(Hydra::FileCharacterization::Characterizer::FileNotFoundError)
24
+ end
25
+ end
26
+
27
+ describe 'corruptFile' do
28
+ let(:filename) { fixture_file('brendan_broken.dxxd') }
29
+ it "should return xml showing Unknown Binary and application/octet-stream mimetype" do
30
+ expect(subject.call).to include(%(<identity format="Unknown Binary" mimetype="application/octet-stream"))
31
+ end
32
+ end
33
+
34
+ describe 'zip file should be characterized not its contents' do
35
+ let(:filename) { fixture_file('archive.zip') }
36
+ its(:call) { should include(%(<identity format="ZIP Format" mimetype="application/zip"))}
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,20 @@
1
+ require 'spec_helper'
2
+ require 'hydra-file_characterization/to_temp_file'
3
+
4
+ describe Hydra::FileCharacterization do
5
+
6
+ describe 'ToTempFile' do
7
+ let(:string) { "This is the content of the file." }
8
+ subject { Hydra::FileCharacterization::ToTempFile.new(string, "hello.rb") }
9
+
10
+ it 'create a tempfile that exists' do
11
+ @temp_file = ''
12
+ subject.call do |temp_file|
13
+ @temp_file = temp_file
14
+ expect(File.exist?(@temp_file)).to eq true
15
+ expect(File.extname(@temp_file)).to eq '.rb'
16
+ end
17
+ expect(File.exist?(@temp_file)).to eq false
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+ require 'hydra-file_characterization'
3
+
4
+ describe Hydra::FileCharacterization do
5
+
6
+
7
+
8
+ describe 'config' do
9
+ subject { Hydra::FileCharacterization::Configuration.new }
10
+ let (:expected_fits_path) {"string"}
11
+ before(:each) do
12
+ subject.fits_path = expected_fits_path
13
+ end
14
+ its(:config) {should have_key :fits_path}
15
+ its(:fits_path) {should == expected_fits_path}
16
+ end
17
+
18
+ end
@@ -0,0 +1,17 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ RSpec.configure do |config|
8
+ config.treat_symbols_as_metadata_keys_with_true_values = true
9
+ config.run_all_when_everything_filtered = true
10
+ config.filter_run :focus
11
+
12
+ # Run specs in random order to surface order dependencies. If you find an
13
+ # order dependency and want to debug it, you can fix the order by providing
14
+ # the seed, which is printed after each run.
15
+ # --seed 1234
16
+ config.order = 'random'
17
+ end
metadata CHANGED
@@ -1,32 +1,106 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hydra-file_characterization
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Treacy
8
8
  - Jeremy Friesen
9
+ - Sue Richeson
10
+ - Rajesh Balekai
9
11
  autorequire:
10
12
  bindir: bin
11
13
  cert_chain: []
12
- date: 2013-09-17 00:00:00.000000000 Z
13
- dependencies: []
14
+ date: 2013-09-18 00:00:00.000000000 Z
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: activesupport
18
+ requirement: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - '>='
21
+ - !ruby/object:Gem::Version
22
+ version: 3.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 3.0.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - '>='
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ type: :development
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ - !ruby/object:Gem::Dependency
45
+ name: guard
46
+ requirement: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ type: :development
52
+ prerelease: false
53
+ version_requirements: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ - !ruby/object:Gem::Dependency
59
+ name: guard-rspec
60
+ requirement: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - '>='
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ type: :development
66
+ prerelease: false
67
+ version_requirements: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
14
72
  description: To provide a wrapper for file characterization
15
73
  email:
16
74
  - jatr@kb.dk
17
75
  - jeremy.n.friesen@gmail.com
76
+ - spr7b@virginia.edu
77
+ - rbalekai@gmail.com
18
78
  executables: []
19
79
  extensions: []
20
80
  extra_rdoc_files: []
21
81
  files:
22
82
  - .gitignore
83
+ - .rspec
23
84
  - Gemfile
85
+ - Guardfile
24
86
  - LICENSE
25
87
  - README.md
26
88
  - Rakefile
27
89
  - hydra-file_characterization.gemspec
28
90
  - lib/hydra-file_characterization.rb
91
+ - lib/hydra-file_characterization/characterizer.rb
92
+ - lib/hydra-file_characterization/to_temp_file.rb
29
93
  - lib/hydra-file_characterization/version.rb
94
+ - spec/fixtures/archive.zip
95
+ - spec/fixtures/archive.zip.fits.xml
96
+ - spec/fixtures/brendan_behan.jpeg
97
+ - spec/fixtures/brendan_behan.jpeg.fits.xml
98
+ - spec/fixtures/brendan_broken.dxxd
99
+ - spec/fixtures/brendan_broken.dxxd.fits.xml
100
+ - spec/lib/hydra-file_characterization/characterizer_spec.rb
101
+ - spec/lib/hydra-file_characterization/to_temp_file_spec.rb
102
+ - spec/lib/hydra-file_characterization_spec.rb
103
+ - spec/spec_helper.rb
30
104
  homepage: https://github.com/projecthydra/hydra-file_characterization
31
105
  licenses:
32
106
  - APACHE2
@@ -51,4 +125,14 @@ rubygems_version: 2.0.3
51
125
  signing_key:
52
126
  specification_version: 4
53
127
  summary: To provide a wrapper for file characterization
54
- test_files: []
128
+ test_files:
129
+ - spec/fixtures/archive.zip
130
+ - spec/fixtures/archive.zip.fits.xml
131
+ - spec/fixtures/brendan_behan.jpeg
132
+ - spec/fixtures/brendan_behan.jpeg.fits.xml
133
+ - spec/fixtures/brendan_broken.dxxd
134
+ - spec/fixtures/brendan_broken.dxxd.fits.xml
135
+ - spec/lib/hydra-file_characterization/characterizer_spec.rb
136
+ - spec/lib/hydra-file_characterization/to_temp_file_spec.rb
137
+ - spec/lib/hydra-file_characterization_spec.rb
138
+ - spec/spec_helper.rb