ddr-extraction 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 23d86fadee99408637cfc774cdd62efdaff04e7d
4
- data.tar.gz: 03b9386486aa83632f5d96fcc65f209b7ad32952
3
+ metadata.gz: 1e6c32f2c82cc25e4a8c7b65b98becae648925ad
4
+ data.tar.gz: 9f8f4da7e5e4c7db8fc66dcddda8bff671642572
5
5
  SHA512:
6
- metadata.gz: afd68b70ea7ee45bed52c1debfabd88c4ebef23d40d997232c4452061f19ac64d0d2791fc90990807d456e468c660ed553538aaaddf59de731643126e3ab89bb
7
- data.tar.gz: df8032a81665c2a4592df5a3ff96d3ff8f490dc1c8f16e4f1de482895fe119f48a5dc796e51d0b98aefea78080e1418d2c685a08ba73e94d7df2752322d07098
6
+ metadata.gz: 8411bd09f0cb81d7cb16827fd3098f474c21fcf407da1ec38e2936fc3fc9982e892de08cf6ed747901fce92b6aef3d8cfa22c4a6f4bb2a4269fbb659adca539c
7
+ data.tar.gz: 30967c3ecb2bd79fa5895e7198cb15d29b45be5eceb876ce2a60dd94371b4387296daac635b1fea7a0f38270dd59e62fdc11ed0da2a9be087a4067a2b64cffb9
data/.gitignore CHANGED
@@ -1,5 +1,5 @@
1
1
  Gemfile.lock
2
2
  bin/tika-*
3
- bin/fits-*
3
+ bin/fits
4
4
  pkg
5
5
  tmp
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1
4
+ before_script: "bundle exec rake tika:download fits:download"
5
+ cache:
6
+ - bundler
7
+ notifications:
8
+ email:
9
+ - lib-drs@duke.edu
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
- # Ddr::Extractor
1
+ # Ddr::Extraction
2
2
 
3
- Generic file text and metadata extraction service.
3
+ Pluggable file text and metadata extraction service.
4
4
 
5
5
  ## Installation
6
6
 
data/Rakefile CHANGED
@@ -1,3 +1,9 @@
1
1
  require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
2
3
 
3
- load "tasks/ddr_extraction.rake"
4
+ load "tasks/ddr_extraction.rake"
5
+
6
+ desc "Run all specs in spec directory"
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ task :default => :spec
@@ -1,6 +1,6 @@
1
- require "ddr/extraction/version"
2
- require "ddr/extraction/configuration"
3
- require "ddr/extraction/extractor"
1
+ require_relative "extraction/version"
2
+ require_relative "extraction/configuration"
3
+ require_relative "extraction/extractor"
4
4
 
5
5
  module Ddr
6
6
  #
@@ -10,14 +10,15 @@ module Ddr
10
10
 
11
11
  class << self
12
12
 
13
- attr_reader :config
13
+ def config
14
+ @config ||= Configuration.new
15
+ end
14
16
 
15
17
  # Yields a configuration object for the service
16
18
  def configure
17
- @config ||= Configuration.new
18
- yield @config
19
+ yield config
19
20
  end
20
-
21
+
21
22
  end
22
23
 
23
24
  end
@@ -2,11 +2,23 @@ module Ddr
2
2
  module Extraction
3
3
  module Adapters
4
4
 
5
- def self.get_adapter(adapter_name)
6
- require_relative "adapters/#{adapter_name}_adapter"
7
- class_name = "#{adapter_name.to_s.capitalize}Adapter"
8
- const_get(class_name.to_sym, false)
9
- end
5
+ KNOWN_ADAPTERS = [:fits, :tika]
6
+
7
+ class << self
8
+
9
+ def get_adapter(adapter_name)
10
+ require_relative "adapters/#{adapter_name}_adapter"
11
+ class_name = "#{adapter_name.to_s.capitalize}Adapter"
12
+ const_get(class_name.to_sym, false)
13
+ end
14
+
15
+ KNOWN_ADAPTERS.each do |adapter|
16
+ define_method(adapter) do
17
+ get_adapter(adapter)
18
+ end
19
+ end
20
+
21
+ end
10
22
 
11
23
  end
12
24
  end
@@ -12,14 +12,8 @@ module Ddr
12
12
  end
13
13
 
14
14
  class << self
15
- # FITS version
16
- attr_accessor :version
17
-
18
15
  # Path to FITS executable (fits.sh or fits.bat)
19
16
  attr_accessor :path
20
-
21
- # URL to download distribution
22
- attr_accessor :download_url
23
17
  end
24
18
 
25
19
  end
@@ -12,27 +12,9 @@ module Ddr
12
12
  end
13
13
 
14
14
  class << self
15
- # Tika version
16
- attr_accessor :version
17
-
18
15
  # Path to tika-app.jar
19
16
  attr_accessor :path
20
17
 
21
- # Base command
22
- attr_accessor :command
23
-
24
- # URL to download distribution
25
- attr_accessor :download_url
26
-
27
- # Verify checksum?
28
- attr_accessor :verify_checksum
29
-
30
- # Tika distribution checksum
31
- attr_accessor :checksum_value
32
-
33
- # Tika distribution checksum type
34
- attr_accessor :checksum_type
35
-
36
18
  # Tika server port (optional, required for server)
37
19
  attr_accessor :port
38
20
  end
@@ -5,8 +5,8 @@ module Ddr
5
5
  module Extraction
6
6
  class Configuration
7
7
 
8
- def adapters(name)
9
- config = Adapters.get_adapter(name)
8
+ def adapters
9
+ config = Adapters
10
10
  yield config if block_given?
11
11
  config
12
12
  end
@@ -1,26 +1,11 @@
1
1
  require "ddr-extraction"
2
2
 
3
- BIN_DIR = File.expand_path("../../../../bin", __FILE__)
4
- TIKA_VERSION = "1.6"
5
- FITS_VERSION = "0.8.3"
3
+ bin_dir = File.expand_path("../../../../bin", __FILE__)
6
4
 
7
5
  Ddr::Extraction.configure do |config|
8
6
  config.adapter.text = :tika
9
7
  config.adapter.metadata = :fits
10
-
11
- config.adapters(:tika) do |tika|
12
- tika.version = TIKA_VERSION
13
- tika.path = File.join(BIN_DIR, "tika-app.jar")
14
- tika.download_url = "http://archive.apache.org/dist/tika/tika-app-#{TIKA_VERSION}.jar"
15
- tika.verify_checksum = true
16
- tika.checksum_value = "99df0d8c3f6a2be498d275053e611fb5afdf0a9d"
17
- tika.checksum_type = :SHA1
18
- end
19
-
20
- config.adapters(:fits) do |fits|
21
- fits.version = "0.8.3"
22
- fits.path = File.join(BIN_DIR, "fits-#{FITS_VERSION}", "fits.sh")
23
- fits.download_url = "http://projects.iq.harvard.edu/files/fits/files/fits-#{FITS_VERSION}.zip"
24
- end
8
+ config.adapters.tika.path = File.join(bin_dir, "tika-app.jar")
9
+ config.adapters.fits.path = File.join(bin_dir, "fits", "fits.sh")
25
10
  end
26
11
 
@@ -1,5 +1,5 @@
1
1
  module Ddr
2
2
  module Extraction
3
- VERSION = "0.2.0"
3
+ VERSION = "0.2.1"
4
4
  end
5
5
  end
@@ -1,26 +1,40 @@
1
1
  require "ddr/extraction/defaults"
2
2
  require "openssl"
3
+ require "net/http"
3
4
 
4
5
  DOWNLOAD_DIR = File.absolute_path("tmp")
6
+ BIN_DIR = File.absolute_path("bin")
7
+ TIKA_VERSION = "1.6"
8
+ FITS_VERSION = "0.8.3"
9
+
10
+ tika_version = ENV["TIKA_VERSION"] || TIKA_VERSION
11
+ tika_path = Ddr::Extraction.config.adapters.tika.path
12
+ tika_app = File.basename(tika_path)
13
+ tika_download_url = "http://archive.apache.org/dist/tika/tika-app-#{tika_version}.jar"
14
+ tika_checksum_url = "#{tika_download_url}.sha"
15
+ tika_checksum_type = :SHA1
16
+
17
+ fits_version = ENV["FITS_VERSION"] || FITS_VERSION
18
+ fits_path = Ddr::Extraction.config.adapters.fits.path
19
+ fits_download_url = "http://projects.iq.harvard.edu/files/fits/files/fits-#{fits_version}.zip"
5
20
 
6
21
  namespace :tika do
7
22
  desc "Download Tika app"
8
- task :download => :download_dir do
9
- tika_app = File.join(DOWNLOAD_DIR, "tika-app.jar")
10
- tika_config = Ddr::Extraction.config.adapters(:tika)
11
- puts "Downloading Tika app ... "
12
- system "curl -L #{tika_config.download_url} -o #{tika_app}"
13
- if tika_config.verify_checksum
23
+ task :download => [:download_dir] do
24
+ FileUtils.cd(DOWNLOAD_DIR) do
25
+ puts "Downloading Tika app ... "
26
+ system "curl -L #{tika_download_url} -o #{tika_app}"
27
+ checksum = Net::HTTP.get(URI(tika_checksum_url)).chomp
14
28
  puts "Verifiying checksum ... "
15
- digest = OpenSSL::Digest.const_get(tika_config.checksum_type).new
16
- digest << File.read(tika_config.path)
17
- if digest.to_s != tika_config.checksum_value
29
+ digest = OpenSSL::Digest.const_get(tika_checksum_type).new
30
+ digest << File.read(tika_app)
31
+ if digest.to_s != checksum
18
32
  puts "Checksums do not match -- aborting!"
19
33
  FileUtils.remove_entry_secure(tika_app)
20
34
  abort
21
35
  end
36
+ FileUtils.mv(tika_app, tika_path)
22
37
  end
23
- FileUtils.mv(tika_app, tika_config.path)
24
38
  end
25
39
 
26
40
  # namespace :server do
@@ -33,13 +47,13 @@ end
33
47
  namespace :fits do
34
48
  desc "Download FITS tool"
35
49
  task :download => :download_dir do
36
- fits_tool = File.join(DOWNLOAD_DIR, "fits.zip")
37
- fits_config = Ddr::Extraction.config.adapters(:fits)
38
- puts "Downloading FITS tool ... "
39
- system "curl -L #{fits_config.download_url} -o #{fits_tool}"
40
- # Unzip options: convert text files, force overwrite, extra quiet
41
- system "unzip -a -o -qq -d bin #{fits_tool}"
42
- FileUtils.chmod(0755, fits_config.path)
50
+ FileUtils.cd(DOWNLOAD_DIR) do
51
+ puts "Downloading FITS tool ... "
52
+ system "curl -L #{fits_download_url} -o fits.zip"
53
+ system "unzip -a -o -q fits.zip"
54
+ FileUtils.mv("fits-#{fits_version}", File.dirname(fits_path))
55
+ end
56
+ FileUtils.chmod(0755, fits_path)
43
57
  end
44
58
  end
45
59
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ddr-extraction
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Chandek-Stark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-12 00:00:00.000000000 Z
11
+ date: 2014-11-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -62,6 +62,7 @@ extra_rdoc_files: []
62
62
  files:
63
63
  - ".gitignore"
64
64
  - ".rspec"
65
+ - ".travis.yml"
65
66
  - Gemfile
66
67
  - LICENSE.txt
67
68
  - README.md