ddr-extraction 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 23d86fadee99408637cfc774cdd62efdaff04e7d
4
- data.tar.gz: 03b9386486aa83632f5d96fcc65f209b7ad32952
3
+ metadata.gz: 1e6c32f2c82cc25e4a8c7b65b98becae648925ad
4
+ data.tar.gz: 9f8f4da7e5e4c7db8fc66dcddda8bff671642572
5
5
  SHA512:
6
- metadata.gz: afd68b70ea7ee45bed52c1debfabd88c4ebef23d40d997232c4452061f19ac64d0d2791fc90990807d456e468c660ed553538aaaddf59de731643126e3ab89bb
7
- data.tar.gz: df8032a81665c2a4592df5a3ff96d3ff8f490dc1c8f16e4f1de482895fe119f48a5dc796e51d0b98aefea78080e1418d2c685a08ba73e94d7df2752322d07098
6
+ metadata.gz: 8411bd09f0cb81d7cb16827fd3098f474c21fcf407da1ec38e2936fc3fc9982e892de08cf6ed747901fce92b6aef3d8cfa22c4a6f4bb2a4269fbb659adca539c
7
+ data.tar.gz: 30967c3ecb2bd79fa5895e7198cb15d29b45be5eceb876ce2a60dd94371b4387296daac635b1fea7a0f38270dd59e62fdc11ed0da2a9be087a4067a2b64cffb9
data/.gitignore CHANGED
@@ -1,5 +1,5 @@
1
1
  Gemfile.lock
2
2
  bin/tika-*
3
- bin/fits-*
3
+ bin/fits
4
4
  pkg
5
5
  tmp
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1
4
+ before_script: "bundle exec rake tika:download fits:download"
5
+ cache:
6
+ - bundler
7
+ notifications:
8
+ email:
9
+ - lib-drs@duke.edu
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
- # Ddr::Extractor
1
+ # Ddr::Extraction
2
2
 
3
- Generic file text and metadata extraction service.
3
+ Pluggable file text and metadata extraction service.
4
4
 
5
5
  ## Installation
6
6
 
data/Rakefile CHANGED
@@ -1,3 +1,9 @@
1
1
  require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
2
3
 
3
- load "tasks/ddr_extraction.rake"
4
+ load "tasks/ddr_extraction.rake"
5
+
6
+ desc "Run all specs in spec directory"
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ task :default => :spec
@@ -1,6 +1,6 @@
1
- require "ddr/extraction/version"
2
- require "ddr/extraction/configuration"
3
- require "ddr/extraction/extractor"
1
+ require_relative "extraction/version"
2
+ require_relative "extraction/configuration"
3
+ require_relative "extraction/extractor"
4
4
 
5
5
  module Ddr
6
6
  #
@@ -10,14 +10,15 @@ module Ddr
10
10
 
11
11
  class << self
12
12
 
13
- attr_reader :config
13
+ def config
14
+ @config ||= Configuration.new
15
+ end
14
16
 
15
17
  # Yields a configuration object for the service
16
18
  def configure
17
- @config ||= Configuration.new
18
- yield @config
19
+ yield config
19
20
  end
20
-
21
+
21
22
  end
22
23
 
23
24
  end
@@ -2,11 +2,23 @@ module Ddr
2
2
  module Extraction
3
3
  module Adapters
4
4
 
5
- def self.get_adapter(adapter_name)
6
- require_relative "adapters/#{adapter_name}_adapter"
7
- class_name = "#{adapter_name.to_s.capitalize}Adapter"
8
- const_get(class_name.to_sym, false)
9
- end
5
+ KNOWN_ADAPTERS = [:fits, :tika]
6
+
7
+ class << self
8
+
9
+ def get_adapter(adapter_name)
10
+ require_relative "adapters/#{adapter_name}_adapter"
11
+ class_name = "#{adapter_name.to_s.capitalize}Adapter"
12
+ const_get(class_name.to_sym, false)
13
+ end
14
+
15
+ KNOWN_ADAPTERS.each do |adapter|
16
+ define_method(adapter) do
17
+ get_adapter(adapter)
18
+ end
19
+ end
20
+
21
+ end
10
22
 
11
23
  end
12
24
  end
@@ -12,14 +12,8 @@ module Ddr
12
12
  end
13
13
 
14
14
  class << self
15
- # FITS version
16
- attr_accessor :version
17
-
18
15
  # Path to FITS executable (fits.sh or fits.bat)
19
16
  attr_accessor :path
20
-
21
- # URL to download distribution
22
- attr_accessor :download_url
23
17
  end
24
18
 
25
19
  end
@@ -12,27 +12,9 @@ module Ddr
12
12
  end
13
13
 
14
14
  class << self
15
- # Tika version
16
- attr_accessor :version
17
-
18
15
  # Path to tika-app.jar
19
16
  attr_accessor :path
20
17
 
21
- # Base command
22
- attr_accessor :command
23
-
24
- # URL to download distribution
25
- attr_accessor :download_url
26
-
27
- # Verify checksum?
28
- attr_accessor :verify_checksum
29
-
30
- # Tika distribution checksum
31
- attr_accessor :checksum_value
32
-
33
- # Tika distribution checksum type
34
- attr_accessor :checksum_type
35
-
36
18
  # Tika server port (optional, required for server)
37
19
  attr_accessor :port
38
20
  end
@@ -5,8 +5,8 @@ module Ddr
5
5
  module Extraction
6
6
  class Configuration
7
7
 
8
- def adapters(name)
9
- config = Adapters.get_adapter(name)
8
+ def adapters
9
+ config = Adapters
10
10
  yield config if block_given?
11
11
  config
12
12
  end
@@ -1,26 +1,11 @@
1
1
  require "ddr-extraction"
2
2
 
3
- BIN_DIR = File.expand_path("../../../../bin", __FILE__)
4
- TIKA_VERSION = "1.6"
5
- FITS_VERSION = "0.8.3"
3
+ bin_dir = File.expand_path("../../../../bin", __FILE__)
6
4
 
7
5
  Ddr::Extraction.configure do |config|
8
6
  config.adapter.text = :tika
9
7
  config.adapter.metadata = :fits
10
-
11
- config.adapters(:tika) do |tika|
12
- tika.version = TIKA_VERSION
13
- tika.path = File.join(BIN_DIR, "tika-app.jar")
14
- tika.download_url = "http://archive.apache.org/dist/tika/tika-app-#{TIKA_VERSION}.jar"
15
- tika.verify_checksum = true
16
- tika.checksum_value = "99df0d8c3f6a2be498d275053e611fb5afdf0a9d"
17
- tika.checksum_type = :SHA1
18
- end
19
-
20
- config.adapters(:fits) do |fits|
21
- fits.version = "0.8.3"
22
- fits.path = File.join(BIN_DIR, "fits-#{FITS_VERSION}", "fits.sh")
23
- fits.download_url = "http://projects.iq.harvard.edu/files/fits/files/fits-#{FITS_VERSION}.zip"
24
- end
8
+ config.adapters.tika.path = File.join(bin_dir, "tika-app.jar")
9
+ config.adapters.fits.path = File.join(bin_dir, "fits", "fits.sh")
25
10
  end
26
11
 
@@ -1,5 +1,5 @@
1
1
  module Ddr
2
2
  module Extraction
3
- VERSION = "0.2.0"
3
+ VERSION = "0.2.1"
4
4
  end
5
5
  end
@@ -1,26 +1,40 @@
1
1
  require "ddr/extraction/defaults"
2
2
  require "openssl"
3
+ require "net/http"
3
4
 
4
5
  DOWNLOAD_DIR = File.absolute_path("tmp")
6
+ BIN_DIR = File.absolute_path("bin")
7
+ TIKA_VERSION = "1.6"
8
+ FITS_VERSION = "0.8.3"
9
+
10
+ tika_version = ENV["TIKA_VERSION"] || TIKA_VERSION
11
+ tika_path = Ddr::Extraction.config.adapters.tika.path
12
+ tika_app = File.basename(tika_path)
13
+ tika_download_url = "http://archive.apache.org/dist/tika/tika-app-#{tika_version}.jar"
14
+ tika_checksum_url = "#{tika_download_url}.sha"
15
+ tika_checksum_type = :SHA1
16
+
17
+ fits_version = ENV["FITS_VERSION"] || FITS_VERSION
18
+ fits_path = Ddr::Extraction.config.adapters.fits.path
19
+ fits_download_url = "http://projects.iq.harvard.edu/files/fits/files/fits-#{fits_version}.zip"
5
20
 
6
21
  namespace :tika do
7
22
  desc "Download Tika app"
8
- task :download => :download_dir do
9
- tika_app = File.join(DOWNLOAD_DIR, "tika-app.jar")
10
- tika_config = Ddr::Extraction.config.adapters(:tika)
11
- puts "Downloading Tika app ... "
12
- system "curl -L #{tika_config.download_url} -o #{tika_app}"
13
- if tika_config.verify_checksum
23
+ task :download => [:download_dir] do
24
+ FileUtils.cd(DOWNLOAD_DIR) do
25
+ puts "Downloading Tika app ... "
26
+ system "curl -L #{tika_download_url} -o #{tika_app}"
27
+ checksum = Net::HTTP.get(URI(tika_checksum_url)).chomp
14
28
  puts "Verifiying checksum ... "
15
- digest = OpenSSL::Digest.const_get(tika_config.checksum_type).new
16
- digest << File.read(tika_config.path)
17
- if digest.to_s != tika_config.checksum_value
29
+ digest = OpenSSL::Digest.const_get(tika_checksum_type).new
30
+ digest << File.read(tika_app)
31
+ if digest.to_s != checksum
18
32
  puts "Checksums do not match -- aborting!"
19
33
  FileUtils.remove_entry_secure(tika_app)
20
34
  abort
21
35
  end
36
+ FileUtils.mv(tika_app, tika_path)
22
37
  end
23
- FileUtils.mv(tika_app, tika_config.path)
24
38
  end
25
39
 
26
40
  # namespace :server do
@@ -33,13 +47,13 @@ end
33
47
  namespace :fits do
34
48
  desc "Download FITS tool"
35
49
  task :download => :download_dir do
36
- fits_tool = File.join(DOWNLOAD_DIR, "fits.zip")
37
- fits_config = Ddr::Extraction.config.adapters(:fits)
38
- puts "Downloading FITS tool ... "
39
- system "curl -L #{fits_config.download_url} -o #{fits_tool}"
40
- # Unzip options: convert text files, force overwrite, extra quiet
41
- system "unzip -a -o -qq -d bin #{fits_tool}"
42
- FileUtils.chmod(0755, fits_config.path)
50
+ FileUtils.cd(DOWNLOAD_DIR) do
51
+ puts "Downloading FITS tool ... "
52
+ system "curl -L #{fits_download_url} -o fits.zip"
53
+ system "unzip -a -o -q fits.zip"
54
+ FileUtils.mv("fits-#{fits_version}", File.dirname(fits_path))
55
+ end
56
+ FileUtils.chmod(0755, fits_path)
43
57
  end
44
58
  end
45
59
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ddr-extraction
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Chandek-Stark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-12 00:00:00.000000000 Z
11
+ date: 2014-11-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -62,6 +62,7 @@ extra_rdoc_files: []
62
62
  files:
63
63
  - ".gitignore"
64
64
  - ".rspec"
65
+ - ".travis.yml"
65
66
  - Gemfile
66
67
  - LICENSE.txt
67
68
  - README.md