ddr-extraction 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/.travis.yml +9 -0
- data/README.md +2 -2
- data/Rakefile +7 -1
- data/lib/ddr/extraction.rb +8 -7
- data/lib/ddr/extraction/adapters.rb +17 -5
- data/lib/ddr/extraction/adapters/fits_adapter.rb +0 -6
- data/lib/ddr/extraction/adapters/tika_adapter.rb +0 -18
- data/lib/ddr/extraction/configuration.rb +2 -2
- data/lib/ddr/extraction/defaults.rb +3 -18
- data/lib/ddr/extraction/version.rb +1 -1
- data/lib/tasks/ddr_extraction.rake +31 -17
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e6c32f2c82cc25e4a8c7b65b98becae648925ad
|
4
|
+
data.tar.gz: 9f8f4da7e5e4c7db8fc66dcddda8bff671642572
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8411bd09f0cb81d7cb16827fd3098f474c21fcf407da1ec38e2936fc3fc9982e892de08cf6ed747901fce92b6aef3d8cfa22c4a6f4bb2a4269fbb659adca539c
|
7
|
+
data.tar.gz: 30967c3ecb2bd79fa5895e7198cb15d29b45be5eceb876ce2a60dd94371b4387296daac635b1fea7a0f38270dd59e62fdc11ed0da2a9be087a4067a2b64cffb9
|
data/.gitignore
CHANGED
data/.travis.yml
ADDED
data/README.md
CHANGED
data/Rakefile
CHANGED
data/lib/ddr/extraction.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
require_relative "extraction/version"
|
2
|
+
require_relative "extraction/configuration"
|
3
|
+
require_relative "extraction/extractor"
|
4
4
|
|
5
5
|
module Ddr
|
6
6
|
#
|
@@ -10,14 +10,15 @@ module Ddr
|
|
10
10
|
|
11
11
|
class << self
|
12
12
|
|
13
|
-
|
13
|
+
def config
|
14
|
+
@config ||= Configuration.new
|
15
|
+
end
|
14
16
|
|
15
17
|
# Yields a configuration object for the service
|
16
18
|
def configure
|
17
|
-
|
18
|
-
yield @config
|
19
|
+
yield config
|
19
20
|
end
|
20
|
-
|
21
|
+
|
21
22
|
end
|
22
23
|
|
23
24
|
end
|
@@ -2,11 +2,23 @@ module Ddr
|
|
2
2
|
module Extraction
|
3
3
|
module Adapters
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
KNOWN_ADAPTERS = [:fits, :tika]
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def get_adapter(adapter_name)
|
10
|
+
require_relative "adapters/#{adapter_name}_adapter"
|
11
|
+
class_name = "#{adapter_name.to_s.capitalize}Adapter"
|
12
|
+
const_get(class_name.to_sym, false)
|
13
|
+
end
|
14
|
+
|
15
|
+
KNOWN_ADAPTERS.each do |adapter|
|
16
|
+
define_method(adapter) do
|
17
|
+
get_adapter(adapter)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
10
22
|
|
11
23
|
end
|
12
24
|
end
|
@@ -12,27 +12,9 @@ module Ddr
|
|
12
12
|
end
|
13
13
|
|
14
14
|
class << self
|
15
|
-
# Tika version
|
16
|
-
attr_accessor :version
|
17
|
-
|
18
15
|
# Path to tika-app.jar
|
19
16
|
attr_accessor :path
|
20
17
|
|
21
|
-
# Base command
|
22
|
-
attr_accessor :command
|
23
|
-
|
24
|
-
# URL to download distribution
|
25
|
-
attr_accessor :download_url
|
26
|
-
|
27
|
-
# Verify checksum?
|
28
|
-
attr_accessor :verify_checksum
|
29
|
-
|
30
|
-
# Tika distribution checksum
|
31
|
-
attr_accessor :checksum_value
|
32
|
-
|
33
|
-
# Tika distribution checksum type
|
34
|
-
attr_accessor :checksum_type
|
35
|
-
|
36
18
|
# Tika server port (optional, required for server)
|
37
19
|
attr_accessor :port
|
38
20
|
end
|
@@ -1,26 +1,11 @@
|
|
1
1
|
require "ddr-extraction"
|
2
2
|
|
3
|
-
|
4
|
-
TIKA_VERSION = "1.6"
|
5
|
-
FITS_VERSION = "0.8.3"
|
3
|
+
bin_dir = File.expand_path("../../../../bin", __FILE__)
|
6
4
|
|
7
5
|
Ddr::Extraction.configure do |config|
|
8
6
|
config.adapter.text = :tika
|
9
7
|
config.adapter.metadata = :fits
|
10
|
-
|
11
|
-
config.adapters(
|
12
|
-
tika.version = TIKA_VERSION
|
13
|
-
tika.path = File.join(BIN_DIR, "tika-app.jar")
|
14
|
-
tika.download_url = "http://archive.apache.org/dist/tika/tika-app-#{TIKA_VERSION}.jar"
|
15
|
-
tika.verify_checksum = true
|
16
|
-
tika.checksum_value = "99df0d8c3f6a2be498d275053e611fb5afdf0a9d"
|
17
|
-
tika.checksum_type = :SHA1
|
18
|
-
end
|
19
|
-
|
20
|
-
config.adapters(:fits) do |fits|
|
21
|
-
fits.version = "0.8.3"
|
22
|
-
fits.path = File.join(BIN_DIR, "fits-#{FITS_VERSION}", "fits.sh")
|
23
|
-
fits.download_url = "http://projects.iq.harvard.edu/files/fits/files/fits-#{FITS_VERSION}.zip"
|
24
|
-
end
|
8
|
+
config.adapters.tika.path = File.join(bin_dir, "tika-app.jar")
|
9
|
+
config.adapters.fits.path = File.join(bin_dir, "fits", "fits.sh")
|
25
10
|
end
|
26
11
|
|
@@ -1,26 +1,40 @@
|
|
1
1
|
require "ddr/extraction/defaults"
|
2
2
|
require "openssl"
|
3
|
+
require "net/http"
|
3
4
|
|
4
5
|
DOWNLOAD_DIR = File.absolute_path("tmp")
|
6
|
+
BIN_DIR = File.absolute_path("bin")
|
7
|
+
TIKA_VERSION = "1.6"
|
8
|
+
FITS_VERSION = "0.8.3"
|
9
|
+
|
10
|
+
tika_version = ENV["TIKA_VERSION"] || TIKA_VERSION
|
11
|
+
tika_path = Ddr::Extraction.config.adapters.tika.path
|
12
|
+
tika_app = File.basename(tika_path)
|
13
|
+
tika_download_url = "http://archive.apache.org/dist/tika/tika-app-#{tika_version}.jar"
|
14
|
+
tika_checksum_url = "#{tika_download_url}.sha"
|
15
|
+
tika_checksum_type = :SHA1
|
16
|
+
|
17
|
+
fits_version = ENV["FITS_VERSION"] || FITS_VERSION
|
18
|
+
fits_path = Ddr::Extraction.config.adapters.fits.path
|
19
|
+
fits_download_url = "http://projects.iq.harvard.edu/files/fits/files/fits-#{fits_version}.zip"
|
5
20
|
|
6
21
|
namespace :tika do
|
7
22
|
desc "Download Tika app"
|
8
|
-
task :download => :download_dir do
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
if tika_config.verify_checksum
|
23
|
+
task :download => [:download_dir] do
|
24
|
+
FileUtils.cd(DOWNLOAD_DIR) do
|
25
|
+
puts "Downloading Tika app ... "
|
26
|
+
system "curl -L #{tika_download_url} -o #{tika_app}"
|
27
|
+
checksum = Net::HTTP.get(URI(tika_checksum_url)).chomp
|
14
28
|
puts "Verifiying checksum ... "
|
15
|
-
digest = OpenSSL::Digest.const_get(
|
16
|
-
digest << File.read(
|
17
|
-
if digest.to_s !=
|
29
|
+
digest = OpenSSL::Digest.const_get(tika_checksum_type).new
|
30
|
+
digest << File.read(tika_app)
|
31
|
+
if digest.to_s != checksum
|
18
32
|
puts "Checksums do not match -- aborting!"
|
19
33
|
FileUtils.remove_entry_secure(tika_app)
|
20
34
|
abort
|
21
35
|
end
|
36
|
+
FileUtils.mv(tika_app, tika_path)
|
22
37
|
end
|
23
|
-
FileUtils.mv(tika_app, tika_config.path)
|
24
38
|
end
|
25
39
|
|
26
40
|
# namespace :server do
|
@@ -33,13 +47,13 @@ end
|
|
33
47
|
namespace :fits do
|
34
48
|
desc "Download FITS tool"
|
35
49
|
task :download => :download_dir do
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
FileUtils.chmod(0755,
|
50
|
+
FileUtils.cd(DOWNLOAD_DIR) do
|
51
|
+
puts "Downloading FITS tool ... "
|
52
|
+
system "curl -L #{fits_download_url} -o fits.zip"
|
53
|
+
system "unzip -a -o -q fits.zip"
|
54
|
+
FileUtils.mv("fits-#{fits_version}", File.dirname(fits_path))
|
55
|
+
end
|
56
|
+
FileUtils.chmod(0755, fits_path)
|
43
57
|
end
|
44
58
|
end
|
45
59
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddr-extraction
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Chandek-Stark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -62,6 +62,7 @@ extra_rdoc_files: []
|
|
62
62
|
files:
|
63
63
|
- ".gitignore"
|
64
64
|
- ".rspec"
|
65
|
+
- ".travis.yml"
|
65
66
|
- Gemfile
|
66
67
|
- LICENSE.txt
|
67
68
|
- README.md
|