docsplit 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/docsplit.gemspec +1 -1
- data/lib/docsplit.rb +4 -2
- metadata +3 -3
data/docsplit.gemspec
CHANGED
data/lib/docsplit.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# The Docsplit module delegates to the Java PDF extractors.
|
2
2
|
module Docsplit
|
3
3
|
|
4
|
-
VERSION = '0.4.
|
4
|
+
VERSION = '0.4.1' # Keep in sync with gemspec.
|
5
5
|
|
6
6
|
ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
7
7
|
|
@@ -11,6 +11,8 @@ module Docsplit
|
|
11
11
|
|
12
12
|
HEADLESS = "-Djava.awt.headless=true"
|
13
13
|
|
14
|
+
OFFICE = RUBY_PLATFORM.match(/darwin/i) ? '' : '-Doffice.home=/usr/lib/openoffice'
|
15
|
+
|
14
16
|
METADATA_KEYS = [:author, :date, :creator, :keywords, :producer, :subject, :title, :length]
|
15
17
|
|
16
18
|
DEPENDENCIES = {:java => false, :gm => false, :pdftotext => false, :pdftk => false, :tesseract => false}
|
@@ -76,7 +78,7 @@ module Docsplit
|
|
76
78
|
# Runs a Java command, with quieted logging, and the classpath set properly.
|
77
79
|
def self.run(command, pdfs, opts, return_output=false)
|
78
80
|
pdfs = [pdfs].flatten.map{|pdf| "\"#{pdf}\""}.join(' ')
|
79
|
-
cmd = "java #{HEADLESS} #{LOGGING} -cp #{CLASSPATH} #{command} #{pdfs} 2>&1"
|
81
|
+
cmd = "java #{HEADLESS} #{LOGGING} #{OFFICE} -cp #{CLASSPATH} #{command} #{pdfs} 2>&1"
|
80
82
|
result = `#{cmd}`.chomp
|
81
83
|
raise ExtractionFailed, result if $? != 0
|
82
84
|
return return_output ? (result.empty? ? nil : result) : true
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: docsplit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 13
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 4
|
9
|
-
-
|
10
|
-
version: 0.4.
|
9
|
+
- 1
|
10
|
+
version: 0.4.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jeremy Ashkenas
|