burisu-docsplit 0.7.6 → 0.7.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/docsplit.gemspec +1 -2
- data/lib/docsplit.rb +1 -1
- data/lib/docsplit/page_extractor.rb +4 -4
- data/lib/docsplit/pdf_extractor.rb +1 -1
- data/lib/docsplit/transparent_pdfs.rb +6 -3
- metadata +20 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 370a33126963926b13bef202fb15e05127a02db0
|
4
|
+
data.tar.gz: 76024f613e3ad9a339cc207ac428037c4ba6f7ef
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3564ec6ea484e25fd09f8e3b135bdbfb31c02ed64e74f5f3f269c38fbd58ab9f2c0d63cf9387cedd7eb10549832d583819f2caf09e9d2c2b3316da1c31243e4
|
7
|
+
data.tar.gz: 6a88a1820ab2bf23a0dacab2d54d5949de9bee18d1ba2bda86ce67948dfec97f2dee8dd3195a01033199d2e0b5c28b8e530c7d493aaafa910d2781af82371ee8
|
data/docsplit.gemspec
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'burisu-docsplit'
|
3
|
-
s.version = '0.7.
|
4
|
-
|
3
|
+
s.version = '0.7.7' # Keep version in sync with docsplit.rb
|
5
4
|
s.homepage = "http://documentcloud.github.com/docsplit/"
|
6
5
|
s.summary = "Break Apart Documents into Images, Text, Pages and PDFs"
|
7
6
|
s.description = <<-EOS
|
data/lib/docsplit.rb
CHANGED
@@ -9,13 +9,13 @@ module Docsplit
|
|
9
9
|
extract_options opts
|
10
10
|
[pdfs].flatten.each do |pdf|
|
11
11
|
pdf_name = File.basename(pdf, File.extname(pdf))
|
12
|
-
page_path = File.join(@output, "#{pdf_name}_%d.pdf"
|
12
|
+
page_path = ESCAPE[File.join(@output, "#{pdf_name}")] + "_%d.pdf"
|
13
13
|
FileUtils.mkdir_p @output unless File.exists?(@output)
|
14
14
|
|
15
15
|
cmd = if DEPENDENCIES[:pdftailor] # prefer pdftailor, but keep pdftk for backwards compatability
|
16
|
-
"pdftailor unstitch --output #{
|
16
|
+
"pdftailor unstitch --output #{page_path} #{ESCAPE[pdf]} 2>&1"
|
17
17
|
else
|
18
|
-
"pdftk #{ESCAPE[pdf]} burst output #{
|
18
|
+
"pdftk #{ESCAPE[pdf]} burst output #{page_path} 2>&1"
|
19
19
|
end
|
20
20
|
result = `#{cmd}`.chomp
|
21
21
|
FileUtils.rm('doc_data.txt') if File.exists?('doc_data.txt')
|
@@ -33,4 +33,4 @@ module Docsplit
|
|
33
33
|
|
34
34
|
end
|
35
35
|
|
36
|
-
end
|
36
|
+
end
|
@@ -23,7 +23,7 @@ module Docsplit
|
|
23
23
|
unless @@version_string
|
24
24
|
null = windows? ? "NUL" : "/dev/null"
|
25
25
|
@@version_string = `#{office_executable} -h 2>#{null}`.split("\n").first
|
26
|
-
if !!@@version_string.match(/[0-9]*/)
|
26
|
+
if !!@@version_string.to_s.match(/[0-9]*/)
|
27
27
|
@@version_string = `#{office_executable} --version`.split("\n").first
|
28
28
|
end
|
29
29
|
end
|
@@ -8,17 +8,20 @@ module Docsplit
|
|
8
8
|
# through further extraction.
|
9
9
|
def ensure_pdfs(docs)
|
10
10
|
[docs].flatten.map do |doc|
|
11
|
-
|
12
|
-
if ext.downcase == '.pdf' || File.open(doc, "rb", &:readline) =~ /\A\%PDF-\d+(\.\d+)?$/
|
11
|
+
if is_pdf?(doc)
|
13
12
|
doc
|
14
13
|
else
|
15
14
|
tempdir = File.join(Dir.tmpdir, 'docsplit')
|
16
15
|
extract_pdf([doc], {:output => tempdir})
|
17
|
-
File.join(tempdir, File.basename(doc,
|
16
|
+
File.join(tempdir, File.basename(doc, File.extname(doc)) + '.pdf')
|
18
17
|
end
|
19
18
|
end
|
20
19
|
end
|
21
20
|
|
21
|
+
def is_pdf?(doc)
|
22
|
+
File.extname(doc).downcase == '.pdf' || File.open(doc, 'rb', &:readline) =~ /\A\%PDF-\d+(\.\d+)?/
|
23
|
+
end
|
24
|
+
|
22
25
|
end
|
23
26
|
|
24
27
|
extend TransparentPDFs
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: burisu-docsplit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Ashkenas
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2014-
|
13
|
+
date: 2014-10-18 00:00:00.000000000 Z
|
14
14
|
dependencies: []
|
15
15
|
description: |2
|
16
16
|
Docsplit is a command-line utility and Ruby library for splitting apart
|
@@ -23,29 +23,29 @@ executables:
|
|
23
23
|
extensions: []
|
24
24
|
extra_rdoc_files: []
|
25
25
|
files:
|
26
|
+
- LICENSE
|
27
|
+
- README
|
28
|
+
- bin/docsplit
|
29
|
+
- docsplit.gemspec
|
30
|
+
- lib/docsplit.rb
|
31
|
+
- lib/docsplit/command_line.rb
|
26
32
|
- lib/docsplit/image_extractor.rb
|
27
33
|
- lib/docsplit/info_extractor.rb
|
28
|
-
- lib/docsplit/transparent_pdfs.rb
|
29
|
-
- lib/docsplit/text_extractor.rb
|
30
|
-
- lib/docsplit/text_cleaner.rb
|
31
34
|
- lib/docsplit/page_extractor.rb
|
32
35
|
- lib/docsplit/pdf_extractor.rb
|
33
|
-
- lib/docsplit/
|
34
|
-
- lib/docsplit.rb
|
35
|
-
-
|
36
|
-
- vendor/logging.properties
|
36
|
+
- lib/docsplit/text_cleaner.rb
|
37
|
+
- lib/docsplit/text_extractor.rb
|
38
|
+
- lib/docsplit/transparent_pdfs.rb
|
37
39
|
- vendor/conf/document-formats.js
|
38
|
-
- vendor/jodconverter/jurt-3.2.1.jar
|
39
|
-
- vendor/jodconverter/unoil-3.2.1.jar
|
40
40
|
- vendor/jodconverter/commons-cli-1.1.jar
|
41
|
-
- vendor/jodconverter/json-20090211.jar
|
42
|
-
- vendor/jodconverter/ridl-3.2.1.jar
|
43
41
|
- vendor/jodconverter/commons-io-1.4.jar
|
44
|
-
- vendor/jodconverter/juh-3.2.1.jar
|
45
42
|
- vendor/jodconverter/jodconverter-core-3.0-beta-4.jar
|
46
|
-
-
|
47
|
-
-
|
48
|
-
-
|
43
|
+
- vendor/jodconverter/json-20090211.jar
|
44
|
+
- vendor/jodconverter/juh-3.2.1.jar
|
45
|
+
- vendor/jodconverter/jurt-3.2.1.jar
|
46
|
+
- vendor/jodconverter/ridl-3.2.1.jar
|
47
|
+
- vendor/jodconverter/unoil-3.2.1.jar
|
48
|
+
- vendor/logging.properties
|
49
49
|
homepage: http://documentcloud.github.com/docsplit/
|
50
50
|
licenses:
|
51
51
|
- MIT
|
@@ -56,17 +56,17 @@ require_paths:
|
|
56
56
|
- lib
|
57
57
|
required_ruby_version: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
63
|
requirements:
|
64
|
-
- -
|
64
|
+
- - ">="
|
65
65
|
- !ruby/object:Gem::Version
|
66
66
|
version: '0'
|
67
67
|
requirements: []
|
68
68
|
rubyforge_project:
|
69
|
-
rubygems_version: 2.
|
69
|
+
rubygems_version: 2.2.2
|
70
70
|
signing_key:
|
71
71
|
specification_version: 4
|
72
72
|
summary: Break Apart Documents into Images, Text, Pages and PDFs
|