burisu-docsplit 0.7.6 → 0.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/docsplit.gemspec +1 -2
- data/lib/docsplit.rb +1 -1
- data/lib/docsplit/page_extractor.rb +4 -4
- data/lib/docsplit/pdf_extractor.rb +1 -1
- data/lib/docsplit/transparent_pdfs.rb +6 -3
- metadata +20 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 370a33126963926b13bef202fb15e05127a02db0
|
4
|
+
data.tar.gz: 76024f613e3ad9a339cc207ac428037c4ba6f7ef
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3564ec6ea484e25fd09f8e3b135bdbfb31c02ed64e74f5f3f269c38fbd58ab9f2c0d63cf9387cedd7eb10549832d583819f2caf09e9d2c2b3316da1c31243e4
|
7
|
+
data.tar.gz: 6a88a1820ab2bf23a0dacab2d54d5949de9bee18d1ba2bda86ce67948dfec97f2dee8dd3195a01033199d2e0b5c28b8e530c7d493aaafa910d2781af82371ee8
|
data/docsplit.gemspec
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'burisu-docsplit'
|
3
|
-
s.version = '0.7.
|
4
|
-
|
3
|
+
s.version = '0.7.7' # Keep version in sync with docsplit.rb
|
5
4
|
s.homepage = "http://documentcloud.github.com/docsplit/"
|
6
5
|
s.summary = "Break Apart Documents into Images, Text, Pages and PDFs"
|
7
6
|
s.description = <<-EOS
|
data/lib/docsplit.rb
CHANGED
@@ -9,13 +9,13 @@ module Docsplit
|
|
9
9
|
extract_options opts
|
10
10
|
[pdfs].flatten.each do |pdf|
|
11
11
|
pdf_name = File.basename(pdf, File.extname(pdf))
|
12
|
-
page_path = File.join(@output, "#{pdf_name}_%d.pdf"
|
12
|
+
page_path = ESCAPE[File.join(@output, "#{pdf_name}")] + "_%d.pdf"
|
13
13
|
FileUtils.mkdir_p @output unless File.exists?(@output)
|
14
14
|
|
15
15
|
cmd = if DEPENDENCIES[:pdftailor] # prefer pdftailor, but keep pdftk for backwards compatability
|
16
|
-
"pdftailor unstitch --output #{
|
16
|
+
"pdftailor unstitch --output #{page_path} #{ESCAPE[pdf]} 2>&1"
|
17
17
|
else
|
18
|
-
"pdftk #{ESCAPE[pdf]} burst output #{
|
18
|
+
"pdftk #{ESCAPE[pdf]} burst output #{page_path} 2>&1"
|
19
19
|
end
|
20
20
|
result = `#{cmd}`.chomp
|
21
21
|
FileUtils.rm('doc_data.txt') if File.exists?('doc_data.txt')
|
@@ -33,4 +33,4 @@ module Docsplit
|
|
33
33
|
|
34
34
|
end
|
35
35
|
|
36
|
-
end
|
36
|
+
end
|
@@ -23,7 +23,7 @@ module Docsplit
|
|
23
23
|
unless @@version_string
|
24
24
|
null = windows? ? "NUL" : "/dev/null"
|
25
25
|
@@version_string = `#{office_executable} -h 2>#{null}`.split("\n").first
|
26
|
-
if !!@@version_string.match(/[0-9]*/)
|
26
|
+
if !!@@version_string.to_s.match(/[0-9]*/)
|
27
27
|
@@version_string = `#{office_executable} --version`.split("\n").first
|
28
28
|
end
|
29
29
|
end
|
@@ -8,17 +8,20 @@ module Docsplit
|
|
8
8
|
# through further extraction.
|
9
9
|
def ensure_pdfs(docs)
|
10
10
|
[docs].flatten.map do |doc|
|
11
|
-
|
12
|
-
if ext.downcase == '.pdf' || File.open(doc, "rb", &:readline) =~ /\A\%PDF-\d+(\.\d+)?$/
|
11
|
+
if is_pdf?(doc)
|
13
12
|
doc
|
14
13
|
else
|
15
14
|
tempdir = File.join(Dir.tmpdir, 'docsplit')
|
16
15
|
extract_pdf([doc], {:output => tempdir})
|
17
|
-
File.join(tempdir, File.basename(doc,
|
16
|
+
File.join(tempdir, File.basename(doc, File.extname(doc)) + '.pdf')
|
18
17
|
end
|
19
18
|
end
|
20
19
|
end
|
21
20
|
|
21
|
+
def is_pdf?(doc)
|
22
|
+
File.extname(doc).downcase == '.pdf' || File.open(doc, 'rb', &:readline) =~ /\A\%PDF-\d+(\.\d+)?/
|
23
|
+
end
|
24
|
+
|
22
25
|
end
|
23
26
|
|
24
27
|
extend TransparentPDFs
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: burisu-docsplit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy Ashkenas
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2014-
|
13
|
+
date: 2014-10-18 00:00:00.000000000 Z
|
14
14
|
dependencies: []
|
15
15
|
description: |2
|
16
16
|
Docsplit is a command-line utility and Ruby library for splitting apart
|
@@ -23,29 +23,29 @@ executables:
|
|
23
23
|
extensions: []
|
24
24
|
extra_rdoc_files: []
|
25
25
|
files:
|
26
|
+
- LICENSE
|
27
|
+
- README
|
28
|
+
- bin/docsplit
|
29
|
+
- docsplit.gemspec
|
30
|
+
- lib/docsplit.rb
|
31
|
+
- lib/docsplit/command_line.rb
|
26
32
|
- lib/docsplit/image_extractor.rb
|
27
33
|
- lib/docsplit/info_extractor.rb
|
28
|
-
- lib/docsplit/transparent_pdfs.rb
|
29
|
-
- lib/docsplit/text_extractor.rb
|
30
|
-
- lib/docsplit/text_cleaner.rb
|
31
34
|
- lib/docsplit/page_extractor.rb
|
32
35
|
- lib/docsplit/pdf_extractor.rb
|
33
|
-
- lib/docsplit/
|
34
|
-
- lib/docsplit.rb
|
35
|
-
-
|
36
|
-
- vendor/logging.properties
|
36
|
+
- lib/docsplit/text_cleaner.rb
|
37
|
+
- lib/docsplit/text_extractor.rb
|
38
|
+
- lib/docsplit/transparent_pdfs.rb
|
37
39
|
- vendor/conf/document-formats.js
|
38
|
-
- vendor/jodconverter/jurt-3.2.1.jar
|
39
|
-
- vendor/jodconverter/unoil-3.2.1.jar
|
40
40
|
- vendor/jodconverter/commons-cli-1.1.jar
|
41
|
-
- vendor/jodconverter/json-20090211.jar
|
42
|
-
- vendor/jodconverter/ridl-3.2.1.jar
|
43
41
|
- vendor/jodconverter/commons-io-1.4.jar
|
44
|
-
- vendor/jodconverter/juh-3.2.1.jar
|
45
42
|
- vendor/jodconverter/jodconverter-core-3.0-beta-4.jar
|
46
|
-
-
|
47
|
-
-
|
48
|
-
-
|
43
|
+
- vendor/jodconverter/json-20090211.jar
|
44
|
+
- vendor/jodconverter/juh-3.2.1.jar
|
45
|
+
- vendor/jodconverter/jurt-3.2.1.jar
|
46
|
+
- vendor/jodconverter/ridl-3.2.1.jar
|
47
|
+
- vendor/jodconverter/unoil-3.2.1.jar
|
48
|
+
- vendor/logging.properties
|
49
49
|
homepage: http://documentcloud.github.com/docsplit/
|
50
50
|
licenses:
|
51
51
|
- MIT
|
@@ -56,17 +56,17 @@ require_paths:
|
|
56
56
|
- lib
|
57
57
|
required_ruby_version: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
63
|
requirements:
|
64
|
-
- -
|
64
|
+
- - ">="
|
65
65
|
- !ruby/object:Gem::Version
|
66
66
|
version: '0'
|
67
67
|
requirements: []
|
68
68
|
rubyforge_project:
|
69
|
-
rubygems_version: 2.
|
69
|
+
rubygems_version: 2.2.2
|
70
70
|
signing_key:
|
71
71
|
specification_version: 4
|
72
72
|
summary: Break Apart Documents into Images, Text, Pages and PDFs
|