burisu-docsplit 0.7.6 → 0.7.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e8e83657d34d345bf2187c4d97f62a68dbc9f67
4
- data.tar.gz: 6235ba1a2fc9a737f7df72dfd4aa57afb8b416ad
3
+ metadata.gz: 370a33126963926b13bef202fb15e05127a02db0
4
+ data.tar.gz: 76024f613e3ad9a339cc207ac428037c4ba6f7ef
5
5
  SHA512:
6
- metadata.gz: b669fbd7c68f94b0451cbb5753f83aaa7ef77d13247bb696e0222467310600b91b5a7ac1255a512873b79c5c78c5a21f3f5006a1b6b34626021cdca34f360fc4
7
- data.tar.gz: b2f80cca13a78498c2742bc6ce14f2c778485f5c514600c37a43119901fba503a6fcc00dcc55d687b8fbecc4a9b4f1754ed8e95372e2e46576c8a1fb166648e6
6
+ metadata.gz: d3564ec6ea484e25fd09f8e3b135bdbfb31c02ed64e74f5f3f269c38fbd58ab9f2c0d63cf9387cedd7eb10549832d583819f2caf09e9d2c2b3316da1c31243e4
7
+ data.tar.gz: 6a88a1820ab2bf23a0dacab2d54d5949de9bee18d1ba2bda86ce67948dfec97f2dee8dd3195a01033199d2e0b5c28b8e530c7d493aaafa910d2781af82371ee8
@@ -1,7 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'burisu-docsplit'
3
- s.version = '0.7.6' # Keep version in sync with docsplit.rb
4
-
3
+ s.version = '0.7.7' # Keep version in sync with docsplit.rb
5
4
  s.homepage = "http://documentcloud.github.com/docsplit/"
6
5
  s.summary = "Break Apart Documents into Images, Text, Pages and PDFs"
7
6
  s.description = <<-EOS
@@ -5,7 +5,7 @@ require 'shellwords'
5
5
  # The Docsplit module delegates to the Java PDF extractors.
6
6
  module Docsplit
7
7
 
8
- VERSION = '0.7.4' # Keep in sync with gemspec.
8
+ VERSION = '0.7.5' # Keep in sync with gemspec.
9
9
 
10
10
  ESCAPE = lambda {|x| Shellwords.shellescape(x) }
11
11
 
@@ -9,13 +9,13 @@ module Docsplit
9
9
  extract_options opts
10
10
  [pdfs].flatten.each do |pdf|
11
11
  pdf_name = File.basename(pdf, File.extname(pdf))
12
- page_path = File.join(@output, "#{pdf_name}_%d.pdf")
12
+ page_path = ESCAPE[File.join(@output, "#{pdf_name}")] + "_%d.pdf"
13
13
  FileUtils.mkdir_p @output unless File.exists?(@output)
14
14
 
15
15
  cmd = if DEPENDENCIES[:pdftailor] # prefer pdftailor, but keep pdftk for backwards compatability
16
- "pdftailor unstitch --output #{ESCAPE[page_path]} #{ESCAPE[pdf]} 2>&1"
16
+ "pdftailor unstitch --output #{page_path} #{ESCAPE[pdf]} 2>&1"
17
17
  else
18
- "pdftk #{ESCAPE[pdf]} burst output #{ESCAPE[page_path]} 2>&1"
18
+ "pdftk #{ESCAPE[pdf]} burst output #{page_path} 2>&1"
19
19
  end
20
20
  result = `#{cmd}`.chomp
21
21
  FileUtils.rm('doc_data.txt') if File.exists?('doc_data.txt')
@@ -33,4 +33,4 @@ module Docsplit
33
33
 
34
34
  end
35
35
 
36
- end
36
+ end
@@ -23,7 +23,7 @@ module Docsplit
23
23
  unless @@version_string
24
24
  null = windows? ? "NUL" : "/dev/null"
25
25
  @@version_string = `#{office_executable} -h 2>#{null}`.split("\n").first
26
- if !!@@version_string.match(/[0-9]*/)
26
+ if !!@@version_string.to_s.match(/[0-9]*/)
27
27
  @@version_string = `#{office_executable} --version`.split("\n").first
28
28
  end
29
29
  end
@@ -8,17 +8,20 @@ module Docsplit
8
8
  # through further extraction.
9
9
  def ensure_pdfs(docs)
10
10
  [docs].flatten.map do |doc|
11
- ext = File.extname(doc)
12
- if ext.downcase == '.pdf' || File.open(doc, "rb", &:readline) =~ /\A\%PDF-\d+(\.\d+)?$/
11
+ if is_pdf?(doc)
13
12
  doc
14
13
  else
15
14
  tempdir = File.join(Dir.tmpdir, 'docsplit')
16
15
  extract_pdf([doc], {:output => tempdir})
17
- File.join(tempdir, File.basename(doc, ext) + '.pdf')
16
+ File.join(tempdir, File.basename(doc, File.extname(doc)) + '.pdf')
18
17
  end
19
18
  end
20
19
  end
21
20
 
21
+ def is_pdf?(doc)
22
+ File.extname(doc).downcase == '.pdf' || File.open(doc, 'rb', &:readline) =~ /\A\%PDF-\d+(\.\d+)?/
23
+ end
24
+
22
25
  end
23
26
 
24
27
  extend TransparentPDFs
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: burisu-docsplit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.6
4
+ version: 0.7.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-03-27 00:00:00.000000000 Z
13
+ date: 2014-10-18 00:00:00.000000000 Z
14
14
  dependencies: []
15
15
  description: |2
16
16
  Docsplit is a command-line utility and Ruby library for splitting apart
@@ -23,29 +23,29 @@ executables:
23
23
  extensions: []
24
24
  extra_rdoc_files: []
25
25
  files:
26
+ - LICENSE
27
+ - README
28
+ - bin/docsplit
29
+ - docsplit.gemspec
30
+ - lib/docsplit.rb
31
+ - lib/docsplit/command_line.rb
26
32
  - lib/docsplit/image_extractor.rb
27
33
  - lib/docsplit/info_extractor.rb
28
- - lib/docsplit/transparent_pdfs.rb
29
- - lib/docsplit/text_extractor.rb
30
- - lib/docsplit/text_cleaner.rb
31
34
  - lib/docsplit/page_extractor.rb
32
35
  - lib/docsplit/pdf_extractor.rb
33
- - lib/docsplit/command_line.rb
34
- - lib/docsplit.rb
35
- - bin/docsplit
36
- - vendor/logging.properties
36
+ - lib/docsplit/text_cleaner.rb
37
+ - lib/docsplit/text_extractor.rb
38
+ - lib/docsplit/transparent_pdfs.rb
37
39
  - vendor/conf/document-formats.js
38
- - vendor/jodconverter/jurt-3.2.1.jar
39
- - vendor/jodconverter/unoil-3.2.1.jar
40
40
  - vendor/jodconverter/commons-cli-1.1.jar
41
- - vendor/jodconverter/json-20090211.jar
42
- - vendor/jodconverter/ridl-3.2.1.jar
43
41
  - vendor/jodconverter/commons-io-1.4.jar
44
- - vendor/jodconverter/juh-3.2.1.jar
45
42
  - vendor/jodconverter/jodconverter-core-3.0-beta-4.jar
46
- - docsplit.gemspec
47
- - LICENSE
48
- - README
43
+ - vendor/jodconverter/json-20090211.jar
44
+ - vendor/jodconverter/juh-3.2.1.jar
45
+ - vendor/jodconverter/jurt-3.2.1.jar
46
+ - vendor/jodconverter/ridl-3.2.1.jar
47
+ - vendor/jodconverter/unoil-3.2.1.jar
48
+ - vendor/logging.properties
49
49
  homepage: http://documentcloud.github.com/docsplit/
50
50
  licenses:
51
51
  - MIT
@@ -56,17 +56,17 @@ require_paths:
56
56
  - lib
57
57
  required_ruby_version: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  required_rubygems_version: !ruby/object:Gem::Requirement
63
63
  requirements:
64
- - - '>='
64
+ - - ">="
65
65
  - !ruby/object:Gem::Version
66
66
  version: '0'
67
67
  requirements: []
68
68
  rubyforge_project:
69
- rubygems_version: 2.0.14
69
+ rubygems_version: 2.2.2
70
70
  signing_key:
71
71
  specification_version: 4
72
72
  summary: Break Apart Documents into Images, Text, Pages and PDFs