burisu-docsplit 0.7.6 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e8e83657d34d345bf2187c4d97f62a68dbc9f67
4
- data.tar.gz: 6235ba1a2fc9a737f7df72dfd4aa57afb8b416ad
3
+ metadata.gz: 370a33126963926b13bef202fb15e05127a02db0
4
+ data.tar.gz: 76024f613e3ad9a339cc207ac428037c4ba6f7ef
5
5
  SHA512:
6
- metadata.gz: b669fbd7c68f94b0451cbb5753f83aaa7ef77d13247bb696e0222467310600b91b5a7ac1255a512873b79c5c78c5a21f3f5006a1b6b34626021cdca34f360fc4
7
- data.tar.gz: b2f80cca13a78498c2742bc6ce14f2c778485f5c514600c37a43119901fba503a6fcc00dcc55d687b8fbecc4a9b4f1754ed8e95372e2e46576c8a1fb166648e6
6
+ metadata.gz: d3564ec6ea484e25fd09f8e3b135bdbfb31c02ed64e74f5f3f269c38fbd58ab9f2c0d63cf9387cedd7eb10549832d583819f2caf09e9d2c2b3316da1c31243e4
7
+ data.tar.gz: 6a88a1820ab2bf23a0dacab2d54d5949de9bee18d1ba2bda86ce67948dfec97f2dee8dd3195a01033199d2e0b5c28b8e530c7d493aaafa910d2781af82371ee8
@@ -1,7 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'burisu-docsplit'
3
- s.version = '0.7.6' # Keep version in sync with docsplit.rb
4
-
3
+ s.version = '0.7.7' # Keep version in sync with docsplit.rb
5
4
  s.homepage = "http://documentcloud.github.com/docsplit/"
6
5
  s.summary = "Break Apart Documents into Images, Text, Pages and PDFs"
7
6
  s.description = <<-EOS
@@ -5,7 +5,7 @@ require 'shellwords'
5
5
  # The Docsplit module delegates to the Java PDF extractors.
6
6
  module Docsplit
7
7
 
8
- VERSION = '0.7.4' # Keep in sync with gemspec.
8
+ VERSION = '0.7.5' # Keep in sync with gemspec.
9
9
 
10
10
  ESCAPE = lambda {|x| Shellwords.shellescape(x) }
11
11
 
@@ -9,13 +9,13 @@ module Docsplit
9
9
  extract_options opts
10
10
  [pdfs].flatten.each do |pdf|
11
11
  pdf_name = File.basename(pdf, File.extname(pdf))
12
- page_path = File.join(@output, "#{pdf_name}_%d.pdf")
12
+ page_path = ESCAPE[File.join(@output, "#{pdf_name}")] + "_%d.pdf"
13
13
  FileUtils.mkdir_p @output unless File.exists?(@output)
14
14
 
15
15
  cmd = if DEPENDENCIES[:pdftailor] # prefer pdftailor, but keep pdftk for backwards compatability
16
- "pdftailor unstitch --output #{ESCAPE[page_path]} #{ESCAPE[pdf]} 2>&1"
16
+ "pdftailor unstitch --output #{page_path} #{ESCAPE[pdf]} 2>&1"
17
17
  else
18
- "pdftk #{ESCAPE[pdf]} burst output #{ESCAPE[page_path]} 2>&1"
18
+ "pdftk #{ESCAPE[pdf]} burst output #{page_path} 2>&1"
19
19
  end
20
20
  result = `#{cmd}`.chomp
21
21
  FileUtils.rm('doc_data.txt') if File.exists?('doc_data.txt')
@@ -33,4 +33,4 @@ module Docsplit
33
33
 
34
34
  end
35
35
 
36
- end
36
+ end
@@ -23,7 +23,7 @@ module Docsplit
23
23
  unless @@version_string
24
24
  null = windows? ? "NUL" : "/dev/null"
25
25
  @@version_string = `#{office_executable} -h 2>#{null}`.split("\n").first
26
- if !!@@version_string.match(/[0-9]*/)
26
+ if !!@@version_string.to_s.match(/[0-9]*/)
27
27
  @@version_string = `#{office_executable} --version`.split("\n").first
28
28
  end
29
29
  end
@@ -8,17 +8,20 @@ module Docsplit
8
8
  # through further extraction.
9
9
  def ensure_pdfs(docs)
10
10
  [docs].flatten.map do |doc|
11
- ext = File.extname(doc)
12
- if ext.downcase == '.pdf' || File.open(doc, "rb", &:readline) =~ /\A\%PDF-\d+(\.\d+)?$/
11
+ if is_pdf?(doc)
13
12
  doc
14
13
  else
15
14
  tempdir = File.join(Dir.tmpdir, 'docsplit')
16
15
  extract_pdf([doc], {:output => tempdir})
17
- File.join(tempdir, File.basename(doc, ext) + '.pdf')
16
+ File.join(tempdir, File.basename(doc, File.extname(doc)) + '.pdf')
18
17
  end
19
18
  end
20
19
  end
21
20
 
21
+ def is_pdf?(doc)
22
+ File.extname(doc).downcase == '.pdf' || File.open(doc, 'rb', &:readline) =~ /\A\%PDF-\d+(\.\d+)?/
23
+ end
24
+
22
25
  end
23
26
 
24
27
  extend TransparentPDFs
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: burisu-docsplit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.6
4
+ version: 0.7.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Ashkenas
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-03-27 00:00:00.000000000 Z
13
+ date: 2014-10-18 00:00:00.000000000 Z
14
14
  dependencies: []
15
15
  description: |2
16
16
  Docsplit is a command-line utility and Ruby library for splitting apart
@@ -23,29 +23,29 @@ executables:
23
23
  extensions: []
24
24
  extra_rdoc_files: []
25
25
  files:
26
+ - LICENSE
27
+ - README
28
+ - bin/docsplit
29
+ - docsplit.gemspec
30
+ - lib/docsplit.rb
31
+ - lib/docsplit/command_line.rb
26
32
  - lib/docsplit/image_extractor.rb
27
33
  - lib/docsplit/info_extractor.rb
28
- - lib/docsplit/transparent_pdfs.rb
29
- - lib/docsplit/text_extractor.rb
30
- - lib/docsplit/text_cleaner.rb
31
34
  - lib/docsplit/page_extractor.rb
32
35
  - lib/docsplit/pdf_extractor.rb
33
- - lib/docsplit/command_line.rb
34
- - lib/docsplit.rb
35
- - bin/docsplit
36
- - vendor/logging.properties
36
+ - lib/docsplit/text_cleaner.rb
37
+ - lib/docsplit/text_extractor.rb
38
+ - lib/docsplit/transparent_pdfs.rb
37
39
  - vendor/conf/document-formats.js
38
- - vendor/jodconverter/jurt-3.2.1.jar
39
- - vendor/jodconverter/unoil-3.2.1.jar
40
40
  - vendor/jodconverter/commons-cli-1.1.jar
41
- - vendor/jodconverter/json-20090211.jar
42
- - vendor/jodconverter/ridl-3.2.1.jar
43
41
  - vendor/jodconverter/commons-io-1.4.jar
44
- - vendor/jodconverter/juh-3.2.1.jar
45
42
  - vendor/jodconverter/jodconverter-core-3.0-beta-4.jar
46
- - docsplit.gemspec
47
- - LICENSE
48
- - README
43
+ - vendor/jodconverter/json-20090211.jar
44
+ - vendor/jodconverter/juh-3.2.1.jar
45
+ - vendor/jodconverter/jurt-3.2.1.jar
46
+ - vendor/jodconverter/ridl-3.2.1.jar
47
+ - vendor/jodconverter/unoil-3.2.1.jar
48
+ - vendor/logging.properties
49
49
  homepage: http://documentcloud.github.com/docsplit/
50
50
  licenses:
51
51
  - MIT
@@ -56,17 +56,17 @@ require_paths:
56
56
  - lib
57
57
  required_ruby_version: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  required_rubygems_version: !ruby/object:Gem::Requirement
63
63
  requirements:
64
- - - '>='
64
+ - - ">="
65
65
  - !ruby/object:Gem::Version
66
66
  version: '0'
67
67
  requirements: []
68
68
  rubyforge_project:
69
- rubygems_version: 2.0.14
69
+ rubygems_version: 2.2.2
70
70
  signing_key:
71
71
  specification_version: 4
72
72
  summary: Break Apart Documents into Images, Text, Pages and PDFs