docsplit 0.7.4 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
- ---
2
- SHA512:
3
- metadata.gz: b275fe3c2adc1c24cdd0f82d443e4d435ca94b872763649519b2d30bd66a3af5b6e184d8dda521f7ae2bdb5f5e4054a16d157f97498dfbb4decbd195497153fc
4
- data.tar.gz: 8630bc27e04716940919f00014e6d4cd2bfd9830a8d132c6ce6ad8b01f9068dd63579041b8d281e752e83a9b77641a0e7f53cd639d63db0699205cee77d4ae76
5
- SHA1:
6
- metadata.gz: a31b827c0439da61d1d38584a0c482866dca0cb0
7
- data.tar.gz: 25afa3c6aabc5190c6850f4da2016494df7ef4ed
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ebdecba4d9b5b3a19244e08a2f3bcbaff8d8fab1
4
+ data.tar.gz: adb719d204a184313c1d282c837a7d1c929977ff
5
+ SHA512:
6
+ metadata.gz: 8e58b660472bbff77ce500e50007c1dcdeec9adce59d527d9e331b42bde76d4ff9f2df9aece56e3793eef9d49795aafeddc8a7dfc9bd56a4fa07ca69fa080ca2
7
+ data.tar.gz: 1ca0706ec0b4eca050c9a1d0a45858365f5bde18b639c3ff641278dc777a1edf17128585f264b2a128e36a5388d4b2c7b9386e8f0b1993e850bc995d625c731b
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'docsplit'
3
- s.version = '0.7.4' # Keep version in sync with docsplit.rb
4
- s.date = '2014-02-16'
3
+ s.version = '0.7.5' # Keep version in sync with docsplit.rb
4
+ s.date = '2014-05-28'
5
5
 
6
6
  s.homepage = "http://documentcloud.github.com/docsplit/"
7
7
  s.summary = "Break Apart Documents into Images, Text, Pages and PDFs"
@@ -5,7 +5,7 @@ require 'shellwords'
5
5
  # The Docsplit module delegates to the Java PDF extractors.
6
6
  module Docsplit
7
7
 
8
- VERSION = '0.7.4' # Keep in sync with gemspec.
8
+ VERSION = '0.7.5' # Keep in sync with gemspec.
9
9
 
10
10
  ESCAPE = lambda {|x| Shellwords.shellescape(x) }
11
11
 
@@ -52,6 +52,7 @@ module Docsplit
52
52
  /Applications/OpenOffice.org.app/Contents
53
53
  )
54
54
  else # probably linux/unix
55
+ # heroku libreoffice buildpack: https://github.com/rishihahs/heroku-buildpack-libreoffice
55
56
  search_paths = %w(
56
57
  /usr/lib/libreoffice
57
58
  /usr/lib64/libreoffice
@@ -59,6 +60,7 @@ module Docsplit
59
60
  /usr/lib/openoffice
60
61
  /usr/lib64/openoffice
61
62
  /opt/openoffice.org3
63
+ /app/vendor/libreoffice
62
64
  )
63
65
  end
64
66
  search_paths
@@ -8,19 +8,22 @@ module Docsplit
8
8
  # through further extraction.
9
9
  def ensure_pdfs(docs)
10
10
  [docs].flatten.map do |doc|
11
- ext = File.extname(doc)
12
- if ext.downcase == '.pdf'
11
+ if is_pdf?(doc)
13
12
  doc
14
13
  else
15
14
  tempdir = File.join(Dir.tmpdir, 'docsplit')
16
15
  extract_pdf([doc], {:output => tempdir})
17
- File.join(tempdir, File.basename(doc, ext) + '.pdf')
16
+ File.join(tempdir, File.basename(doc, File.extname(doc)) + '.pdf')
18
17
  end
19
18
  end
20
19
  end
21
20
 
21
+ def is_pdf?(doc)
22
+ File.extname(doc).downcase == '.pdf' || File.open(doc, 'rb', &:readline) =~ /\A\%PDF-\d+(\.\d+)?/
23
+ end
24
+
22
25
  end
23
26
 
24
27
  extend TransparentPDFs
25
28
 
26
- end
29
+ end
metadata CHANGED
@@ -1,28 +1,33 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: docsplit
3
- version: !ruby/object:Gem::Version
4
- version: 0.7.4
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.5
5
5
  platform: ruby
6
- authors:
6
+ authors:
7
7
  - Jeremy Ashkenas
8
8
  - Samuel Clay
9
9
  - Ted Han
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
-
14
- date: 2014-02-16 00:00:00 Z
13
+ date: 2014-05-28 00:00:00.000000000 Z
15
14
  dependencies: []
16
-
17
- description: " Docsplit is a command-line utility and Ruby library for splitting apart\n documents into their component parts: searchable UTF-8 plain text, page\n images or thumbnails in any format, PDFs, single pages, and document\n metadata (title, author, number of pages...)\n"
15
+ description: |2
16
+ Docsplit is a command-line utility and Ruby library for splitting apart
17
+ documents into their component parts: searchable UTF-8 plain text, page
18
+ images or thumbnails in any format, PDFs, single pages, and document
19
+ metadata (title, author, number of pages...)
18
20
  email: opensource@documentcloud.org
19
- executables:
21
+ executables:
20
22
  - docsplit
21
23
  extensions: []
22
-
23
24
  extra_rdoc_files: []
24
-
25
- files:
25
+ files:
26
+ - LICENSE
27
+ - README
28
+ - bin/docsplit
29
+ - docsplit.gemspec
30
+ - lib/docsplit.rb
26
31
  - lib/docsplit/command_line.rb
27
32
  - lib/docsplit/image_extractor.rb
28
33
  - lib/docsplit/info_extractor.rb
@@ -31,8 +36,6 @@ files:
31
36
  - lib/docsplit/text_cleaner.rb
32
37
  - lib/docsplit/text_extractor.rb
33
38
  - lib/docsplit/transparent_pdfs.rb
34
- - lib/docsplit.rb
35
- - bin/docsplit
36
39
  - vendor/conf/document-formats.js
37
40
  - vendor/jodconverter/commons-cli-1.1.jar
38
41
  - vendor/jodconverter/commons-io-1.4.jar
@@ -43,34 +46,28 @@ files:
43
46
  - vendor/jodconverter/ridl-3.2.1.jar
44
47
  - vendor/jodconverter/unoil-3.2.1.jar
45
48
  - vendor/logging.properties
46
- - docsplit.gemspec
47
- - LICENSE
48
- - README
49
49
  homepage: http://documentcloud.github.com/docsplit/
50
- licenses:
50
+ licenses:
51
51
  - MIT
52
52
  metadata: {}
53
-
54
53
  post_install_message:
55
54
  rdoc_options: []
56
-
57
- require_paths:
55
+ require_paths:
58
56
  - lib
59
- required_ruby_version: !ruby/object:Gem::Requirement
60
- requirements:
61
- - &id001
62
- - ">="
63
- - !ruby/object:Gem::Version
64
- version: "0"
65
- required_rubygems_version: !ruby/object:Gem::Requirement
66
- requirements:
67
- - *id001
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
68
67
  requirements: []
69
-
70
68
  rubyforge_project: docsplit
71
- rubygems_version: 2.0.13
69
+ rubygems_version: 2.2.2
72
70
  signing_key:
73
71
  specification_version: 4
74
72
  summary: Break Apart Documents into Images, Text, Pages and PDFs
75
73
  test_files: []
76
-