docsplit 0.7.4 → 0.7.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
- ---
2
- SHA512:
3
- metadata.gz: b275fe3c2adc1c24cdd0f82d443e4d435ca94b872763649519b2d30bd66a3af5b6e184d8dda521f7ae2bdb5f5e4054a16d157f97498dfbb4decbd195497153fc
4
- data.tar.gz: 8630bc27e04716940919f00014e6d4cd2bfd9830a8d132c6ce6ad8b01f9068dd63579041b8d281e752e83a9b77641a0e7f53cd639d63db0699205cee77d4ae76
5
- SHA1:
6
- metadata.gz: a31b827c0439da61d1d38584a0c482866dca0cb0
7
- data.tar.gz: 25afa3c6aabc5190c6850f4da2016494df7ef4ed
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ebdecba4d9b5b3a19244e08a2f3bcbaff8d8fab1
4
+ data.tar.gz: adb719d204a184313c1d282c837a7d1c929977ff
5
+ SHA512:
6
+ metadata.gz: 8e58b660472bbff77ce500e50007c1dcdeec9adce59d527d9e331b42bde76d4ff9f2df9aece56e3793eef9d49795aafeddc8a7dfc9bd56a4fa07ca69fa080ca2
7
+ data.tar.gz: 1ca0706ec0b4eca050c9a1d0a45858365f5bde18b639c3ff641278dc777a1edf17128585f264b2a128e36a5388d4b2c7b9386e8f0b1993e850bc995d625c731b
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'docsplit'
3
- s.version = '0.7.4' # Keep version in sync with docsplit.rb
4
- s.date = '2014-02-16'
3
+ s.version = '0.7.5' # Keep version in sync with docsplit.rb
4
+ s.date = '2014-05-28'
5
5
 
6
6
  s.homepage = "http://documentcloud.github.com/docsplit/"
7
7
  s.summary = "Break Apart Documents into Images, Text, Pages and PDFs"
@@ -5,7 +5,7 @@ require 'shellwords'
5
5
  # The Docsplit module delegates to the Java PDF extractors.
6
6
  module Docsplit
7
7
 
8
- VERSION = '0.7.4' # Keep in sync with gemspec.
8
+ VERSION = '0.7.5' # Keep in sync with gemspec.
9
9
 
10
10
  ESCAPE = lambda {|x| Shellwords.shellescape(x) }
11
11
 
@@ -52,6 +52,7 @@ module Docsplit
52
52
  /Applications/OpenOffice.org.app/Contents
53
53
  )
54
54
  else # probably linux/unix
55
+ # heroku libreoffice buildpack: https://github.com/rishihahs/heroku-buildpack-libreoffice
55
56
  search_paths = %w(
56
57
  /usr/lib/libreoffice
57
58
  /usr/lib64/libreoffice
@@ -59,6 +60,7 @@ module Docsplit
59
60
  /usr/lib/openoffice
60
61
  /usr/lib64/openoffice
61
62
  /opt/openoffice.org3
63
+ /app/vendor/libreoffice
62
64
  )
63
65
  end
64
66
  search_paths
@@ -8,19 +8,22 @@ module Docsplit
8
8
  # through further extraction.
9
9
  def ensure_pdfs(docs)
10
10
  [docs].flatten.map do |doc|
11
- ext = File.extname(doc)
12
- if ext.downcase == '.pdf'
11
+ if is_pdf?(doc)
13
12
  doc
14
13
  else
15
14
  tempdir = File.join(Dir.tmpdir, 'docsplit')
16
15
  extract_pdf([doc], {:output => tempdir})
17
- File.join(tempdir, File.basename(doc, ext) + '.pdf')
16
+ File.join(tempdir, File.basename(doc, File.extname(doc)) + '.pdf')
18
17
  end
19
18
  end
20
19
  end
21
20
 
21
+ def is_pdf?(doc)
22
+ File.extname(doc).downcase == '.pdf' || File.open(doc, 'rb', &:readline) =~ /\A\%PDF-\d+(\.\d+)?/
23
+ end
24
+
22
25
  end
23
26
 
24
27
  extend TransparentPDFs
25
28
 
26
- end
29
+ end
metadata CHANGED
@@ -1,28 +1,33 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: docsplit
3
- version: !ruby/object:Gem::Version
4
- version: 0.7.4
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.5
5
5
  platform: ruby
6
- authors:
6
+ authors:
7
7
  - Jeremy Ashkenas
8
8
  - Samuel Clay
9
9
  - Ted Han
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
-
14
- date: 2014-02-16 00:00:00 Z
13
+ date: 2014-05-28 00:00:00.000000000 Z
15
14
  dependencies: []
16
-
17
- description: " Docsplit is a command-line utility and Ruby library for splitting apart\n documents into their component parts: searchable UTF-8 plain text, page\n images or thumbnails in any format, PDFs, single pages, and document\n metadata (title, author, number of pages...)\n"
15
+ description: |2
16
+ Docsplit is a command-line utility and Ruby library for splitting apart
17
+ documents into their component parts: searchable UTF-8 plain text, page
18
+ images or thumbnails in any format, PDFs, single pages, and document
19
+ metadata (title, author, number of pages...)
18
20
  email: opensource@documentcloud.org
19
- executables:
21
+ executables:
20
22
  - docsplit
21
23
  extensions: []
22
-
23
24
  extra_rdoc_files: []
24
-
25
- files:
25
+ files:
26
+ - LICENSE
27
+ - README
28
+ - bin/docsplit
29
+ - docsplit.gemspec
30
+ - lib/docsplit.rb
26
31
  - lib/docsplit/command_line.rb
27
32
  - lib/docsplit/image_extractor.rb
28
33
  - lib/docsplit/info_extractor.rb
@@ -31,8 +36,6 @@ files:
31
36
  - lib/docsplit/text_cleaner.rb
32
37
  - lib/docsplit/text_extractor.rb
33
38
  - lib/docsplit/transparent_pdfs.rb
34
- - lib/docsplit.rb
35
- - bin/docsplit
36
39
  - vendor/conf/document-formats.js
37
40
  - vendor/jodconverter/commons-cli-1.1.jar
38
41
  - vendor/jodconverter/commons-io-1.4.jar
@@ -43,34 +46,28 @@ files:
43
46
  - vendor/jodconverter/ridl-3.2.1.jar
44
47
  - vendor/jodconverter/unoil-3.2.1.jar
45
48
  - vendor/logging.properties
46
- - docsplit.gemspec
47
- - LICENSE
48
- - README
49
49
  homepage: http://documentcloud.github.com/docsplit/
50
- licenses:
50
+ licenses:
51
51
  - MIT
52
52
  metadata: {}
53
-
54
53
  post_install_message:
55
54
  rdoc_options: []
56
-
57
- require_paths:
55
+ require_paths:
58
56
  - lib
59
- required_ruby_version: !ruby/object:Gem::Requirement
60
- requirements:
61
- - &id001
62
- - ">="
63
- - !ruby/object:Gem::Version
64
- version: "0"
65
- required_rubygems_version: !ruby/object:Gem::Requirement
66
- requirements:
67
- - *id001
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
68
67
  requirements: []
69
-
70
68
  rubyforge_project: docsplit
71
- rubygems_version: 2.0.13
69
+ rubygems_version: 2.2.2
72
70
  signing_key:
73
71
  specification_version: 4
74
72
  summary: Break Apart Documents into Images, Text, Pages and PDFs
75
73
  test_files: []
76
-