pdftoimage 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +70 -0
- data/lib/pdftoimage/image.rb +30 -11
- data/lib/pdftoimage/version.rb +1 -1
- data/lib/pdftoimage.rb +53 -10
- metadata +12 -15
- data/ChangeLog.rdoc +0 -28
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 63b80a5a933c53e13c0928aeca1fc5592d91d2c6d93d6e1bc0712e33180469f0
|
|
4
|
+
data.tar.gz: aa20e7f2a5e45ae52ad1a40c826ba1b0bfdb5c4ead284e1332243d3ddcfd7efa
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2bde807b5302cf3f0de8fac2beb888b69b6a6b557e9a8a206405feb2b31e7cdc262e51c3bf6b32887a7580c1cea31b3c9aaa329500c12be25e7a237d681fc4cd
|
|
7
|
+
data.tar.gz: e5857849b1b9a4f60a0ba07501429eceae4a1973331caee859e6d31ae43efc52a48d87e7556e7509af2f1e401724e129c1f921d785d059438f8827814e81c005
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com).
|
|
6
|
+
|
|
7
|
+
## [0.3.0] - 2026-03-20
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
- `PDFToImage.open` now accepts IO objects in addition to file paths (#11)
|
|
11
|
+
- `PDFToImage.from_blob` for opening PDFs from raw binary data (#11)
|
|
12
|
+
- `Image#save` now accepts IO objects for output, enabling fully in-memory workflows (#11)
|
|
13
|
+
- Support for opening PDFs from remote URLs (#16)
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
- Replaced direct ImageMagick CLI calls with MiniMagick (#15)
|
|
17
|
+
|
|
18
|
+
### Removed
|
|
19
|
+
- Removed iconv dependency (#17)
|
|
20
|
+
|
|
21
|
+
## [0.2.1] - 2025-04-08
|
|
22
|
+
|
|
23
|
+
### Fixed
|
|
24
|
+
- "Error determining page count" (#13)
|
|
25
|
+
- Updated shellwords dependency to 0.2.0+ (#7)
|
|
26
|
+
|
|
27
|
+
## [0.2.0] - 2023-04-20
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
- Use of deprecated `File.exists?` method (#4)
|
|
31
|
+
- File paths are now escaped to properly handle spaces and special characters (#3)
|
|
32
|
+
|
|
33
|
+
### Added
|
|
34
|
+
- Specifying dpi resolution is now supported (#5)
|
|
35
|
+
|
|
36
|
+
## [0.1.7] - 2018-05-01
|
|
37
|
+
|
|
38
|
+
### Fixed
|
|
39
|
+
- Updated yard to resolve a vulnerability
|
|
40
|
+
|
|
41
|
+
## [0.1.6] - 2011-07-13
|
|
42
|
+
|
|
43
|
+
### Fixed
|
|
44
|
+
- Buggy PDF generators encoding CreationDate and ModDate as UTF-16 instead of ASCII, causing parsing errors
|
|
45
|
+
|
|
46
|
+
## [0.1.5] - 2011-03-08
|
|
47
|
+
|
|
48
|
+
### Fixed
|
|
49
|
+
- poppler_utils no longer leaves off the extra padded zero
|
|
50
|
+
|
|
51
|
+
## [0.1.4] - 2010-11-15
|
|
52
|
+
|
|
53
|
+
### Fixed
|
|
54
|
+
- Documents with page counts that are exact powers of 10 not parsing properly due to poppler_utils zero-padding behavior
|
|
55
|
+
|
|
56
|
+
## [0.1.3] - 2010-11-12
|
|
57
|
+
|
|
58
|
+
### Fixed
|
|
59
|
+
- PDF documents with more than 9 pages not parsing properly (zero-padding issue)
|
|
60
|
+
|
|
61
|
+
## [0.1.2] - 2010-11-11
|
|
62
|
+
|
|
63
|
+
### Added
|
|
64
|
+
- Support for blocks upon opening a PDF
|
|
65
|
+
- `quality` method for JPEG/MIFF/PNG compression levels
|
|
66
|
+
- Lazy conversion: PDF conversion is now deferred until saving, improving performance for partial conversions
|
|
67
|
+
|
|
68
|
+
## [0.1.1] - 2010-11-10
|
|
69
|
+
|
|
70
|
+
- Initial release
|
data/lib/pdftoimage/image.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
require 'tempfile'
|
|
2
|
+
|
|
1
3
|
module PDFToImage
|
|
2
4
|
# A class which is instantiated by PDFToImage when a PDF document
|
|
3
5
|
# is opened.
|
|
@@ -26,7 +28,7 @@ module PDFToImage
|
|
|
26
28
|
|
|
27
29
|
CUSTOM_IMAGE_METHODS.each do |method|
|
|
28
30
|
define_method(method.to_sym) do |*args|
|
|
29
|
-
@args <<
|
|
31
|
+
@args << [method, args]
|
|
30
32
|
|
|
31
33
|
self
|
|
32
34
|
end
|
|
@@ -67,18 +69,12 @@ module PDFToImage
|
|
|
67
69
|
# @param outname [String] The output filename of the image
|
|
68
70
|
#
|
|
69
71
|
def save(outname)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
if not @args.empty?
|
|
75
|
-
cmd += "#{@args.join(' ')} "
|
|
72
|
+
if outname.respond_to?(:write)
|
|
73
|
+
save_to_io(outname)
|
|
74
|
+
else
|
|
75
|
+
save_to_file(outname)
|
|
76
76
|
end
|
|
77
77
|
|
|
78
|
-
cmd += "#{Shellwords.escape(@filename)} #{Shellwords.escape(outname)}"
|
|
79
|
-
|
|
80
|
-
PDFToImage.exec(cmd)
|
|
81
|
-
|
|
82
78
|
return true
|
|
83
79
|
end
|
|
84
80
|
|
|
@@ -94,6 +90,29 @@ module PDFToImage
|
|
|
94
90
|
|
|
95
91
|
private
|
|
96
92
|
|
|
93
|
+
def save_to_file(outname)
|
|
94
|
+
generate_temp_file
|
|
95
|
+
|
|
96
|
+
image = MiniMagick::Image.open(@filename)
|
|
97
|
+
@args.each do |method, args|
|
|
98
|
+
image.send(method, *args)
|
|
99
|
+
end
|
|
100
|
+
image.write(outname)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def save_to_io(io)
|
|
104
|
+
tempfile = Tempfile.new(['pdftoimage', '.png'])
|
|
105
|
+
tempfile.binmode
|
|
106
|
+
begin
|
|
107
|
+
save_to_file(tempfile.path)
|
|
108
|
+
tempfile.rewind
|
|
109
|
+
IO.copy_stream(tempfile, io)
|
|
110
|
+
ensure
|
|
111
|
+
tempfile.close
|
|
112
|
+
tempfile.unlink
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
97
116
|
def generate_temp_file
|
|
98
117
|
if @opened == false
|
|
99
118
|
cmd = "pdftoppm -png -f #{@page} #{@pdf_args.join(" ")} -l #{@page} #{Shellwords.escape(@pdf_name)} #{Shellwords.escape(@filename)}"
|
data/lib/pdftoimage/version.rb
CHANGED
data/lib/pdftoimage.rb
CHANGED
|
@@ -2,8 +2,11 @@ require 'pdftoimage/version'
|
|
|
2
2
|
require 'pdftoimage/image'
|
|
3
3
|
|
|
4
4
|
require 'tmpdir'
|
|
5
|
-
require 'iconv'
|
|
6
5
|
require 'shellwords'
|
|
6
|
+
require 'mini_magick'
|
|
7
|
+
require 'open-uri'
|
|
8
|
+
require 'uri'
|
|
9
|
+
require 'stringio'
|
|
7
10
|
|
|
8
11
|
module PDFToImage
|
|
9
12
|
class PDFError < RuntimeError; end
|
|
@@ -20,20 +23,15 @@ module PDFToImage
|
|
|
20
23
|
raise PDFToImage::PDFError, "poppler_utils not installed"
|
|
21
24
|
end
|
|
22
25
|
|
|
23
|
-
begin
|
|
24
|
-
tmp = `identify -version 2>&1`
|
|
25
|
-
raise(PDFToImage::PDFError, "ImageMagick not installed") unless tmp.index('ImageMagick')
|
|
26
|
-
rescue Errno::ENOENT
|
|
27
|
-
raise PDFToImage::PDFError, "ImageMagick not installed"
|
|
28
|
-
end
|
|
29
|
-
|
|
30
26
|
class << self
|
|
31
27
|
# Opens a PDF document and prepares it for splitting into images.
|
|
32
28
|
#
|
|
33
|
-
# @param
|
|
29
|
+
# @param source [String, IO] A filename, URL, or IO object containing PDF data
|
|
34
30
|
#
|
|
35
31
|
# @return [Array] An array of images
|
|
36
|
-
def open(
|
|
32
|
+
def open(source, &block)
|
|
33
|
+
filename = resolve_source(source)
|
|
34
|
+
|
|
37
35
|
if not File.exist?(filename)
|
|
38
36
|
raise PDFError, "File '#{filename}' not found."
|
|
39
37
|
end
|
|
@@ -54,6 +52,16 @@ module PDFToImage
|
|
|
54
52
|
return images
|
|
55
53
|
end
|
|
56
54
|
|
|
55
|
+
# Opens a PDF from raw binary data.
|
|
56
|
+
#
|
|
57
|
+
# @param data [String] Binary string of PDF content
|
|
58
|
+
#
|
|
59
|
+
# @return [Array] An array of images
|
|
60
|
+
def from_blob(data, &block)
|
|
61
|
+
filename = write_to_tempfile(data)
|
|
62
|
+
open(filename, &block)
|
|
63
|
+
end
|
|
64
|
+
|
|
57
65
|
# Executes the specified command, returning the output.
|
|
58
66
|
#
|
|
59
67
|
# @param cmd [String] The command to run
|
|
@@ -102,6 +110,41 @@ module PDFToImage
|
|
|
102
110
|
return matches[1].to_i
|
|
103
111
|
end
|
|
104
112
|
|
|
113
|
+
def resolve_source(source)
|
|
114
|
+
if source.respond_to?(:read)
|
|
115
|
+
write_to_tempfile(source.read)
|
|
116
|
+
elsif url?(source)
|
|
117
|
+
download_file(source)
|
|
118
|
+
else
|
|
119
|
+
source
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def write_to_tempfile(data)
|
|
124
|
+
tempfile = File.join(@@pdf_temp_dir, "#{random_name}.pdf")
|
|
125
|
+
File.open(tempfile, 'wb') { |f| f.write(data) }
|
|
126
|
+
tempfile
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def url?(filename)
|
|
130
|
+
uri = URI.parse(filename)
|
|
131
|
+
uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
|
|
132
|
+
rescue URI::InvalidURIError
|
|
133
|
+
false
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def download_file(url)
|
|
137
|
+
tempfile = File.join(@@pdf_temp_dir, "#{random_name}.pdf")
|
|
138
|
+
remote = URI.open(url)
|
|
139
|
+
File.open(tempfile, 'wb') do |file|
|
|
140
|
+
file.write(remote.read)
|
|
141
|
+
end
|
|
142
|
+
remote.close
|
|
143
|
+
tempfile
|
|
144
|
+
rescue OpenURI::HTTPError, SocketError, Errno::ECONNREFUSED => e
|
|
145
|
+
raise PDFError, "Failed to download '#{url}': #{e.message}"
|
|
146
|
+
end
|
|
147
|
+
|
|
105
148
|
# Generate a random file name in the system's tmp folder
|
|
106
149
|
def random_filename
|
|
107
150
|
File.join(@@pdf_temp_dir, random_name)
|
metadata
CHANGED
|
@@ -1,43 +1,42 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pdftoimage
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Rob Flynn
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
|
-
name:
|
|
13
|
+
name: shellwords
|
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
|
16
15
|
requirements:
|
|
17
16
|
- - "~>"
|
|
18
17
|
- !ruby/object:Gem::Version
|
|
19
|
-
version:
|
|
18
|
+
version: 0.2.2
|
|
20
19
|
type: :runtime
|
|
21
20
|
prerelease: false
|
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
22
|
requirements:
|
|
24
23
|
- - "~>"
|
|
25
24
|
- !ruby/object:Gem::Version
|
|
26
|
-
version:
|
|
25
|
+
version: 0.2.2
|
|
27
26
|
- !ruby/object:Gem::Dependency
|
|
28
|
-
name:
|
|
27
|
+
name: mini_magick
|
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
|
30
29
|
requirements:
|
|
31
30
|
- - "~>"
|
|
32
31
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: 0
|
|
32
|
+
version: '4.0'
|
|
34
33
|
type: :runtime
|
|
35
34
|
prerelease: false
|
|
36
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
36
|
requirements:
|
|
38
37
|
- - "~>"
|
|
39
38
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: 0
|
|
39
|
+
version: '4.0'
|
|
41
40
|
description: A ruby gem for converting PDF documents into a series of images. This
|
|
42
41
|
module is based off poppler_utils and ImageMagick.
|
|
43
42
|
email: rob@thingerly.com
|
|
@@ -45,7 +44,7 @@ executables: []
|
|
|
45
44
|
extensions: []
|
|
46
45
|
extra_rdoc_files: []
|
|
47
46
|
files:
|
|
48
|
-
-
|
|
47
|
+
- CHANGELOG.md
|
|
49
48
|
- LICENSE
|
|
50
49
|
- README.rdoc
|
|
51
50
|
- lib/pdftoimage.rb
|
|
@@ -55,9 +54,8 @@ homepage: https://github.com/robflynn/pdftoimage
|
|
|
55
54
|
licenses:
|
|
56
55
|
- MIT
|
|
57
56
|
metadata:
|
|
58
|
-
changelog_uri: https://github.com/robflynn/pdftoimage/blob/master/
|
|
57
|
+
changelog_uri: https://github.com/robflynn/pdftoimage/blob/master/CHANGELOG.md
|
|
59
58
|
source_code_uri: https://github.com/robflynn/pdftoimage/
|
|
60
|
-
post_install_message:
|
|
61
59
|
rdoc_options: []
|
|
62
60
|
require_paths:
|
|
63
61
|
- lib
|
|
@@ -65,15 +63,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
65
63
|
requirements:
|
|
66
64
|
- - ">="
|
|
67
65
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: '
|
|
66
|
+
version: '2.7'
|
|
69
67
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
70
68
|
requirements:
|
|
71
69
|
- - ">="
|
|
72
70
|
- !ruby/object:Gem::Version
|
|
73
71
|
version: '0'
|
|
74
72
|
requirements: []
|
|
75
|
-
rubygems_version: 3.
|
|
76
|
-
signing_key:
|
|
73
|
+
rubygems_version: 3.6.7
|
|
77
74
|
specification_version: 4
|
|
78
75
|
summary: A ruby gem for converting PDF documents into a series of images.
|
|
79
76
|
test_files: []
|
data/ChangeLog.rdoc
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
=== 0.2.0 / 2023-04-20
|
|
2
|
-
* Fixed use of deprecated File.exists? method (pr#4 from Thornolf)
|
|
3
|
-
* File paths are now escaped to properly handle spaces and special characters (pr#3 from drnic)
|
|
4
|
-
* Specifying dpi resolution is now supported (pr#5 from lehf)
|
|
5
|
-
|
|
6
|
-
=== 0.1.7 / 2018-05-01
|
|
7
|
-
* Updated yard to resolve a vulnerability.
|
|
8
|
-
|
|
9
|
-
=== 0.1.6 / 2011-07-13
|
|
10
|
-
* Buggy PDF generators try to encode CreationDate and ModDate as UTF-16 as opposed to ASCII. This leads to parsing errors where the code was assuming UTF-8 encoding was in use.
|
|
11
|
-
|
|
12
|
-
=== 0.1.5 / 2011-03-08
|
|
13
|
-
* Fixed a bug due to the fact that poppler_utils no longer leaves off the extra padded zero.
|
|
14
|
-
|
|
15
|
-
=== 0.1.4 / 2010-11-15
|
|
16
|
-
* Fixed a bug concerning documents with page counts that are exact powers of 10. poppler_utils prepends one less zero to the page counts when a document count is a power of 10. This is now fixed in PDFToImage.
|
|
17
|
-
|
|
18
|
-
=== 0.1.3 / 2010-11-12
|
|
19
|
-
* Fixed a problem where PDF documents with more than 9 pages were not parsing properly. (embarrassing 0 padding problem.)
|
|
20
|
-
|
|
21
|
-
=== 0.1.2 / 2010-11-11
|
|
22
|
-
* Added support for blocks upon opening a PDF
|
|
23
|
-
* Image objects now support the "quality" method for JPEG/MIFF/PNG compression levels.
|
|
24
|
-
* PDF conversion is now deferred until saving. This greatly speeds up the conversion process in cases where you only want a few pages out of a large document converted.
|
|
25
|
-
|
|
26
|
-
=== 0.1.1 / 2010-11-10
|
|
27
|
-
|
|
28
|
-
* Initial release:
|