hexapdf 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/README.md +8 -7
- data/examples/022-outline.rb +5 -1
- data/lib/hexapdf/cli/debug_info.rb +98 -0
- data/lib/hexapdf/cli/images.rb +2 -2
- data/lib/hexapdf/cli/inspect.rb +5 -1
- data/lib/hexapdf/cli.rb +2 -0
- data/lib/hexapdf/encryption/security_handler.rb +3 -1
- data/lib/hexapdf/font/cmap.rb +10 -6
- data/lib/hexapdf/parser.rb +29 -4
- data/lib/hexapdf/revision.rb +6 -2
- data/lib/hexapdf/type/acro_form/field.rb +4 -1
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/encryption/test_security_handler.rb +7 -5
- data/test/hexapdf/test_parser.rb +55 -3
- data/test/hexapdf/test_revision.rb +27 -6
- data/test/hexapdf/type/acro_form/test_field.rb +5 -0
- data/test/test_helper.rb +6 -0
- metadata +20 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 35bbb5d1780d07ecf6098cc40359ff2cc02cd89231a124b6ff1a0a13c760d116
|
|
4
|
+
data.tar.gz: 8664f2ac8a6651ee83e7292d005ea10d89b7ea738de47cc62dbf219f4eae0cb4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 232aefc90eb4f9f9a913d27affa95a0c9eff43a72e04eeb1adc0fbe11e865033c6fd0b7779930b15a982afdd909d6ffa98640db6db668f95ce0c26332749cfae
|
|
7
|
+
data.tar.gz: e1b836a23d58e92ceb70f5b892d023edcf585288583f2254d35394688204bfdbf4401edea6562a96d1583a71a302d8d50e8a175262ff5077a3b4a2200ec922a4
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,29 @@
|
|
|
1
|
+
## 1.6.0 - 2026-02-10
|
|
2
|
+
|
|
3
|
+
### Added
|
|
4
|
+
|
|
5
|
+
* CLI command `hexapdf debug-info` for creating debugging information,
|
|
6
|
+
especially for malformed files
|
|
7
|
+
|
|
8
|
+
### Changed
|
|
9
|
+
|
|
10
|
+
* Optimized decoding character codes with a CMap to drastically lower memory
|
|
11
|
+
usage
|
|
12
|
+
* CLI command `hexapdf inspect rev` to show whether the cross-reference table
|
|
13
|
+
was reconstructed
|
|
14
|
+
|
|
15
|
+
### Fixed
|
|
16
|
+
|
|
17
|
+
* Path generation for image extraction in CLI command `hexapdf images`
|
|
18
|
+
* Handling of certain invalid PDFs where the generation number for object
|
|
19
|
+
identifiers don't match their cross-reference section value
|
|
20
|
+
* AES 256bit encryption to include unnecessary field /Length in encryption
|
|
21
|
+
dictionary to work around buggy PDF libraries
|
|
22
|
+
* Parsing of invalid /Filter and /DecodeParms stream keys in case they resolve
|
|
23
|
+
to a recursive structure
|
|
24
|
+
* [HexaPDF::Type::AcroForm::Field#each_widget] to only yield widget objects
|
|
25
|
+
|
|
26
|
+
|
|
1
27
|
## 1.5.0 - 2025-12-08
|
|
2
28
|
|
|
3
29
|
### Added
|
data/README.md
CHANGED
|
@@ -13,7 +13,7 @@ In short, it allows
|
|
|
13
13
|
* **securing** PDF files by encrypting or signing them and
|
|
14
14
|
* **optimizing** PDF files for smaller file size or other criteria.
|
|
15
15
|
|
|
16
|
-
HexaPDF is available under two
|
|
16
|
+
HexaPDF is available under two licenses, the AGPL and a commercial license, see the [License
|
|
17
17
|
section](#License) for details.
|
|
18
18
|
|
|
19
19
|
|
|
@@ -93,12 +93,13 @@ with example graphics and PDF files and tightly integrated into the rest of the
|
|
|
93
93
|
|
|
94
94
|
## Requirements and Installation
|
|
95
95
|
|
|
96
|
-
Since HexaPDF is written in Ruby, a working Ruby installation is needed - see the
|
|
97
|
-
|
|
98
|
-
|
|
96
|
+
Since HexaPDF is written in Ruby, a working Ruby installation is needed - see the [official
|
|
97
|
+
installation documentation][rbinstall] for details. Note that you need Ruby version 3.0 or higher as
|
|
98
|
+
prior versions are not supported!
|
|
99
99
|
|
|
100
|
-
HexaPDF works on all Ruby implementations that are CRuby compatible
|
|
101
|
-
|
|
100
|
+
HexaPDF works on all Ruby implementations that are CRuby compatible and on any platform supported by
|
|
101
|
+
Ruby (Linux, macOS, Windows, ...). Implementations like JRuby and TruffleRuby should work but
|
|
102
|
+
HexaPDF is not actively tested against them.
|
|
102
103
|
|
|
103
104
|
Apart from Ruby itself the HexaPDF library has only one external dependency `geom2d` which is
|
|
104
105
|
written and provided by the HexaPDF authors. The `hexapdf` application has an additional dependency
|
|
@@ -117,7 +118,7 @@ Prawn is a **library for generating content**.
|
|
|
117
118
|
|
|
118
119
|
To be more specific, it is easily possible to read an existing PDF with HexaPDF and modify parts of
|
|
119
120
|
it before writing it out again. The modifications can be to the PDF object structure like removing
|
|
120
|
-
superfluous annotations or the
|
|
121
|
+
superfluous annotations or the content itself.
|
|
121
122
|
|
|
122
123
|
Prawn has no such functionality. There is basic support for using a PDF as a template using the
|
|
123
124
|
`pdf-reader` and `prawn-template` gems but support is very limited. However, Prawn has a very
|
data/examples/022-outline.rb
CHANGED
|
@@ -10,7 +10,11 @@
|
|
|
10
10
|
require 'hexapdf'
|
|
11
11
|
|
|
12
12
|
doc = HexaPDF::Document.new
|
|
13
|
-
6.times
|
|
13
|
+
6.times do |i|
|
|
14
|
+
doc.pages.add.canvas.
|
|
15
|
+
font("Helvetica", size: 150).
|
|
16
|
+
text("Page #{i + 1}", at: [10, 660])
|
|
17
|
+
end
|
|
14
18
|
|
|
15
19
|
doc.outline.add_item("Main") do |main|
|
|
16
20
|
main.add_item("Page 1", destination: 0)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# -*- encoding: utf-8; frozen_string_literal: true -*-
|
|
2
|
+
#
|
|
3
|
+
#--
|
|
4
|
+
# This file is part of HexaPDF.
|
|
5
|
+
#
|
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
+
# Copyright (C) 2014-2025 Thomas Leitner
|
|
8
|
+
#
|
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
|
16
|
+
#
|
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
|
20
|
+
# License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
|
24
|
+
#
|
|
25
|
+
# The interactive user interfaces in modified source and object code
|
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
|
28
|
+
#
|
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
|
31
|
+
# is created or manipulated using HexaPDF.
|
|
32
|
+
#
|
|
33
|
+
# If the GNU Affero General Public License doesn't fit your need,
|
|
34
|
+
# commercial licenses are available at <https://gettalong.at/hexapdf/>.
|
|
35
|
+
#++
|
|
36
|
+
|
|
37
|
+
require 'hexapdf/cli/command'
|
|
38
|
+
|
|
39
|
+
module HexaPDF
|
|
40
|
+
module CLI
|
|
41
|
+
|
|
42
|
+
# Creates debugging information for adding to an issue.
|
|
43
|
+
class DebugInfo < Command
|
|
44
|
+
|
|
45
|
+
def initialize #:nodoc:
|
|
46
|
+
super('debug-info', takes_commands: false)
|
|
47
|
+
short_desc("Create debug information for a PDF file")
|
|
48
|
+
long_desc(<<~EOF)
|
|
49
|
+
Creates debug information for a possibly malformed PDF file that can be attached to an
|
|
50
|
+
issue.
|
|
51
|
+
|
|
52
|
+
Two files are created: anonymized-FILE where all strings are replaced with zeroes and
|
|
53
|
+
debug_info.txt with additional debug information.
|
|
54
|
+
EOF
|
|
55
|
+
|
|
56
|
+
options.on("--password PASSWORD", "-p", String,
|
|
57
|
+
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
58
|
+
@password = (pwd == '-' ? read_password : pwd)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
@password = nil
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def execute(file) #:nodoc:
|
|
65
|
+
output_name = "anonymized-#{file}"
|
|
66
|
+
puts "Creating anonymized file '#{output_name}'"
|
|
67
|
+
data = File.binread(file)
|
|
68
|
+
data.gsub!(/(>>\s*stream\s*)(.*?)(\s*endstream)/m) {|m| "#{$1}#{'0' * $2.length}#{$3}" }
|
|
69
|
+
data.gsub!(/([^<]<)([0-9A-Fa-f#{Tokenizer::WHITESPACE}]*?)>/m) {|m| "#{$1}#{'0' * $2.length}>" }
|
|
70
|
+
data.gsub!(/\((.*?)\)/m) {|m| "(#{'0' * $1.length})" }
|
|
71
|
+
File.binwrite(output_name, data)
|
|
72
|
+
|
|
73
|
+
debug_info = +''
|
|
74
|
+
puts "Collecting debug information in debug_info.txt"
|
|
75
|
+
begin
|
|
76
|
+
output = capture_output { HexaPDF::CLI::Application.new.parse(['info', '--check', file]) }
|
|
77
|
+
debug_info << "Output:\n"<< output
|
|
78
|
+
rescue
|
|
79
|
+
debug_info << "Error collecting info: #{$!.message}\n"
|
|
80
|
+
end
|
|
81
|
+
File.write('debug_info.txt', debug_info)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
|
|
86
|
+
def capture_output
|
|
87
|
+
stdout, stderr = $stdout, $stderr
|
|
88
|
+
$stdout = $stderr = StringIO.new
|
|
89
|
+
yield
|
|
90
|
+
$stdout.string
|
|
91
|
+
ensure
|
|
92
|
+
$stdout, $stderr = stdout, stderr
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
end
|
|
98
|
+
end
|
data/lib/hexapdf/cli/images.rb
CHANGED
|
@@ -147,7 +147,7 @@ module HexaPDF
|
|
|
147
147
|
# Extracts the images with the given indices.
|
|
148
148
|
def extract_images(doc)
|
|
149
149
|
FileUtils.mkdir_p(File.dirname("#{@prefix}filename"))
|
|
150
|
-
prefix = File.directory?(@prefix) ? @prefix : "@
|
|
150
|
+
prefix = File.directory?(@prefix) ? @prefix : "#{@prefix}-"
|
|
151
151
|
|
|
152
152
|
done = Set.new
|
|
153
153
|
count = total = 0
|
|
@@ -157,7 +157,7 @@ module HexaPDF
|
|
|
157
157
|
info = image.info
|
|
158
158
|
if info.writable
|
|
159
159
|
count += 1
|
|
160
|
-
path = "#{
|
|
160
|
+
path = "#{prefix}#{index}.#{image.info.extension}"
|
|
161
161
|
maybe_raise_on_existing_file(path)
|
|
162
162
|
if command_parser.verbosity_info?
|
|
163
163
|
puts "Extracting image #{index} (#{image.width}x#{image.height}, " \
|
data/lib/hexapdf/cli/inspect.rb
CHANGED
|
@@ -293,6 +293,10 @@ module HexaPDF
|
|
|
293
293
|
IO.copy_stream(@doc.revisions.parser.io, $stdout, length, 0)
|
|
294
294
|
else
|
|
295
295
|
puts "Document has #{@doc.revisions.count} revision#{@doc.revisions.count == 1 ? '' : 's'}"
|
|
296
|
+
if @doc.revisions.parser.reconstructed? && @doc.revisions.count == 1 &&
|
|
297
|
+
@doc.revisions.current == @doc.revisions.parser.reconstructed_revision
|
|
298
|
+
puts "Document cross-reference table has been reconstructed"
|
|
299
|
+
end
|
|
296
300
|
revision_information do |rev, index, count, signature, end_offset|
|
|
297
301
|
type = if rev.trailer[:XRefStm]
|
|
298
302
|
"xref table + stream"
|
|
@@ -415,7 +419,7 @@ module HexaPDF
|
|
|
415
419
|
sig = signatures[rev]
|
|
416
420
|
if sig
|
|
417
421
|
end_index = sig[:ByteRange][-2] + sig[:ByteRange][-1]
|
|
418
|
-
|
|
422
|
+
elsif rev != @doc.revisions.parser.reconstructed_revision
|
|
419
423
|
io.seek(startxrefs[index], IO::SEEK_SET)
|
|
420
424
|
buffer = ''.b
|
|
421
425
|
while io.pos < startxrefs[index + 1]
|
data/lib/hexapdf/cli.rb
CHANGED
|
@@ -49,6 +49,7 @@ require 'hexapdf/cli/image2pdf'
|
|
|
49
49
|
require 'hexapdf/cli/form'
|
|
50
50
|
require 'hexapdf/cli/fonts'
|
|
51
51
|
require 'hexapdf/cli/usage'
|
|
52
|
+
require 'hexapdf/cli/debug_info'
|
|
52
53
|
require 'hexapdf/version'
|
|
53
54
|
require 'hexapdf/document'
|
|
54
55
|
|
|
@@ -125,6 +126,7 @@ module HexaPDF
|
|
|
125
126
|
add_command(HexaPDF::CLI::Form.new)
|
|
126
127
|
add_command(HexaPDF::CLI::Fonts.new)
|
|
127
128
|
add_command(HexaPDF::CLI::Usage.new)
|
|
129
|
+
add_command(HexaPDF::CLI::DebugInfo.new)
|
|
128
130
|
add_command(CmdParse::HelpCommand.new)
|
|
129
131
|
version_command = CmdParse::VersionCommand.new(add_switches: false)
|
|
130
132
|
add_command(version_command)
|
|
@@ -363,7 +363,9 @@ module HexaPDF
|
|
|
363
363
|
raise(HexaPDF::UnsupportedEncryptionError,
|
|
364
364
|
"Invalid key length #{key_length} specified")
|
|
365
365
|
end
|
|
366
|
-
|
|
366
|
+
# /Length should only be set for V=2 as per the spec. However, software like Adobe Reader
|
|
367
|
+
# fails if this is not set for V=5 or V=4.
|
|
368
|
+
dict[:Length] = key_length if dict[:V] == 5 || dict[:V] == 4 || dict[:V] == 2
|
|
367
369
|
|
|
368
370
|
if ![:aes, :arc4].include?(algorithm)
|
|
369
371
|
raise(HexaPDF::UnsupportedEncryptionError,
|
data/lib/hexapdf/font/cmap.rb
CHANGED
|
@@ -143,10 +143,13 @@ module HexaPDF
|
|
|
143
143
|
# An error is raised if the string contains invalid bytes.
|
|
144
144
|
def read_codes(string)
|
|
145
145
|
codes = []
|
|
146
|
-
bytes = string.
|
|
146
|
+
bytes = string.bytes
|
|
147
|
+
length = bytes.length
|
|
148
|
+
i = 0
|
|
147
149
|
|
|
148
|
-
|
|
149
|
-
byte = bytes
|
|
150
|
+
while i < length
|
|
151
|
+
byte = bytes[i]
|
|
152
|
+
i += 1
|
|
150
153
|
code = 0
|
|
151
154
|
|
|
152
155
|
found = @codespace_ranges.any? do |first_byte_range, rest_ranges|
|
|
@@ -154,9 +157,10 @@ module HexaPDF
|
|
|
154
157
|
|
|
155
158
|
code = (code << 8) + byte
|
|
156
159
|
valid = rest_ranges.all? do |range|
|
|
157
|
-
|
|
158
|
-
byte = bytes
|
|
159
|
-
|
|
160
|
+
if i < length
|
|
161
|
+
byte = bytes[i]
|
|
162
|
+
i += 1
|
|
163
|
+
else
|
|
160
164
|
raise HexaPDF::Error, "Missing bytes while reading codes via CMap"
|
|
161
165
|
end
|
|
162
166
|
code = (code << 8) + byte
|
data/lib/hexapdf/parser.rb
CHANGED
|
@@ -112,8 +112,18 @@ module HexaPDF
|
|
|
112
112
|
end
|
|
113
113
|
|
|
114
114
|
if xref_entry.oid != 0 && (oid != xref_entry.oid || gen != xref_entry.gen)
|
|
115
|
-
|
|
116
|
-
|
|
115
|
+
msg = "The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \
|
|
116
|
+
"the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref"
|
|
117
|
+
# Some invalid PDFs contain entries where the generation number in the xref is different
|
|
118
|
+
# from the one found in the indirect object. If the file were reconstructed the generation
|
|
119
|
+
# number from the indirect object itself would be used.
|
|
120
|
+
# To gracefully handle such invalid PDFs they need to have a single revision.
|
|
121
|
+
# The other code part that handles this is in Revision#object.
|
|
122
|
+
if oid == xref_entry.oid && @document.revisions.count == 1
|
|
123
|
+
maybe_raise(msg, pos: xref_entry.pos)
|
|
124
|
+
else
|
|
125
|
+
raise_malformed(msg)
|
|
126
|
+
end
|
|
117
127
|
end
|
|
118
128
|
|
|
119
129
|
if obj.kind_of?(Reference)
|
|
@@ -209,9 +219,24 @@ module HexaPDF
|
|
|
209
219
|
tok = @tokenizer.next_token
|
|
210
220
|
|
|
211
221
|
object[:Length] = length
|
|
222
|
+
if object.key?(:Filter)
|
|
223
|
+
begin
|
|
224
|
+
object[:Filter] = @document.unwrap(object[:Filter])
|
|
225
|
+
rescue HexaPDF::Error
|
|
226
|
+
maybe_raise("Invalid /Filter entry for stream", pos: @tokenizer.pos)
|
|
227
|
+
object.delete(:Filter)
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
if object.key?(:DecodeParms)
|
|
231
|
+
begin
|
|
232
|
+
object[:DecodeParms] = @document.unwrap(object[:DecodeParms])
|
|
233
|
+
rescue HexaPDF::Error
|
|
234
|
+
maybe_raise("Invalid /DecodeParms entry for stream", pos: @tokenizer.pos)
|
|
235
|
+
object.delete(:DecodeParms)
|
|
236
|
+
end
|
|
237
|
+
end
|
|
212
238
|
stream = StreamData.new(@tokenizer.io, offset: pos, length: length,
|
|
213
|
-
filter:
|
|
214
|
-
decode_parms: @document.unwrap(object[:DecodeParms]))
|
|
239
|
+
filter: object[:Filter], decode_parms: object[:DecodeParms])
|
|
215
240
|
end
|
|
216
241
|
|
|
217
242
|
unless tok.kind_of?(Tokenizer::Token) && tok == 'endobj'
|
data/lib/hexapdf/revision.rb
CHANGED
|
@@ -128,6 +128,11 @@ module HexaPDF
|
|
|
128
128
|
@objects[oid, gen]
|
|
129
129
|
elsif (xref_entry = @xref_section[oid, gen])
|
|
130
130
|
load_object(xref_entry)
|
|
131
|
+
elsif (xref_entry = @xref_section[oid]) && (obj = load_object(xref_entry))&.gen == gen
|
|
132
|
+
# This branch handles invalid PDFs with a single revision containing xref entries where the
|
|
133
|
+
# gen doesn't match the gen of the indirect object. Also see the special handling in
|
|
134
|
+
# Parser#load_object.
|
|
135
|
+
obj
|
|
131
136
|
else
|
|
132
137
|
nil
|
|
133
138
|
end
|
|
@@ -219,8 +224,7 @@ module HexaPDF
|
|
|
219
224
|
seen = {}
|
|
220
225
|
@objects.each {|oid, _gen, data| seen[oid] = true; yield(data) }
|
|
221
226
|
@xref_section.each do |oid, _gen, data|
|
|
222
|
-
|
|
223
|
-
yield(@objects[oid] || load_object(data))
|
|
227
|
+
yield(@objects[oid] || load_object(data)) unless seen.key?(oid)
|
|
224
228
|
end
|
|
225
229
|
@all_objects_loaded = true
|
|
226
230
|
end
|
|
@@ -291,7 +291,10 @@ module HexaPDF
|
|
|
291
291
|
if embedded_widget?
|
|
292
292
|
yield(document.wrap(self))
|
|
293
293
|
elsif terminal_field?
|
|
294
|
-
self[:Kids]&.each
|
|
294
|
+
self[:Kids]&.each do |kid|
|
|
295
|
+
kid = document.wrap(kid)
|
|
296
|
+
yield(kid) if kid.type == :Annot && kid[:Subtype] == :Widget
|
|
297
|
+
end
|
|
295
298
|
end
|
|
296
299
|
|
|
297
300
|
unless direct_only
|
data/lib/hexapdf/version.rb
CHANGED
|
@@ -129,16 +129,18 @@ describe HexaPDF::Encryption::SecurityHandler do
|
|
|
129
129
|
end
|
|
130
130
|
|
|
131
131
|
it "sets the correct /Length value for the given key length" do
|
|
132
|
-
[[40, nil], [48, 48], [128, 128]
|
|
133
|
-
|
|
134
|
-
@handler.
|
|
135
|
-
assert(result == @handler.dict[:Length])
|
|
132
|
+
[[40, nil], [48, 48], [128, 128]].each do |key_length, result|
|
|
133
|
+
@handler.set_up_encryption(key_length: key_length, algorithm: :arc4)
|
|
134
|
+
result.nil? ? assert_nil(@handler.dict[:Length]) : assert_equal(result, @handler.dict[:Length])
|
|
136
135
|
end
|
|
137
136
|
|
|
138
|
-
# Work-around buggy software
|
|
137
|
+
# Work-around for buggy software needing the /Length key
|
|
139
138
|
@handler.set_up_encryption(key_length: 128, algorithm: :aes)
|
|
140
139
|
assert_equal(4, @handler.dict[:V])
|
|
141
140
|
assert_equal(128, @handler.dict[:Length])
|
|
141
|
+
@handler.set_up_encryption(key_length: 256, algorithm: :aes)
|
|
142
|
+
assert_equal(5, @handler.dict[:V])
|
|
143
|
+
assert_equal(256, @handler.dict[:Length])
|
|
142
144
|
end
|
|
143
145
|
|
|
144
146
|
it "calls the prepare_encryption method" do
|
data/test/hexapdf/test_parser.rb
CHANGED
|
@@ -10,6 +10,7 @@ describe HexaPDF::Parser do
|
|
|
10
10
|
@document = HexaPDF::Document.new
|
|
11
11
|
@document.config['parser.try_xref_reconstruction'] = false
|
|
12
12
|
@document.add(@document.wrap(10, oid: 1, gen: 0))
|
|
13
|
+
@document.add(@document.wrap({Recurse: HexaPDF::Reference.new(3)}, oid: 3))
|
|
13
14
|
|
|
14
15
|
create_parser(+<<~EOF)
|
|
15
16
|
%PDF-1.7
|
|
@@ -173,6 +174,18 @@ describe HexaPDF::Parser do
|
|
|
173
174
|
assert_equal({Length: 4}, object)
|
|
174
175
|
end
|
|
175
176
|
|
|
177
|
+
it "recovers in case of an invalid /Filter leading to indirect object recursion" do
|
|
178
|
+
create_parser("1 0 obj<</Length 1/Filter 3 0 R>>stream\n1\nendstream endobj")
|
|
179
|
+
object, * = @parser.parse_indirect_object
|
|
180
|
+
assert_equal({Length: 1}, object)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
it "recovers in case of an invalid /DecodeParms leading to indirect object recursion" do
|
|
184
|
+
create_parser("1 0 obj<</Length 1/DecodeParms 3 0 R>>stream\n1\nendstream endobj")
|
|
185
|
+
object, * = @parser.parse_indirect_object
|
|
186
|
+
assert_equal({Length: 1}, object)
|
|
187
|
+
end
|
|
188
|
+
|
|
176
189
|
it "fails if the oid, gen or 'obj' keyword is invalid" do
|
|
177
190
|
create_parser("a 0 obj\n5\nendobj")
|
|
178
191
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
@@ -267,6 +280,18 @@ describe HexaPDF::Parser do
|
|
|
267
280
|
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
|
|
268
281
|
assert_match(/keyword endobj/, exp.message)
|
|
269
282
|
end
|
|
283
|
+
|
|
284
|
+
it "fails if an invalid /Filter leads to indirect object recursion" do
|
|
285
|
+
create_parser("1 0 obj<</Length 1/Filter 3 0 R>>stream\n1\nendstream endobj")
|
|
286
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
287
|
+
assert_match(/Invalid \/Filter/, exp.message)
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
it "fails if an invalid /DecodeParms leads to indirect object recursion" do
|
|
291
|
+
create_parser("1 0 obj<</Length 1/DecodeParms 3 0 R>>stream\n1\nendstream endobj")
|
|
292
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
|
|
293
|
+
assert_match(/Invalid \/DecodeParms/, exp.message)
|
|
294
|
+
end
|
|
270
295
|
end
|
|
271
296
|
end
|
|
272
297
|
|
|
@@ -315,14 +340,32 @@ describe HexaPDF::Parser do
|
|
|
315
340
|
assert_equal(1, obj.oid)
|
|
316
341
|
end
|
|
317
342
|
|
|
343
|
+
it "handles the case when generation numbers don't match with a single revision" do
|
|
344
|
+
@entry.gen = 2
|
|
345
|
+
obj = @parser.load_object(@entry)
|
|
346
|
+
assert_equal(2, obj.oid)
|
|
347
|
+
assert_equal(5, obj[0])
|
|
348
|
+
end
|
|
349
|
+
|
|
318
350
|
describe "with strict parsing" do
|
|
319
|
-
|
|
351
|
+
before do
|
|
320
352
|
@document.config['parser.on_correctable_error'] = proc { true }
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
it "raises an error if an indirect object has an offset of 0" do
|
|
321
356
|
exp = assert_raises(HexaPDF::MalformedPDFError) do
|
|
322
357
|
@parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
|
|
323
358
|
end
|
|
324
359
|
assert_match(/has offset 0/, exp.message)
|
|
325
360
|
end
|
|
361
|
+
|
|
362
|
+
it "fails if the generation numbers don't match with a single revision" do
|
|
363
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) do
|
|
364
|
+
@entry.gen = 2
|
|
365
|
+
@parser.load_object(@entry)
|
|
366
|
+
end
|
|
367
|
+
assert_match(/oid,gen.*don't match/, exp.message)
|
|
368
|
+
end
|
|
326
369
|
end
|
|
327
370
|
|
|
328
371
|
it "fails if another object is found instead of an object stream" do
|
|
@@ -342,9 +385,18 @@ describe HexaPDF::Parser do
|
|
|
342
385
|
assert_match(/invalid cross-reference type/i, exp.message)
|
|
343
386
|
end
|
|
344
387
|
|
|
345
|
-
it "fails if the object
|
|
388
|
+
it "fails if the object numbers don't match" do
|
|
389
|
+
exp = assert_raises(HexaPDF::MalformedPDFError) do
|
|
390
|
+
@entry.oid = 5
|
|
391
|
+
@parser.load_object(@entry)
|
|
392
|
+
end
|
|
393
|
+
assert_match(/oid,gen.*don't match/, exp.message)
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
it "fails if the generation numbers don't match for multiple revisions" do
|
|
397
|
+
@document.revisions.add
|
|
346
398
|
exp = assert_raises(HexaPDF::MalformedPDFError) do
|
|
347
|
-
@entry.gen =
|
|
399
|
+
@entry.gen = 5
|
|
348
400
|
@parser.load_object(@entry)
|
|
349
401
|
end
|
|
350
402
|
assert_match(/oid,gen.*don't match/, exp.message)
|
|
@@ -17,6 +17,7 @@ describe HexaPDF::Revision do
|
|
|
17
17
|
@xref_section.add_in_use_entry(5, 0, 1000)
|
|
18
18
|
@xref_section.add_in_use_entry(6, 0, 5000)
|
|
19
19
|
@xref_section.add_in_use_entry(7, 0, 5000)
|
|
20
|
+
@xref_section.add_in_use_entry(8, 2, 5000)
|
|
20
21
|
@obj = HexaPDF::Object.new(:val, oid: 1, gen: 0)
|
|
21
22
|
@ref = HexaPDF::Reference.new(1, 0)
|
|
22
23
|
|
|
@@ -30,6 +31,7 @@ describe HexaPDF::Revision do
|
|
|
30
31
|
when 5 then HexaPDF::Dictionary.new({Type: :ObjStm}, oid: entry.oid, gen: entry.gen)
|
|
31
32
|
when 7 then HexaPDF::Type::Catalog.new({Type: :Catalog}, oid: entry.oid, gen: entry.gen,
|
|
32
33
|
document: self)
|
|
34
|
+
when 8 then HexaPDF::Object.new(:DifferentGen, oid: entry.oid, gen: 0)
|
|
33
35
|
when 6 then HexaPDF::Dictionary.new({Array: HexaPDF::PDFArray.new([1, 2])},
|
|
34
36
|
oid: entry.oid, gen: entry.gen)
|
|
35
37
|
else HexaPDF::Object.new(:Test, oid: entry.oid, gen: entry.gen)
|
|
@@ -50,10 +52,10 @@ describe HexaPDF::Revision do
|
|
|
50
52
|
end
|
|
51
53
|
|
|
52
54
|
it "returns the next free object number" do
|
|
53
|
-
assert_equal(8, @rev.next_free_oid)
|
|
54
|
-
@obj.oid = 8
|
|
55
|
-
@rev.add(@obj)
|
|
56
55
|
assert_equal(9, @rev.next_free_oid)
|
|
56
|
+
@obj.oid = 9
|
|
57
|
+
@rev.add(@obj)
|
|
58
|
+
assert_equal(10, @rev.next_free_oid)
|
|
57
59
|
end
|
|
58
60
|
|
|
59
61
|
describe "add" do
|
|
@@ -113,6 +115,12 @@ describe HexaPDF::Revision do
|
|
|
113
115
|
refute_nil(obj)
|
|
114
116
|
end
|
|
115
117
|
|
|
118
|
+
it "loads an object that is defined in the cross-reference section with an invalid generation number" do
|
|
119
|
+
obj = @rev.object(HexaPDF::Reference.new(8, 0))
|
|
120
|
+
assert_equal(0, obj.gen)
|
|
121
|
+
assert_equal(:DifferentGen, obj.value)
|
|
122
|
+
end
|
|
123
|
+
|
|
116
124
|
it "loads free entries in the cross-reference section as special PDF null objects" do
|
|
117
125
|
obj = @rev.object(HexaPDF::Reference.new(3, 0))
|
|
118
126
|
assert_nil(obj.value)
|
|
@@ -172,7 +180,20 @@ describe HexaPDF::Revision do
|
|
|
172
180
|
describe "object iteration" do
|
|
173
181
|
it "iterates over all objects via each" do
|
|
174
182
|
@rev.add(@obj)
|
|
175
|
-
assert_equal([@obj, *(2..
|
|
183
|
+
assert_equal([@obj, *(2..8).map {|i| @rev.object(i) }], @rev.each.to_a)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
it "ensures no object is loaded multiple times" do
|
|
187
|
+
obj_2_data = nil
|
|
188
|
+
@rev.add(@obj) # ensures this is yielded first
|
|
189
|
+
@rev.each do |obj|
|
|
190
|
+
if obj == @obj
|
|
191
|
+
obj_2_data = @rev.object(2).data
|
|
192
|
+
elsif obj.oid == 2
|
|
193
|
+
assert_same(obj_2_data, obj.data)
|
|
194
|
+
break
|
|
195
|
+
end
|
|
196
|
+
end
|
|
176
197
|
end
|
|
177
198
|
|
|
178
199
|
it "iterates only over loaded objects" do
|
|
@@ -216,8 +237,8 @@ describe HexaPDF::Revision do
|
|
|
216
237
|
end
|
|
217
238
|
|
|
218
239
|
it "handles object and xref streams that were added appropriately depending on the 'all' arg" do
|
|
219
|
-
xref = @rev.add(HexaPDF::Dictionary.new({Type: :XRef}, oid:
|
|
220
|
-
objstm = @rev.add(HexaPDF::Dictionary.new({Type: :ObjStm}, oid:
|
|
240
|
+
xref = @rev.add(HexaPDF::Dictionary.new({Type: :XRef}, oid: 20))
|
|
241
|
+
objstm = @rev.add(HexaPDF::Dictionary.new({Type: :ObjStm}, oid: 21))
|
|
221
242
|
assert_equal([], @rev.each_modified_object.to_a)
|
|
222
243
|
assert_equal([xref, objstm], @rev.each_modified_object(all: true).to_a)
|
|
223
244
|
end
|
|
@@ -147,6 +147,11 @@ describe HexaPDF::Type::AcroForm::Field do
|
|
|
147
147
|
it "yields nothing if no widgets are defined" do
|
|
148
148
|
assert_equal([], @field.each_widget.to_a)
|
|
149
149
|
end
|
|
150
|
+
|
|
151
|
+
it "ignores entries in the /Kids array that are not widgets" do
|
|
152
|
+
@field[:Kids] = [{Subtype: :Widget, Rect: [0, 0, 0, 0], X: 1}, {FT: :Tx, Kids: []}]
|
|
153
|
+
assert_equal(1, @field.each_widget.to_a.size)
|
|
154
|
+
end
|
|
150
155
|
end
|
|
151
156
|
|
|
152
157
|
describe "create_widget" do
|
data/test/test_helper.rb
CHANGED
|
@@ -11,6 +11,12 @@ rescue LoadError
|
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
gem 'minitest'
|
|
14
|
+
begin
|
|
15
|
+
gem 'minitest-mock'
|
|
16
|
+
require 'minitest/mock'
|
|
17
|
+
rescue Gem::MissingSpecError
|
|
18
|
+
# Assume Minitest < 6 is in use for older Rubies
|
|
19
|
+
end
|
|
14
20
|
gem 'strscan'
|
|
15
21
|
require 'minitest/autorun'
|
|
16
22
|
require 'fiber'
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: hexapdf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Thomas Leitner
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: cmdparse
|
|
@@ -97,14 +97,28 @@ dependencies:
|
|
|
97
97
|
requirements:
|
|
98
98
|
- - "~>"
|
|
99
99
|
- !ruby/object:Gem::Version
|
|
100
|
-
version: '
|
|
100
|
+
version: '6.0'
|
|
101
101
|
type: :development
|
|
102
102
|
prerelease: false
|
|
103
103
|
version_requirements: !ruby/object:Gem::Requirement
|
|
104
104
|
requirements:
|
|
105
105
|
- - "~>"
|
|
106
106
|
- !ruby/object:Gem::Version
|
|
107
|
-
version: '
|
|
107
|
+
version: '6.0'
|
|
108
|
+
- !ruby/object:Gem::Dependency
|
|
109
|
+
name: minitest-mock
|
|
110
|
+
requirement: !ruby/object:Gem::Requirement
|
|
111
|
+
requirements:
|
|
112
|
+
- - "~>"
|
|
113
|
+
- !ruby/object:Gem::Version
|
|
114
|
+
version: '5.27'
|
|
115
|
+
type: :development
|
|
116
|
+
prerelease: false
|
|
117
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
118
|
+
requirements:
|
|
119
|
+
- - "~>"
|
|
120
|
+
- !ruby/object:Gem::Version
|
|
121
|
+
version: '5.27'
|
|
108
122
|
- !ruby/object:Gem::Dependency
|
|
109
123
|
name: reline
|
|
110
124
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -327,6 +341,7 @@ files:
|
|
|
327
341
|
- lib/hexapdf/cli.rb
|
|
328
342
|
- lib/hexapdf/cli/batch.rb
|
|
329
343
|
- lib/hexapdf/cli/command.rb
|
|
344
|
+
- lib/hexapdf/cli/debug_info.rb
|
|
330
345
|
- lib/hexapdf/cli/files.rb
|
|
331
346
|
- lib/hexapdf/cli/fonts.rb
|
|
332
347
|
- lib/hexapdf/cli/form.rb
|
|
@@ -864,7 +879,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
864
879
|
- !ruby/object:Gem::Version
|
|
865
880
|
version: '0'
|
|
866
881
|
requirements: []
|
|
867
|
-
rubygems_version:
|
|
882
|
+
rubygems_version: 4.0.3
|
|
868
883
|
specification_version: 4
|
|
869
884
|
summary: HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
870
885
|
test_files: []
|