hexapdf 0.14.3 → 0.15.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +85 -0
- data/lib/hexapdf/cli/form.rb +30 -8
- data/lib/hexapdf/configuration.rb +19 -4
- data/lib/hexapdf/content/canvas.rb +1 -0
- data/lib/hexapdf/encryption/security_handler.rb +7 -2
- data/lib/hexapdf/encryption/standard_security_handler.rb +16 -0
- data/lib/hexapdf/error.rb +4 -3
- data/lib/hexapdf/filter.rb +1 -0
- data/lib/hexapdf/filter/crypt.rb +60 -0
- data/lib/hexapdf/font/type1/afm_parser.rb +2 -1
- data/lib/hexapdf/parser.rb +35 -11
- data/lib/hexapdf/revision.rb +16 -0
- data/lib/hexapdf/serializer.rb +7 -1
- data/lib/hexapdf/tokenizer.rb +22 -3
- data/lib/hexapdf/type/acro_form.rb +1 -0
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +29 -17
- data/lib/hexapdf/type/acro_form/button_field.rb +8 -4
- data/lib/hexapdf/type/acro_form/field.rb +1 -0
- data/lib/hexapdf/type/acro_form/form.rb +37 -0
- data/lib/hexapdf/type/acro_form/signature_field.rb +223 -0
- data/lib/hexapdf/type/annotation.rb +13 -9
- data/lib/hexapdf/type/annotations/widget.rb +3 -1
- data/lib/hexapdf/type/font_descriptor.rb +9 -2
- data/lib/hexapdf/type/page.rb +81 -0
- data/lib/hexapdf/type/xref_stream.rb +7 -0
- data/lib/hexapdf/utils/graphics_helpers.rb +4 -4
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/content/test_canvas.rb +21 -0
- data/test/hexapdf/encryption/test_security_handler.rb +15 -0
- data/test/hexapdf/encryption/test_standard_security_handler.rb +27 -0
- data/test/hexapdf/filter/test_crypt.rb +21 -0
- data/test/hexapdf/font/type1/test_afm_parser.rb +5 -0
- data/test/hexapdf/test_parser.rb +47 -3
- data/test/hexapdf/test_revision.rb +21 -0
- data/test/hexapdf/test_serializer.rb +3 -0
- data/test/hexapdf/test_tokenizer.rb +22 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +21 -2
- data/test/hexapdf/type/acro_form/test_button_field.rb +13 -7
- data/test/hexapdf/type/acro_form/test_field.rb +5 -0
- data/test/hexapdf/type/acro_form/test_form.rb +46 -2
- data/test/hexapdf/type/acro_form/test_signature_field.rb +38 -0
- data/test/hexapdf/type/annotations/test_widget.rb +2 -0
- data/test/hexapdf/type/test_annotation.rb +20 -10
- data/test/hexapdf/type/test_font_descriptor.rb +7 -0
- data/test/hexapdf/type/test_page.rb +187 -49
- data/test/hexapdf/type/test_xref_stream.rb +7 -0
- data/test/hexapdf/utils/test_graphics_helpers.rb +8 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 592ea8ae7648df43e92ba50effdf3f8f34163e4acf7fb9567c3b38db46eb598e
|
4
|
+
data.tar.gz: 6c3b7d32a1499f2e2133fbafdf46b9d3cd4d1df41b9ae308c0c32ea39aefff2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fdf9edf53c0443d459008634ddbff7cd80fc1422fa558df41db04af0d9eeb512ea050d5b4a10987b824c675203e39bc851d1b2a68d0178f2cd12fada66b31245
|
7
|
+
data.tar.gz: 8e6a7b91da0ed2b63f7bc6d52c3993553f439edf986253b3508d0510310195c2a6f3721c2cfed735afc3d60dacacedc8207a6fac361bedc354bc6bd779207eac
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,88 @@
|
|
1
|
+
## 0.15.3 - 2021-05-01
|
2
|
+
|
3
|
+
### Fixed
|
4
|
+
|
5
|
+
* Handling of general (not document-level), unencrypted metadata streams
|
6
|
+
|
7
|
+
|
8
|
+
## 0.15.2 - 2021-05-01
|
9
|
+
|
10
|
+
### Fixed
|
11
|
+
|
12
|
+
* Handling of unencrypted metadata streams
|
13
|
+
|
14
|
+
|
15
|
+
## 0.15.1 - 2021-04-15
|
16
|
+
|
17
|
+
### Fixed
|
18
|
+
|
19
|
+
* Potential division by zero when calculating the scaling for XObjects
|
20
|
+
* Handling of XObjects with a width or height of zero when drawing on canvas
|
21
|
+
|
22
|
+
|
23
|
+
## 0.15.0 - 2021-04-12
|
24
|
+
|
25
|
+
### Added
|
26
|
+
|
27
|
+
* [HexaPDF::Type::Page#flatten_annotations] for flattening the annotations of a
|
28
|
+
page
|
29
|
+
* [HexaPDF::Type::AcroForm::Form#flatten] for flattening interactive forms
|
30
|
+
* [HexaPDF::Revision#update] for updating the stored wrapper class of a PDF
|
31
|
+
object
|
32
|
+
* [HexaPDF::Type::AcroForm::SignatureField] for working with AcroForm signature
|
33
|
+
fields
|
34
|
+
* Support for form field flattening to the `hexapdf form` CLI command
|
35
|
+
|
36
|
+
### Changed
|
37
|
+
|
38
|
+
* **Breaking change**: Overhauled the interface for accessing appearances of
|
39
|
+
annotations to make it more convenient
|
40
|
+
* Validation of [HexaPDF::Type::FontDescriptor] to delete invalid `/FontWeight`
|
41
|
+
value
|
42
|
+
* [HexaPDF::MalformedPDFError#pos] an accessor instead of a reader and update
|
43
|
+
the exception message
|
44
|
+
* Configuration option 'acro_form.fallback_font' to allow a callable object for
|
45
|
+
more advanced fallback font handling
|
46
|
+
|
47
|
+
### Fixed
|
48
|
+
|
49
|
+
* [HexaPDF::Type::Annotations::Widget#background_color] to correctly handle
|
50
|
+
empty background color arrays
|
51
|
+
* [HexaPDF::Type::AcroForm::Field#delete_widget] to update the wrapper object
|
52
|
+
stored in the document in case the widget is embedded
|
53
|
+
* Processing of invalid PDF files containing a space,CR,LF combination after
|
54
|
+
the 'stream' keyword
|
55
|
+
* Cross-reference stream reconstruction with respect to detection of linearized
|
56
|
+
files
|
57
|
+
* Detection of existing appearances for AcroForm push button fields when
|
58
|
+
creating appearances
|
59
|
+
|
60
|
+
|
61
|
+
## 0.14.4 - 2021-02-27
|
62
|
+
|
63
|
+
### Added
|
64
|
+
|
65
|
+
* Support for the Crypt filters
|
66
|
+
|
67
|
+
### Changed
|
68
|
+
|
69
|
+
* [HexaPDF::MalformedPDFError] to make the `pos` argument optional
|
70
|
+
|
71
|
+
### Fixed
|
72
|
+
|
73
|
+
* Handling of invalid floating point numbers NaN, Inf and -Inf when serializing
|
74
|
+
* Processing of invalid PDF files containing NaN and Inf instead of numbers
|
75
|
+
* Bug in Type1 font AFM parser that occured if the file doesn't end with a new
|
76
|
+
line character
|
77
|
+
* Cross-reference table reconstruction to handle the case of an entry specifying
|
78
|
+
a non-existent indirect object
|
79
|
+
* Cross-reference table reconstruction to handle trailers specified by cross-
|
80
|
+
reference streams
|
81
|
+
* Cross-reference table reconstruction to use the set security handle for
|
82
|
+
decrypting indirect objects
|
83
|
+
* Parsing of cross-reference streams where data is missing
|
84
|
+
|
85
|
+
|
1
86
|
## 0.14.3 - 2021-02-16
|
2
87
|
|
3
88
|
### Fixed
|
data/lib/hexapdf/cli/form.rb
CHANGED
@@ -52,18 +52,26 @@ module HexaPDF
|
|
52
52
|
If the the output file name is not given, all form fields are listed in page order. Use
|
53
53
|
the global --verbose option to show additional information like field type and location.
|
54
54
|
|
55
|
-
If the output file name is given, the fields can be
|
56
|
-
|
57
|
-
|
55
|
+
If the output file name is given, the fields can be filled out interactively, via a
|
56
|
+
template or just flattened by using the respective options. Form field flattening can also
|
57
|
+
be activated in addition to filling out the form. If neither --fill, --template nor
|
58
|
+
--flatten is specified, --fill is implied.
|
58
59
|
EOF
|
59
60
|
|
60
61
|
options.on("--password PASSWORD", "-p", String,
|
61
62
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
62
63
|
@password = (pwd == '-' ? read_password : pwd)
|
63
64
|
end
|
65
|
+
options.on("--fill", "Fill out the form") do
|
66
|
+
@fill = true
|
67
|
+
end
|
64
68
|
options.on("--template TEMPLATE_FILE", "-t TEMPLATE_FILE",
|
65
|
-
"Use the template file for the field values") do |template|
|
69
|
+
"Use the template file for the field values (implies --fill)") do |template|
|
66
70
|
@template = template
|
71
|
+
@fill = true
|
72
|
+
end
|
73
|
+
options.on('--flatten', 'Flatten the form fields') do
|
74
|
+
@flatten = true
|
67
75
|
end
|
68
76
|
options.on("--[no-]viewer-override", "Let the PDF viewer override the visual " \
|
69
77
|
"appearance. Default: use setting from input PDF") do |need_appearances|
|
@@ -75,6 +83,8 @@ module HexaPDF
|
|
75
83
|
end
|
76
84
|
|
77
85
|
@password = nil
|
86
|
+
@fill = false
|
87
|
+
@flatten = false
|
78
88
|
@template = nil
|
79
89
|
@need_appearances = nil
|
80
90
|
@incremental = true
|
@@ -82,16 +92,28 @@ module HexaPDF
|
|
82
92
|
|
83
93
|
def execute(in_file, out_file = nil) #:nodoc:
|
84
94
|
maybe_raise_on_existing_file(out_file) if out_file
|
95
|
+
if (@fill || @flatten) && !out_file
|
96
|
+
raise "Output file missing"
|
97
|
+
end
|
85
98
|
with_document(in_file, password: @password, out_file: out_file,
|
86
99
|
incremental: @incremental) do |doc|
|
87
100
|
if !doc.acro_form
|
88
101
|
raise "This PDF doesn't contain an interactive form"
|
89
102
|
elsif out_file
|
90
103
|
doc.acro_form[:NeedAppearances] = @need_appearances unless @need_appearances.nil?
|
91
|
-
if @
|
92
|
-
|
93
|
-
|
94
|
-
|
104
|
+
if @fill || !@flatten
|
105
|
+
if @template
|
106
|
+
fill_form_with_template(doc)
|
107
|
+
else
|
108
|
+
fill_form(doc)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
if @flatten
|
112
|
+
unless doc.acro_form.flatten.empty?
|
113
|
+
$stderr.puts "Warning: Not all form fields could be flattened"
|
114
|
+
doc.catalog.delete(:AcroForm)
|
115
|
+
doc.delete(doc.acro_form)
|
116
|
+
end
|
95
117
|
end
|
96
118
|
else
|
97
119
|
list_form_fields(doc)
|
@@ -164,9 +164,20 @@ module HexaPDF
|
|
164
164
|
# acro_form.fallback_font::
|
165
165
|
# The font that should be used when a variable text field references a font that cannot be used.
|
166
166
|
#
|
167
|
-
# Can
|
168
|
-
#
|
169
|
-
#
|
167
|
+
# Can be one of the following:
|
168
|
+
#
|
169
|
+
# * The name of a font, like 'Helvetica'.
|
170
|
+
#
|
171
|
+
# * An array consisting of the font name and a hash of font options, like ['Helvetica',
|
172
|
+
# variant: :italic].
|
173
|
+
#
|
174
|
+
# * A callable object receiving the field and the font object (or +nil+ if no valid font object
|
175
|
+
# was found) and which has to return either a font name or an array consisting of the font
|
176
|
+
# name and a hash of font options. This way the response can be different depending on the
|
177
|
+
# original font and it would also allow e.g. modifying the configured fonts to add custom
|
178
|
+
# ones.
|
179
|
+
#
|
180
|
+
# If set to +nil+, the use of the fallback font is disabled.
|
170
181
|
#
|
171
182
|
# Default is 'Helvetica'.
|
172
183
|
#
|
@@ -393,7 +404,7 @@ module HexaPDF
|
|
393
404
|
DCTDecode: 'HexaPDF::Filter::PassThrough',
|
394
405
|
DCT: 'HexaPDF::Filter::PassThrough',
|
395
406
|
JPXDecode: 'HexaPDF::Filter::PassThrough',
|
396
|
-
Crypt:
|
407
|
+
Crypt: 'HexaPDF::Filter::Crypt',
|
397
408
|
Encryption: 'HexaPDF::Filter::Encryption',
|
398
409
|
},
|
399
410
|
'font.map' => {},
|
@@ -516,6 +527,9 @@ module HexaPDF
|
|
516
527
|
XXAcroFormField: 'HexaPDF::Type::AcroForm::Field',
|
517
528
|
XXAppearanceDictionary: 'HexaPDF::Type::Annotation::AppearanceDictionary',
|
518
529
|
Border: 'HexaPDF::Type::Annotation::Border',
|
530
|
+
SigFieldLock: 'HexaPDF::Type::AcroForm::SignatureField::LockDictionary',
|
531
|
+
SV: 'HexaPDF::Type::AcroForm::SignatureField::SeedValueDictionary',
|
532
|
+
SVCert: 'HexaPDF::Type::AcroForm::SignatureField::CertificateSeedValueDictionary',
|
519
533
|
},
|
520
534
|
'object.subtype_map' => {
|
521
535
|
nil => {
|
@@ -561,6 +575,7 @@ module HexaPDF
|
|
561
575
|
Tx: 'HexaPDF::Type::AcroForm::TextField',
|
562
576
|
Btn: 'HexaPDF::Type::AcroForm::ButtonField',
|
563
577
|
Ch: 'HexaPDF::Type::AcroForm::ChoiceField',
|
578
|
+
Sig: 'HexaPDF::Type::AcroForm::SignatureField',
|
564
579
|
},
|
565
580
|
})
|
566
581
|
|
@@ -1260,6 +1260,7 @@ module HexaPDF
|
|
1260
1260
|
unless obj.kind_of?(HexaPDF::Stream)
|
1261
1261
|
obj = context.document.images.add(obj)
|
1262
1262
|
end
|
1263
|
+
return obj if obj.width == 0 || obj.height == 0
|
1263
1264
|
|
1264
1265
|
width, height = calculate_dimensions(obj.width, obj.height,
|
1265
1266
|
rwidth: width, rheight: height)
|
@@ -268,7 +268,7 @@ module HexaPDF
|
|
268
268
|
str.replace(string_algorithm.decrypt(key, str))
|
269
269
|
end
|
270
270
|
|
271
|
-
if obj.kind_of?(HexaPDF::Stream)
|
271
|
+
if obj.kind_of?(HexaPDF::Stream) && obj.raw_stream.filter[0] != :Crypt
|
272
272
|
unless string_algorithm == stream_algorithm
|
273
273
|
key = object_key(obj.oid, obj.gen, stream_algorithm)
|
274
274
|
end
|
@@ -300,7 +300,12 @@ module HexaPDF
|
|
300
300
|
obj.raw_stream.key == key && obj.raw_stream.algorithm == stream_algorithm
|
301
301
|
obj.raw_stream.undecrypted_fiber
|
302
302
|
else
|
303
|
-
|
303
|
+
filter = obj[:Filter]
|
304
|
+
if filter == :Crypt || (filter.kind_of?(PDFArray) && filter[0] == :Crypt)
|
305
|
+
result
|
306
|
+
else
|
307
|
+
stream_algorithm.encryption_fiber(key, result)
|
308
|
+
end
|
304
309
|
end
|
305
310
|
end
|
306
311
|
|
@@ -240,6 +240,22 @@ module HexaPDF
|
|
240
240
|
end
|
241
241
|
end
|
242
242
|
|
243
|
+
def decrypt(obj) #:nodoc:
|
244
|
+
if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
|
245
|
+
obj
|
246
|
+
else
|
247
|
+
super
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def encrypt_stream(obj) #:nodoc
|
252
|
+
if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
|
253
|
+
obj.stream_encoder
|
254
|
+
else
|
255
|
+
super
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
243
259
|
private
|
244
260
|
|
245
261
|
# Prepares the security handler for use in encrypting the document.
|
data/lib/hexapdf/error.rb
CHANGED
@@ -43,12 +43,13 @@ module HexaPDF
|
|
43
43
|
class MalformedPDFError < Error
|
44
44
|
|
45
45
|
# The byte position in the PDF file where the error occured.
|
46
|
-
|
46
|
+
attr_accessor :pos
|
47
47
|
|
48
48
|
# Creates a new malformed PDF error object for the given exception message.
|
49
49
|
#
|
50
|
-
# The byte position where the error occured
|
51
|
-
|
50
|
+
# The byte position where the error occured can either be given via the +pos+ argument or later
|
51
|
+
# via the #pos accessor but must be set before the exception message is retrieved.
|
52
|
+
def initialize(message, pos: nil)
|
52
53
|
super(message)
|
53
54
|
@pos = pos
|
54
55
|
end
|
data/lib/hexapdf/filter.rb
CHANGED
@@ -0,0 +1,60 @@
|
|
1
|
+
# -*- encoding: utf-8; frozen_string_literal: true -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2014-2020 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#
|
33
|
+
# If the GNU Affero General Public License doesn't fit your need,
|
34
|
+
# commercial licenses are available at <https://gettalong.at/hexapdf/>.
|
35
|
+
#++
|
36
|
+
|
37
|
+
require 'hexapdf/error'
|
38
|
+
|
39
|
+
module HexaPDF
|
40
|
+
module Filter
|
41
|
+
|
42
|
+
# This filter module implements the Crypt filter. The only supported part is using the Identity
|
43
|
+
# filter.
|
44
|
+
module Crypt
|
45
|
+
|
46
|
+
# See HexaPDF::Filter
|
47
|
+
def self.decoder(source, options)
|
48
|
+
if !options || !options.key?(:Name) || options[:Name] == :Identity
|
49
|
+
source
|
50
|
+
else
|
51
|
+
raise FilterError, "Handling of Crypt filters besides Identity is not implemented"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
singleton_class.send(:alias_method, :encoder, :decoder)
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -56,10 +56,12 @@ module HexaPDF
|
|
56
56
|
# PDF references are resolved using the associated Document object.
|
57
57
|
def initialize(io, document)
|
58
58
|
@io = io
|
59
|
-
|
59
|
+
on_correctable_error = document.config['parser.on_correctable_error'].curry[document]
|
60
|
+
@tokenizer = Tokenizer.new(io, on_correctable_error: on_correctable_error)
|
60
61
|
@document = document
|
61
62
|
@object_stream_data = {}
|
62
63
|
@reconstructed_revision = nil
|
64
|
+
@in_reconstruct_revision = false
|
63
65
|
retrieve_pdf_header_offset_and_version
|
64
66
|
end
|
65
67
|
|
@@ -94,7 +96,8 @@ module HexaPDF
|
|
94
96
|
|
95
97
|
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
96
98
|
rescue HexaPDF::MalformedPDFError
|
97
|
-
reconstructed_revision.object(xref_entry)
|
99
|
+
reconstructed_revision.object(xref_entry) ||
|
100
|
+
@document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
|
98
101
|
end
|
99
102
|
|
100
103
|
# Parses the indirect object at the specified offset.
|
@@ -137,11 +140,13 @@ module HexaPDF
|
|
137
140
|
raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset)
|
138
141
|
end
|
139
142
|
tok1 = @tokenizer.next_byte
|
140
|
-
|
143
|
+
if tok1 == 32 # space
|
144
|
+
maybe_raise("Keyword stream followed by space instead of LF or CR/LF", pos: @tokenizer.pos)
|
145
|
+
tok1 = @tokenizer.next_byte
|
146
|
+
end
|
147
|
+
tok2 = @tokenizer.next_byte if tok1 == 13 # CR
|
141
148
|
if tok1 != 10 && tok1 != 13
|
142
|
-
|
143
|
-
maybe_raise("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos,
|
144
|
-
force: tok1 != 32 || (tok2 != 10 && tok2 != 13)) # 32=space
|
149
|
+
raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos)
|
145
150
|
elsif tok1 == 13 && tok2 != 10
|
146
151
|
maybe_raise("Keyword stream must be followed by LF or CR/LF, not CR alone",
|
147
152
|
pos: @tokenizer.pos)
|
@@ -211,7 +216,12 @@ module HexaPDF
|
|
211
216
|
unless obj.respond_to?(:xref_section)
|
212
217
|
raise_malformed("Object is not a cross-reference stream", pos: pos)
|
213
218
|
end
|
214
|
-
|
219
|
+
begin
|
220
|
+
xref_section = obj.xref_section
|
221
|
+
rescue MalformedPDFError => e
|
222
|
+
e.pos = pos
|
223
|
+
raise
|
224
|
+
end
|
215
225
|
trailer = obj.trailer
|
216
226
|
unless xref_section.entry?(obj.oid, obj.gen)
|
217
227
|
maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos)
|
@@ -389,12 +399,16 @@ module HexaPDF
|
|
389
399
|
# If the file contains multiple cross-reference sections, all objects will be put into a single
|
390
400
|
# cross-reference table, later objects overwriting prior ones.
|
391
401
|
def reconstruct_revision
|
402
|
+
return if @in_reconstruct_revision
|
403
|
+
@in_reconstruct_revision = true
|
404
|
+
|
392
405
|
raise unless @document.config['parser.try_xref_reconstruction']
|
393
406
|
msg = "#{$!} - trying cross-reference table reconstruction"
|
394
407
|
@document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
|
395
408
|
|
396
409
|
xref = XRefSection.new
|
397
410
|
@tokenizer.pos = 0
|
411
|
+
linearized = nil
|
398
412
|
while true
|
399
413
|
@tokenizer.skip_whitespace
|
400
414
|
pos = @tokenizer.pos
|
@@ -410,13 +424,17 @@ module HexaPDF
|
|
410
424
|
@tokenizer.pos = next_new_line_pos
|
411
425
|
elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
|
412
426
|
xref.add_in_use_entry(token, gen, pos)
|
427
|
+
if linearized.nil?
|
428
|
+
obj = @tokenizer.next_object rescue nil
|
429
|
+
linearized = obj.kind_of?(Hash) && obj.key?(:Linearized)
|
430
|
+
end
|
413
431
|
@tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
|
414
432
|
end
|
415
433
|
elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
|
416
434
|
obj = @tokenizer.next_object rescue nil
|
417
435
|
# Use last trailer found in case of multiple revisions but use first trailer in case of
|
418
436
|
# linearized file.
|
419
|
-
trailer = obj if obj.kind_of?(Hash) && (
|
437
|
+
trailer = obj if obj.kind_of?(Hash) && (!linearized || trailer.nil?)
|
420
438
|
elsif token == Tokenizer::NO_MORE_TOKENS
|
421
439
|
break
|
422
440
|
else
|
@@ -424,16 +442,22 @@ module HexaPDF
|
|
424
442
|
end
|
425
443
|
end
|
426
444
|
|
427
|
-
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
428
445
|
if !trailer || trailer.empty?
|
429
|
-
|
446
|
+
_, trailer = load_revision(startxref_offset) rescue nil
|
447
|
+
unless trailer
|
448
|
+
@in_reconstruct_revision = false
|
449
|
+
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
450
|
+
end
|
430
451
|
end
|
452
|
+
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
431
453
|
|
432
454
|
loader = lambda do |xref_entry|
|
433
455
|
obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
|
434
|
-
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
456
|
+
obj = @document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
457
|
+
@document.security_handler ? @document.security_handler.decrypt(obj) : obj
|
435
458
|
end
|
436
459
|
|
460
|
+
@in_reconstruct_revision = false
|
437
461
|
Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
|
438
462
|
loader: loader)
|
439
463
|
end
|