hexapdf 0.14.3 → 0.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +85 -0
- data/lib/hexapdf/cli/form.rb +30 -8
- data/lib/hexapdf/configuration.rb +19 -4
- data/lib/hexapdf/content/canvas.rb +1 -0
- data/lib/hexapdf/encryption/security_handler.rb +7 -2
- data/lib/hexapdf/encryption/standard_security_handler.rb +16 -0
- data/lib/hexapdf/error.rb +4 -3
- data/lib/hexapdf/filter.rb +1 -0
- data/lib/hexapdf/filter/crypt.rb +60 -0
- data/lib/hexapdf/font/type1/afm_parser.rb +2 -1
- data/lib/hexapdf/parser.rb +35 -11
- data/lib/hexapdf/revision.rb +16 -0
- data/lib/hexapdf/serializer.rb +7 -1
- data/lib/hexapdf/tokenizer.rb +22 -3
- data/lib/hexapdf/type/acro_form.rb +1 -0
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +29 -17
- data/lib/hexapdf/type/acro_form/button_field.rb +8 -4
- data/lib/hexapdf/type/acro_form/field.rb +1 -0
- data/lib/hexapdf/type/acro_form/form.rb +37 -0
- data/lib/hexapdf/type/acro_form/signature_field.rb +223 -0
- data/lib/hexapdf/type/annotation.rb +13 -9
- data/lib/hexapdf/type/annotations/widget.rb +3 -1
- data/lib/hexapdf/type/font_descriptor.rb +9 -2
- data/lib/hexapdf/type/page.rb +81 -0
- data/lib/hexapdf/type/xref_stream.rb +7 -0
- data/lib/hexapdf/utils/graphics_helpers.rb +4 -4
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/content/test_canvas.rb +21 -0
- data/test/hexapdf/encryption/test_security_handler.rb +15 -0
- data/test/hexapdf/encryption/test_standard_security_handler.rb +27 -0
- data/test/hexapdf/filter/test_crypt.rb +21 -0
- data/test/hexapdf/font/type1/test_afm_parser.rb +5 -0
- data/test/hexapdf/test_parser.rb +47 -3
- data/test/hexapdf/test_revision.rb +21 -0
- data/test/hexapdf/test_serializer.rb +3 -0
- data/test/hexapdf/test_tokenizer.rb +22 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +21 -2
- data/test/hexapdf/type/acro_form/test_button_field.rb +13 -7
- data/test/hexapdf/type/acro_form/test_field.rb +5 -0
- data/test/hexapdf/type/acro_form/test_form.rb +46 -2
- data/test/hexapdf/type/acro_form/test_signature_field.rb +38 -0
- data/test/hexapdf/type/annotations/test_widget.rb +2 -0
- data/test/hexapdf/type/test_annotation.rb +20 -10
- data/test/hexapdf/type/test_font_descriptor.rb +7 -0
- data/test/hexapdf/type/test_page.rb +187 -49
- data/test/hexapdf/type/test_xref_stream.rb +7 -0
- data/test/hexapdf/utils/test_graphics_helpers.rb +8 -0
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 592ea8ae7648df43e92ba50effdf3f8f34163e4acf7fb9567c3b38db46eb598e
|
|
4
|
+
data.tar.gz: 6c3b7d32a1499f2e2133fbafdf46b9d3cd4d1df41b9ae308c0c32ea39aefff2d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fdf9edf53c0443d459008634ddbff7cd80fc1422fa558df41db04af0d9eeb512ea050d5b4a10987b824c675203e39bc851d1b2a68d0178f2cd12fada66b31245
|
|
7
|
+
data.tar.gz: 8e6a7b91da0ed2b63f7bc6d52c3993553f439edf986253b3508d0510310195c2a6f3721c2cfed735afc3d60dacacedc8207a6fac361bedc354bc6bd779207eac
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,88 @@
|
|
|
1
|
+
## 0.15.3 - 2021-05-01
|
|
2
|
+
|
|
3
|
+
### Fixed
|
|
4
|
+
|
|
5
|
+
* Handling of general (not document-level), unencrypted metadata streams
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
## 0.15.2 - 2021-05-01
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
* Handling of unencrypted metadata streams
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
## 0.15.1 - 2021-04-15
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
|
|
19
|
+
* Potential division by zero when calculating the scaling for XObjects
|
|
20
|
+
* Handling of XObjects with a width or height of zero when drawing on canvas
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
## 0.15.0 - 2021-04-12
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
* [HexaPDF::Type::Page#flatten_annotations] for flattening the annotations of a
|
|
28
|
+
page
|
|
29
|
+
* [HexaPDF::Type::AcroForm::Form#flatten] for flattening interactive forms
|
|
30
|
+
* [HexaPDF::Revision#update] for updating the stored wrapper class of a PDF
|
|
31
|
+
object
|
|
32
|
+
* [HexaPDF::Type::AcroForm::SignatureField] for working with AcroForm signature
|
|
33
|
+
fields
|
|
34
|
+
* Support for form field flattening to the `hexapdf form` CLI command
|
|
35
|
+
|
|
36
|
+
### Changed
|
|
37
|
+
|
|
38
|
+
* **Breaking change**: Overhauled the interface for accessing appearances of
|
|
39
|
+
annotations to make it more convenient
|
|
40
|
+
* Validation of [HexaPDF::Type::FontDescriptor] to delete invalid `/FontWeight`
|
|
41
|
+
value
|
|
42
|
+
* [HexaPDF::MalformedPDFError#pos] an accessor instead of a reader and update
|
|
43
|
+
the exception message
|
|
44
|
+
* Configuration option 'acro_form.fallback_font' to allow a callable object for
|
|
45
|
+
more advanced fallback font handling
|
|
46
|
+
|
|
47
|
+
### Fixed
|
|
48
|
+
|
|
49
|
+
* [HexaPDF::Type::Annotations::Widget#background_color] to correctly handle
|
|
50
|
+
empty background color arrays
|
|
51
|
+
* [HexaPDF::Type::AcroForm::Field#delete_widget] to update the wrapper object
|
|
52
|
+
stored in the document in case the widget is embedded
|
|
53
|
+
* Processing of invalid PDF files containing a space,CR,LF combination after
|
|
54
|
+
the 'stream' keyword
|
|
55
|
+
* Cross-reference stream reconstruction with respect to detection of linearized
|
|
56
|
+
files
|
|
57
|
+
* Detection of existing appearances for AcroForm push button fields when
|
|
58
|
+
creating appearances
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
## 0.14.4 - 2021-02-27
|
|
62
|
+
|
|
63
|
+
### Added
|
|
64
|
+
|
|
65
|
+
* Support for the Crypt filters
|
|
66
|
+
|
|
67
|
+
### Changed
|
|
68
|
+
|
|
69
|
+
* [HexaPDF::MalformedPDFError] to make the `pos` argument optional
|
|
70
|
+
|
|
71
|
+
### Fixed
|
|
72
|
+
|
|
73
|
+
* Handling of invalid floating point numbers NaN, Inf and -Inf when serializing
|
|
74
|
+
* Processing of invalid PDF files containing NaN and Inf instead of numbers
|
|
75
|
+
* Bug in Type1 font AFM parser that occured if the file doesn't end with a new
|
|
76
|
+
line character
|
|
77
|
+
* Cross-reference table reconstruction to handle the case of an entry specifying
|
|
78
|
+
a non-existent indirect object
|
|
79
|
+
* Cross-reference table reconstruction to handle trailers specified by cross-
|
|
80
|
+
reference streams
|
|
81
|
+
* Cross-reference table reconstruction to use the set security handle for
|
|
82
|
+
decrypting indirect objects
|
|
83
|
+
* Parsing of cross-reference streams where data is missing
|
|
84
|
+
|
|
85
|
+
|
|
1
86
|
## 0.14.3 - 2021-02-16
|
|
2
87
|
|
|
3
88
|
### Fixed
|
data/lib/hexapdf/cli/form.rb
CHANGED
|
@@ -52,18 +52,26 @@ module HexaPDF
|
|
|
52
52
|
If the the output file name is not given, all form fields are listed in page order. Use
|
|
53
53
|
the global --verbose option to show additional information like field type and location.
|
|
54
54
|
|
|
55
|
-
If the output file name is given, the fields can be
|
|
56
|
-
|
|
57
|
-
|
|
55
|
+
If the output file name is given, the fields can be filled out interactively, via a
|
|
56
|
+
template or just flattened by using the respective options. Form field flattening can also
|
|
57
|
+
be activated in addition to filling out the form. If neither --fill, --template nor
|
|
58
|
+
--flatten is specified, --fill is implied.
|
|
58
59
|
EOF
|
|
59
60
|
|
|
60
61
|
options.on("--password PASSWORD", "-p", String,
|
|
61
62
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
62
63
|
@password = (pwd == '-' ? read_password : pwd)
|
|
63
64
|
end
|
|
65
|
+
options.on("--fill", "Fill out the form") do
|
|
66
|
+
@fill = true
|
|
67
|
+
end
|
|
64
68
|
options.on("--template TEMPLATE_FILE", "-t TEMPLATE_FILE",
|
|
65
|
-
"Use the template file for the field values") do |template|
|
|
69
|
+
"Use the template file for the field values (implies --fill)") do |template|
|
|
66
70
|
@template = template
|
|
71
|
+
@fill = true
|
|
72
|
+
end
|
|
73
|
+
options.on('--flatten', 'Flatten the form fields') do
|
|
74
|
+
@flatten = true
|
|
67
75
|
end
|
|
68
76
|
options.on("--[no-]viewer-override", "Let the PDF viewer override the visual " \
|
|
69
77
|
"appearance. Default: use setting from input PDF") do |need_appearances|
|
|
@@ -75,6 +83,8 @@ module HexaPDF
|
|
|
75
83
|
end
|
|
76
84
|
|
|
77
85
|
@password = nil
|
|
86
|
+
@fill = false
|
|
87
|
+
@flatten = false
|
|
78
88
|
@template = nil
|
|
79
89
|
@need_appearances = nil
|
|
80
90
|
@incremental = true
|
|
@@ -82,16 +92,28 @@ module HexaPDF
|
|
|
82
92
|
|
|
83
93
|
def execute(in_file, out_file = nil) #:nodoc:
|
|
84
94
|
maybe_raise_on_existing_file(out_file) if out_file
|
|
95
|
+
if (@fill || @flatten) && !out_file
|
|
96
|
+
raise "Output file missing"
|
|
97
|
+
end
|
|
85
98
|
with_document(in_file, password: @password, out_file: out_file,
|
|
86
99
|
incremental: @incremental) do |doc|
|
|
87
100
|
if !doc.acro_form
|
|
88
101
|
raise "This PDF doesn't contain an interactive form"
|
|
89
102
|
elsif out_file
|
|
90
103
|
doc.acro_form[:NeedAppearances] = @need_appearances unless @need_appearances.nil?
|
|
91
|
-
if @
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
104
|
+
if @fill || !@flatten
|
|
105
|
+
if @template
|
|
106
|
+
fill_form_with_template(doc)
|
|
107
|
+
else
|
|
108
|
+
fill_form(doc)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
if @flatten
|
|
112
|
+
unless doc.acro_form.flatten.empty?
|
|
113
|
+
$stderr.puts "Warning: Not all form fields could be flattened"
|
|
114
|
+
doc.catalog.delete(:AcroForm)
|
|
115
|
+
doc.delete(doc.acro_form)
|
|
116
|
+
end
|
|
95
117
|
end
|
|
96
118
|
else
|
|
97
119
|
list_form_fields(doc)
|
|
@@ -164,9 +164,20 @@ module HexaPDF
|
|
|
164
164
|
# acro_form.fallback_font::
|
|
165
165
|
# The font that should be used when a variable text field references a font that cannot be used.
|
|
166
166
|
#
|
|
167
|
-
# Can
|
|
168
|
-
#
|
|
169
|
-
#
|
|
167
|
+
# Can be one of the following:
|
|
168
|
+
#
|
|
169
|
+
# * The name of a font, like 'Helvetica'.
|
|
170
|
+
#
|
|
171
|
+
# * An array consisting of the font name and a hash of font options, like ['Helvetica',
|
|
172
|
+
# variant: :italic].
|
|
173
|
+
#
|
|
174
|
+
# * A callable object receiving the field and the font object (or +nil+ if no valid font object
|
|
175
|
+
# was found) and which has to return either a font name or an array consisting of the font
|
|
176
|
+
# name and a hash of font options. This way the response can be different depending on the
|
|
177
|
+
# original font and it would also allow e.g. modifying the configured fonts to add custom
|
|
178
|
+
# ones.
|
|
179
|
+
#
|
|
180
|
+
# If set to +nil+, the use of the fallback font is disabled.
|
|
170
181
|
#
|
|
171
182
|
# Default is 'Helvetica'.
|
|
172
183
|
#
|
|
@@ -393,7 +404,7 @@ module HexaPDF
|
|
|
393
404
|
DCTDecode: 'HexaPDF::Filter::PassThrough',
|
|
394
405
|
DCT: 'HexaPDF::Filter::PassThrough',
|
|
395
406
|
JPXDecode: 'HexaPDF::Filter::PassThrough',
|
|
396
|
-
Crypt:
|
|
407
|
+
Crypt: 'HexaPDF::Filter::Crypt',
|
|
397
408
|
Encryption: 'HexaPDF::Filter::Encryption',
|
|
398
409
|
},
|
|
399
410
|
'font.map' => {},
|
|
@@ -516,6 +527,9 @@ module HexaPDF
|
|
|
516
527
|
XXAcroFormField: 'HexaPDF::Type::AcroForm::Field',
|
|
517
528
|
XXAppearanceDictionary: 'HexaPDF::Type::Annotation::AppearanceDictionary',
|
|
518
529
|
Border: 'HexaPDF::Type::Annotation::Border',
|
|
530
|
+
SigFieldLock: 'HexaPDF::Type::AcroForm::SignatureField::LockDictionary',
|
|
531
|
+
SV: 'HexaPDF::Type::AcroForm::SignatureField::SeedValueDictionary',
|
|
532
|
+
SVCert: 'HexaPDF::Type::AcroForm::SignatureField::CertificateSeedValueDictionary',
|
|
519
533
|
},
|
|
520
534
|
'object.subtype_map' => {
|
|
521
535
|
nil => {
|
|
@@ -561,6 +575,7 @@ module HexaPDF
|
|
|
561
575
|
Tx: 'HexaPDF::Type::AcroForm::TextField',
|
|
562
576
|
Btn: 'HexaPDF::Type::AcroForm::ButtonField',
|
|
563
577
|
Ch: 'HexaPDF::Type::AcroForm::ChoiceField',
|
|
578
|
+
Sig: 'HexaPDF::Type::AcroForm::SignatureField',
|
|
564
579
|
},
|
|
565
580
|
})
|
|
566
581
|
|
|
@@ -1260,6 +1260,7 @@ module HexaPDF
|
|
|
1260
1260
|
unless obj.kind_of?(HexaPDF::Stream)
|
|
1261
1261
|
obj = context.document.images.add(obj)
|
|
1262
1262
|
end
|
|
1263
|
+
return obj if obj.width == 0 || obj.height == 0
|
|
1263
1264
|
|
|
1264
1265
|
width, height = calculate_dimensions(obj.width, obj.height,
|
|
1265
1266
|
rwidth: width, rheight: height)
|
|
@@ -268,7 +268,7 @@ module HexaPDF
|
|
|
268
268
|
str.replace(string_algorithm.decrypt(key, str))
|
|
269
269
|
end
|
|
270
270
|
|
|
271
|
-
if obj.kind_of?(HexaPDF::Stream)
|
|
271
|
+
if obj.kind_of?(HexaPDF::Stream) && obj.raw_stream.filter[0] != :Crypt
|
|
272
272
|
unless string_algorithm == stream_algorithm
|
|
273
273
|
key = object_key(obj.oid, obj.gen, stream_algorithm)
|
|
274
274
|
end
|
|
@@ -300,7 +300,12 @@ module HexaPDF
|
|
|
300
300
|
obj.raw_stream.key == key && obj.raw_stream.algorithm == stream_algorithm
|
|
301
301
|
obj.raw_stream.undecrypted_fiber
|
|
302
302
|
else
|
|
303
|
-
|
|
303
|
+
filter = obj[:Filter]
|
|
304
|
+
if filter == :Crypt || (filter.kind_of?(PDFArray) && filter[0] == :Crypt)
|
|
305
|
+
result
|
|
306
|
+
else
|
|
307
|
+
stream_algorithm.encryption_fiber(key, result)
|
|
308
|
+
end
|
|
304
309
|
end
|
|
305
310
|
end
|
|
306
311
|
|
|
@@ -240,6 +240,22 @@ module HexaPDF
|
|
|
240
240
|
end
|
|
241
241
|
end
|
|
242
242
|
|
|
243
|
+
def decrypt(obj) #:nodoc:
|
|
244
|
+
if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
|
|
245
|
+
obj
|
|
246
|
+
else
|
|
247
|
+
super
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def encrypt_stream(obj) #:nodoc
|
|
252
|
+
if dict[:V] >= 4 && obj.type == :Metadata && obj[:Subtype] == :XML && !dict[:EncryptMetadata]
|
|
253
|
+
obj.stream_encoder
|
|
254
|
+
else
|
|
255
|
+
super
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
243
259
|
private
|
|
244
260
|
|
|
245
261
|
# Prepares the security handler for use in encrypting the document.
|
data/lib/hexapdf/error.rb
CHANGED
|
@@ -43,12 +43,13 @@ module HexaPDF
|
|
|
43
43
|
class MalformedPDFError < Error
|
|
44
44
|
|
|
45
45
|
# The byte position in the PDF file where the error occured.
|
|
46
|
-
|
|
46
|
+
attr_accessor :pos
|
|
47
47
|
|
|
48
48
|
# Creates a new malformed PDF error object for the given exception message.
|
|
49
49
|
#
|
|
50
|
-
# The byte position where the error occured
|
|
51
|
-
|
|
50
|
+
# The byte position where the error occured can either be given via the +pos+ argument or later
|
|
51
|
+
# via the #pos accessor but must be set before the exception message is retrieved.
|
|
52
|
+
def initialize(message, pos: nil)
|
|
52
53
|
super(message)
|
|
53
54
|
@pos = pos
|
|
54
55
|
end
|
data/lib/hexapdf/filter.rb
CHANGED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# -*- encoding: utf-8; frozen_string_literal: true -*-
|
|
2
|
+
#
|
|
3
|
+
#--
|
|
4
|
+
# This file is part of HexaPDF.
|
|
5
|
+
#
|
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
+
# Copyright (C) 2014-2020 Thomas Leitner
|
|
8
|
+
#
|
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
|
16
|
+
#
|
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
|
20
|
+
# License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
|
24
|
+
#
|
|
25
|
+
# The interactive user interfaces in modified source and object code
|
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
|
28
|
+
#
|
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
|
31
|
+
# is created or manipulated using HexaPDF.
|
|
32
|
+
#
|
|
33
|
+
# If the GNU Affero General Public License doesn't fit your need,
|
|
34
|
+
# commercial licenses are available at <https://gettalong.at/hexapdf/>.
|
|
35
|
+
#++
|
|
36
|
+
|
|
37
|
+
require 'hexapdf/error'
|
|
38
|
+
|
|
39
|
+
module HexaPDF
|
|
40
|
+
module Filter
|
|
41
|
+
|
|
42
|
+
# This filter module implements the Crypt filter. The only supported part is using the Identity
|
|
43
|
+
# filter.
|
|
44
|
+
module Crypt
|
|
45
|
+
|
|
46
|
+
# See HexaPDF::Filter
|
|
47
|
+
def self.decoder(source, options)
|
|
48
|
+
if !options || !options.key?(:Name) || options[:Name] == :Identity
|
|
49
|
+
source
|
|
50
|
+
else
|
|
51
|
+
raise FilterError, "Handling of Crypt filters besides Identity is not implemented"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
singleton_class.send(:alias_method, :encoder, :decoder)
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
end
|
|
60
|
+
end
|
data/lib/hexapdf/parser.rb
CHANGED
|
@@ -56,10 +56,12 @@ module HexaPDF
|
|
|
56
56
|
# PDF references are resolved using the associated Document object.
|
|
57
57
|
def initialize(io, document)
|
|
58
58
|
@io = io
|
|
59
|
-
|
|
59
|
+
on_correctable_error = document.config['parser.on_correctable_error'].curry[document]
|
|
60
|
+
@tokenizer = Tokenizer.new(io, on_correctable_error: on_correctable_error)
|
|
60
61
|
@document = document
|
|
61
62
|
@object_stream_data = {}
|
|
62
63
|
@reconstructed_revision = nil
|
|
64
|
+
@in_reconstruct_revision = false
|
|
63
65
|
retrieve_pdf_header_offset_and_version
|
|
64
66
|
end
|
|
65
67
|
|
|
@@ -94,7 +96,8 @@ module HexaPDF
|
|
|
94
96
|
|
|
95
97
|
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
|
96
98
|
rescue HexaPDF::MalformedPDFError
|
|
97
|
-
reconstructed_revision.object(xref_entry)
|
|
99
|
+
reconstructed_revision.object(xref_entry) ||
|
|
100
|
+
@document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
|
|
98
101
|
end
|
|
99
102
|
|
|
100
103
|
# Parses the indirect object at the specified offset.
|
|
@@ -137,11 +140,13 @@ module HexaPDF
|
|
|
137
140
|
raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset)
|
|
138
141
|
end
|
|
139
142
|
tok1 = @tokenizer.next_byte
|
|
140
|
-
|
|
143
|
+
if tok1 == 32 # space
|
|
144
|
+
maybe_raise("Keyword stream followed by space instead of LF or CR/LF", pos: @tokenizer.pos)
|
|
145
|
+
tok1 = @tokenizer.next_byte
|
|
146
|
+
end
|
|
147
|
+
tok2 = @tokenizer.next_byte if tok1 == 13 # CR
|
|
141
148
|
if tok1 != 10 && tok1 != 13
|
|
142
|
-
|
|
143
|
-
maybe_raise("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos,
|
|
144
|
-
force: tok1 != 32 || (tok2 != 10 && tok2 != 13)) # 32=space
|
|
149
|
+
raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos)
|
|
145
150
|
elsif tok1 == 13 && tok2 != 10
|
|
146
151
|
maybe_raise("Keyword stream must be followed by LF or CR/LF, not CR alone",
|
|
147
152
|
pos: @tokenizer.pos)
|
|
@@ -211,7 +216,12 @@ module HexaPDF
|
|
|
211
216
|
unless obj.respond_to?(:xref_section)
|
|
212
217
|
raise_malformed("Object is not a cross-reference stream", pos: pos)
|
|
213
218
|
end
|
|
214
|
-
|
|
219
|
+
begin
|
|
220
|
+
xref_section = obj.xref_section
|
|
221
|
+
rescue MalformedPDFError => e
|
|
222
|
+
e.pos = pos
|
|
223
|
+
raise
|
|
224
|
+
end
|
|
215
225
|
trailer = obj.trailer
|
|
216
226
|
unless xref_section.entry?(obj.oid, obj.gen)
|
|
217
227
|
maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos)
|
|
@@ -389,12 +399,16 @@ module HexaPDF
|
|
|
389
399
|
# If the file contains multiple cross-reference sections, all objects will be put into a single
|
|
390
400
|
# cross-reference table, later objects overwriting prior ones.
|
|
391
401
|
def reconstruct_revision
|
|
402
|
+
return if @in_reconstruct_revision
|
|
403
|
+
@in_reconstruct_revision = true
|
|
404
|
+
|
|
392
405
|
raise unless @document.config['parser.try_xref_reconstruction']
|
|
393
406
|
msg = "#{$!} - trying cross-reference table reconstruction"
|
|
394
407
|
@document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
|
|
395
408
|
|
|
396
409
|
xref = XRefSection.new
|
|
397
410
|
@tokenizer.pos = 0
|
|
411
|
+
linearized = nil
|
|
398
412
|
while true
|
|
399
413
|
@tokenizer.skip_whitespace
|
|
400
414
|
pos = @tokenizer.pos
|
|
@@ -410,13 +424,17 @@ module HexaPDF
|
|
|
410
424
|
@tokenizer.pos = next_new_line_pos
|
|
411
425
|
elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
|
|
412
426
|
xref.add_in_use_entry(token, gen, pos)
|
|
427
|
+
if linearized.nil?
|
|
428
|
+
obj = @tokenizer.next_object rescue nil
|
|
429
|
+
linearized = obj.kind_of?(Hash) && obj.key?(:Linearized)
|
|
430
|
+
end
|
|
413
431
|
@tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
|
|
414
432
|
end
|
|
415
433
|
elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
|
|
416
434
|
obj = @tokenizer.next_object rescue nil
|
|
417
435
|
# Use last trailer found in case of multiple revisions but use first trailer in case of
|
|
418
436
|
# linearized file.
|
|
419
|
-
trailer = obj if obj.kind_of?(Hash) && (
|
|
437
|
+
trailer = obj if obj.kind_of?(Hash) && (!linearized || trailer.nil?)
|
|
420
438
|
elsif token == Tokenizer::NO_MORE_TOKENS
|
|
421
439
|
break
|
|
422
440
|
else
|
|
@@ -424,16 +442,22 @@ module HexaPDF
|
|
|
424
442
|
end
|
|
425
443
|
end
|
|
426
444
|
|
|
427
|
-
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
|
428
445
|
if !trailer || trailer.empty?
|
|
429
|
-
|
|
446
|
+
_, trailer = load_revision(startxref_offset) rescue nil
|
|
447
|
+
unless trailer
|
|
448
|
+
@in_reconstruct_revision = false
|
|
449
|
+
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
|
450
|
+
end
|
|
430
451
|
end
|
|
452
|
+
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
|
431
453
|
|
|
432
454
|
loader = lambda do |xref_entry|
|
|
433
455
|
obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
|
|
434
|
-
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
|
456
|
+
obj = @document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
|
457
|
+
@document.security_handler ? @document.security_handler.decrypt(obj) : obj
|
|
435
458
|
end
|
|
436
459
|
|
|
460
|
+
@in_reconstruct_revision = false
|
|
437
461
|
Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
|
|
438
462
|
loader: loader)
|
|
439
463
|
end
|