hexapdf 0.14.1 → 0.15.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +97 -0
- data/lib/hexapdf/cli/form.rb +30 -8
- data/lib/hexapdf/configuration.rb +19 -4
- data/lib/hexapdf/content/canvas.rb +1 -0
- data/lib/hexapdf/dictionary.rb +3 -0
- data/lib/hexapdf/dictionary_fields.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +7 -2
- data/lib/hexapdf/error.rb +4 -3
- data/lib/hexapdf/filter.rb +1 -0
- data/lib/hexapdf/filter/crypt.rb +60 -0
- data/lib/hexapdf/font/true_type/subsetter.rb +7 -3
- data/lib/hexapdf/font/type1/afm_parser.rb +2 -1
- data/lib/hexapdf/parser.rb +46 -14
- data/lib/hexapdf/pdf_array.rb +3 -0
- data/lib/hexapdf/revision.rb +16 -0
- data/lib/hexapdf/serializer.rb +10 -3
- data/lib/hexapdf/tokenizer.rb +44 -3
- data/lib/hexapdf/type/acro_form.rb +1 -0
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +32 -17
- data/lib/hexapdf/type/acro_form/button_field.rb +8 -4
- data/lib/hexapdf/type/acro_form/field.rb +1 -0
- data/lib/hexapdf/type/acro_form/form.rb +37 -0
- data/lib/hexapdf/type/acro_form/signature_field.rb +223 -0
- data/lib/hexapdf/type/annotation.rb +13 -9
- data/lib/hexapdf/type/annotations/widget.rb +3 -1
- data/lib/hexapdf/type/font_descriptor.rb +9 -2
- data/lib/hexapdf/type/page.rb +81 -0
- data/lib/hexapdf/type/resources.rb +4 -0
- data/lib/hexapdf/type/xref_stream.rb +7 -0
- data/lib/hexapdf/utils/graphics_helpers.rb +4 -4
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/content/test_canvas.rb +21 -0
- data/test/hexapdf/encryption/test_security_handler.rb +15 -0
- data/test/hexapdf/filter/test_crypt.rb +21 -0
- data/test/hexapdf/font/true_type/test_subsetter.rb +7 -2
- data/test/hexapdf/font/type1/test_afm_parser.rb +5 -0
- data/test/hexapdf/test_dictionary_fields.rb +7 -0
- data/test/hexapdf/test_parser.rb +82 -2
- data/test/hexapdf/test_revision.rb +21 -0
- data/test/hexapdf/test_serializer.rb +10 -0
- data/test/hexapdf/test_tokenizer.rb +50 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +24 -3
- data/test/hexapdf/type/acro_form/test_button_field.rb +13 -7
- data/test/hexapdf/type/acro_form/test_field.rb +5 -0
- data/test/hexapdf/type/acro_form/test_form.rb +46 -2
- data/test/hexapdf/type/acro_form/test_signature_field.rb +38 -0
- data/test/hexapdf/type/annotations/test_widget.rb +2 -0
- data/test/hexapdf/type/test_annotation.rb +20 -10
- data/test/hexapdf/type/test_font_descriptor.rb +7 -0
- data/test/hexapdf/type/test_page.rb +187 -49
- data/test/hexapdf/type/test_resources.rb +6 -0
- data/test/hexapdf/type/test_xref_stream.rb +7 -0
- data/test/hexapdf/utils/test_graphics_helpers.rb +8 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 310d9fc74134cb2840118b3637c35a3037909d707532116680ef2fe6f42c43d3
|
4
|
+
data.tar.gz: 76f05b220e101114d4a4136d8c07520cfe35e1e532652ea4e43593a4b812284c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6e811e637b859f3e327ece6af28174bd6602cec0585af596c9b174127ab7276752c3e519e668757933945f2906f4a6856a96e68575eb9139f32365bfa6b8a36b
|
7
|
+
data.tar.gz: 54e94c7d6704a340d1e5a5ad7b35e0715b06b6604ecec4671a079cea6049f786308258a44378280597d59e66c17e4eefd6317e94fe9a465671c2d093a7395ad3
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,100 @@
|
|
1
|
+
## 0.15.1 - 2021-04-15
|
2
|
+
|
3
|
+
### Fixed
|
4
|
+
|
5
|
+
* Potential division by zero when calculating the scaling for XObjects
|
6
|
+
* Handling of XObjects with a width or height of zero when drawing on canvas
|
7
|
+
|
8
|
+
|
9
|
+
## 0.15.0 - 2021-04-12
|
10
|
+
|
11
|
+
### Added
|
12
|
+
|
13
|
+
* [HexaPDF::Type::Page#flatten_annotations] for flattening the annotations of a
|
14
|
+
page
|
15
|
+
* [HexaPDF::Type::AcroForm::Form#flatten] for flattening interactive forms
|
16
|
+
* [HexaPDF::Revision#update] for updating the stored wrapper class of a PDF
|
17
|
+
object
|
18
|
+
* [HexaPDF::Type::AcroForm::SignatureField] for working with AcroForm signature
|
19
|
+
fields
|
20
|
+
* Support for form field flattening to the `hexapdf form` CLI command
|
21
|
+
|
22
|
+
### Changed
|
23
|
+
|
24
|
+
* **Breaking change**: Overhauled the interface for accessing appearances of
|
25
|
+
annotations to make it more convenient
|
26
|
+
* Validation of [HexaPDF::Type::FontDescriptor] to delete invalid `/FontWeight`
|
27
|
+
value
|
28
|
+
* [HexaPDF::MalformedPDFError#pos] an accessor instead of a reader and update
|
29
|
+
the exception message
|
30
|
+
* Configuration option 'acro_form.fallback_font' to allow a callable object for
|
31
|
+
more advanced fallback font handling
|
32
|
+
|
33
|
+
### Fixed
|
34
|
+
|
35
|
+
* [HexaPDF::Type::Annotations::Widget#background_color] to correctly handle
|
36
|
+
empty background color arrays
|
37
|
+
* [HexaPDF::Type::AcroForm::Field#delete_widget] to update the wrapper object
|
38
|
+
stored in the document in case the widget is embedded
|
39
|
+
* Processing of invalid PDF files containing a space,CR,LF combination after
|
40
|
+
the 'stream' keyword
|
41
|
+
* Cross-reference stream reconstruction with respect to detection of linearized
|
42
|
+
files
|
43
|
+
* Detection of existing appearances for AcroForm push button fields when
|
44
|
+
creating appearances
|
45
|
+
|
46
|
+
|
47
|
+
## 0.14.4 - 2021-02-27
|
48
|
+
|
49
|
+
### Added
|
50
|
+
|
51
|
+
* Support for the Crypt filters
|
52
|
+
|
53
|
+
### Changed
|
54
|
+
|
55
|
+
* [HexaPDF::MalformedPDFError] to make the `pos` argument optional
|
56
|
+
|
57
|
+
### Fixed
|
58
|
+
|
59
|
+
* Handling of invalid floating point numbers NaN, Inf and -Inf when serializing
|
60
|
+
* Processing of invalid PDF files containing NaN and Inf instead of numbers
|
61
|
+
* Bug in Type1 font AFM parser that occured if the file doesn't end with a new
|
62
|
+
line character
|
63
|
+
* Cross-reference table reconstruction to handle the case of an entry specifying
|
64
|
+
a non-existent indirect object
|
65
|
+
* Cross-reference table reconstruction to handle trailers specified by cross-
|
66
|
+
reference streams
|
67
|
+
* Cross-reference table reconstruction to use the set security handle for
|
68
|
+
decrypting indirect objects
|
69
|
+
* Parsing of cross-reference streams where data is missing
|
70
|
+
|
71
|
+
|
72
|
+
## 0.14.3 - 2021-02-16
|
73
|
+
|
74
|
+
### Fixed
|
75
|
+
|
76
|
+
* Bug in [HexaPDF::Font::TrueType::Subsetter#use_glyph] which lead to corrupt
|
77
|
+
text output
|
78
|
+
* [HexaPDF::Serializer] to handle infinite recursion problem
|
79
|
+
* Cross-reference table reconstruction to avoid an O(n^2) performance problem
|
80
|
+
* [HexaPDF::Type::Resources] validation to handle an invalid `/ProcSet` entry
|
81
|
+
containing a single value instead of an array
|
82
|
+
* Processing of invalid PDF files missing a required value in appearance streams
|
83
|
+
* Processing of invalid empty arrays that should be rectangles by converting
|
84
|
+
them to PDF null objects
|
85
|
+
* Processing of invalid PDF files containing indirect objects with offset 0
|
86
|
+
* Processing of invalid PDF files containing a space/CR or space/LF combination
|
87
|
+
after the 'stream' keyword
|
88
|
+
|
89
|
+
|
90
|
+
## 0.14.2 - 2021-01-22
|
91
|
+
|
92
|
+
### Fixed
|
93
|
+
|
94
|
+
* [HexaPDF::Font::TrueType::Subsetter#use_glyph] to really avoid using subset
|
95
|
+
glyph ID 41 (`)`)
|
96
|
+
|
97
|
+
|
1
98
|
## 0.14.1 - 2021-01-21
|
2
99
|
|
3
100
|
### Changed
|
data/lib/hexapdf/cli/form.rb
CHANGED
@@ -52,18 +52,26 @@ module HexaPDF
|
|
52
52
|
If the the output file name is not given, all form fields are listed in page order. Use
|
53
53
|
the global --verbose option to show additional information like field type and location.
|
54
54
|
|
55
|
-
If the output file name is given, the fields can be
|
56
|
-
|
57
|
-
|
55
|
+
If the output file name is given, the fields can be filled out interactively, via a
|
56
|
+
template or just flattened by using the respective options. Form field flattening can also
|
57
|
+
be activated in addition to filling out the form. If neither --fill, --template nor
|
58
|
+
--flatten is specified, --fill is implied.
|
58
59
|
EOF
|
59
60
|
|
60
61
|
options.on("--password PASSWORD", "-p", String,
|
61
62
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
62
63
|
@password = (pwd == '-' ? read_password : pwd)
|
63
64
|
end
|
65
|
+
options.on("--fill", "Fill out the form") do
|
66
|
+
@fill = true
|
67
|
+
end
|
64
68
|
options.on("--template TEMPLATE_FILE", "-t TEMPLATE_FILE",
|
65
|
-
"Use the template file for the field values") do |template|
|
69
|
+
"Use the template file for the field values (implies --fill)") do |template|
|
66
70
|
@template = template
|
71
|
+
@fill = true
|
72
|
+
end
|
73
|
+
options.on('--flatten', 'Flatten the form fields') do
|
74
|
+
@flatten = true
|
67
75
|
end
|
68
76
|
options.on("--[no-]viewer-override", "Let the PDF viewer override the visual " \
|
69
77
|
"appearance. Default: use setting from input PDF") do |need_appearances|
|
@@ -75,6 +83,8 @@ module HexaPDF
|
|
75
83
|
end
|
76
84
|
|
77
85
|
@password = nil
|
86
|
+
@fill = false
|
87
|
+
@flatten = false
|
78
88
|
@template = nil
|
79
89
|
@need_appearances = nil
|
80
90
|
@incremental = true
|
@@ -82,16 +92,28 @@ module HexaPDF
|
|
82
92
|
|
83
93
|
def execute(in_file, out_file = nil) #:nodoc:
|
84
94
|
maybe_raise_on_existing_file(out_file) if out_file
|
95
|
+
if (@fill || @flatten) && !out_file
|
96
|
+
raise "Output file missing"
|
97
|
+
end
|
85
98
|
with_document(in_file, password: @password, out_file: out_file,
|
86
99
|
incremental: @incremental) do |doc|
|
87
100
|
if !doc.acro_form
|
88
101
|
raise "This PDF doesn't contain an interactive form"
|
89
102
|
elsif out_file
|
90
103
|
doc.acro_form[:NeedAppearances] = @need_appearances unless @need_appearances.nil?
|
91
|
-
if @
|
92
|
-
|
93
|
-
|
94
|
-
|
104
|
+
if @fill || !@flatten
|
105
|
+
if @template
|
106
|
+
fill_form_with_template(doc)
|
107
|
+
else
|
108
|
+
fill_form(doc)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
if @flatten
|
112
|
+
unless doc.acro_form.flatten.empty?
|
113
|
+
$stderr.puts "Warning: Not all form fields could be flattened"
|
114
|
+
doc.catalog.delete(:AcroForm)
|
115
|
+
doc.delete(doc.acro_form)
|
116
|
+
end
|
95
117
|
end
|
96
118
|
else
|
97
119
|
list_form_fields(doc)
|
@@ -164,9 +164,20 @@ module HexaPDF
|
|
164
164
|
# acro_form.fallback_font::
|
165
165
|
# The font that should be used when a variable text field references a font that cannot be used.
|
166
166
|
#
|
167
|
-
# Can
|
168
|
-
#
|
169
|
-
#
|
167
|
+
# Can be one of the following:
|
168
|
+
#
|
169
|
+
# * The name of a font, like 'Helvetica'.
|
170
|
+
#
|
171
|
+
# * An array consisting of the font name and a hash of font options, like ['Helvetica',
|
172
|
+
# variant: :italic].
|
173
|
+
#
|
174
|
+
# * A callable object receiving the field and the font object (or +nil+ if no valid font object
|
175
|
+
# was found) and which has to return either a font name or an array consisting of the font
|
176
|
+
# name and a hash of font options. This way the response can be different depending on the
|
177
|
+
# original font and it would also allow e.g. modifying the configured fonts to add custom
|
178
|
+
# ones.
|
179
|
+
#
|
180
|
+
# If set to +nil+, the use of the fallback font is disabled.
|
170
181
|
#
|
171
182
|
# Default is 'Helvetica'.
|
172
183
|
#
|
@@ -393,7 +404,7 @@ module HexaPDF
|
|
393
404
|
DCTDecode: 'HexaPDF::Filter::PassThrough',
|
394
405
|
DCT: 'HexaPDF::Filter::PassThrough',
|
395
406
|
JPXDecode: 'HexaPDF::Filter::PassThrough',
|
396
|
-
Crypt:
|
407
|
+
Crypt: 'HexaPDF::Filter::Crypt',
|
397
408
|
Encryption: 'HexaPDF::Filter::Encryption',
|
398
409
|
},
|
399
410
|
'font.map' => {},
|
@@ -516,6 +527,9 @@ module HexaPDF
|
|
516
527
|
XXAcroFormField: 'HexaPDF::Type::AcroForm::Field',
|
517
528
|
XXAppearanceDictionary: 'HexaPDF::Type::Annotation::AppearanceDictionary',
|
518
529
|
Border: 'HexaPDF::Type::Annotation::Border',
|
530
|
+
SigFieldLock: 'HexaPDF::Type::AcroForm::SignatureField::LockDictionary',
|
531
|
+
SV: 'HexaPDF::Type::AcroForm::SignatureField::SeedValueDictionary',
|
532
|
+
SVCert: 'HexaPDF::Type::AcroForm::SignatureField::CertificateSeedValueDictionary',
|
519
533
|
},
|
520
534
|
'object.subtype_map' => {
|
521
535
|
nil => {
|
@@ -561,6 +575,7 @@ module HexaPDF
|
|
561
575
|
Tx: 'HexaPDF::Type::AcroForm::TextField',
|
562
576
|
Btn: 'HexaPDF::Type::AcroForm::ButtonField',
|
563
577
|
Ch: 'HexaPDF::Type::AcroForm::ChoiceField',
|
578
|
+
Sig: 'HexaPDF::Type::AcroForm::SignatureField',
|
564
579
|
},
|
565
580
|
})
|
566
581
|
|
@@ -1260,6 +1260,7 @@ module HexaPDF
|
|
1260
1260
|
unless obj.kind_of?(HexaPDF::Stream)
|
1261
1261
|
obj = context.document.images.add(obj)
|
1262
1262
|
end
|
1263
|
+
return obj if obj.width == 0 || obj.height == 0
|
1263
1264
|
|
1264
1265
|
width, height = calculate_dimensions(obj.width, obj.height,
|
1265
1266
|
rwidth: width, rheight: height)
|
data/lib/hexapdf/dictionary.rb
CHANGED
@@ -156,6 +156,9 @@ module HexaPDF
|
|
156
156
|
#
|
157
157
|
# * Returns the default value if one is specified and no value is available.
|
158
158
|
#
|
159
|
+
# Note: If field information is available for the entry, a Hash or Array value will always be
|
160
|
+
# wrapped by Dictionary or PDFArray. Otherwise, the value will be returned as-is.
|
161
|
+
#
|
159
162
|
# Note: This method may throw a "can't add a new key into hash during iteration" error in
|
160
163
|
# certain cases because it potentially modifies the underlying hash!
|
161
164
|
def [](name)
|
@@ -344,7 +344,7 @@ module HexaPDF
|
|
344
344
|
# Wraps a given array in the Rectangle class. Otherwise returns +nil+.
|
345
345
|
def self.convert(data, _type, document)
|
346
346
|
return unless data.kind_of?(Array) || data.kind_of?(HexaPDF::PDFArray)
|
347
|
-
document.wrap(data, type: Rectangle)
|
347
|
+
data.empty? ? document.wrap(nil) : document.wrap(data, type: Rectangle)
|
348
348
|
end
|
349
349
|
|
350
350
|
end
|
@@ -268,7 +268,7 @@ module HexaPDF
|
|
268
268
|
str.replace(string_algorithm.decrypt(key, str))
|
269
269
|
end
|
270
270
|
|
271
|
-
if obj.kind_of?(HexaPDF::Stream)
|
271
|
+
if obj.kind_of?(HexaPDF::Stream) && obj.raw_stream.filter[0] != :Crypt
|
272
272
|
unless string_algorithm == stream_algorithm
|
273
273
|
key = object_key(obj.oid, obj.gen, stream_algorithm)
|
274
274
|
end
|
@@ -300,7 +300,12 @@ module HexaPDF
|
|
300
300
|
obj.raw_stream.key == key && obj.raw_stream.algorithm == stream_algorithm
|
301
301
|
obj.raw_stream.undecrypted_fiber
|
302
302
|
else
|
303
|
-
|
303
|
+
filter = obj[:Filter]
|
304
|
+
if filter == :Crypt || (filter.kind_of?(PDFArray) && filter[0] == :Crypt)
|
305
|
+
result
|
306
|
+
else
|
307
|
+
stream_algorithm.encryption_fiber(key, result)
|
308
|
+
end
|
304
309
|
end
|
305
310
|
end
|
306
311
|
|
data/lib/hexapdf/error.rb
CHANGED
@@ -43,12 +43,13 @@ module HexaPDF
|
|
43
43
|
class MalformedPDFError < Error
|
44
44
|
|
45
45
|
# The byte position in the PDF file where the error occured.
|
46
|
-
|
46
|
+
attr_accessor :pos
|
47
47
|
|
48
48
|
# Creates a new malformed PDF error object for the given exception message.
|
49
49
|
#
|
50
|
-
# The byte position where the error occured
|
51
|
-
|
50
|
+
# The byte position where the error occured can either be given via the +pos+ argument or later
|
51
|
+
# via the #pos accessor but must be set before the exception message is retrieved.
|
52
|
+
def initialize(message, pos: nil)
|
52
53
|
super(message)
|
53
54
|
@pos = pos
|
54
55
|
end
|
data/lib/hexapdf/filter.rb
CHANGED
@@ -0,0 +1,60 @@
|
|
1
|
+
# -*- encoding: utf-8; frozen_string_literal: true -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2014-2020 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#
|
33
|
+
# If the GNU Affero General Public License doesn't fit your need,
|
34
|
+
# commercial licenses are available at <https://gettalong.at/hexapdf/>.
|
35
|
+
#++
|
36
|
+
|
37
|
+
require 'hexapdf/error'
|
38
|
+
|
39
|
+
module HexaPDF
|
40
|
+
module Filter
|
41
|
+
|
42
|
+
# This filter module implements the Crypt filter. The only supported part is using the Identity
|
43
|
+
# filter.
|
44
|
+
module Crypt
|
45
|
+
|
46
|
+
# See HexaPDF::Filter
|
47
|
+
def self.decoder(source, options)
|
48
|
+
if !options || !options.key?(:Name) || options[:Name] == :Identity
|
49
|
+
source
|
50
|
+
else
|
51
|
+
raise FilterError, "Handling of Crypt filters besides Identity is not implemented"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
singleton_class.send(:alias_method, :encoder, :decoder)
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -63,10 +63,14 @@ module HexaPDF
|
|
63
63
|
def use_glyph(glyph_id)
|
64
64
|
return @glyph_map[glyph_id] if @glyph_map.key?(glyph_id)
|
65
65
|
@last_id += 1
|
66
|
-
# Handle codes for ASCII characters \r, (, ) and \ specially so that
|
67
|
-
# the output (PDF serialization would need to escape them)
|
68
|
-
if @last_id == 13 || @last_id == 40 || @last_id ==
|
66
|
+
# Handle codes for ASCII characters \r (13), (, ) (40, 41) and \ (92) specially so that
|
67
|
+
# they never appear in the output (PDF serialization would need to escape them)
|
68
|
+
if @last_id == 13 || @last_id == 40 || @last_id == 92
|
69
69
|
@glyph_map[:"s#{@last_id}"] = @last_id
|
70
|
+
if @last_id == 40
|
71
|
+
@last_id += 1
|
72
|
+
@glyph_map[:"s#{@last_id}"] = @last_id
|
73
|
+
end
|
70
74
|
@last_id += 1
|
71
75
|
end
|
72
76
|
@glyph_map[glyph_id] = @last_id
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -56,10 +56,12 @@ module HexaPDF
|
|
56
56
|
# PDF references are resolved using the associated Document object.
|
57
57
|
def initialize(io, document)
|
58
58
|
@io = io
|
59
|
-
|
59
|
+
on_correctable_error = document.config['parser.on_correctable_error'].curry[document]
|
60
|
+
@tokenizer = Tokenizer.new(io, on_correctable_error: on_correctable_error)
|
60
61
|
@document = document
|
61
62
|
@object_stream_data = {}
|
62
63
|
@reconstructed_revision = nil
|
64
|
+
@in_reconstruct_revision = false
|
63
65
|
retrieve_pdf_header_offset_and_version
|
64
66
|
end
|
65
67
|
|
@@ -72,7 +74,13 @@ module HexaPDF
|
|
72
74
|
obj, oid, gen, stream =
|
73
75
|
case xref_entry.type
|
74
76
|
when :in_use
|
75
|
-
|
77
|
+
if xref_entry.pos == 0 && xref_entry.oid != 0
|
78
|
+
# Handle seen-in-the-wild objects with invalid offset 0
|
79
|
+
maybe_raise("Indirect object (#{xref_entry.oid},#{xref_entry.gen}) has offset 0", pos: 0)
|
80
|
+
[nil, xref_entry.oid, xref_entry.gen, nil]
|
81
|
+
else
|
82
|
+
parse_indirect_object(xref_entry.pos)
|
83
|
+
end
|
76
84
|
when :free
|
77
85
|
[nil, xref_entry.oid, xref_entry.gen, nil]
|
78
86
|
when :compressed
|
@@ -83,12 +91,13 @@ module HexaPDF
|
|
83
91
|
|
84
92
|
if xref_entry.oid != 0 && (oid != xref_entry.oid || gen != xref_entry.gen)
|
85
93
|
raise_malformed("The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \
|
86
|
-
|
94
|
+
"the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
|
87
95
|
end
|
88
96
|
|
89
97
|
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
90
98
|
rescue HexaPDF::MalformedPDFError
|
91
|
-
reconstructed_revision.object(xref_entry)
|
99
|
+
reconstructed_revision.object(xref_entry) ||
|
100
|
+
@document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
|
92
101
|
end
|
93
102
|
|
94
103
|
# Parses the indirect object at the specified offset.
|
@@ -131,7 +140,11 @@ module HexaPDF
|
|
131
140
|
raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset)
|
132
141
|
end
|
133
142
|
tok1 = @tokenizer.next_byte
|
134
|
-
|
143
|
+
if tok1 == 32 # space
|
144
|
+
maybe_raise("Keyword stream followed by space instead of LF or CR/LF", pos: @tokenizer.pos)
|
145
|
+
tok1 = @tokenizer.next_byte
|
146
|
+
end
|
147
|
+
tok2 = @tokenizer.next_byte if tok1 == 13 # CR
|
135
148
|
if tok1 != 10 && tok1 != 13
|
136
149
|
raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos)
|
137
150
|
elsif tok1 == 13 && tok2 != 10
|
@@ -203,7 +216,12 @@ module HexaPDF
|
|
203
216
|
unless obj.respond_to?(:xref_section)
|
204
217
|
raise_malformed("Object is not a cross-reference stream", pos: pos)
|
205
218
|
end
|
206
|
-
|
219
|
+
begin
|
220
|
+
xref_section = obj.xref_section
|
221
|
+
rescue MalformedPDFError => e
|
222
|
+
e.pos = pos
|
223
|
+
raise
|
224
|
+
end
|
207
225
|
trailer = obj.trailer
|
208
226
|
unless xref_section.entry?(obj.oid, obj.gen)
|
209
227
|
maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos)
|
@@ -381,34 +399,42 @@ module HexaPDF
|
|
381
399
|
# If the file contains multiple cross-reference sections, all objects will be put into a single
|
382
400
|
# cross-reference table, later objects overwriting prior ones.
|
383
401
|
def reconstruct_revision
|
402
|
+
return if @in_reconstruct_revision
|
403
|
+
@in_reconstruct_revision = true
|
404
|
+
|
384
405
|
raise unless @document.config['parser.try_xref_reconstruction']
|
385
406
|
msg = "#{$!} - trying cross-reference table reconstruction"
|
386
407
|
@document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
|
387
408
|
|
388
409
|
xref = XRefSection.new
|
389
410
|
@tokenizer.pos = 0
|
411
|
+
linearized = nil
|
390
412
|
while true
|
391
413
|
@tokenizer.skip_whitespace
|
392
414
|
pos = @tokenizer.pos
|
393
|
-
@tokenizer.scan_until(/(\n|\r\n?)
|
415
|
+
@tokenizer.scan_until(/(\n|\r\n?)+|\z/)
|
394
416
|
next_new_line_pos = @tokenizer.pos
|
395
417
|
@tokenizer.pos = pos
|
396
418
|
|
397
|
-
token = @tokenizer.
|
419
|
+
token = @tokenizer.next_integer_or_keyword rescue nil
|
398
420
|
if token.kind_of?(Integer)
|
399
|
-
gen = @tokenizer.
|
400
|
-
tok = @tokenizer.
|
421
|
+
gen = @tokenizer.next_integer_or_keyword rescue nil
|
422
|
+
tok = @tokenizer.next_integer_or_keyword rescue nil
|
401
423
|
if @tokenizer.pos > next_new_line_pos
|
402
424
|
@tokenizer.pos = next_new_line_pos
|
403
425
|
elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
|
404
426
|
xref.add_in_use_entry(token, gen, pos)
|
427
|
+
if linearized.nil?
|
428
|
+
obj = @tokenizer.next_object rescue nil
|
429
|
+
linearized = obj.kind_of?(Hash) && obj.key?(:Linearized)
|
430
|
+
end
|
405
431
|
@tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
|
406
432
|
end
|
407
433
|
elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
|
408
434
|
obj = @tokenizer.next_object rescue nil
|
409
435
|
# Use last trailer found in case of multiple revisions but use first trailer in case of
|
410
436
|
# linearized file.
|
411
|
-
trailer = obj if obj.kind_of?(Hash) && (
|
437
|
+
trailer = obj if obj.kind_of?(Hash) && (!linearized || trailer.nil?)
|
412
438
|
elsif token == Tokenizer::NO_MORE_TOKENS
|
413
439
|
break
|
414
440
|
else
|
@@ -416,16 +442,22 @@ module HexaPDF
|
|
416
442
|
end
|
417
443
|
end
|
418
444
|
|
419
|
-
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
420
445
|
if !trailer || trailer.empty?
|
421
|
-
|
446
|
+
_, trailer = load_revision(startxref_offset) rescue nil
|
447
|
+
unless trailer
|
448
|
+
@in_reconstruct_revision = false
|
449
|
+
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
450
|
+
end
|
422
451
|
end
|
452
|
+
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
423
453
|
|
424
454
|
loader = lambda do |xref_entry|
|
425
455
|
obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
|
426
|
-
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
456
|
+
obj = @document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
457
|
+
@document.security_handler ? @document.security_handler.decrypt(obj) : obj
|
427
458
|
end
|
428
459
|
|
460
|
+
@in_reconstruct_revision = false
|
429
461
|
Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
|
430
462
|
loader: loader)
|
431
463
|
end
|