hexapdf 0.14.2 → 0.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +96 -0
- data/lib/hexapdf/cli/form.rb +30 -8
- data/lib/hexapdf/configuration.rb +19 -4
- data/lib/hexapdf/content/canvas.rb +1 -0
- data/lib/hexapdf/dictionary.rb +3 -0
- data/lib/hexapdf/dictionary_fields.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +7 -2
- data/lib/hexapdf/encryption/standard_security_handler.rb +12 -0
- data/lib/hexapdf/error.rb +4 -3
- data/lib/hexapdf/filter.rb +1 -0
- data/lib/hexapdf/filter/crypt.rb +60 -0
- data/lib/hexapdf/font/true_type/subsetter.rb +5 -1
- data/lib/hexapdf/font/type1/afm_parser.rb +2 -1
- data/lib/hexapdf/parser.rb +46 -14
- data/lib/hexapdf/pdf_array.rb +3 -0
- data/lib/hexapdf/revision.rb +16 -0
- data/lib/hexapdf/serializer.rb +10 -3
- data/lib/hexapdf/tokenizer.rb +44 -3
- data/lib/hexapdf/type/acro_form.rb +1 -0
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +32 -17
- data/lib/hexapdf/type/acro_form/button_field.rb +8 -4
- data/lib/hexapdf/type/acro_form/field.rb +1 -0
- data/lib/hexapdf/type/acro_form/form.rb +37 -0
- data/lib/hexapdf/type/acro_form/signature_field.rb +223 -0
- data/lib/hexapdf/type/annotation.rb +13 -9
- data/lib/hexapdf/type/annotations/widget.rb +3 -1
- data/lib/hexapdf/type/font_descriptor.rb +9 -2
- data/lib/hexapdf/type/page.rb +81 -0
- data/lib/hexapdf/type/resources.rb +4 -0
- data/lib/hexapdf/type/xref_stream.rb +7 -0
- data/lib/hexapdf/utils/graphics_helpers.rb +4 -4
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/content/test_canvas.rb +21 -0
- data/test/hexapdf/encryption/test_security_handler.rb +15 -0
- data/test/hexapdf/encryption/test_standard_security_handler.rb +26 -0
- data/test/hexapdf/filter/test_crypt.rb +21 -0
- data/test/hexapdf/font/true_type/test_subsetter.rb +2 -0
- data/test/hexapdf/font/type1/test_afm_parser.rb +5 -0
- data/test/hexapdf/test_dictionary_fields.rb +7 -0
- data/test/hexapdf/test_parser.rb +82 -2
- data/test/hexapdf/test_revision.rb +21 -0
- data/test/hexapdf/test_serializer.rb +10 -0
- data/test/hexapdf/test_tokenizer.rb +50 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +24 -3
- data/test/hexapdf/type/acro_form/test_button_field.rb +13 -7
- data/test/hexapdf/type/acro_form/test_field.rb +5 -0
- data/test/hexapdf/type/acro_form/test_form.rb +46 -2
- data/test/hexapdf/type/acro_form/test_signature_field.rb +38 -0
- data/test/hexapdf/type/annotations/test_widget.rb +2 -0
- data/test/hexapdf/type/test_annotation.rb +20 -10
- data/test/hexapdf/type/test_font_descriptor.rb +7 -0
- data/test/hexapdf/type/test_page.rb +187 -49
- data/test/hexapdf/type/test_resources.rb +6 -0
- data/test/hexapdf/type/test_xref_stream.rb +7 -0
- data/test/hexapdf/utils/test_graphics_helpers.rb +8 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a6ef1bdd17664ef0b9474b931d31ad69c57c77928ce2b69a3bbd5dadda0dce6
|
4
|
+
data.tar.gz: 11b87436d19cc5498fd6a77f0b6c410e717809264153964668be0f5199b9354d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 525a55832758b5eecd1a7f2daf5f220e1afa7ff8e88ca2d65998e658585f290ff2018450e50423f2331b7f195865eab8b1c62562ecdbf3671b46d4da770aed12
|
7
|
+
data.tar.gz: 0b0e18c7f79f0e2a54080fefad1dd4d94e15157f72e5360a3ebd827fc0cc2037ae6e06302155426e7f0900e97ee0cee678e069bd8ef05a9333d684c50e1343a5
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,99 @@
|
|
1
|
+
## 0.15.2 - 2021-05-01
|
2
|
+
|
3
|
+
### Fixed
|
4
|
+
|
5
|
+
* Handling of unencrypted metadata streams
|
6
|
+
|
7
|
+
|
8
|
+
## 0.15.1 - 2021-04-15
|
9
|
+
|
10
|
+
### Fixed
|
11
|
+
|
12
|
+
* Potential division by zero when calculating the scaling for XObjects
|
13
|
+
* Handling of XObjects with a width or height of zero when drawing on canvas
|
14
|
+
|
15
|
+
|
16
|
+
## 0.15.0 - 2021-04-12
|
17
|
+
|
18
|
+
### Added
|
19
|
+
|
20
|
+
* [HexaPDF::Type::Page#flatten_annotations] for flattening the annotations of a
|
21
|
+
page
|
22
|
+
* [HexaPDF::Type::AcroForm::Form#flatten] for flattening interactive forms
|
23
|
+
* [HexaPDF::Revision#update] for updating the stored wrapper class of a PDF
|
24
|
+
object
|
25
|
+
* [HexaPDF::Type::AcroForm::SignatureField] for working with AcroForm signature
|
26
|
+
fields
|
27
|
+
* Support for form field flattening to the `hexapdf form` CLI command
|
28
|
+
|
29
|
+
### Changed
|
30
|
+
|
31
|
+
* **Breaking change**: Overhauled the interface for accessing appearances of
|
32
|
+
annotations to make it more convenient
|
33
|
+
* Validation of [HexaPDF::Type::FontDescriptor] to delete invalid `/FontWeight`
|
34
|
+
value
|
35
|
+
* [HexaPDF::MalformedPDFError#pos] an accessor instead of a reader and update
|
36
|
+
the exception message
|
37
|
+
* Configuration option 'acro_form.fallback_font' to allow a callable object for
|
38
|
+
more advanced fallback font handling
|
39
|
+
|
40
|
+
### Fixed
|
41
|
+
|
42
|
+
* [HexaPDF::Type::Annotations::Widget#background_color] to correctly handle
|
43
|
+
empty background color arrays
|
44
|
+
* [HexaPDF::Type::AcroForm::Field#delete_widget] to update the wrapper object
|
45
|
+
stored in the document in case the widget is embedded
|
46
|
+
* Processing of invalid PDF files containing a space,CR,LF combination after
|
47
|
+
the 'stream' keyword
|
48
|
+
* Cross-reference stream reconstruction with respect to detection of linearized
|
49
|
+
files
|
50
|
+
* Detection of existing appearances for AcroForm push button fields when
|
51
|
+
creating appearances
|
52
|
+
|
53
|
+
|
54
|
+
## 0.14.4 - 2021-02-27
|
55
|
+
|
56
|
+
### Added
|
57
|
+
|
58
|
+
* Support for the Crypt filters
|
59
|
+
|
60
|
+
### Changed
|
61
|
+
|
62
|
+
* [HexaPDF::MalformedPDFError] to make the `pos` argument optional
|
63
|
+
|
64
|
+
### Fixed
|
65
|
+
|
66
|
+
* Handling of invalid floating point numbers NaN, Inf and -Inf when serializing
|
67
|
+
* Processing of invalid PDF files containing NaN and Inf instead of numbers
|
68
|
+
* Bug in Type1 font AFM parser that occured if the file doesn't end with a new
|
69
|
+
line character
|
70
|
+
* Cross-reference table reconstruction to handle the case of an entry specifying
|
71
|
+
a non-existent indirect object
|
72
|
+
* Cross-reference table reconstruction to handle trailers specified by cross-
|
73
|
+
reference streams
|
74
|
+
* Cross-reference table reconstruction to use the set security handle for
|
75
|
+
decrypting indirect objects
|
76
|
+
* Parsing of cross-reference streams where data is missing
|
77
|
+
|
78
|
+
|
79
|
+
## 0.14.3 - 2021-02-16
|
80
|
+
|
81
|
+
### Fixed
|
82
|
+
|
83
|
+
* Bug in [HexaPDF::Font::TrueType::Subsetter#use_glyph] which lead to corrupt
|
84
|
+
text output
|
85
|
+
* [HexaPDF::Serializer] to handle infinite recursion problem
|
86
|
+
* Cross-reference table reconstruction to avoid an O(n^2) performance problem
|
87
|
+
* [HexaPDF::Type::Resources] validation to handle an invalid `/ProcSet` entry
|
88
|
+
containing a single value instead of an array
|
89
|
+
* Processing of invalid PDF files missing a required value in appearance streams
|
90
|
+
* Processing of invalid empty arrays that should be rectangles by converting
|
91
|
+
them to PDF null objects
|
92
|
+
* Processing of invalid PDF files containing indirect objects with offset 0
|
93
|
+
* Processing of invalid PDF files containing a space/CR or space/LF combination
|
94
|
+
after the 'stream' keyword
|
95
|
+
|
96
|
+
|
1
97
|
## 0.14.2 - 2021-01-22
|
2
98
|
|
3
99
|
### Fixed
|
data/lib/hexapdf/cli/form.rb
CHANGED
@@ -52,18 +52,26 @@ module HexaPDF
|
|
52
52
|
If the the output file name is not given, all form fields are listed in page order. Use
|
53
53
|
the global --verbose option to show additional information like field type and location.
|
54
54
|
|
55
|
-
If the output file name is given, the fields can be
|
56
|
-
|
57
|
-
|
55
|
+
If the output file name is given, the fields can be filled out interactively, via a
|
56
|
+
template or just flattened by using the respective options. Form field flattening can also
|
57
|
+
be activated in addition to filling out the form. If neither --fill, --template nor
|
58
|
+
--flatten is specified, --fill is implied.
|
58
59
|
EOF
|
59
60
|
|
60
61
|
options.on("--password PASSWORD", "-p", String,
|
61
62
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
62
63
|
@password = (pwd == '-' ? read_password : pwd)
|
63
64
|
end
|
65
|
+
options.on("--fill", "Fill out the form") do
|
66
|
+
@fill = true
|
67
|
+
end
|
64
68
|
options.on("--template TEMPLATE_FILE", "-t TEMPLATE_FILE",
|
65
|
-
"Use the template file for the field values") do |template|
|
69
|
+
"Use the template file for the field values (implies --fill)") do |template|
|
66
70
|
@template = template
|
71
|
+
@fill = true
|
72
|
+
end
|
73
|
+
options.on('--flatten', 'Flatten the form fields') do
|
74
|
+
@flatten = true
|
67
75
|
end
|
68
76
|
options.on("--[no-]viewer-override", "Let the PDF viewer override the visual " \
|
69
77
|
"appearance. Default: use setting from input PDF") do |need_appearances|
|
@@ -75,6 +83,8 @@ module HexaPDF
|
|
75
83
|
end
|
76
84
|
|
77
85
|
@password = nil
|
86
|
+
@fill = false
|
87
|
+
@flatten = false
|
78
88
|
@template = nil
|
79
89
|
@need_appearances = nil
|
80
90
|
@incremental = true
|
@@ -82,16 +92,28 @@ module HexaPDF
|
|
82
92
|
|
83
93
|
def execute(in_file, out_file = nil) #:nodoc:
|
84
94
|
maybe_raise_on_existing_file(out_file) if out_file
|
95
|
+
if (@fill || @flatten) && !out_file
|
96
|
+
raise "Output file missing"
|
97
|
+
end
|
85
98
|
with_document(in_file, password: @password, out_file: out_file,
|
86
99
|
incremental: @incremental) do |doc|
|
87
100
|
if !doc.acro_form
|
88
101
|
raise "This PDF doesn't contain an interactive form"
|
89
102
|
elsif out_file
|
90
103
|
doc.acro_form[:NeedAppearances] = @need_appearances unless @need_appearances.nil?
|
91
|
-
if @
|
92
|
-
|
93
|
-
|
94
|
-
|
104
|
+
if @fill || !@flatten
|
105
|
+
if @template
|
106
|
+
fill_form_with_template(doc)
|
107
|
+
else
|
108
|
+
fill_form(doc)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
if @flatten
|
112
|
+
unless doc.acro_form.flatten.empty?
|
113
|
+
$stderr.puts "Warning: Not all form fields could be flattened"
|
114
|
+
doc.catalog.delete(:AcroForm)
|
115
|
+
doc.delete(doc.acro_form)
|
116
|
+
end
|
95
117
|
end
|
96
118
|
else
|
97
119
|
list_form_fields(doc)
|
@@ -164,9 +164,20 @@ module HexaPDF
|
|
164
164
|
# acro_form.fallback_font::
|
165
165
|
# The font that should be used when a variable text field references a font that cannot be used.
|
166
166
|
#
|
167
|
-
# Can
|
168
|
-
#
|
169
|
-
#
|
167
|
+
# Can be one of the following:
|
168
|
+
#
|
169
|
+
# * The name of a font, like 'Helvetica'.
|
170
|
+
#
|
171
|
+
# * An array consisting of the font name and a hash of font options, like ['Helvetica',
|
172
|
+
# variant: :italic].
|
173
|
+
#
|
174
|
+
# * A callable object receiving the field and the font object (or +nil+ if no valid font object
|
175
|
+
# was found) and which has to return either a font name or an array consisting of the font
|
176
|
+
# name and a hash of font options. This way the response can be different depending on the
|
177
|
+
# original font and it would also allow e.g. modifying the configured fonts to add custom
|
178
|
+
# ones.
|
179
|
+
#
|
180
|
+
# If set to +nil+, the use of the fallback font is disabled.
|
170
181
|
#
|
171
182
|
# Default is 'Helvetica'.
|
172
183
|
#
|
@@ -393,7 +404,7 @@ module HexaPDF
|
|
393
404
|
DCTDecode: 'HexaPDF::Filter::PassThrough',
|
394
405
|
DCT: 'HexaPDF::Filter::PassThrough',
|
395
406
|
JPXDecode: 'HexaPDF::Filter::PassThrough',
|
396
|
-
Crypt:
|
407
|
+
Crypt: 'HexaPDF::Filter::Crypt',
|
397
408
|
Encryption: 'HexaPDF::Filter::Encryption',
|
398
409
|
},
|
399
410
|
'font.map' => {},
|
@@ -516,6 +527,9 @@ module HexaPDF
|
|
516
527
|
XXAcroFormField: 'HexaPDF::Type::AcroForm::Field',
|
517
528
|
XXAppearanceDictionary: 'HexaPDF::Type::Annotation::AppearanceDictionary',
|
518
529
|
Border: 'HexaPDF::Type::Annotation::Border',
|
530
|
+
SigFieldLock: 'HexaPDF::Type::AcroForm::SignatureField::LockDictionary',
|
531
|
+
SV: 'HexaPDF::Type::AcroForm::SignatureField::SeedValueDictionary',
|
532
|
+
SVCert: 'HexaPDF::Type::AcroForm::SignatureField::CertificateSeedValueDictionary',
|
519
533
|
},
|
520
534
|
'object.subtype_map' => {
|
521
535
|
nil => {
|
@@ -561,6 +575,7 @@ module HexaPDF
|
|
561
575
|
Tx: 'HexaPDF::Type::AcroForm::TextField',
|
562
576
|
Btn: 'HexaPDF::Type::AcroForm::ButtonField',
|
563
577
|
Ch: 'HexaPDF::Type::AcroForm::ChoiceField',
|
578
|
+
Sig: 'HexaPDF::Type::AcroForm::SignatureField',
|
564
579
|
},
|
565
580
|
})
|
566
581
|
|
@@ -1260,6 +1260,7 @@ module HexaPDF
|
|
1260
1260
|
unless obj.kind_of?(HexaPDF::Stream)
|
1261
1261
|
obj = context.document.images.add(obj)
|
1262
1262
|
end
|
1263
|
+
return obj if obj.width == 0 || obj.height == 0
|
1263
1264
|
|
1264
1265
|
width, height = calculate_dimensions(obj.width, obj.height,
|
1265
1266
|
rwidth: width, rheight: height)
|
data/lib/hexapdf/dictionary.rb
CHANGED
@@ -156,6 +156,9 @@ module HexaPDF
|
|
156
156
|
#
|
157
157
|
# * Returns the default value if one is specified and no value is available.
|
158
158
|
#
|
159
|
+
# Note: If field information is available for the entry, a Hash or Array value will always be
|
160
|
+
# wrapped by Dictionary or PDFArray. Otherwise, the value will be returned as-is.
|
161
|
+
#
|
159
162
|
# Note: This method may throw a "can't add a new key into hash during iteration" error in
|
160
163
|
# certain cases because it potentially modifies the underlying hash!
|
161
164
|
def [](name)
|
@@ -344,7 +344,7 @@ module HexaPDF
|
|
344
344
|
# Wraps a given array in the Rectangle class. Otherwise returns +nil+.
|
345
345
|
def self.convert(data, _type, document)
|
346
346
|
return unless data.kind_of?(Array) || data.kind_of?(HexaPDF::PDFArray)
|
347
|
-
document.wrap(data, type: Rectangle)
|
347
|
+
data.empty? ? document.wrap(nil) : document.wrap(data, type: Rectangle)
|
348
348
|
end
|
349
349
|
|
350
350
|
end
|
@@ -268,7 +268,7 @@ module HexaPDF
|
|
268
268
|
str.replace(string_algorithm.decrypt(key, str))
|
269
269
|
end
|
270
270
|
|
271
|
-
if obj.kind_of?(HexaPDF::Stream)
|
271
|
+
if obj.kind_of?(HexaPDF::Stream) && obj.raw_stream.filter[0] != :Crypt
|
272
272
|
unless string_algorithm == stream_algorithm
|
273
273
|
key = object_key(obj.oid, obj.gen, stream_algorithm)
|
274
274
|
end
|
@@ -300,7 +300,12 @@ module HexaPDF
|
|
300
300
|
obj.raw_stream.key == key && obj.raw_stream.algorithm == stream_algorithm
|
301
301
|
obj.raw_stream.undecrypted_fiber
|
302
302
|
else
|
303
|
-
|
303
|
+
filter = obj[:Filter]
|
304
|
+
if filter == :Crypt || (filter.kind_of?(PDFArray) && filter[0] == :Crypt)
|
305
|
+
result
|
306
|
+
else
|
307
|
+
stream_algorithm.encryption_fiber(key, result)
|
308
|
+
end
|
304
309
|
end
|
305
310
|
end
|
306
311
|
|
@@ -240,6 +240,18 @@ module HexaPDF
|
|
240
240
|
end
|
241
241
|
end
|
242
242
|
|
243
|
+
def decrypt(obj) #:nodoc:
|
244
|
+
if obj.type == :Metadata && obj == document.catalog.value[:Metadata] && !dict[:EncryptMetadata]
|
245
|
+
obj
|
246
|
+
else
|
247
|
+
super
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def encrypt_stream(obj) #:nodoc
|
252
|
+
obj == document.catalog.value[:Metadata] && !dict[:EncryptMetadata] ? obj.stream_encoder : super
|
253
|
+
end
|
254
|
+
|
243
255
|
private
|
244
256
|
|
245
257
|
# Prepares the security handler for use in encrypting the document.
|
data/lib/hexapdf/error.rb
CHANGED
@@ -43,12 +43,13 @@ module HexaPDF
|
|
43
43
|
class MalformedPDFError < Error
|
44
44
|
|
45
45
|
# The byte position in the PDF file where the error occured.
|
46
|
-
|
46
|
+
attr_accessor :pos
|
47
47
|
|
48
48
|
# Creates a new malformed PDF error object for the given exception message.
|
49
49
|
#
|
50
|
-
# The byte position where the error occured
|
51
|
-
|
50
|
+
# The byte position where the error occured can either be given via the +pos+ argument or later
|
51
|
+
# via the #pos accessor but must be set before the exception message is retrieved.
|
52
|
+
def initialize(message, pos: nil)
|
52
53
|
super(message)
|
53
54
|
@pos = pos
|
54
55
|
end
|
data/lib/hexapdf/filter.rb
CHANGED
@@ -0,0 +1,60 @@
|
|
1
|
+
# -*- encoding: utf-8; frozen_string_literal: true -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2014-2020 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#
|
33
|
+
# If the GNU Affero General Public License doesn't fit your need,
|
34
|
+
# commercial licenses are available at <https://gettalong.at/hexapdf/>.
|
35
|
+
#++
|
36
|
+
|
37
|
+
require 'hexapdf/error'
|
38
|
+
|
39
|
+
module HexaPDF
|
40
|
+
module Filter
|
41
|
+
|
42
|
+
# This filter module implements the Crypt filter. The only supported part is using the Identity
|
43
|
+
# filter.
|
44
|
+
module Crypt
|
45
|
+
|
46
|
+
# See HexaPDF::Filter
|
47
|
+
def self.decoder(source, options)
|
48
|
+
if !options || !options.key?(:Name) || options[:Name] == :Identity
|
49
|
+
source
|
50
|
+
else
|
51
|
+
raise FilterError, "Handling of Crypt filters besides Identity is not implemented"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
singleton_class.send(:alias_method, :encoder, :decoder)
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -67,7 +67,11 @@ module HexaPDF
|
|
67
67
|
# they never appear in the output (PDF serialization would need to escape them)
|
68
68
|
if @last_id == 13 || @last_id == 40 || @last_id == 92
|
69
69
|
@glyph_map[:"s#{@last_id}"] = @last_id
|
70
|
-
|
70
|
+
if @last_id == 40
|
71
|
+
@last_id += 1
|
72
|
+
@glyph_map[:"s#{@last_id}"] = @last_id
|
73
|
+
end
|
74
|
+
@last_id += 1
|
71
75
|
end
|
72
76
|
@glyph_map[glyph_id] = @last_id
|
73
77
|
end
|
data/lib/hexapdf/parser.rb
CHANGED
@@ -56,10 +56,12 @@ module HexaPDF
|
|
56
56
|
# PDF references are resolved using the associated Document object.
|
57
57
|
def initialize(io, document)
|
58
58
|
@io = io
|
59
|
-
|
59
|
+
on_correctable_error = document.config['parser.on_correctable_error'].curry[document]
|
60
|
+
@tokenizer = Tokenizer.new(io, on_correctable_error: on_correctable_error)
|
60
61
|
@document = document
|
61
62
|
@object_stream_data = {}
|
62
63
|
@reconstructed_revision = nil
|
64
|
+
@in_reconstruct_revision = false
|
63
65
|
retrieve_pdf_header_offset_and_version
|
64
66
|
end
|
65
67
|
|
@@ -72,7 +74,13 @@ module HexaPDF
|
|
72
74
|
obj, oid, gen, stream =
|
73
75
|
case xref_entry.type
|
74
76
|
when :in_use
|
75
|
-
|
77
|
+
if xref_entry.pos == 0 && xref_entry.oid != 0
|
78
|
+
# Handle seen-in-the-wild objects with invalid offset 0
|
79
|
+
maybe_raise("Indirect object (#{xref_entry.oid},#{xref_entry.gen}) has offset 0", pos: 0)
|
80
|
+
[nil, xref_entry.oid, xref_entry.gen, nil]
|
81
|
+
else
|
82
|
+
parse_indirect_object(xref_entry.pos)
|
83
|
+
end
|
76
84
|
when :free
|
77
85
|
[nil, xref_entry.oid, xref_entry.gen, nil]
|
78
86
|
when :compressed
|
@@ -83,12 +91,13 @@ module HexaPDF
|
|
83
91
|
|
84
92
|
if xref_entry.oid != 0 && (oid != xref_entry.oid || gen != xref_entry.gen)
|
85
93
|
raise_malformed("The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \
|
86
|
-
|
94
|
+
"the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
|
87
95
|
end
|
88
96
|
|
89
97
|
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
90
98
|
rescue HexaPDF::MalformedPDFError
|
91
|
-
reconstructed_revision.object(xref_entry)
|
99
|
+
reconstructed_revision.object(xref_entry) ||
|
100
|
+
@document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
|
92
101
|
end
|
93
102
|
|
94
103
|
# Parses the indirect object at the specified offset.
|
@@ -131,7 +140,11 @@ module HexaPDF
|
|
131
140
|
raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset)
|
132
141
|
end
|
133
142
|
tok1 = @tokenizer.next_byte
|
134
|
-
|
143
|
+
if tok1 == 32 # space
|
144
|
+
maybe_raise("Keyword stream followed by space instead of LF or CR/LF", pos: @tokenizer.pos)
|
145
|
+
tok1 = @tokenizer.next_byte
|
146
|
+
end
|
147
|
+
tok2 = @tokenizer.next_byte if tok1 == 13 # CR
|
135
148
|
if tok1 != 10 && tok1 != 13
|
136
149
|
raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos)
|
137
150
|
elsif tok1 == 13 && tok2 != 10
|
@@ -203,7 +216,12 @@ module HexaPDF
|
|
203
216
|
unless obj.respond_to?(:xref_section)
|
204
217
|
raise_malformed("Object is not a cross-reference stream", pos: pos)
|
205
218
|
end
|
206
|
-
|
219
|
+
begin
|
220
|
+
xref_section = obj.xref_section
|
221
|
+
rescue MalformedPDFError => e
|
222
|
+
e.pos = pos
|
223
|
+
raise
|
224
|
+
end
|
207
225
|
trailer = obj.trailer
|
208
226
|
unless xref_section.entry?(obj.oid, obj.gen)
|
209
227
|
maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos)
|
@@ -381,34 +399,42 @@ module HexaPDF
|
|
381
399
|
# If the file contains multiple cross-reference sections, all objects will be put into a single
|
382
400
|
# cross-reference table, later objects overwriting prior ones.
|
383
401
|
def reconstruct_revision
|
402
|
+
return if @in_reconstruct_revision
|
403
|
+
@in_reconstruct_revision = true
|
404
|
+
|
384
405
|
raise unless @document.config['parser.try_xref_reconstruction']
|
385
406
|
msg = "#{$!} - trying cross-reference table reconstruction"
|
386
407
|
@document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
|
387
408
|
|
388
409
|
xref = XRefSection.new
|
389
410
|
@tokenizer.pos = 0
|
411
|
+
linearized = nil
|
390
412
|
while true
|
391
413
|
@tokenizer.skip_whitespace
|
392
414
|
pos = @tokenizer.pos
|
393
|
-
@tokenizer.scan_until(/(\n|\r\n?)
|
415
|
+
@tokenizer.scan_until(/(\n|\r\n?)+|\z/)
|
394
416
|
next_new_line_pos = @tokenizer.pos
|
395
417
|
@tokenizer.pos = pos
|
396
418
|
|
397
|
-
token = @tokenizer.
|
419
|
+
token = @tokenizer.next_integer_or_keyword rescue nil
|
398
420
|
if token.kind_of?(Integer)
|
399
|
-
gen = @tokenizer.
|
400
|
-
tok = @tokenizer.
|
421
|
+
gen = @tokenizer.next_integer_or_keyword rescue nil
|
422
|
+
tok = @tokenizer.next_integer_or_keyword rescue nil
|
401
423
|
if @tokenizer.pos > next_new_line_pos
|
402
424
|
@tokenizer.pos = next_new_line_pos
|
403
425
|
elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
|
404
426
|
xref.add_in_use_entry(token, gen, pos)
|
427
|
+
if linearized.nil?
|
428
|
+
obj = @tokenizer.next_object rescue nil
|
429
|
+
linearized = obj.kind_of?(Hash) && obj.key?(:Linearized)
|
430
|
+
end
|
405
431
|
@tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
|
406
432
|
end
|
407
433
|
elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
|
408
434
|
obj = @tokenizer.next_object rescue nil
|
409
435
|
# Use last trailer found in case of multiple revisions but use first trailer in case of
|
410
436
|
# linearized file.
|
411
|
-
trailer = obj if obj.kind_of?(Hash) && (
|
437
|
+
trailer = obj if obj.kind_of?(Hash) && (!linearized || trailer.nil?)
|
412
438
|
elsif token == Tokenizer::NO_MORE_TOKENS
|
413
439
|
break
|
414
440
|
else
|
@@ -416,16 +442,22 @@ module HexaPDF
|
|
416
442
|
end
|
417
443
|
end
|
418
444
|
|
419
|
-
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
420
445
|
if !trailer || trailer.empty?
|
421
|
-
|
446
|
+
_, trailer = load_revision(startxref_offset) rescue nil
|
447
|
+
unless trailer
|
448
|
+
@in_reconstruct_revision = false
|
449
|
+
raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
|
450
|
+
end
|
422
451
|
end
|
452
|
+
trailer&.delete(:Prev) # no need for this and may wreak havoc
|
423
453
|
|
424
454
|
loader = lambda do |xref_entry|
|
425
455
|
obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
|
426
|
-
@document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
456
|
+
obj = @document.wrap(obj, oid: oid, gen: gen, stream: stream)
|
457
|
+
@document.security_handler ? @document.security_handler.decrypt(obj) : obj
|
427
458
|
end
|
428
459
|
|
460
|
+
@in_reconstruct_revision = false
|
429
461
|
Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
|
430
462
|
loader: loader)
|
431
463
|
end
|