hexapdf 0.14.2 → 0.15.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +96 -0
  3. data/lib/hexapdf/cli/form.rb +30 -8
  4. data/lib/hexapdf/configuration.rb +19 -4
  5. data/lib/hexapdf/content/canvas.rb +1 -0
  6. data/lib/hexapdf/dictionary.rb +3 -0
  7. data/lib/hexapdf/dictionary_fields.rb +1 -1
  8. data/lib/hexapdf/encryption/security_handler.rb +7 -2
  9. data/lib/hexapdf/encryption/standard_security_handler.rb +12 -0
  10. data/lib/hexapdf/error.rb +4 -3
  11. data/lib/hexapdf/filter.rb +1 -0
  12. data/lib/hexapdf/filter/crypt.rb +60 -0
  13. data/lib/hexapdf/font/true_type/subsetter.rb +5 -1
  14. data/lib/hexapdf/font/type1/afm_parser.rb +2 -1
  15. data/lib/hexapdf/parser.rb +46 -14
  16. data/lib/hexapdf/pdf_array.rb +3 -0
  17. data/lib/hexapdf/revision.rb +16 -0
  18. data/lib/hexapdf/serializer.rb +10 -3
  19. data/lib/hexapdf/tokenizer.rb +44 -3
  20. data/lib/hexapdf/type/acro_form.rb +1 -0
  21. data/lib/hexapdf/type/acro_form/appearance_generator.rb +32 -17
  22. data/lib/hexapdf/type/acro_form/button_field.rb +8 -4
  23. data/lib/hexapdf/type/acro_form/field.rb +1 -0
  24. data/lib/hexapdf/type/acro_form/form.rb +37 -0
  25. data/lib/hexapdf/type/acro_form/signature_field.rb +223 -0
  26. data/lib/hexapdf/type/annotation.rb +13 -9
  27. data/lib/hexapdf/type/annotations/widget.rb +3 -1
  28. data/lib/hexapdf/type/font_descriptor.rb +9 -2
  29. data/lib/hexapdf/type/page.rb +81 -0
  30. data/lib/hexapdf/type/resources.rb +4 -0
  31. data/lib/hexapdf/type/xref_stream.rb +7 -0
  32. data/lib/hexapdf/utils/graphics_helpers.rb +4 -4
  33. data/lib/hexapdf/version.rb +1 -1
  34. data/test/hexapdf/content/test_canvas.rb +21 -0
  35. data/test/hexapdf/encryption/test_security_handler.rb +15 -0
  36. data/test/hexapdf/encryption/test_standard_security_handler.rb +26 -0
  37. data/test/hexapdf/filter/test_crypt.rb +21 -0
  38. data/test/hexapdf/font/true_type/test_subsetter.rb +2 -0
  39. data/test/hexapdf/font/type1/test_afm_parser.rb +5 -0
  40. data/test/hexapdf/test_dictionary_fields.rb +7 -0
  41. data/test/hexapdf/test_parser.rb +82 -2
  42. data/test/hexapdf/test_revision.rb +21 -0
  43. data/test/hexapdf/test_serializer.rb +10 -0
  44. data/test/hexapdf/test_tokenizer.rb +50 -0
  45. data/test/hexapdf/test_writer.rb +2 -2
  46. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +24 -3
  47. data/test/hexapdf/type/acro_form/test_button_field.rb +13 -7
  48. data/test/hexapdf/type/acro_form/test_field.rb +5 -0
  49. data/test/hexapdf/type/acro_form/test_form.rb +46 -2
  50. data/test/hexapdf/type/acro_form/test_signature_field.rb +38 -0
  51. data/test/hexapdf/type/annotations/test_widget.rb +2 -0
  52. data/test/hexapdf/type/test_annotation.rb +20 -10
  53. data/test/hexapdf/type/test_font_descriptor.rb +7 -0
  54. data/test/hexapdf/type/test_page.rb +187 -49
  55. data/test/hexapdf/type/test_resources.rb +6 -0
  56. data/test/hexapdf/type/test_xref_stream.rb +7 -0
  57. data/test/hexapdf/utils/test_graphics_helpers.rb +8 -0
  58. metadata +6 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 22367739c2160e7a5dbc9e8b20bfefb06ef96664ccb814e4ab719a9bce7b68f3
4
- data.tar.gz: 8f703fa1e8e7b9d0b966e37becbecbbb7a4c1d81d0a823d770fa3d531efb1a4a
3
+ metadata.gz: 1a6ef1bdd17664ef0b9474b931d31ad69c57c77928ce2b69a3bbd5dadda0dce6
4
+ data.tar.gz: 11b87436d19cc5498fd6a77f0b6c410e717809264153964668be0f5199b9354d
5
5
  SHA512:
6
- metadata.gz: b05954e3c3890cbbc40d8171e6f1d7f6375569d69b5c90fc0e74b5ea5553ff5a45f1090a3741f50bfab7a6e0725b1e79f5c0eab7b92e6f7e518f38eb1eb6a3f8
7
- data.tar.gz: 2a3441b7ee7ca89e1417ea4134c3ab7444b4f791f5cba274361e719626fe9b0b08c903425b53a7fb7b04be3cab96a4edbb5c502dc1e7b4e3cdc809f8a9ebafb6
6
+ metadata.gz: 525a55832758b5eecd1a7f2daf5f220e1afa7ff8e88ca2d65998e658585f290ff2018450e50423f2331b7f195865eab8b1c62562ecdbf3671b46d4da770aed12
7
+ data.tar.gz: 0b0e18c7f79f0e2a54080fefad1dd4d94e15157f72e5360a3ebd827fc0cc2037ae6e06302155426e7f0900e97ee0cee678e069bd8ef05a9333d684c50e1343a5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,99 @@
1
+ ## 0.15.2 - 2021-05-01
2
+
3
+ ### Fixed
4
+
5
+ * Handling of unencrypted metadata streams
6
+
7
+
8
+ ## 0.15.1 - 2021-04-15
9
+
10
+ ### Fixed
11
+
12
+ * Potential division by zero when calculating the scaling for XObjects
13
+ * Handling of XObjects with a width or height of zero when drawing on canvas
14
+
15
+
16
+ ## 0.15.0 - 2021-04-12
17
+
18
+ ### Added
19
+
20
+ * [HexaPDF::Type::Page#flatten_annotations] for flattening the annotations of a
21
+ page
22
+ * [HexaPDF::Type::AcroForm::Form#flatten] for flattening interactive forms
23
+ * [HexaPDF::Revision#update] for updating the stored wrapper class of a PDF
24
+ object
25
+ * [HexaPDF::Type::AcroForm::SignatureField] for working with AcroForm signature
26
+ fields
27
+ * Support for form field flattening to the `hexapdf form` CLI command
28
+
29
+ ### Changed
30
+
31
+ * **Breaking change**: Overhauled the interface for accessing appearances of
32
+ annotations to make it more convenient
33
+ * Validation of [HexaPDF::Type::FontDescriptor] to delete invalid `/FontWeight`
34
+ value
35
+ * [HexaPDF::MalformedPDFError#pos] an accessor instead of a reader and update
36
+ the exception message
37
+ * Configuration option 'acro_form.fallback_font' to allow a callable object for
38
+ more advanced fallback font handling
39
+
40
+ ### Fixed
41
+
42
+ * [HexaPDF::Type::Annotations::Widget#background_color] to correctly handle
43
+ empty background color arrays
44
+ * [HexaPDF::Type::AcroForm::Field#delete_widget] to update the wrapper object
45
+ stored in the document in case the widget is embedded
46
+ * Processing of invalid PDF files containing a space,CR,LF combination after
47
+ the 'stream' keyword
48
+ * Cross-reference stream reconstruction with respect to detection of linearized
49
+ files
50
+ * Detection of existing appearances for AcroForm push button fields when
51
+ creating appearances
52
+
53
+
54
+ ## 0.14.4 - 2021-02-27
55
+
56
+ ### Added
57
+
58
+ * Support for the Crypt filters
59
+
60
+ ### Changed
61
+
62
+ * [HexaPDF::MalformedPDFError] to make the `pos` argument optional
63
+
64
+ ### Fixed
65
+
66
+ * Handling of invalid floating point numbers NaN, Inf and -Inf when serializing
67
+ * Processing of invalid PDF files containing NaN and Inf instead of numbers
68
+ * Bug in Type1 font AFM parser that occured if the file doesn't end with a new
69
+ line character
70
+ * Cross-reference table reconstruction to handle the case of an entry specifying
71
+ a non-existent indirect object
72
+ * Cross-reference table reconstruction to handle trailers specified by cross-
73
+ reference streams
74
+ * Cross-reference table reconstruction to use the set security handle for
75
+ decrypting indirect objects
76
+ * Parsing of cross-reference streams where data is missing
77
+
78
+
79
+ ## 0.14.3 - 2021-02-16
80
+
81
+ ### Fixed
82
+
83
+ * Bug in [HexaPDF::Font::TrueType::Subsetter#use_glyph] which lead to corrupt
84
+ text output
85
+ * [HexaPDF::Serializer] to handle infinite recursion problem
86
+ * Cross-reference table reconstruction to avoid an O(n^2) performance problem
87
+ * [HexaPDF::Type::Resources] validation to handle an invalid `/ProcSet` entry
88
+ containing a single value instead of an array
89
+ * Processing of invalid PDF files missing a required value in appearance streams
90
+ * Processing of invalid empty arrays that should be rectangles by converting
91
+ them to PDF null objects
92
+ * Processing of invalid PDF files containing indirect objects with offset 0
93
+ * Processing of invalid PDF files containing a space/CR or space/LF combination
94
+ after the 'stream' keyword
95
+
96
+
1
97
  ## 0.14.2 - 2021-01-22
2
98
 
3
99
  ### Fixed
@@ -52,18 +52,26 @@ module HexaPDF
52
52
  If the the output file name is not given, all form fields are listed in page order. Use
53
53
  the global --verbose option to show additional information like field type and location.
54
54
 
55
- If the output file name is given, the fields can be interactively filled out. By
56
- additionally using the --template option, the data for the fields is read from the given
57
- template file instead of the standard input.
55
+ If the output file name is given, the fields can be filled out interactively, via a
56
+ template or just flattened by using the respective options. Form field flattening can also
57
+ be activated in addition to filling out the form. If neither --fill, --template nor
58
+ --flatten is specified, --fill is implied.
58
59
  EOF
59
60
 
60
61
  options.on("--password PASSWORD", "-p", String,
61
62
  "The password for decryption. Use - for reading from standard input.") do |pwd|
62
63
  @password = (pwd == '-' ? read_password : pwd)
63
64
  end
65
+ options.on("--fill", "Fill out the form") do
66
+ @fill = true
67
+ end
64
68
  options.on("--template TEMPLATE_FILE", "-t TEMPLATE_FILE",
65
- "Use the template file for the field values") do |template|
69
+ "Use the template file for the field values (implies --fill)") do |template|
66
70
  @template = template
71
+ @fill = true
72
+ end
73
+ options.on('--flatten', 'Flatten the form fields') do
74
+ @flatten = true
67
75
  end
68
76
  options.on("--[no-]viewer-override", "Let the PDF viewer override the visual " \
69
77
  "appearance. Default: use setting from input PDF") do |need_appearances|
@@ -75,6 +83,8 @@ module HexaPDF
75
83
  end
76
84
 
77
85
  @password = nil
86
+ @fill = false
87
+ @flatten = false
78
88
  @template = nil
79
89
  @need_appearances = nil
80
90
  @incremental = true
@@ -82,16 +92,28 @@ module HexaPDF
82
92
 
83
93
  def execute(in_file, out_file = nil) #:nodoc:
84
94
  maybe_raise_on_existing_file(out_file) if out_file
95
+ if (@fill || @flatten) && !out_file
96
+ raise "Output file missing"
97
+ end
85
98
  with_document(in_file, password: @password, out_file: out_file,
86
99
  incremental: @incremental) do |doc|
87
100
  if !doc.acro_form
88
101
  raise "This PDF doesn't contain an interactive form"
89
102
  elsif out_file
90
103
  doc.acro_form[:NeedAppearances] = @need_appearances unless @need_appearances.nil?
91
- if @template
92
- fill_form_with_template(doc)
93
- else
94
- fill_form(doc)
104
+ if @fill || !@flatten
105
+ if @template
106
+ fill_form_with_template(doc)
107
+ else
108
+ fill_form(doc)
109
+ end
110
+ end
111
+ if @flatten
112
+ unless doc.acro_form.flatten.empty?
113
+ $stderr.puts "Warning: Not all form fields could be flattened"
114
+ doc.catalog.delete(:AcroForm)
115
+ doc.delete(doc.acro_form)
116
+ end
95
117
  end
96
118
  else
97
119
  list_form_fields(doc)
@@ -164,9 +164,20 @@ module HexaPDF
164
164
  # acro_form.fallback_font::
165
165
  # The font that should be used when a variable text field references a font that cannot be used.
166
166
  #
167
- # Can either be the name of a font, like 'Helvetica', or an array consisting of the font name
168
- # and a hash of font options, like ['Helvetica', variant: :italic]. If set to +nil+, the use of
169
- # the fallback font is disabled.
167
+ # Can be one of the following:
168
+ #
169
+ # * The name of a font, like 'Helvetica'.
170
+ #
171
+ # * An array consisting of the font name and a hash of font options, like ['Helvetica',
172
+ # variant: :italic].
173
+ #
174
+ # * A callable object receiving the field and the font object (or +nil+ if no valid font object
175
+ # was found) and which has to return either a font name or an array consisting of the font
176
+ # name and a hash of font options. This way the response can be different depending on the
177
+ # original font and it would also allow e.g. modifying the configured fonts to add custom
178
+ # ones.
179
+ #
180
+ # If set to +nil+, the use of the fallback font is disabled.
170
181
  #
171
182
  # Default is 'Helvetica'.
172
183
  #
@@ -393,7 +404,7 @@ module HexaPDF
393
404
  DCTDecode: 'HexaPDF::Filter::PassThrough',
394
405
  DCT: 'HexaPDF::Filter::PassThrough',
395
406
  JPXDecode: 'HexaPDF::Filter::PassThrough',
396
- Crypt: nil,
407
+ Crypt: 'HexaPDF::Filter::Crypt',
397
408
  Encryption: 'HexaPDF::Filter::Encryption',
398
409
  },
399
410
  'font.map' => {},
@@ -516,6 +527,9 @@ module HexaPDF
516
527
  XXAcroFormField: 'HexaPDF::Type::AcroForm::Field',
517
528
  XXAppearanceDictionary: 'HexaPDF::Type::Annotation::AppearanceDictionary',
518
529
  Border: 'HexaPDF::Type::Annotation::Border',
530
+ SigFieldLock: 'HexaPDF::Type::AcroForm::SignatureField::LockDictionary',
531
+ SV: 'HexaPDF::Type::AcroForm::SignatureField::SeedValueDictionary',
532
+ SVCert: 'HexaPDF::Type::AcroForm::SignatureField::CertificateSeedValueDictionary',
519
533
  },
520
534
  'object.subtype_map' => {
521
535
  nil => {
@@ -561,6 +575,7 @@ module HexaPDF
561
575
  Tx: 'HexaPDF::Type::AcroForm::TextField',
562
576
  Btn: 'HexaPDF::Type::AcroForm::ButtonField',
563
577
  Ch: 'HexaPDF::Type::AcroForm::ChoiceField',
578
+ Sig: 'HexaPDF::Type::AcroForm::SignatureField',
564
579
  },
565
580
  })
566
581
 
@@ -1260,6 +1260,7 @@ module HexaPDF
1260
1260
  unless obj.kind_of?(HexaPDF::Stream)
1261
1261
  obj = context.document.images.add(obj)
1262
1262
  end
1263
+ return obj if obj.width == 0 || obj.height == 0
1263
1264
 
1264
1265
  width, height = calculate_dimensions(obj.width, obj.height,
1265
1266
  rwidth: width, rheight: height)
@@ -156,6 +156,9 @@ module HexaPDF
156
156
  #
157
157
  # * Returns the default value if one is specified and no value is available.
158
158
  #
159
+ # Note: If field information is available for the entry, a Hash or Array value will always be
160
+ # wrapped by Dictionary or PDFArray. Otherwise, the value will be returned as-is.
161
+ #
159
162
  # Note: This method may throw a "can't add a new key into hash during iteration" error in
160
163
  # certain cases because it potentially modifies the underlying hash!
161
164
  def [](name)
@@ -344,7 +344,7 @@ module HexaPDF
344
344
  # Wraps a given array in the Rectangle class. Otherwise returns +nil+.
345
345
  def self.convert(data, _type, document)
346
346
  return unless data.kind_of?(Array) || data.kind_of?(HexaPDF::PDFArray)
347
- document.wrap(data, type: Rectangle)
347
+ data.empty? ? document.wrap(nil) : document.wrap(data, type: Rectangle)
348
348
  end
349
349
 
350
350
  end
@@ -268,7 +268,7 @@ module HexaPDF
268
268
  str.replace(string_algorithm.decrypt(key, str))
269
269
  end
270
270
 
271
- if obj.kind_of?(HexaPDF::Stream)
271
+ if obj.kind_of?(HexaPDF::Stream) && obj.raw_stream.filter[0] != :Crypt
272
272
  unless string_algorithm == stream_algorithm
273
273
  key = object_key(obj.oid, obj.gen, stream_algorithm)
274
274
  end
@@ -300,7 +300,12 @@ module HexaPDF
300
300
  obj.raw_stream.key == key && obj.raw_stream.algorithm == stream_algorithm
301
301
  obj.raw_stream.undecrypted_fiber
302
302
  else
303
- stream_algorithm.encryption_fiber(key, result)
303
+ filter = obj[:Filter]
304
+ if filter == :Crypt || (filter.kind_of?(PDFArray) && filter[0] == :Crypt)
305
+ result
306
+ else
307
+ stream_algorithm.encryption_fiber(key, result)
308
+ end
304
309
  end
305
310
  end
306
311
 
@@ -240,6 +240,18 @@ module HexaPDF
240
240
  end
241
241
  end
242
242
 
243
+ def decrypt(obj) #:nodoc:
244
+ if obj.type == :Metadata && obj == document.catalog.value[:Metadata] && !dict[:EncryptMetadata]
245
+ obj
246
+ else
247
+ super
248
+ end
249
+ end
250
+
251
+ def encrypt_stream(obj) #:nodoc
252
+ obj == document.catalog.value[:Metadata] && !dict[:EncryptMetadata] ? obj.stream_encoder : super
253
+ end
254
+
243
255
  private
244
256
 
245
257
  # Prepares the security handler for use in encrypting the document.
data/lib/hexapdf/error.rb CHANGED
@@ -43,12 +43,13 @@ module HexaPDF
43
43
  class MalformedPDFError < Error
44
44
 
45
45
  # The byte position in the PDF file where the error occured.
46
- attr_reader :pos
46
+ attr_accessor :pos
47
47
 
48
48
  # Creates a new malformed PDF error object for the given exception message.
49
49
  #
50
- # The byte position where the error occured has to be given via the +pos+ argument.
51
- def initialize(message, pos:)
50
+ # The byte position where the error occured can either be given via the +pos+ argument or later
51
+ # via the #pos accessor but must be set before the exception message is retrieved.
52
+ def initialize(message, pos: nil)
52
53
  super(message)
53
54
  @pos = pos
54
55
  end
@@ -95,6 +95,7 @@ module HexaPDF
95
95
  autoload(:Predictor, 'hexapdf/filter/predictor')
96
96
 
97
97
  autoload(:Encryption, 'hexapdf/filter/encryption')
98
+ autoload(:Crypt, 'hexapdf/filter/crypt')
98
99
 
99
100
  autoload(:PassThrough, 'hexapdf/filter/pass_through')
100
101
 
@@ -0,0 +1,60 @@
1
+ # -*- encoding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2020 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #
33
+ # If the GNU Affero General Public License doesn't fit your need,
34
+ # commercial licenses are available at <https://gettalong.at/hexapdf/>.
35
+ #++
36
+
37
+ require 'hexapdf/error'
38
+
39
+ module HexaPDF
40
+ module Filter
41
+
42
+ # This filter module implements the Crypt filter. The only supported part is using the Identity
43
+ # filter.
44
+ module Crypt
45
+
46
+ # See HexaPDF::Filter
47
+ def self.decoder(source, options)
48
+ if !options || !options.key?(:Name) || options[:Name] == :Identity
49
+ source
50
+ else
51
+ raise FilterError, "Handling of Crypt filters besides Identity is not implemented"
52
+ end
53
+ end
54
+
55
+ singleton_class.send(:alias_method, :encoder, :decoder)
56
+
57
+ end
58
+
59
+ end
60
+ end
@@ -67,7 +67,11 @@ module HexaPDF
67
67
  # they never appear in the output (PDF serialization would need to escape them)
68
68
  if @last_id == 13 || @last_id == 40 || @last_id == 92
69
69
  @glyph_map[:"s#{@last_id}"] = @last_id
70
- @last_id += (@last_id == 40 ? 2 : 1)
70
+ if @last_id == 40
71
+ @last_id += 1
72
+ @glyph_map[:"s#{@last_id}"] = @last_id
73
+ end
74
+ @last_id += 1
71
75
  end
72
76
  @glyph_map[glyph_id] = @last_id
73
77
  end
@@ -207,7 +207,8 @@ module HexaPDF
207
207
 
208
208
  # Returns the rest of the line, with whitespace stripped.
209
209
  def parse_string
210
- line = @line.strip!
210
+ @line.strip!
211
+ line = @line
211
212
  @line = ''
212
213
  line
213
214
  end
@@ -56,10 +56,12 @@ module HexaPDF
56
56
  # PDF references are resolved using the associated Document object.
57
57
  def initialize(io, document)
58
58
  @io = io
59
- @tokenizer = Tokenizer.new(io)
59
+ on_correctable_error = document.config['parser.on_correctable_error'].curry[document]
60
+ @tokenizer = Tokenizer.new(io, on_correctable_error: on_correctable_error)
60
61
  @document = document
61
62
  @object_stream_data = {}
62
63
  @reconstructed_revision = nil
64
+ @in_reconstruct_revision = false
63
65
  retrieve_pdf_header_offset_and_version
64
66
  end
65
67
 
@@ -72,7 +74,13 @@ module HexaPDF
72
74
  obj, oid, gen, stream =
73
75
  case xref_entry.type
74
76
  when :in_use
75
- parse_indirect_object(xref_entry.pos)
77
+ if xref_entry.pos == 0 && xref_entry.oid != 0
78
+ # Handle seen-in-the-wild objects with invalid offset 0
79
+ maybe_raise("Indirect object (#{xref_entry.oid},#{xref_entry.gen}) has offset 0", pos: 0)
80
+ [nil, xref_entry.oid, xref_entry.gen, nil]
81
+ else
82
+ parse_indirect_object(xref_entry.pos)
83
+ end
76
84
  when :free
77
85
  [nil, xref_entry.oid, xref_entry.gen, nil]
78
86
  when :compressed
@@ -83,12 +91,13 @@ module HexaPDF
83
91
 
84
92
  if xref_entry.oid != 0 && (oid != xref_entry.oid || gen != xref_entry.gen)
85
93
  raise_malformed("The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \
86
- "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
94
+ "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
87
95
  end
88
96
 
89
97
  @document.wrap(obj, oid: oid, gen: gen, stream: stream)
90
98
  rescue HexaPDF::MalformedPDFError
91
- reconstructed_revision.object(xref_entry)
99
+ reconstructed_revision.object(xref_entry) ||
100
+ @document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
92
101
  end
93
102
 
94
103
  # Parses the indirect object at the specified offset.
@@ -131,7 +140,11 @@ module HexaPDF
131
140
  raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset)
132
141
  end
133
142
  tok1 = @tokenizer.next_byte
134
- tok2 = @tokenizer.next_byte if tok1 == 13 # 13=CR, 10=LF
143
+ if tok1 == 32 # space
144
+ maybe_raise("Keyword stream followed by space instead of LF or CR/LF", pos: @tokenizer.pos)
145
+ tok1 = @tokenizer.next_byte
146
+ end
147
+ tok2 = @tokenizer.next_byte if tok1 == 13 # CR
135
148
  if tok1 != 10 && tok1 != 13
136
149
  raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos)
137
150
  elsif tok1 == 13 && tok2 != 10
@@ -203,7 +216,12 @@ module HexaPDF
203
216
  unless obj.respond_to?(:xref_section)
204
217
  raise_malformed("Object is not a cross-reference stream", pos: pos)
205
218
  end
206
- xref_section = obj.xref_section
219
+ begin
220
+ xref_section = obj.xref_section
221
+ rescue MalformedPDFError => e
222
+ e.pos = pos
223
+ raise
224
+ end
207
225
  trailer = obj.trailer
208
226
  unless xref_section.entry?(obj.oid, obj.gen)
209
227
  maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos)
@@ -381,34 +399,42 @@ module HexaPDF
381
399
  # If the file contains multiple cross-reference sections, all objects will be put into a single
382
400
  # cross-reference table, later objects overwriting prior ones.
383
401
  def reconstruct_revision
402
+ return if @in_reconstruct_revision
403
+ @in_reconstruct_revision = true
404
+
384
405
  raise unless @document.config['parser.try_xref_reconstruction']
385
406
  msg = "#{$!} - trying cross-reference table reconstruction"
386
407
  @document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
387
408
 
388
409
  xref = XRefSection.new
389
410
  @tokenizer.pos = 0
411
+ linearized = nil
390
412
  while true
391
413
  @tokenizer.skip_whitespace
392
414
  pos = @tokenizer.pos
393
- @tokenizer.scan_until(/(\n|\r\n?)+/)
415
+ @tokenizer.scan_until(/(\n|\r\n?)+|\z/)
394
416
  next_new_line_pos = @tokenizer.pos
395
417
  @tokenizer.pos = pos
396
418
 
397
- token = @tokenizer.next_token rescue nil
419
+ token = @tokenizer.next_integer_or_keyword rescue nil
398
420
  if token.kind_of?(Integer)
399
- gen = @tokenizer.next_token rescue nil
400
- tok = @tokenizer.next_token rescue nil
421
+ gen = @tokenizer.next_integer_or_keyword rescue nil
422
+ tok = @tokenizer.next_integer_or_keyword rescue nil
401
423
  if @tokenizer.pos > next_new_line_pos
402
424
  @tokenizer.pos = next_new_line_pos
403
425
  elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
404
426
  xref.add_in_use_entry(token, gen, pos)
427
+ if linearized.nil?
428
+ obj = @tokenizer.next_object rescue nil
429
+ linearized = obj.kind_of?(Hash) && obj.key?(:Linearized)
430
+ end
405
431
  @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
406
432
  end
407
433
  elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
408
434
  obj = @tokenizer.next_object rescue nil
409
435
  # Use last trailer found in case of multiple revisions but use first trailer in case of
410
436
  # linearized file.
411
- trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
437
+ trailer = obj if obj.kind_of?(Hash) && (!linearized || trailer.nil?)
412
438
  elsif token == Tokenizer::NO_MORE_TOKENS
413
439
  break
414
440
  else
@@ -416,16 +442,22 @@ module HexaPDF
416
442
  end
417
443
  end
418
444
 
419
- trailer&.delete(:Prev) # no need for this and may wreak havoc
420
445
  if !trailer || trailer.empty?
421
- raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
446
+ _, trailer = load_revision(startxref_offset) rescue nil
447
+ unless trailer
448
+ @in_reconstruct_revision = false
449
+ raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
450
+ end
422
451
  end
452
+ trailer&.delete(:Prev) # no need for this and may wreak havoc
423
453
 
424
454
  loader = lambda do |xref_entry|
425
455
  obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
426
- @document.wrap(obj, oid: oid, gen: gen, stream: stream)
456
+ obj = @document.wrap(obj, oid: oid, gen: gen, stream: stream)
457
+ @document.security_handler ? @document.security_handler.decrypt(obj) : obj
427
458
  end
428
459
 
460
+ @in_reconstruct_revision = false
429
461
  Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
430
462
  loader: loader)
431
463
  end