RubyGems - hexapdf - Versions diffs - 0.14.2 → 0.15.2 - Mend

hexapdf 0.14.2 → 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +96 -0
data/lib/hexapdf/cli/form.rb +30 -8
data/lib/hexapdf/configuration.rb +19 -4
data/lib/hexapdf/content/canvas.rb +1 -0
data/lib/hexapdf/dictionary.rb +3 -0
data/lib/hexapdf/dictionary_fields.rb +1 -1
data/lib/hexapdf/encryption/security_handler.rb +7 -2
data/lib/hexapdf/encryption/standard_security_handler.rb +12 -0
data/lib/hexapdf/error.rb +4 -3
data/lib/hexapdf/filter.rb +1 -0
data/lib/hexapdf/filter/crypt.rb +60 -0
data/lib/hexapdf/font/true_type/subsetter.rb +5 -1
data/lib/hexapdf/font/type1/afm_parser.rb +2 -1
data/lib/hexapdf/parser.rb +46 -14
data/lib/hexapdf/pdf_array.rb +3 -0
data/lib/hexapdf/revision.rb +16 -0
data/lib/hexapdf/serializer.rb +10 -3
data/lib/hexapdf/tokenizer.rb +44 -3
data/lib/hexapdf/type/acro_form.rb +1 -0
data/lib/hexapdf/type/acro_form/appearance_generator.rb +32 -17
data/lib/hexapdf/type/acro_form/button_field.rb +8 -4
data/lib/hexapdf/type/acro_form/field.rb +1 -0
data/lib/hexapdf/type/acro_form/form.rb +37 -0
data/lib/hexapdf/type/acro_form/signature_field.rb +223 -0
data/lib/hexapdf/type/annotation.rb +13 -9
data/lib/hexapdf/type/annotations/widget.rb +3 -1
data/lib/hexapdf/type/font_descriptor.rb +9 -2
data/lib/hexapdf/type/page.rb +81 -0
data/lib/hexapdf/type/resources.rb +4 -0
data/lib/hexapdf/type/xref_stream.rb +7 -0
data/lib/hexapdf/utils/graphics_helpers.rb +4 -4
data/lib/hexapdf/version.rb +1 -1
data/test/hexapdf/content/test_canvas.rb +21 -0
data/test/hexapdf/encryption/test_security_handler.rb +15 -0
data/test/hexapdf/encryption/test_standard_security_handler.rb +26 -0
data/test/hexapdf/filter/test_crypt.rb +21 -0
data/test/hexapdf/font/true_type/test_subsetter.rb +2 -0
data/test/hexapdf/font/type1/test_afm_parser.rb +5 -0
data/test/hexapdf/test_dictionary_fields.rb +7 -0
data/test/hexapdf/test_parser.rb +82 -2
data/test/hexapdf/test_revision.rb +21 -0
data/test/hexapdf/test_serializer.rb +10 -0
data/test/hexapdf/test_tokenizer.rb +50 -0
data/test/hexapdf/test_writer.rb +2 -2
data/test/hexapdf/type/acro_form/test_appearance_generator.rb +24 -3
data/test/hexapdf/type/acro_form/test_button_field.rb +13 -7
data/test/hexapdf/type/acro_form/test_field.rb +5 -0
data/test/hexapdf/type/acro_form/test_form.rb +46 -2
data/test/hexapdf/type/acro_form/test_signature_field.rb +38 -0
data/test/hexapdf/type/annotations/test_widget.rb +2 -0
data/test/hexapdf/type/test_annotation.rb +20 -10
data/test/hexapdf/type/test_font_descriptor.rb +7 -0
data/test/hexapdf/type/test_page.rb +187 -49
data/test/hexapdf/type/test_resources.rb +6 -0
data/test/hexapdf/type/test_xref_stream.rb +7 -0
data/test/hexapdf/utils/test_graphics_helpers.rb +8 -0
metadata +6 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 22367739c2160e7a5dbc9e8b20bfefb06ef96664ccb814e4ab719a9bce7b68f3
-  data.tar.gz: 8f703fa1e8e7b9d0b966e37becbecbbb7a4c1d81d0a823d770fa3d531efb1a4a
+  metadata.gz: 1a6ef1bdd17664ef0b9474b931d31ad69c57c77928ce2b69a3bbd5dadda0dce6
+  data.tar.gz: 11b87436d19cc5498fd6a77f0b6c410e717809264153964668be0f5199b9354d
 SHA512:
-  metadata.gz: b05954e3c3890cbbc40d8171e6f1d7f6375569d69b5c90fc0e74b5ea5553ff5a45f1090a3741f50bfab7a6e0725b1e79f5c0eab7b92e6f7e518f38eb1eb6a3f8
-  data.tar.gz: 2a3441b7ee7ca89e1417ea4134c3ab7444b4f791f5cba274361e719626fe9b0b08c903425b53a7fb7b04be3cab96a4edbb5c502dc1e7b4e3cdc809f8a9ebafb6
+  metadata.gz: 525a55832758b5eecd1a7f2daf5f220e1afa7ff8e88ca2d65998e658585f290ff2018450e50423f2331b7f195865eab8b1c62562ecdbf3671b46d4da770aed12
+  data.tar.gz: 0b0e18c7f79f0e2a54080fefad1dd4d94e15157f72e5360a3ebd827fc0cc2037ae6e06302155426e7f0900e97ee0cee678e069bd8ef05a9333d684c50e1343a5

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,99 @@
+## 0.15.2 - 2021-05-01
+### Fixed
+* Handling of unencrypted metadata streams
+## 0.15.1 - 2021-04-15
+### Fixed
+* Potential division by zero when calculating the scaling for XObjects
+* Handling of XObjects with a width or height of zero when drawing on canvas
+## 0.15.0 - 2021-04-12
+### Added
+* [HexaPDF::Type::Page#flatten_annotations] for flattening the annotations of a
+  page
+* [HexaPDF::Type::AcroForm::Form#flatten] for flattening interactive forms
+* [HexaPDF::Revision#update] for updating the stored wrapper class of a PDF
+  object
+* [HexaPDF::Type::AcroForm::SignatureField] for working with AcroForm signature
+  fields
+* Support for form field flattening to the `hexapdf form` CLI command
+### Changed
+* **Breaking change**: Overhauled the interface for accessing appearances of
+  annotations to make it more convenient
+* Validation of [HexaPDF::Type::FontDescriptor] to delete invalid `/FontWeight`
+  value
+* [HexaPDF::MalformedPDFError#pos] an accessor instead of a reader and update
+  the exception message
+* Configuration option 'acro_form.fallback_font' to allow a callable object for
+  more advanced fallback font handling
+### Fixed
+* [HexaPDF::Type::Annotations::Widget#background_color] to correctly handle
+  empty background color arrays
+* [HexaPDF::Type::AcroForm::Field#delete_widget] to update the wrapper object
+  stored in the document in case the widget is embedded
+* Processing of invalid PDF files containing a space,CR,LF combination after
+  the 'stream' keyword
+* Cross-reference stream reconstruction with respect to detection of linearized
+  files
+* Detection of existing appearances for AcroForm push button fields when
+  creating appearances
+## 0.14.4 - 2021-02-27
+### Added
+* Support for the Crypt filters
+### Changed
+* [HexaPDF::MalformedPDFError] to make the `pos` argument optional
+### Fixed
+* Handling of invalid floating point numbers NaN, Inf and -Inf when serializing
+* Processing of invalid PDF files containing NaN and Inf instead of numbers
+* Bug in Type1 font AFM parser that occured if the file doesn't end with a new
+  line character
+* Cross-reference table reconstruction to handle the case of an entry specifying
+  a non-existent indirect object
+* Cross-reference table reconstruction to handle trailers specified by cross-
+  reference streams
+* Cross-reference table reconstruction to use the set security handle for
+  decrypting indirect objects
+* Parsing of cross-reference streams where data is missing
+## 0.14.3 - 2021-02-16
+### Fixed
+* Bug in [HexaPDF::Font::TrueType::Subsetter#use_glyph] which lead to corrupt
+  text output
+* [HexaPDF::Serializer] to handle infinite recursion problem
+* Cross-reference table reconstruction to avoid an O(n^2) performance problem
+* [HexaPDF::Type::Resources] validation to handle an invalid `/ProcSet` entry
+  containing a single value instead of an array
+* Processing of invalid PDF files missing a required value in appearance streams
+* Processing of invalid empty arrays that should be rectangles by converting
+  them to PDF null objects
+* Processing of invalid PDF files containing indirect objects with offset 0
+* Processing of invalid PDF files containing a space/CR or space/LF combination
+  after the 'stream' keyword
 ## 0.14.2 - 2021-01-22
 ### Fixed

data/lib/hexapdf/cli/form.rb CHANGED Viewed

@@ -52,18 +52,26 @@ module HexaPDF
           If the the output file name is not given, all form fields are listed in page order. Use
           the global --verbose option to show additional information like field type and location.
-          If the output file name is given, the fields can be interactively filled out. By
-          additionally using the --template option, the data for the fields is read from the given
-          template file instead of the standard input.
+          If the output file name is given, the fields can be filled out interactively, via a
+          template or just flattened by using the respective options. Form field flattening can also
+          be activated in addition to filling out the form. If neither --fill, --template nor
+          --flatten is specified, --fill is implied.
         EOF
         options.on("--password PASSWORD", "-p", String,
                    "The password for decryption. Use - for reading from standard input.") do |pwd|
           @password = (pwd == '-' ? read_password : pwd)
         end
+        options.on("--fill", "Fill out the form") do
+          @fill = true
+        end
         options.on("--template TEMPLATE_FILE", "-t TEMPLATE_FILE",
-                   "Use the template file for the field values") do |template|
+                   "Use the template file for the field values (implies --fill)") do |template|
           @template = template
+          @fill = true
+        end
+        options.on('--flatten', 'Flatten the form fields') do
+          @flatten = true
         end
         options.on("--[no-]viewer-override", "Let the PDF viewer override the visual " \
                    "appearance. Default: use setting from input PDF") do |need_appearances|
@@ -75,6 +83,8 @@ module HexaPDF
         end
         @password = nil
+        @fill = false
+        @flatten = false
         @template = nil
         @need_appearances = nil
         @incremental = true
@@ -82,16 +92,28 @@ module HexaPDF
       def execute(in_file, out_file = nil) #:nodoc:
         maybe_raise_on_existing_file(out_file) if out_file
+        if (@fill || @flatten) && !out_file
+          raise "Output file missing"
+        end
         with_document(in_file, password: @password, out_file: out_file,
                       incremental: @incremental) do |doc|
           if !doc.acro_form
             raise "This PDF doesn't contain an interactive form"
           elsif out_file
             doc.acro_form[:NeedAppearances] = @need_appearances unless @need_appearances.nil?
-            if @template
-              fill_form_with_template(doc)
-            else
-              fill_form(doc)
+            if @fill || !@flatten
+              if @template
+                fill_form_with_template(doc)
+              else
+                fill_form(doc)
+              end
+            end
+            if @flatten
+              unless doc.acro_form.flatten.empty?
+                $stderr.puts "Warning: Not all form fields could be flattened"
+                doc.catalog.delete(:AcroForm)
+                doc.delete(doc.acro_form)
+              end
             end
           else
             list_form_fields(doc)

data/lib/hexapdf/configuration.rb CHANGED Viewed

@@ -164,9 +164,20 @@ module HexaPDF
   # acro_form.fallback_font::
   #    The font that should be used when a variable text field references a font that cannot be used.
   #
-  #    Can either be the name of a font, like 'Helvetica', or an array consisting of the font name
-  #    and a hash of font options, like ['Helvetica', variant: :italic]. If set to +nil+, the use of
-  #    the fallback font is disabled.
+  #    Can be one of the following:
+  #
+  #    * The name of a font, like 'Helvetica'.
+  #
+  #    * An array consisting of the font name and a hash of font options, like ['Helvetica',
+  #      variant: :italic].
+  #
+  #    * A callable object receiving the field and the font object (or +nil+ if no valid font object
+  #      was found) and which has to return either a font name or an array consisting of the font
+  #      name and a hash of font options. This way the response can be different depending on the
+  #      original font and it would also allow e.g. modifying the configured fonts to add custom
+  #      ones.
+  #
+  #    If set to +nil+, the use of the fallback font is disabled.
   #
   #    Default is 'Helvetica'.
   #
@@ -393,7 +404,7 @@ module HexaPDF
                         DCTDecode: 'HexaPDF::Filter::PassThrough',
                         DCT: 'HexaPDF::Filter::PassThrough',
                         JPXDecode: 'HexaPDF::Filter::PassThrough',
-                        Crypt: nil,
+                        Crypt: 'HexaPDF::Filter::Crypt',
                         Encryption: 'HexaPDF::Filter::Encryption',
                       },
                       'font.map' => {},
@@ -516,6 +527,9 @@ module HexaPDF
                         XXAcroFormField: 'HexaPDF::Type::AcroForm::Field',
                         XXAppearanceDictionary: 'HexaPDF::Type::Annotation::AppearanceDictionary',
                         Border: 'HexaPDF::Type::Annotation::Border',
+                        SigFieldLock: 'HexaPDF::Type::AcroForm::SignatureField::LockDictionary',
+                        SV: 'HexaPDF::Type::AcroForm::SignatureField::SeedValueDictionary',
+                        SVCert: 'HexaPDF::Type::AcroForm::SignatureField::CertificateSeedValueDictionary',
                       },
                       'object.subtype_map' => {
                         nil => {
@@ -561,6 +575,7 @@ module HexaPDF
                           Tx: 'HexaPDF::Type::AcroForm::TextField',
                           Btn: 'HexaPDF::Type::AcroForm::ButtonField',
                           Ch: 'HexaPDF::Type::AcroForm::ChoiceField',
+                          Sig: 'HexaPDF::Type::AcroForm::SignatureField',
                         },
                       })

data/lib/hexapdf/content/canvas.rb CHANGED Viewed

@@ -1260,6 +1260,7 @@ module HexaPDF
         unless obj.kind_of?(HexaPDF::Stream)
           obj = context.document.images.add(obj)
         end
+        return obj if obj.width == 0 || obj.height == 0
         width, height = calculate_dimensions(obj.width, obj.height,
                                              rwidth: width, rheight: height)

data/lib/hexapdf/dictionary.rb CHANGED Viewed

@@ -156,6 +156,9 @@ module HexaPDF
     #
     # * Returns the default value if one is specified and no value is available.
     #
+    # Note: If field information is available for the entry, a Hash or Array value will always be
+    # wrapped by Dictionary or PDFArray. Otherwise, the value will be returned as-is.
+    #
     # Note: This method may throw a "can't add a new key into hash during iteration" error in
     # certain cases because it potentially modifies the underlying hash!
     def [](name)

data/lib/hexapdf/dictionary_fields.rb CHANGED Viewed

@@ -344,7 +344,7 @@ module HexaPDF
       # Wraps a given array in the Rectangle class. Otherwise returns +nil+.
       def self.convert(data, _type, document)
         return unless data.kind_of?(Array) || data.kind_of?(HexaPDF::PDFArray)
-        document.wrap(data, type: Rectangle)
+        data.empty? ? document.wrap(nil) : document.wrap(data, type: Rectangle)
       end
     end

data/lib/hexapdf/encryption/security_handler.rb CHANGED Viewed

@@ -268,7 +268,7 @@ module HexaPDF
           str.replace(string_algorithm.decrypt(key, str))
         end
-        if obj.kind_of?(HexaPDF::Stream)
+        if obj.kind_of?(HexaPDF::Stream) && obj.raw_stream.filter[0] != :Crypt
           unless string_algorithm == stream_algorithm
             key = object_key(obj.oid, obj.gen, stream_algorithm)
           end
@@ -300,7 +300,12 @@ module HexaPDF
             obj.raw_stream.key == key && obj.raw_stream.algorithm == stream_algorithm
           obj.raw_stream.undecrypted_fiber
         else
-          stream_algorithm.encryption_fiber(key, result)
+          filter = obj[:Filter]
+          if filter == :Crypt || (filter.kind_of?(PDFArray) && filter[0] == :Crypt)
+            result
+          else
+            stream_algorithm.encryption_fiber(key, result)
+          end
         end
       end

data/lib/hexapdf/encryption/standard_security_handler.rb CHANGED Viewed

@@ -240,6 +240,18 @@ module HexaPDF
         end
       end
+      def decrypt(obj) #:nodoc:
+        if obj.type == :Metadata && obj == document.catalog.value[:Metadata] && !dict[:EncryptMetadata]
+          obj
+        else
+          super
+        end
+      end
+      def encrypt_stream(obj) #:nodoc
+        obj == document.catalog.value[:Metadata] && !dict[:EncryptMetadata] ? obj.stream_encoder : super
+      end
       private
       # Prepares the security handler for use in encrypting the document.

data/lib/hexapdf/error.rb CHANGED Viewed

@@ -43,12 +43,13 @@ module HexaPDF
   class MalformedPDFError < Error
     # The byte position in the PDF file where the error occured.
-    attr_reader :pos
+    attr_accessor :pos
     # Creates a new malformed PDF error object for the given exception message.
     #
-    # The byte position where the error occured has to be given via the +pos+ argument.
-    def initialize(message, pos:)
+    # The byte position where the error occured can either be given via the +pos+ argument or later
+    # via the #pos accessor but must be set before the exception message is retrieved.
+    def initialize(message, pos: nil)
       super(message)
       @pos = pos
     end

data/lib/hexapdf/filter.rb CHANGED Viewed

@@ -95,6 +95,7 @@ module HexaPDF
     autoload(:Predictor, 'hexapdf/filter/predictor')
     autoload(:Encryption, 'hexapdf/filter/encryption')
+    autoload(:Crypt, 'hexapdf/filter/crypt')
     autoload(:PassThrough, 'hexapdf/filter/pass_through')

data/lib/hexapdf/filter/crypt.rb ADDED Viewed

@@ -0,0 +1,60 @@
+# -*- encoding: utf-8; frozen_string_literal: true -*-
+#
+#--
+# This file is part of HexaPDF.
+#
+# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
+# Copyright (C) 2014-2020 Thomas Leitner
+#
+# HexaPDF is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License version 3 as
+# published by the Free Software Foundation with the addition of the
+# following permission added to Section 15 as permitted in Section 7(a):
+# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
+# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
+# INFRINGEMENT OF THIRD PARTY RIGHTS.
+#
+# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
+#
+# The interactive user interfaces in modified source and object code
+# versions of HexaPDF must display Appropriate Legal Notices, as required
+# under Section 5 of the GNU Affero General Public License version 3.
+#
+# In accordance with Section 7(b) of the GNU Affero General Public
+# License, a covered work must retain the producer line in every PDF that
+# is created or manipulated using HexaPDF.
+#
+# If the GNU Affero General Public License doesn't fit your need,
+# commercial licenses are available at <https://gettalong.at/hexapdf/>.
+#++
+require 'hexapdf/error'
+module HexaPDF
+  module Filter
+    # This filter module implements the Crypt filter. The only supported part is using the Identity
+    # filter.
+    module Crypt
+      # See HexaPDF::Filter
+      def self.decoder(source, options)
+        if !options || !options.key?(:Name) || options[:Name] == :Identity
+          source
+        else
+          raise FilterError, "Handling of Crypt filters besides Identity is not implemented"
+        end
+      end
+      singleton_class.send(:alias_method, :encoder, :decoder)
+    end
+  end
+end

data/lib/hexapdf/font/true_type/subsetter.rb CHANGED Viewed

@@ -67,7 +67,11 @@ module HexaPDF
           # they never appear in the output (PDF serialization would need to escape them)
           if @last_id == 13 || @last_id == 40 || @last_id == 92
             @glyph_map[:"s#{@last_id}"] = @last_id
-            @last_id += (@last_id == 40 ? 2 : 1)
+            if @last_id == 40
+              @last_id += 1
+              @glyph_map[:"s#{@last_id}"] = @last_id
+            end
+            @last_id += 1
           end
           @glyph_map[glyph_id] = @last_id
         end

data/lib/hexapdf/font/type1/afm_parser.rb CHANGED Viewed

@@ -207,7 +207,8 @@ module HexaPDF
         # Returns the rest of the line, with whitespace stripped.
         def parse_string
-          line = @line.strip!
+          @line.strip!
+          line = @line
           @line = ''
           line
         end

data/lib/hexapdf/parser.rb CHANGED Viewed

@@ -56,10 +56,12 @@ module HexaPDF
     # PDF references are resolved using the associated Document object.
     def initialize(io, document)
       @io = io
-      @tokenizer = Tokenizer.new(io)
+      on_correctable_error = document.config['parser.on_correctable_error'].curry[document]
+      @tokenizer = Tokenizer.new(io, on_correctable_error: on_correctable_error)
       @document = document
       @object_stream_data = {}
       @reconstructed_revision = nil
+      @in_reconstruct_revision = false
       retrieve_pdf_header_offset_and_version
     end
@@ -72,7 +74,13 @@ module HexaPDF
       obj, oid, gen, stream =
         case xref_entry.type
         when :in_use
-          parse_indirect_object(xref_entry.pos)
+          if xref_entry.pos == 0 && xref_entry.oid != 0
+            # Handle seen-in-the-wild objects with invalid offset 0
+            maybe_raise("Indirect object (#{xref_entry.oid},#{xref_entry.gen}) has offset 0", pos: 0)
+            [nil, xref_entry.oid, xref_entry.gen, nil]
+          else
+            parse_indirect_object(xref_entry.pos)
+          end
         when :free
           [nil, xref_entry.oid, xref_entry.gen, nil]
         when :compressed
@@ -83,12 +91,13 @@ module HexaPDF
       if xref_entry.oid != 0 && (oid != xref_entry.oid || gen != xref_entry.gen)
         raise_malformed("The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \
-          "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
+                        "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
       end
       @document.wrap(obj, oid: oid, gen: gen, stream: stream)
     rescue HexaPDF::MalformedPDFError
-      reconstructed_revision.object(xref_entry)
+      reconstructed_revision.object(xref_entry) ||
+        @document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)
     end
     # Parses the indirect object at the specified offset.
@@ -131,7 +140,11 @@ module HexaPDF
           raise_malformed("A stream needs a dictionary, not a(n) #{object.class}", pos: offset)
         end
         tok1 = @tokenizer.next_byte
-        tok2 = @tokenizer.next_byte if tok1 == 13 # 13=CR, 10=LF
+        if tok1 == 32 # space
+          maybe_raise("Keyword stream followed by space instead of LF or CR/LF", pos: @tokenizer.pos)
+          tok1 = @tokenizer.next_byte
+        end
+        tok2 = @tokenizer.next_byte if tok1 == 13 # CR
         if tok1 != 10 && tok1 != 13
           raise_malformed("Keyword stream must be followed by LF or CR/LF", pos: @tokenizer.pos)
         elsif tok1 == 13 && tok2 != 10
@@ -203,7 +216,12 @@ module HexaPDF
         unless obj.respond_to?(:xref_section)
           raise_malformed("Object is not a cross-reference stream", pos: pos)
         end
-        xref_section = obj.xref_section
+        begin
+          xref_section = obj.xref_section
+        rescue MalformedPDFError => e
+          e.pos = pos
+          raise
+        end
         trailer = obj.trailer
         unless xref_section.entry?(obj.oid, obj.gen)
           maybe_raise("Cross-reference stream doesn't contain entry for itself", pos: pos)
@@ -381,34 +399,42 @@ module HexaPDF
     # If the file contains multiple cross-reference sections, all objects will be put into a single
     # cross-reference table, later objects overwriting prior ones.
     def reconstruct_revision
+      return if @in_reconstruct_revision
+      @in_reconstruct_revision = true
       raise unless @document.config['parser.try_xref_reconstruction']
       msg = "#{$!} - trying cross-reference table reconstruction"
       @document.config['parser.on_correctable_error'].call(@document, msg, @tokenizer.pos)
       xref = XRefSection.new
       @tokenizer.pos = 0
+      linearized = nil
       while true
         @tokenizer.skip_whitespace
         pos = @tokenizer.pos
-        @tokenizer.scan_until(/(\n|\r\n?)+/)
+        @tokenizer.scan_until(/(\n|\r\n?)+|\z/)
         next_new_line_pos = @tokenizer.pos
         @tokenizer.pos = pos
-        token = @tokenizer.next_token rescue nil
+        token = @tokenizer.next_integer_or_keyword rescue nil
         if token.kind_of?(Integer)
-          gen = @tokenizer.next_token rescue nil
-          tok = @tokenizer.next_token rescue nil
+          gen = @tokenizer.next_integer_or_keyword rescue nil
+          tok = @tokenizer.next_integer_or_keyword rescue nil
           if @tokenizer.pos > next_new_line_pos
             @tokenizer.pos = next_new_line_pos
           elsif gen.kind_of?(Integer) && tok.kind_of?(Tokenizer::Token) && tok == 'obj'
             xref.add_in_use_entry(token, gen, pos)
+            if linearized.nil?
+              obj = @tokenizer.next_object rescue nil
+              linearized = obj.kind_of?(Hash) && obj.key?(:Linearized)
+            end
             @tokenizer.scan_until(/(?:\n|\r\n?)endobj\b/)
           end
         elsif token.kind_of?(Tokenizer::Token) && token == 'trailer'
           obj = @tokenizer.next_object rescue nil
           # Use last trailer found in case of multiple revisions but use first trailer in case of
           # linearized file.
-          trailer = obj if obj.kind_of?(Hash) && (obj.key?(:Prev) || trailer.nil?)
+          trailer = obj if obj.kind_of?(Hash) && (!linearized || trailer.nil?)
         elsif token == Tokenizer::NO_MORE_TOKENS
           break
         else
@@ -416,16 +442,22 @@ module HexaPDF
         end
       end
-      trailer&.delete(:Prev) # no need for this and may wreak havoc
       if !trailer || trailer.empty?
-        raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
+        _, trailer = load_revision(startxref_offset) rescue nil
+        unless trailer
+          @in_reconstruct_revision = false
+          raise_malformed("Could not reconstruct malformed PDF because trailer was not found", pos: 0)
+        end
       end
+      trailer&.delete(:Prev) # no need for this and may wreak havoc
       loader = lambda do |xref_entry|
         obj, oid, gen, stream = parse_indirect_object(xref_entry.pos)
-        @document.wrap(obj, oid: oid, gen: gen, stream: stream)
+        obj = @document.wrap(obj, oid: oid, gen: gen, stream: stream)
+        @document.security_handler ? @document.security_handler.decrypt(obj) : obj
       end
+      @in_reconstruct_revision = false
       Revision.new(@document.wrap(trailer, type: :XXTrailer), xref_section: xref,
                    loader: loader)
     end