RubyGems - hexapdf - Versions diffs - 0.12.3 → 0.13.0 - Mend

hexapdf 0.12.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +38 -0
data/lib/hexapdf/cli/command.rb +4 -2
data/lib/hexapdf/cli/image2pdf.rb +2 -1
data/lib/hexapdf/cli/info.rb +51 -2
data/lib/hexapdf/cli/inspect.rb +30 -8
data/lib/hexapdf/cli/merge.rb +1 -1
data/lib/hexapdf/configuration.rb +15 -0
data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
data/lib/hexapdf/dictionary.rb +4 -4
data/lib/hexapdf/dictionary_fields.rb +1 -9
data/lib/hexapdf/document.rb +31 -12
data/lib/hexapdf/document/files.rb +0 -1
data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
data/lib/hexapdf/encryption/security_handler.rb +1 -0
data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
data/lib/hexapdf/font/cmap.rb +1 -4
data/lib/hexapdf/font/true_type/table/head.rb +1 -0
data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
data/lib/hexapdf/image_loader/png.rb +3 -2
data/lib/hexapdf/layout/line.rb +1 -1
data/lib/hexapdf/layout/style.rb +23 -23
data/lib/hexapdf/layout/text_shaper.rb +3 -2
data/lib/hexapdf/object.rb +30 -25
data/lib/hexapdf/parser.rb +65 -3
data/lib/hexapdf/pdf_array.rb +9 -2
data/lib/hexapdf/revisions.rb +29 -21
data/lib/hexapdf/serializer.rb +1 -1
data/lib/hexapdf/task/optimize.rb +6 -4
data/lib/hexapdf/type/acro_form/choice_field.rb +4 -4
data/lib/hexapdf/type/acro_form/field.rb +35 -5
data/lib/hexapdf/type/acro_form/form.rb +6 -4
data/lib/hexapdf/type/acro_form/text_field.rb +2 -1
data/lib/hexapdf/type/actions/uri.rb +3 -2
data/lib/hexapdf/type/annotations/widget.rb +3 -4
data/lib/hexapdf/type/catalog.rb +2 -2
data/lib/hexapdf/type/file_specification.rb +1 -1
data/lib/hexapdf/type/font_simple.rb +3 -1
data/lib/hexapdf/type/font_true_type.rb +6 -2
data/lib/hexapdf/type/font_type0.rb +1 -1
data/lib/hexapdf/type/form.rb +2 -1
data/lib/hexapdf/type/image.rb +2 -2
data/lib/hexapdf/type/page.rb +16 -7
data/lib/hexapdf/type/page_tree_node.rb +29 -5
data/lib/hexapdf/type/resources.rb +1 -0
data/lib/hexapdf/type/trailer.rb +2 -3
data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
data/lib/hexapdf/version.rb +1 -1
data/test/hexapdf/common_tokenizer_tests.rb +2 -2
data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
data/test/hexapdf/content/test_canvas.rb +3 -3
data/test/hexapdf/content/test_color_space.rb +1 -1
data/test/hexapdf/encryption/test_aes.rb +4 -4
data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
data/test/hexapdf/layout/test_text_layouter.rb +3 -4
data/test/hexapdf/test_configuration.rb +2 -2
data/test/hexapdf/test_dictionary.rb +3 -1
data/test/hexapdf/test_dictionary_fields.rb +2 -2
data/test/hexapdf/test_document.rb +4 -4
data/test/hexapdf/test_object.rb +44 -26
data/test/hexapdf/test_parser.rb +115 -55
data/test/hexapdf/test_pdf_array.rb +7 -0
data/test/hexapdf/test_revisions.rb +35 -0
data/test/hexapdf/test_writer.rb +2 -2
data/test/hexapdf/type/acro_form/test_appearance_generator.rb +1 -2
data/test/hexapdf/type/acro_form/test_field.rb +39 -0
data/test/hexapdf/type/acro_form/test_form.rb +4 -4
data/test/hexapdf/type/acro_form/test_text_field.rb +2 -0
data/test/hexapdf/type/test_font_simple.rb +2 -1
data/test/hexapdf/type/test_font_true_type.rb +6 -0
data/test/hexapdf/type/test_form.rb +1 -1
data/test/hexapdf/type/test_page.rb +8 -1
data/test/hexapdf/type/test_page_tree_node.rb +42 -0
data/test/hexapdf/utils/test_bit_field.rb +2 -0
data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
metadata +5 -12

data/lib/hexapdf/version.rb CHANGED

@@ -37,6 +37,6 @@
 module HexaPDF
   # The version of HexaPDF.
-  VERSION = '0.12.3'
+  VERSION = '0.13.0'
 end

data/test/hexapdf/common_tokenizer_tests.rb CHANGED

@@ -122,7 +122,7 @@ module CommonTokenizerTests
   end
   it "next_token: should not fail when reading super long numbers" do
-    create_tokenizer("1" + "0" * 10_000)
+    create_tokenizer("1" << "0" * 10_000)
     assert_equal(10**10_000, @tokenizer.next_token)
   end
@@ -162,7 +162,7 @@ module CommonTokenizerTests
   end
   it "returns the correct position on operations" do
-    create_tokenizer("hallo du" + " " * 50000 + "hallo du")
+    create_tokenizer("hallo du" << " " * 50000 << "hallo du")
     @tokenizer.next_token
     assert_equal(5, @tokenizer.pos)

data/test/hexapdf/content/graphic_object/test_arc.rb CHANGED

@@ -68,14 +68,14 @@ describe HexaPDF::Content::GraphicObject::Arc do
       arc.max_curves = 4
       curves = arc.curves
       assert_equal(2, curves.size)
-      assert_curve_values([0, 1, p1: [1, 0.548584], p2: [0.548584, 1]], curves[0])
-      assert_curve_values([-1, 0, p1: [-0.548584, 1], p2: [-1, 0.548584]], curves[1])
+      assert_curve_values([0, 1, {p1: [1, 0.548584], p2: [0.548584, 1]}], curves[0])
+      assert_curve_values([-1, 0, {p1: [-0.548584, 1], p2: [-1, 0.548584]}], curves[1])
       arc.configure(clockwise: true)
       curves = arc.curves
       assert_equal(2, curves.size)
-      assert_curve_values([0, -1, p1: [1, -0.548584], p2: [0.548584, -1]], curves[0])
-      assert_curve_values([-1, 0, p1: [-0.548584, -1], p2: [-1, -0.548584]], curves[1])
+      assert_curve_values([0, -1, {p1: [1, -0.548584], p2: [0.548584, -1]}], curves[0])
+      assert_curve_values([-1, 0, {p1: [-0.548584, -1], p2: [-1, -0.548584]}], curves[1])
     end
   end

data/test/hexapdf/content/test_canvas.rb CHANGED

@@ -531,7 +531,7 @@ describe HexaPDF::Content::Canvas do
     end
     it "invokes the polygon method when radius != 0" do
-      args = [0, 0, 10, 0, 10, 10, 0, 10, radius: 5]
+      args = [0, 0, 10, 0, 10, 10, 0, 10, {radius: 5}]
       assert_method_invoked(@canvas, :polygon, args) do
         @canvas.rectangle(0, 0, 10, 10, radius: 5)
       end
@@ -631,7 +631,7 @@ describe HexaPDF::Content::Canvas do
   describe "circle" do
     it "uses arc for the hard work" do
-      assert_method_invoked(@canvas, :arc, [5, 6, a: 7]) do
+      assert_method_invoked(@canvas, :arc, [5, 6, {a: 7}]) do
         @canvas.graphics_object = :path
         @canvas.circle(5, 6, 7)
       end
@@ -651,7 +651,7 @@ describe HexaPDF::Content::Canvas do
   describe "ellipse" do
     it "uses arc for the hard work" do
-      assert_method_invoked(@canvas, :ellipse, [5, 6, a: 7, b: 5, inclination: 10]) do
+      assert_method_invoked(@canvas, :ellipse, [5, 6, {a: 7, b: 5, inclination: 10}]) do
         @canvas.ellipse(5, 6, a: 7, b: 5, inclination: 10)
       end
     end

data/test/hexapdf/content/test_color_space.rb CHANGED

@@ -155,7 +155,7 @@ describe HexaPDF::Content::ColorSpace::DeviceGray do
   before do
     @color_space = HexaPDF::Content::ColorSpace::DeviceGray.new
-    @color_space_family = @color_space_definition =  :DeviceGray
+    @color_space_family = @color_space_definition = :DeviceGray
     @color = @color_space.default_color
     @other_color = @color_space.color(128)
     @colors = [128]

data/test/hexapdf/encryption/test_aes.rb CHANGED

@@ -101,13 +101,13 @@ describe HexaPDF::Encryption::AES do
       result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
       assert_equal('a' * 16, result)
-      f = Fiber.new { 'a' * 31 + "\x00" }
+      f = Fiber.new { 'a' * 31 << "\x00" }
       result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
-      assert_equal('a' * 15 + "\x00", result)
+      assert_equal('a' * 15 << "\x00", result)
-      f = Fiber.new { 'a' * 29 + "\x00\x01\x03" }
+      f = Fiber.new { 'a' * 29 << "\x00\x01\x03" }
       result = TestHelper.collector(@algorithm_class.decryption_fiber('some' * 4, f))
-      assert_equal('a' * 13 + "\x00\x01\x03", result)
+      assert_equal('a' * 13 << "\x00\x01\x03", result)
     end
     it "fails on decryption if not enough bytes are provided" do

data/test/hexapdf/encryption/test_standard_security_handler.rb CHANGED

@@ -53,24 +53,24 @@ describe HexaPDF::Encryption::StandardEncryptionDictionary do
 end
 describe HexaPDF::Encryption::StandardSecurityHandler do
-  TEST_FILES = Dir[File.join(TEST_DATA_DIR, 'standard-security-handler', '*.pdf')].sort
-  USER_PASSWORD = 'uhexapdf'
-  OWNER_PASSWORD = 'ohexapdf'
+  test_files = Dir[File.join(TEST_DATA_DIR, 'standard-security-handler', '*.pdf')].sort
+  user_password = 'uhexapdf'
+  owner_password = 'ohexapdf'
-  MINIMAL_DOC = HexaPDF::Document.new(io: StringIO.new(MINIMAL_PDF))
+  minimal_doc = HexaPDF::Document.new(io: StringIO.new(MINIMAL_PDF))
-  TEST_FILES.each do |file|
+  test_files.each do |file|
     basename = File.basename(file)
     it "can decrypt, encrypt and decrypt the encrypted file #{basename} with the user password" do
       begin
         doc = HexaPDF::Document.new(io: StringIO.new(File.binread(file)),
-                                    decryption_opts: {password: USER_PASSWORD})
-        assert_equal(MINIMAL_DOC.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
+                                    decryption_opts: {password: user_password})
+        assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
         out = StringIO.new(''.b)
         HexaPDF::Writer.new(doc, out).write
-        doc = HexaPDF::Document.new(io: out, decryption_opts: {password: USER_PASSWORD})
-        assert_equal(MINIMAL_DOC.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
+        doc = HexaPDF::Document.new(io: out, decryption_opts: {password: user_password})
+        assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
       rescue HexaPDF::EncryptionError => e
         flunk("Error processing #{basename}: #{e}")
       end
@@ -80,8 +80,8 @@ describe HexaPDF::Encryption::StandardSecurityHandler do
       it "can decrypt the encrypted file #{basename} with the owner password" do
         begin
           doc = HexaPDF::Document.new(io: StringIO.new(File.binread(file)),
-                                      decryption_opts: {password: OWNER_PASSWORD})
-          assert_equal(MINIMAL_DOC.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
+                                      decryption_opts: {password: owner_password})
+          assert_equal(minimal_doc.trailer[:Info][:ModDate], doc.trailer[:Info][:ModDate])
         rescue HexaPDF::EncryptionError => e
           flunk("Error processing #{basename}: #{e}")
         end

data/test/hexapdf/filter/test_ascii85_decode.rb CHANGED

@@ -33,7 +33,7 @@ describe HexaPDF::Filter::ASCII85Decode do
     end
     it "ignores data after the EOD marker" do
-      assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded.dup + "~>abcdefg"))))
+      assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded << "~>abcdefg"))))
     end
     it "fails if the input contains invalid characters" do

data/test/hexapdf/filter/test_ascii_hex_decode.rb CHANGED

@@ -24,7 +24,7 @@ describe HexaPDF::Filter::ASCIIHexDecode do
     end
     it "ignores data after the EOD marker" do
-      assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded + '4e6f7gzz'))))
+      assert_equal(@decoded, collector(@obj.decoder(feeder(@encoded << '4e6f7gzz'))))
     end
     it "assumes the missing char is '0' if the input length is odd" do

data/test/hexapdf/layout/test_text_layouter.rb CHANGED

@@ -674,10 +674,9 @@ describe HexaPDF::Layout::TextLayouter do
       pos = [0, 0]
       result.select! {|name, _| name == :set_text_matrix || name == :move_text_next_line }.
         map! do |name, ops|
-        if name == :set_text_matrix
-          pos = ops[-2, 2]
-        elsif name == :move_text_next_line
-          pos[1] -= leading
+        case name
+        when :set_text_matrix then pos = ops[-2, 2]
+        when :move_text_next_line then pos[1] -= leading
         end
         pos.dup
       end

data/test/hexapdf/test_configuration.rb CHANGED

@@ -66,8 +66,8 @@ describe HexaPDF::Configuration do
       assert_equal(HexaPDF, @config.constantize('test', 1))
     end
-    def assert_constantize_error # :nodoc:
-      exp = assert_raises(HexaPDF::Error) { yield }
+    def assert_constantize_error(&block) # :nodoc:
+      exp = assert_raises(HexaPDF::Error, &block)
       assert_match(/Error getting constant for configuration option/, exp.message)
     end

data/test/hexapdf/test_dictionary.rb CHANGED

@@ -14,7 +14,9 @@ describe HexaPDF::Dictionary do
   end
   def add(obj)
-    HexaPDF::Object.new(obj, oid: 1)
+    klass = HexaPDF::Object
+    klass = HexaPDF::Dictionary if obj.kind_of?(HexaPDF::Dictionary) || obj.kind_of?(Hash)
+    klass.new(obj, oid: 1)
   end
   def delete(_obj)

data/test/hexapdf/test_dictionary_fields.rb CHANGED

@@ -222,7 +222,7 @@ describe HexaPDF::DictionaryFields do
     it "allows conversion to a Rectangle from an Array" do
       doc = Minitest::Mock.new
-      doc.expect(:wrap, :data, [[0, 1, 2, 3], type: HexaPDF::Rectangle])
+      doc.expect(:wrap, :data, [[0, 1, 2, 3], {type: HexaPDF::Rectangle}])
       @field.convert([0, 1, 2, 3], doc)
       doc.verify
     end
@@ -230,7 +230,7 @@ describe HexaPDF::DictionaryFields do
     it "allows conversion to a Rectangle from a HexaPDF::PDFArray" do
       data = HexaPDF::PDFArray.new([0, 1, 2, 3])
       doc = Minitest::Mock.new
-      doc.expect(:wrap, :data, [data, type: HexaPDF::Rectangle])
+      doc.expect(:wrap, :data, [data, {type: HexaPDF::Rectangle}])
       @field.convert(data, doc)
       doc.verify
     end

data/test/hexapdf/test_document.rb CHANGED

@@ -441,21 +441,21 @@ describe HexaPDF::Document do
   describe "validate" do
     before do
-      @doc.trailer.validate # to create a valid document
+      @doc.validate # to create a valid document
     end
     it "validates indirect objects" do
-      obj = @doc.add({Type: :Catalog})
+      obj = @doc.add({Type: :Page, MediaBox: [1, 1, 1, 1], Parent: @doc.pages.root})
       refute(@doc.validate(auto_correct: false))
       called = false
-      assert(@doc.validate {|o| assert_same(obj, o); called = true })
+      assert(@doc.validate {|_, _, o| assert_same(obj, o); called = true })
       assert(called)
     end
     it "validates the trailer object" do
       @doc.trailer[:ID] = :Symbol
-      refute(@doc.validate {|obj| assert_same(@doc.trailer, obj) })
+      refute(@doc.validate {|_, _, obj| assert_same(@doc.trailer, obj) })
     end
     it "validates only loaded objects" do

data/test/hexapdf/test_object.rb CHANGED

@@ -6,15 +6,6 @@ require 'hexapdf/reference'
 describe HexaPDF::Object do
   describe "class.deep_copy" do
-    it "handles not-duplicatable classes" do
-      assert_equal(5, HexaPDF::Object.deep_copy(5))
-      assert_equal(5.5, HexaPDF::Object.deep_copy(5.5))
-      assert_nil(HexaPDF::Object.deep_copy(nil))
-      assert_equal(true, HexaPDF::Object.deep_copy(true))
-      assert_equal(false, HexaPDF::Object.deep_copy(false))
-      assert_equal(:Name, HexaPDF::Object.deep_copy(:Name))
-    end
     it "handles general, duplicatable classes" do
       x = "test"
       assert_equal("test", HexaPDF::Object.deep_copy(x))
@@ -103,30 +94,57 @@ describe HexaPDF::Object do
   end
   describe "validate" do
-    it "invokes perform_validation correctly via #validate" do
-      obj = HexaPDF::Object.new(5)
-      invoked = {}
-      obj.define_singleton_method(:perform_validation) do |&block|
-        invoked[:method] = true
+    before do
+      @obj = HexaPDF::Object.new(5)
+    end
+    it "invokes perform_validation correctly" do
+      invoked = false
+      @obj.define_singleton_method(:perform_validation) { invoked = true }
+      assert(@obj.validate)
+      assert(invoked)
+    end
+    it "yields all arguments yieled by perform_validation" do
+      invoked = []
+      @obj.define_singleton_method(:perform_validation) do |&block|
+        block.call("error", true, :object)
+      end
+      assert(@obj.validate {|*a| invoked << a })
+      assert_equal([["error", true, :object]], invoked)
+    end
+    it "provides self as third argument if none is yielded by perform_validation" do
+      invoked = []
+      @obj.define_singleton_method(:perform_validation) do |&block|
         block.call("error", true)
       end
-      assert(obj.validate {|*a| invoked[:block] = a })
-      assert_equal([:method, :block], invoked.keys)
-      assert_equal(["error", true], invoked[:block])
+      assert(@obj.validate {|*a| invoked << a })
+      assert_equal([["error", true, @obj]], invoked)
+    end
-      refute(obj.validate(auto_correct: false))
+    it "yields all problems when auto_correct is true" do
+      invoked = []
+      @obj.define_singleton_method(:perform_validation) do |&block|
+        invoked << :before
+        block.call("error", false)
+        invoked << :after
+        block.call("error2", true)
+        invoked << :last
+      end
+      refute(@obj.validate)
+      assert_equal([:before, :after, :last], invoked)
     end
-    it "stops validating on an uncorrectable problem" do
-      obj = HexaPDF::Object.new(5)
-      invoked = {}
-      obj.define_singleton_method(:perform_validation) do |&block|
-        invoked[:before] = true
+    it "stops at the first uncorrectable problem if auto_correct is false" do
+      invoked = []
+      @obj.define_singleton_method(:perform_validation) do |&block|
+        invoked << :before
         block.call("error", false)
-        invoked[:after] = true
+        invoked << :after
       end
-      refute(obj.validate {|*a| invoked[:block] = a })
-      refute(invoked.key?(:after))
+      refute(@obj.validate(auto_correct: false))
+      assert_equal([:before], invoked)
     end
   end

data/test/hexapdf/test_parser.rb CHANGED

@@ -8,6 +8,7 @@ require 'stringio'
 describe HexaPDF::Parser do
   before do
     @document = HexaPDF::Document.new
+    @document.config['parser.try_xref_reconstruction'] = false
     @document.add(@document.wrap(10, oid: 1, gen: 0))
     create_parser(<<~EOF)
@@ -132,6 +133,48 @@ describe HexaPDF::Parser do
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
       assert_match(/stream.*followed by.*endstream/i, exp.message)
     end
+    describe "with strict parsing" do
+      before do
+        @document.config['parser.on_correctable_error'] = proc { true }
+      end
+      it "fails if an empty indirect object is found" do
+        create_parser("1 0 obj\nendobj")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/no indirect object value/i, exp.message)
+      end
+      it "fails if keyword stream is followed only by CR without LF" do
+        create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/not CR alone/, exp.message)
+      end
+      it "fails if the stream length value is invalid" do
+        create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/invalid stream length/i, exp.message)
+      end
+      it "fails if the keyword endobj is mangled" do
+        create_parser("1 0 obj\n<< >>\nendobjd\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/keyword endobj/, exp.message)
+      end
+      it "fails if the keyword endobj is missing" do
+        create_parser("1 0 obj\n<< >>")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/keyword endobj/, exp.message)
+      end
+      it "fails if there is data between 'endstream' and 'endobj'" do
+        create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
+        assert_match(/keyword endobj/, exp.message)
+      end
+    end
   end
   describe "load_object" do
@@ -205,7 +248,7 @@ describe HexaPDF::Parser do
     end
     it "ignores garbage at the end of the file" do
-      create_parser("startxref\n5\n%%EOF" + "\nhallo" * 150)
+      create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
       assert_equal(5, @parser.startxref_offset)
     end
@@ -215,9 +258,9 @@ describe HexaPDF::Parser do
     end
     it "finds the startxref anywhere in file" do
-      create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
+      create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
       assert_equal(5, @parser.startxref_offset)
-      create_parser("startxref\n5\n%%EOF\n" + "h" * 1017)
+      create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
       assert_equal(5, @parser.startxref_offset)
     end
@@ -242,6 +285,13 @@ describe HexaPDF::Parser do
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
       assert_match(/missing startxref/, exp.message)
     end
+    it "fails on strict parsing if the startxref is not in the last part of the file" do
+      @document.config['parser.on_correctable_error'] = proc { true }
+      create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
+      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
+      assert_match(/end-of-file marker not found/, exp.message)
+    end
   end
   describe "file_header_version" do
@@ -262,7 +312,7 @@ describe HexaPDF::Parser do
     end
     it "ignores junk at the beginning of the file and correctly calculates offset" do
-      create_parser("junk" * 200 + "\n%PDF-1.4\n")
+      create_parser("junk" * 200 << "\n%PDF-1.4\n")
       assert_equal('1.4', @parser.file_header_version)
       assert_equal(801, @parser.instance_variable_get(:@header_offset))
     end
@@ -318,6 +368,12 @@ describe HexaPDF::Parser do
       assert_match(/invalid cross-reference subsection/i, exp.message)
     end
+    it "fails if a sub section entry is mangled" do
+      create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
+      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
+      assert_match(/invalid cross-reference entry/i, exp.message)
+    end
     it "fails if there is no trailer" do
       create_parser("xref\n0 1\n0000000000 00000 n \n")
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
@@ -329,6 +385,30 @@ describe HexaPDF::Parser do
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
       assert_match(/dictionary/, exp.message)
     end
+    describe "with strict parsing" do
+      before do
+        @document.config['parser.on_correctable_error'] = proc { true }
+      end
+      it "fails if xref type=n with offset=0" do
+        create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
+        assert_match(/invalid.*cross-reference entry/i, exp.message)
+      end
+      it " fails xref type=n with gen>65535" do
+        create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
+        assert_match(/invalid.*cross-reference entry/i, exp.message)
+      end
+      it "fails if trailing second whitespace is missing" do
+        create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
+        assert_match(/invalid.*cross-reference entry/i, exp.message)
+      end
+    end
   end
   describe "load_revision" do
@@ -348,75 +428,55 @@ describe HexaPDF::Parser do
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
       assert_match(/not a cross-reference stream/, exp.message)
     end
-  end
-  describe "with strict parsing enabled" do
-    before do
+    it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
       @document.config['parser.on_correctable_error'] = proc { true }
+      create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
+                    "stream\n\x01\x0A\x00\nendstream endobj")
+      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
+      assert_match(/entry for itself/, exp.message)
     end
+  end
-    it "startxref_offset fails if the startxref is not in the last part of the file" do
-      create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
-      assert_match(/end-of-file marker not found/, exp.message)
-    end
-    it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
-      create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
-      assert_match(/invalid.*cross-reference entry/i, exp.message)
-    end
-    it "parse_xref_section_and_trailer fails xref type=n with gen>65535" do
-      create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
-      assert_match(/invalid.*cross-reference entry/i, exp.message)
+  describe "reconstruct_revision" do
+    before do
+      @document.config['parser.try_xref_reconstruction'] = true
+      @xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
     end
-    it "parse_xref_section_and_trailer fails if trailing second whitespace is missing" do
-      create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
-      assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
+    it "serially parses the contents" do
+      create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
+      assert_equal(6, @parser.load_object(@xref).value)
     end
-    it "parse_indirect_object fails if an empty indirect object is found" do
-      create_parser("1 0 obj\nendobj")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
-      assert_match(/no indirect object value/i, exp.message)
+    it "ignores parts where the starting line is split across lines" do
+      create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
+      assert_equal(5, @parser.load_object(@xref).value)
     end
-    it "parse_indirect_object fails if keyword stream is followed only by CR without LF" do
-      create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
-      assert_match(/not CR alone/, exp.message)
+    it "ignores invalid objects" do
+      create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
+      assert_equal(4, @parser.load_object(@xref).value)
     end
-    it "parse_indirect_object fails if the stream length value is invalid" do
-      create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
-      assert_match(/invalid stream length/i, exp.message)
+    it "ignores invalid lines" do
+      create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
+      assert_equal(6, @parser.load_object(@xref).value)
     end
-    it "parse_indirect_object fails if the keyword endobj is missing or mangled" do
-      create_parser("1 0 obj\n<< >>\nendobjd\n")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
-      assert_match(/keyword endobj/, exp.message)
-      create_parser("1 0 obj\n<< >>")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
-      assert_match(/keyword endobj/, exp.message)
+    it "uses the last trailer" do
+      create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
+      assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
     end
-    it "parse_indirect_object fails if there is data between 'endstream' and 'endobj'" do
-      create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
-      assert_match(/keyword endobj/, exp.message)
+    it "uses the first trailer in case of a linearized file" do
+      create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
+      assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
     end
-    it "load_revision fails if the cross-reference stream doesn't contain an entry for itself" do
-      create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
-                    "stream\n\x01\x0A\x00\nendstream endobj")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
-      assert_match(/entry for itself/, exp.message)
+    it "fails if no valid trailer is found" do
+      create_parser("1 0 obj\n5\nendobj")
+      assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
     end
   end
 end