RubyGems - hexapdf - Versions diffs - 0.12.3 → 0.14.3 - Mend

hexapdf 0.12.3 → 0.14.3

Files changed (103) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +132 -0
data/examples/019-acro_form.rb +41 -4
data/lib/hexapdf/cli/command.rb +4 -2
data/lib/hexapdf/cli/image2pdf.rb +2 -1
data/lib/hexapdf/cli/info.rb +51 -2
data/lib/hexapdf/cli/inspect.rb +30 -8
data/lib/hexapdf/cli/merge.rb +1 -1
data/lib/hexapdf/cli/split.rb +74 -14
data/lib/hexapdf/configuration.rb +15 -0
data/lib/hexapdf/content/graphic_object/arc.rb +3 -3
data/lib/hexapdf/dictionary.rb +12 -6
data/lib/hexapdf/dictionary_fields.rb +2 -10
data/lib/hexapdf/document.rb +41 -16
data/lib/hexapdf/document/files.rb +0 -1
data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
data/lib/hexapdf/encryption/security_handler.rb +1 -0
data/lib/hexapdf/encryption/standard_security_handler.rb +1 -0
data/lib/hexapdf/font/cmap.rb +1 -4
data/lib/hexapdf/font/true_type/subsetter.rb +16 -3
data/lib/hexapdf/font/true_type/table/head.rb +1 -0
data/lib/hexapdf/font/true_type/table/os2.rb +2 -0
data/lib/hexapdf/font/true_type/table/post.rb +15 -10
data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
data/lib/hexapdf/font_loader/from_file.rb +18 -8
data/lib/hexapdf/image_loader/png.rb +3 -2
data/lib/hexapdf/importer.rb +3 -2
data/lib/hexapdf/layout/line.rb +1 -1
data/lib/hexapdf/layout/style.rb +23 -23
data/lib/hexapdf/layout/text_layouter.rb +2 -2
data/lib/hexapdf/layout/text_shaper.rb +3 -2
data/lib/hexapdf/object.rb +52 -25
data/lib/hexapdf/parser.rb +107 -7
data/lib/hexapdf/pdf_array.rb +15 -5
data/lib/hexapdf/revisions.rb +29 -21
data/lib/hexapdf/serializer.rb +37 -10
data/lib/hexapdf/task/optimize.rb +6 -4
data/lib/hexapdf/tokenizer.rb +22 -0
data/lib/hexapdf/type/acro_form/appearance_generator.rb +130 -27
data/lib/hexapdf/type/acro_form/button_field.rb +5 -2
data/lib/hexapdf/type/acro_form/choice_field.rb +68 -14
data/lib/hexapdf/type/acro_form/field.rb +35 -5
data/lib/hexapdf/type/acro_form/form.rb +139 -14
data/lib/hexapdf/type/acro_form/text_field.rb +70 -4
data/lib/hexapdf/type/actions/uri.rb +3 -2
data/lib/hexapdf/type/annotations/widget.rb +3 -4
data/lib/hexapdf/type/catalog.rb +2 -2
data/lib/hexapdf/type/cid_font.rb +1 -1
data/lib/hexapdf/type/file_specification.rb +1 -1
data/lib/hexapdf/type/font.rb +1 -1
data/lib/hexapdf/type/font_simple.rb +4 -2
data/lib/hexapdf/type/font_true_type.rb +6 -2
data/lib/hexapdf/type/font_type0.rb +4 -4
data/lib/hexapdf/type/form.rb +6 -2
data/lib/hexapdf/type/image.rb +2 -2
data/lib/hexapdf/type/page.rb +21 -12
data/lib/hexapdf/type/page_tree_node.rb +29 -5
data/lib/hexapdf/type/resources.rb +5 -0
data/lib/hexapdf/type/trailer.rb +2 -3
data/lib/hexapdf/utils/object_hash.rb +0 -1
data/lib/hexapdf/utils/sorted_tree_node.rb +18 -15
data/lib/hexapdf/version.rb +1 -1
data/test/hexapdf/common_tokenizer_tests.rb +2 -2
data/test/hexapdf/content/graphic_object/test_arc.rb +4 -4
data/test/hexapdf/content/test_canvas.rb +3 -3
data/test/hexapdf/content/test_color_space.rb +1 -1
data/test/hexapdf/encryption/test_aes.rb +4 -4
data/test/hexapdf/encryption/test_standard_security_handler.rb +11 -11
data/test/hexapdf/filter/test_ascii85_decode.rb +1 -1
data/test/hexapdf/filter/test_ascii_hex_decode.rb +1 -1
data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
data/test/hexapdf/font/true_type/test_subsetter.rb +10 -0
data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
data/test/hexapdf/font_loader/test_from_file.rb +7 -0
data/test/hexapdf/layout/test_text_layouter.rb +12 -5
data/test/hexapdf/test_configuration.rb +2 -2
data/test/hexapdf/test_dictionary.rb +8 -1
data/test/hexapdf/test_dictionary_fields.rb +9 -2
data/test/hexapdf/test_document.rb +18 -10
data/test/hexapdf/test_object.rb +71 -26
data/test/hexapdf/test_parser.rb +205 -51
data/test/hexapdf/test_pdf_array.rb +8 -1
data/test/hexapdf/test_revisions.rb +35 -0
data/test/hexapdf/test_serializer.rb +7 -0
data/test/hexapdf/test_tokenizer.rb +28 -0
data/test/hexapdf/test_writer.rb +2 -2
data/test/hexapdf/type/acro_form/test_appearance_generator.rb +288 -35
data/test/hexapdf/type/acro_form/test_button_field.rb +15 -0
data/test/hexapdf/type/acro_form/test_choice_field.rb +92 -9
data/test/hexapdf/type/acro_form/test_field.rb +39 -0
data/test/hexapdf/type/acro_form/test_form.rb +87 -15
data/test/hexapdf/type/acro_form/test_text_field.rb +77 -1
data/test/hexapdf/type/test_font_simple.rb +2 -1
data/test/hexapdf/type/test_font_true_type.rb +6 -0
data/test/hexapdf/type/test_form.rb +8 -1
data/test/hexapdf/type/test_page.rb +8 -1
data/test/hexapdf/type/test_page_tree_node.rb +42 -0
data/test/hexapdf/type/test_resources.rb +6 -0
data/test/hexapdf/utils/test_bit_field.rb +2 -0
data/test/hexapdf/utils/test_object_hash.rb +5 -0
data/test/hexapdf/utils/test_sorted_tree_node.rb +10 -9
data/test/test_helper.rb +2 -0
metadata +6 -12

data/test/hexapdf/test_parser.rb CHANGED Viewed

@@ -8,6 +8,7 @@ require 'stringio'
 describe HexaPDF::Parser do
   before do
     @document = HexaPDF::Document.new
+    @document.config['parser.try_xref_reconstruction'] = false
     @document.add(@document.wrap(10, oid: 1, gen: 0))
     create_parser(<<~EOF)
@@ -87,6 +88,18 @@ describe HexaPDF::Parser do
       assert_equal('12', TestHelper.collector(stream.fiber))
     end
+    it "handles keyword stream followed by space and CR or LF" do
+      create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
+      *, stream = @parser.parse_indirect_object
+      assert_equal('12', TestHelper.collector(stream.fiber))
+    end
+    it "handles invalid indirect object value consisting of number followed by endobj without space" do
+      create_parser("1 0 obj 749endobj")
+      object, * = @parser.parse_indirect_object
+      assert_equal(749, object)
+    end
     it "recovers from an invalid stream length value" do
       create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
       obj, _, _, stream = @parser.parse_indirect_object
@@ -132,6 +145,60 @@ describe HexaPDF::Parser do
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
       assert_match(/stream.*followed by.*endstream/i, exp.message)
     end
+    describe "with strict parsing" do
+      before do
+        @document.config['parser.on_correctable_error'] = proc { true }
+      end
+      it "fails if an empty indirect object is found" do
+        create_parser("1 0 obj\nendobj")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/no indirect object value/i, exp.message)
+      end
+      it "fails if keyword stream is followed only by CR without LF" do
+        create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/not CR alone/, exp.message)
+      end
+      it "fails if keyword stream is followed by space and CR or LF instead of LF or CR/LF" do
+        create_parser("1 0 obj<</Length 2>> stream \n12\nendstream endobj")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/must be followed by LF or CR\/LF/, exp.message)
+      end
+      it "fails for numbers followed by endobj without space" do
+        create_parser("1 0 obj 749endobj")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/Invalid object value after 'obj'/, exp.message)
+      end
+      it "fails if the stream length value is invalid" do
+        create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/invalid stream length/i, exp.message)
+      end
+      it "fails if the keyword endobj is mangled" do
+        create_parser("1 0 obj\n<< >>\nendobjd\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/keyword endobj/, exp.message)
+      end
+      it "fails if the keyword endobj is missing" do
+        create_parser("1 0 obj\n<< >>")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/keyword endobj/, exp.message)
+      end
+      it "fails if there is data between 'endstream' and 'endobj'" do
+        create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
+        assert_match(/keyword endobj/, exp.message)
+      end
+    end
   end
   describe "load_object" do
@@ -167,6 +234,23 @@ describe HexaPDF::Parser do
       assert_equal([1, 2], obj.value)
     end
+    it "handles an invalid indirect object offset of 0" do
+      obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
+      assert(obj.null?)
+      assert_equal(2, obj.oid)
+      assert_equal(0, obj.gen)
+    end
+    describe "with strict parsing" do
+      it "raises an error if an indirect object has an offset of 0" do
+        @document.config['parser.on_correctable_error'] = proc { true }
+        exp = assert_raises(HexaPDF::MalformedPDFError) do
+          @parser.load_object(HexaPDF::XRefSection.in_use_entry(2, 0, 0))
+        end
+        assert_match(/has offset 0/, exp.message)
+      end
+    end
     it "fails if another object is found instead of an object stream" do
       def (@document).object(_oid)
         :invalid
@@ -205,7 +289,7 @@ describe HexaPDF::Parser do
     end
     it "ignores garbage at the end of the file" do
-      create_parser("startxref\n5\n%%EOF" + "\nhallo" * 150)
+      create_parser("startxref\n5\n%%EOF" << "\nhallo" * 150)
       assert_equal(5, @parser.startxref_offset)
     end
@@ -215,9 +299,9 @@ describe HexaPDF::Parser do
     end
     it "finds the startxref anywhere in file" do
-      create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
+      create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
       assert_equal(5, @parser.startxref_offset)
-      create_parser("startxref\n5\n%%EOF\n" + "h" * 1017)
+      create_parser("startxref\n5\n%%EOF\n" << "h" * 1017)
       assert_equal(5, @parser.startxref_offset)
     end
@@ -242,6 +326,13 @@ describe HexaPDF::Parser do
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
       assert_match(/missing startxref/, exp.message)
     end
+    it "fails on strict parsing if the startxref is not in the last part of the file" do
+      @document.config['parser.on_correctable_error'] = proc { true }
+      create_parser("startxref\n5\n%%EOF" << "\nhallo" * 5000)
+      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
+      assert_match(/end-of-file marker not found/, exp.message)
+    end
   end
   describe "file_header_version" do
@@ -262,7 +353,7 @@ describe HexaPDF::Parser do
     end
     it "ignores junk at the beginning of the file and correctly calculates offset" do
-      create_parser("junk" * 200 + "\n%PDF-1.4\n")
+      create_parser("junk" * 200 << "\n%PDF-1.4\n")
       assert_equal('1.4', @parser.file_header_version)
       assert_equal(801, @parser.instance_variable_get(:@header_offset))
     end
@@ -318,6 +409,12 @@ describe HexaPDF::Parser do
       assert_match(/invalid cross-reference subsection/i, exp.message)
     end
+    it "fails if a sub section entry is mangled" do
+      create_parser("xref\n0 2\n000a000000 00000 n\n0000000000 65535 n\ntrailer\n<<>>\n")
+      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
+      assert_match(/invalid cross-reference entry/i, exp.message)
+    end
     it "fails if there is no trailer" do
       create_parser("xref\n0 1\n0000000000 00000 n \n")
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
@@ -329,6 +426,71 @@ describe HexaPDF::Parser do
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
       assert_match(/dictionary/, exp.message)
     end
+    describe "invalid numbering of main xref section" do
+      it "handles the xref if the numbering is off by N" do
+        create_parser(" 1 0 obj 1 endobj\n" \
+          "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
+        section, _trailer = @parser.parse_xref_section_and_trailer(17)
+        assert_equal(HexaPDF::XRefSection.in_use_entry(1, 0, 1), section[1])
+      end
+      it "fails if the first entry is not the one for oid=0" do
+        create_parser(" 1 0 obj 1 endobj\n" \
+          "xref\n1 2\n0000000000 00005 f \n0000000001 00000 n \ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
+        assert_match(/Main.*invalid numbering/i, exp.message)
+        create_parser(" 1 0 obj 1 endobj\n" \
+          "xref\n1 2\n0000000001 00000 n \n0000000001 00000 n \ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
+        assert_match(/Main.*invalid numbering/i, exp.message)
+      end
+      it "fails if the tested entry position is invalid" do
+        create_parser(" 1 0 obj 1 endobj\n" \
+          "xref\n1 2\n0000000000 65535 f \n0000000005 00000 n \ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
+        assert_match(/Main.*invalid numbering/i, exp.message)
+      end
+      it "fails if the tested entry position's oid doesn't match the corrected entry oid" do
+        create_parser(" 2 0 obj 1 endobj\n" \
+          "xref\n1 2\n0000000000 65535 f \n0000000001 00000 n \ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(17) }
+        assert_match(/Main.*invalid numbering/i, exp.message)
+      end
+    end
+    describe "with strict parsing" do
+      before do
+        @document.config['parser.on_correctable_error'] = proc { true }
+      end
+      it "fails if xref type=n with offset=0" do
+        create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
+        assert_match(/invalid.*cross-reference entry/i, exp.message)
+      end
+      it " fails xref type=n with gen>65535" do
+        create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
+        assert_match(/invalid.*cross-reference entry/i, exp.message)
+      end
+      it "fails if trailing second whitespace is missing" do
+        create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
+        assert_match(/invalid.*cross-reference entry/i, exp.message)
+      end
+      it "fails if the main cross-reference section has invalid numbering" do
+        create_parser("xref\n1 1\n0000000001 00000 n \ntrailer\n<<>>\n")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
+        assert_match(/Main.*invalid numbering/i, exp.message)
+      end
+    end
   end
   describe "load_revision" do
@@ -348,75 +510,67 @@ describe HexaPDF::Parser do
       exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(10) }
       assert_match(/not a cross-reference stream/, exp.message)
     end
-  end
-  describe "with strict parsing enabled" do
-    before do
+    it "fails on strict parsing if the cross-reference stream doesn't contain an entry for itself" do
       @document.config['parser.on_correctable_error'] = proc { true }
+      create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
+                    "stream\n\x01\x0A\x00\nendstream endobj")
+      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
+      assert_match(/entry for itself/, exp.message)
     end
+  end
-    it "startxref_offset fails if the startxref is not in the last part of the file" do
-      create_parser("startxref\n5\n%%EOF" + "\nhallo" * 5000)
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.startxref_offset }
-      assert_match(/end-of-file marker not found/, exp.message)
+  describe "reconstruct_revision" do
+    before do
+      @document.config['parser.try_xref_reconstruction'] = true
+      @xref = HexaPDF::XRefSection.in_use_entry(1, 0, 100)
     end
-    it "parse_xref_section_and_trailer fails if xref type=n with offset=0" do
-      create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 00000 n \ntrailer\n<<>>\n")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
-      assert_match(/invalid.*cross-reference entry/i, exp.message)
+    it "serially parses the contents" do
+      create_parser("1 0 obj\n5\nendobj\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
+      assert_equal(6, @parser.load_object(@xref).value)
     end
-    it "parse_xref_section_and_trailer fails xref type=n with gen>65535" do
-      create_parser("xref\n0 2\n0000000000 00000 n \n0000000000 65536 n \ntrailer\n<<>>\n")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
-      assert_match(/invalid.*cross-reference entry/i, exp.message)
+    it "ignores parts where the starting line is split across lines" do
+      create_parser("1 0 obj\n5\nendobj\n1 0\nobj\n6\nendobj\ntrailer\n<</Size 1>>")
+      assert_equal(5, @parser.load_object(@xref).value)
     end
-    it "parse_xref_section_and_trailer fails if trailing second whitespace is missing" do
-      create_parser("xref\n0 1\n0000000000 00000 n\ntrailer\n<<>>\n")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_xref_section_and_trailer(0) }
-      assert_match(/invalid.*cross-reference subsection entry/i, exp.message)
+    it "handles cases where the line contains an invalid string that exceeds the read buffer" do
+      create_parser("(1" << "(abc" * 32188 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
+      assert_equal(6, @parser.load_object(@xref).value)
     end
-    it "parse_indirect_object fails if an empty indirect object is found" do
-      create_parser("1 0 obj\nendobj")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
-      assert_match(/no indirect object value/i, exp.message)
+    it "handles pathalogical cases which contain many opened literal strings" do
+      time = Time.now
+      create_parser("(1" << "(abc\n" * 10000 << "\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
+      assert_equal(6, @parser.load_object(@xref).value)
+      assert(Time.now - time < 0.5, "Xref reconstruction takes too long")
     end
-    it "parse_indirect_object fails if keyword stream is followed only by CR without LF" do
-      create_parser("1 0 obj<</Length 2>> stream\r12\nendstream endobj")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
-      assert_match(/not CR alone/, exp.message)
+    it "ignores invalid objects" do
+      create_parser("1 x obj\n5\nendobj\n1 0 xobj\n6\nendobj\n1 0 obj 4\nendobj\ntrailer\n<</Size 1>>")
+      assert_equal(4, @parser.load_object(@xref).value)
     end
-    it "parse_indirect_object fails if the stream length value is invalid" do
-      create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
-      assert_match(/invalid stream length/i, exp.message)
+    it "ignores invalid lines" do
+      create_parser("1 0 obj\n5\nendobj\nhello there\n1 0 obj\n6\nendobj\ntrailer\n<</Size 1>>")
+      assert_equal(6, @parser.load_object(@xref).value)
     end
-    it "parse_indirect_object fails if the keyword endobj is missing or mangled" do
-      create_parser("1 0 obj\n<< >>\nendobjd\n")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
-      assert_match(/keyword endobj/, exp.message)
-      create_parser("1 0 obj\n<< >>")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
-      assert_match(/keyword endobj/, exp.message)
+    it "uses the last trailer" do
+      create_parser("trailer <</Size 1>>\ntrailer <</Size 2/Prev 342>>")
+      assert_equal({Size: 2}, @parser.reconstructed_revision.trailer.value)
     end
-    it "parse_indirect_object fails if there is data between 'endstream' and 'endobj'" do
-      create_parser("1 0 obj\n<< >>\nstream\nendstream\ntest\nendobj\n")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object(0) }
-      assert_match(/keyword endobj/, exp.message)
+    it "uses the first trailer in case of a linearized file" do
+      create_parser("trailer <</Size 1/Prev 342>>\ntrailer <</Size 2>>")
+      assert_equal({Size: 1}, @parser.reconstructed_revision.trailer.value)
     end
-    it "load_revision fails if the cross-reference stream doesn't contain an entry for itself" do
-      create_parser("2 0 obj\n<</Type/XRef/Length 3/W [1 1 1]/Size 1>>" \
-                    "stream\n\x01\x0A\x00\nendstream endobj")
-      exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.load_revision(0) }
-      assert_match(/entry for itself/, exp.message)
+    it "fails if no valid trailer is found" do
+      create_parser("1 0 obj\n5\nendobj")
+      assert_raises(HexaPDF::MalformedPDFError) { @parser.load_object(@xref) }
     end
   end
 end

data/test/hexapdf/test_pdf_array.rb CHANGED Viewed

@@ -107,6 +107,13 @@ describe HexaPDF::PDFArray do
     assert_equal([1, :data, @array[2]], @array[0, 5])
   end
+  it "allows deleting an object" do
+    obj = @array.value[1]
+    assert_same(obj, @array.delete(obj))
+    ref = HexaPDF::Object.new(:test, oid: 1)
+    assert_equal(ref, @array.delete(ref))
+  end
   describe "slice!" do
     it "allows deleting a single element" do
       @array.slice!(2)
@@ -157,6 +164,6 @@ describe HexaPDF::PDFArray do
   end
   it "can be converted to a simple array" do
-    assert_equal(@array.value, @array.to_ary)
+    assert_equal([1, :data, "deref", @array[3]], @array.to_ary)
   end
 end

data/test/hexapdf/test_revisions.rb CHANGED Viewed

@@ -158,4 +158,39 @@ describe HexaPDF::Revisions do
     doc = HexaPDF::Document.new(io: io)
     assert_equal(2, doc.revisions.count)
   end
+  it "uses the reconstructed revision if errors are found when loading from an IO" do
+    io = StringIO.new(<<~EOF)
+      %PDF-1.7
+      1 0 obj
+      10
+      endobj
+      xref
+      0 2
+      0000000000 65535 f
+      0000000009 00000 n
+      trailer
+      << /Size 5 >>
+      startxref
+      28
+      %%EOF
+      2 0 obj
+      300
+      endobj
+      xref
+      2 1
+      0000000301 00000 n
+        trailer
+      << /Size 3 /Prev 100>>
+      startxref
+      139
+      %%EOF
+    EOF
+    doc = HexaPDF::Document.new(io: io)
+    assert_equal(2, doc.revisions.count)
+    assert_same(doc.revisions[0].trailer.value, doc.revisions[1].trailer.value)
+  end
 end

data/test/hexapdf/test_serializer.rb CHANGED Viewed

@@ -153,6 +153,13 @@ describe HexaPDF::Serializer do
       assert_equal("<</Key(value)/Length 6>>stream\nsome\nendstream", io.string)
     end
+    it "doesn't reset the internal recursion flag if the stream is serialized as part of another object" do
+      object = HexaPDF::Dictionary.new({}, oid: 5)
+      object[:Stream] = @stream
+      object[:Self] = object # needs to be the last entry so that :Stream gets serialized first!
+      assert_serialized("<</Stream 2 0 R/Self 5 0 R>>", object)
+    end
     it "fails if a stream without object identifier is serialized" do
       @stream.oid = 0
       assert_raises(HexaPDF::Error) { @serializer.serialize(@stream) }

data/test/hexapdf/test_tokenizer.rb CHANGED Viewed

@@ -27,4 +27,32 @@ describe HexaPDF::Tokenizer do
       5.times {|i| assert_equal(i, @tokenizer.next_token) }
     end
   end
+  it "has a special token scanning method for use with xref reconstruction" do
+    create_tokenizer(<<-EOF.chomp.gsub(/^ {8}/, ''))
+        % Comment
+          true
+        123 50
+        obj
+        (ignored)
+        /Ignored
+        [/Ignored]
+        <</Ignored /Values>>
+    EOF
+    scan_to_newline = proc { @tokenizer.scan_until(/(\n|\r\n?)+|\z/) }
+    assert_nil(@tokenizer.next_integer_or_keyword)
+    scan_to_newline.call
+    assert_equal(true, @tokenizer.next_integer_or_keyword)
+    assert_equal(123, @tokenizer.next_integer_or_keyword)
+    assert_equal(50, @tokenizer.next_integer_or_keyword)
+    assert_equal('obj', @tokenizer.next_integer_or_keyword)
+    4.times do
+      assert_nil(@tokenizer.next_integer_or_keyword)
+      scan_to_newline.call
+    end
+    assert_equal(HexaPDF::Tokenizer::NO_MORE_TOKENS, @tokenizer.next_integer_or_keyword)
+  end
 end