RubyGems - hexapdf - Versions diffs - 1.0.0 → 1.0.1 - Mend

hexapdf 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +15 -0
data/lib/hexapdf/cli.rb +14 -1
data/lib/hexapdf/parser.rb +5 -1
data/lib/hexapdf/utils/sorted_tree_node.rb +12 -2
data/lib/hexapdf/version.rb +1 -1
data/lib/hexapdf/writer.rb +1 -0
data/lib/hexapdf/xref_section.rb +20 -4
data/test/hexapdf/test_parser.rb +16 -6
data/test/hexapdf/test_writer.rb +5 -5
data/test/hexapdf/test_xref_section.rb +15 -0
data/test/hexapdf/utils/test_sorted_tree_node.rb +7 -6
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2889ba1d03e2c351efd694b1583063023fff97c0da636ff5103f88538255735c
-  data.tar.gz: 6fb4727db05900e8fccba2ad4e093d1092e17305e5b5616ded97a76cf835673c
+  metadata.gz: 61b0fb56c6522f2af82eb8ffb10570c45bb11460cf4c048c1bdfe8d9daf71afe
+  data.tar.gz: 91cb053019c367825ac0799a84e4ddad837fe283a6ab2bc6df16ee9ed9f2456d
 SHA512:
-  metadata.gz: 00be8ed2c306a88e5bfc0eada97a7e6bf802ec269e832bb21b3521d4077b18ecad11946ddc6f8a6d575820e66339059e59ba2c4cdd2b74d6c7d6defd0f2f5256
-  data.tar.gz: 94c6a8178ead2a986921b72b07ef5dc388a5fa6a67945573eec921db30c9940d241d3f47591c8fbbf9bdcf313df0dda536f0fc78e0e946e27dfa3bc13dad9a28
+  metadata.gz: 9a71ee1e9307f0ef67c9dec108c7f68db45166a62f9b6ec60915ce2c089cf0e9ec5bfcd8d74e8b31b63238a09c820a0798689a84e5ea0b1577e2492e5a1d425e
+  data.tar.gz: b20043cead03f7fc7fe527fdbcb3674ab2d1da06b546bac9c1549b6eb6d143232453132709d93ae008d78a83bff36cf85fd0dbc0938da848e7847a1830e6011e

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,18 @@
+## 1.0.1 - 2024-11-04
+### Changed
+* Informational output on errors when running CLI commands to provide more
+  details
+### Fixed
+* Parsing of indirect objects the value of which is an indirect reference
+* Writing of the initial cross-reference section to ensure a single subsection
+* [HexaPDF::Utils::SortedTreeNode] to wrap all /Kids entries with the correct
+  type class
 ## 1.0.0 - 2024-10-26
 ### Added

data/lib/hexapdf/cli.rb CHANGED Viewed

@@ -64,8 +64,21 @@ module HexaPDF
     rescue StandardError => e
       $stderr.puts "Problem encountered: #{e.message}"
       unless e.kind_of?(HexaPDF::Error)
+        $stderr.puts "Backtrace (last 10 lines):"
+        $stderr.puts e.backtrace[0, 10]
+        $stderr.puts
         $stderr.puts "--> The problem might indicate a faulty PDF or a bug in HexaPDF."
-        $stderr.puts "--> Please report this at https://github.com/gettalong/hexapdf/issues - thanks!"
+        $stderr.puts "--> Please report this at"
+        $stderr.puts "-->"
+        $stderr.puts "-->     https://github.com/gettalong/hexapdf/issues"
+        $stderr.puts "-->"
+        $stderr.puts "--> and include the information above as well as the output of running"
+        $stderr.puts "--> the following command on the input PDF:"
+        $stderr.puts "-->"
+        $stderr.puts "-->     hexapdf info --check INPUT.PDF"
+        $stderr.puts "-->"
+        $stderr.puts "--> If possible, please also provide the input PDF."
+        $stderr.puts "--> Thanks!"
       end
       exit(1)
     end

data/lib/hexapdf/parser.rb CHANGED Viewed

@@ -116,7 +116,11 @@ module HexaPDF
                         "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
       end
-      @document.wrap(obj, oid: oid, gen: gen, stream: stream)
+      if obj.kind_of?(Reference)
+        @document.deref(obj)
+      else
+        @document.wrap(obj, oid: oid, gen: gen, stream: stream)
+      end
     rescue HexaPDF::MalformedPDFError
       reconstructed_revision.object(xref_entry) ||
         @document.wrap(nil, oid: xref_entry.oid, gen: xref_entry.gen)

data/lib/hexapdf/utils/sorted_tree_node.rb CHANGED Viewed

@@ -174,6 +174,7 @@ module HexaPDF
           elsif node.key?(:Kids)
             index = find_in_intermediate_node(node[:Kids], key)
             node = node[:Kids][index]
+            node = document.wrap(node, type: self.class) if node
             break unless node && key >= node[:Limits][0] && key <= node[:Limits][1]
           else
             break
@@ -194,7 +195,7 @@ module HexaPDF
         container_name = leaf_node_container_name
         stack = [self]
         until stack.empty?
-          node = stack.pop
+          node = document.wrap(stack.pop, type: self.class)
           if node.key?(container_name)
             data = node[container_name]
             index = 0
@@ -217,7 +218,7 @@ module HexaPDF
       def path_to_key(node, key, stack)
         return unless node.key?(:Kids)
         index = find_in_intermediate_node(node[:Kids], key)
-        stack << node[:Kids][index]
+        stack << document.wrap(node[:Kids][index], type: self.class)
         path_to_key(stack.last, key, stack)
       end
@@ -307,6 +308,15 @@ module HexaPDF
         super
         container_name = leaf_node_container_name
+        if key?(:Kids)
+          self[:Kids].each do |kid|
+            unless kid.indirect?
+              yield("Children of sorted tree nodes must be indirect", true)
+              document.add(kid)
+            end
+          end
+        end
         # All keys of the container must be lexically ordered strings and the container must be
         # correctly formatted
         if key?(container_name)

data/lib/hexapdf/version.rb CHANGED Viewed

@@ -37,6 +37,6 @@
 module HexaPDF
   # The version of HexaPDF.
-  VERSION = '1.0.0'
+  VERSION = '1.0.1'
 end

data/lib/hexapdf/writer.rb CHANGED Viewed

@@ -149,6 +149,7 @@ module HexaPDF
       obj_to_stm = object_streams.each_with_object({}) {|stm, m| m.update(stm.write_objects(rev)) }
       xref_section = XRefSection.new
+      xref_section.mark_as_initial_section! unless previous_xref_pos
       xref_section.add_free_entry(0, 65535) if previous_xref_pos.nil?
       rev.each do |obj|
         if obj.null?

data/lib/hexapdf/xref_section.rb CHANGED Viewed

@@ -111,6 +111,13 @@ module HexaPDF
     # used.
     private :'[]='
+    # Marks this XRefSection object as being the first cross-reference section in a PDF file.
+    #
+    # This has the consequence that only a single sub-section is created.
+    def mark_as_initial_section!
+      @initial_section = true
+    end
     # Adds an in-use entry to the cross-reference section.
     #
     # See: ::in_use_entry
@@ -147,15 +154,24 @@ module HexaPDF
     # If this section contains no objects, a single empty array is yielded (corresponding to a
     # subsection with zero elements).
     #
-    # The subsections are dynamically generated based on the object numbers in this section.
+    # The subsections are dynamically generated based on the object numbers in this section. In case
+    # the section was marked as the initial section (see #mark_as_initial_section!) only a single
+    # subsection is yielded.
     def each_subsection
       return to_enum(__method__) unless block_given?
       temp = []
       oids.sort.each do |oid|
-        if !temp.empty? && temp[-1].oid + 1 != oid
-          yield(temp)
-          temp = []
+        expected_next_oid = !temp.empty? && temp[-1].oid + 1
+        if expected_next_oid && expected_next_oid != oid
+          if @initial_section
+            expected_next_oid.upto(oid - 1) do |free_oid|
+              temp << self.class.free_entry(free_oid, 0)
+            end
+          else
+            yield(temp)
+            temp = []
+          end
         end
         temp << self[oid]
       end

data/test/hexapdf/test_parser.rb CHANGED Viewed

@@ -33,18 +33,23 @@ describe HexaPDF::Parser do
       endstream
       endobj
+      5 0 obj
+      1 0 R
+      endobj
       xref
       0 4
       0000000000 65535 f
       0000000010 00000 n
       0000000029 00000 n
       0000000000 65535 f
-      3 1
+      3 2
       0000000556 00000 n
+      0000000308 00000 n
       trailer
       << /Test (now) >>
       startxref
-      308
+      330
       %%EOF
     EOF
   end
@@ -305,6 +310,11 @@ describe HexaPDF::Parser do
       assert_equal(0, obj.gen)
     end
+    it "handles the case of the value of an indirect object being an indirect reference" do
+      obj = @parser.load_object(HexaPDF::XRefSection.in_use_entry(5, 0, 308))
+      assert_equal(1, obj.oid)
+    end
     describe "with strict parsing" do
       it "raises an error if an indirect object has an offset of 0" do
         @document.config['parser.on_correctable_error'] = proc { true }
@@ -343,13 +353,13 @@ describe HexaPDF::Parser do
   describe "startxref_offset" do
     it "caches the offset value" do
-      assert_equal(308, @parser.startxref_offset)
-      @parser.instance_eval { @io }.string.sub!(/308\n/, "309\n")
-      assert_equal(308, @parser.startxref_offset)
+      assert_equal(330, @parser.startxref_offset)
+      @parser.instance_eval { @io }.string.sub!(/330\n/, "309\n")
+      assert_equal(330, @parser.startxref_offset)
     end
     it "returns the correct offset" do
-      assert_equal(308, @parser.startxref_offset)
+      assert_equal(330, @parser.startxref_offset)
     end
     it "ignores garbage at the end of the file" do

data/test/hexapdf/test_writer.rb CHANGED Viewed

@@ -53,8 +53,8 @@ describe HexaPDF::Writer do
     EOF
     xref_stream = case HexaPDF::VERSION.length
-                  when 5 then "x\xDAcbdlc``b`\xB0\x04\x93\x93\x19\x18\x00\f\x0F\x01["
-                  when 6 then "x\xDAcbdlg``b`\xB0\x04\x93\x93\x18\x18\x00\f\e\x01["
+                  when 5 then "x\xDAcbdlg``b`\xB0\x04\x93\x93\x19\x18\x00\f\x1E\x01\\"
+                  when 6 then "x\xDAcbd\xEC```b`\xB0\x04\x93\x93\x18\x18\x00\f*\x01\\"
                   else fail
                   end
     @compressed_input_io = StringIO.new(<<~EOF.force_encoding(Encoding::BINARY))
@@ -69,8 +69,8 @@ describe HexaPDF::Writer do
       20
       endobj
       3 0 obj
-      <</Size 6/Type/XRef/W[1 1 2]/Index[0 4 5 1]/Filter/FlateDecode/DecodeParms<</Columns 4/Predictor 12>>/Length 31>>stream
-      x\xDAcb`\xF8\xFF\x9F\x89\x89\x95\x91\x91\xE9\x7F\x19\x03\x03\x13\x83\x10\x88he`\x00\x00B4\x04\x1E
+      <</Size 6/Type/XRef/W[1 1 2]/Index[0 6]/Filter/FlateDecode/DecodeParms<</Columns 4/Predictor 12>>/Length 36>>stream
+      x\xDAcb`\xF8\xFF\x9F\x89\x89\x95\x91\x91\xE9\x7F\x19\x03\x03\x13\x83\x10\x90\xF8_\f\x14c\x14bd\x04\x00lk\a
       endstream
       endobj
       startxref
@@ -90,7 +90,7 @@ describe HexaPDF::Writer do
       endstream
       endobj
       startxref
-      #{442 + HexaPDF::VERSION.length}
+      #{443 + HexaPDF::VERSION.length}
       %%EOF
     EOF
   end

data/test/hexapdf/test_xref_section.rb CHANGED Viewed

@@ -57,5 +57,20 @@ describe HexaPDF::XRefSection do
       @xref_section.add_in_use_entry(20, 0, 0)
       assert_subsections([[1, 2], [10, 11], [20]])
     end
+    it "yields a single subsection if the section was marked as the initial one" do
+      @xref_section.mark_as_initial_section!
+      @xref_section.add_in_use_entry(6, 0, 0)
+      @xref_section.add_in_use_entry(7, 0, 0)
+      @xref_section.add_in_use_entry(9, 0, 0)
+      @xref_section.add_in_use_entry(1, 0, 0)
+      @xref_section.add_in_use_entry(2, 0, 0)
+      result = @xref_section.each_subsection.map {|s| s.map {|e| [e.oid, e.type] }}
+      assert_equal([[[1, :in_use], [2, :in_use],
+                     [3, :free], [4, :free], [5, :free],
+                     [6, :in_use], [7, :in_use],
+                     [8, :free],
+                     [9, :in_use]]], result)
+    end
   end
 end

data/test/hexapdf/utils/test_sorted_tree_node.rb CHANGED Viewed

@@ -12,10 +12,12 @@ describe HexaPDF::Utils::SortedTreeNode do
   end
   def add_multilevel_entries
-    @kid11 = @doc.add({Limits: ['c', 'f'], Names: ['c', 1, 'f', 1]}, type: HexaPDF::NameTreeNode)
+    item = @doc.add(1)
+    @item_ref = HexaPDF::Reference.new(item.oid, item.gen)
+    @kid11 = @doc.add({Limits: ['c', 'f'], Names: ['c', @item_ref, 'f', 1]}, type: HexaPDF::NameTreeNode)
     @kid12 = @doc.add({Limits: ['i', 'm'], Names: ['i', 1, 'm', 1]}, type: HexaPDF::NameTreeNode)
     ref = HexaPDF::Reference.new(@kid11.oid, @kid11.gen)
-    @kid1 = @doc.add({Limits: ['c', 'm'], Kids: [ref, @kid12]}, type: HexaPDF::NameTreeNode)
+    @kid1 = @doc.add({Limits: ['c', 'm'], Kids: [ref, @kid12]})
     @kid21 = @doc.add({Limits: ['o', 'q'], Names: ['o', 1, 'q', 1]}, type: HexaPDF::NameTreeNode)
     @kid221 = @doc.add({Limits: ['s', 'u'], Names: ['s', 1, 'u', 1]}, type: HexaPDF::NameTreeNode)
     @kid22 = @doc.add({Limits: ['s', 'u'], Kids: [@kid221]}, type: HexaPDF::NameTreeNode)
@@ -75,7 +77,7 @@ describe HexaPDF::Utils::SortedTreeNode do
       @root.add_entry('v', 1)
       assert_equal(['a', 'm'], @kid1[:Limits].value)
       assert_equal(['a', 'f'], @kid11[:Limits].value)
-      assert_equal(['a', 1, 'c', 1, 'e', 1, 'f', 1], @kid11[:Names].value)
+      assert_equal(['a', 1, 'c', @item_ref, 'e', 1, 'f', 1], @kid11[:Names].value)
       assert_equal(['g', 'm'], @kid12[:Limits].value)
       assert_equal(['g', 1, 'i', 1, 'j', 1, 'm', 1], @kid12[:Names].value)
       assert_equal(['n', 'v'], @kid2[:Limits].value)
@@ -203,13 +205,12 @@ describe HexaPDF::Utils::SortedTreeNode do
     end
     it "checks that all kid objects are indirect objects" do
-      @root[:Kids][0] = ref = HexaPDF::Reference.new(@kid1.oid, @kid1.gen)
       assert(@root.validate)
-      @root[:Kids][0] = ref
+      @root[:Kids][0] = @kid1
       @kid1.oid = 0
       assert(@root.validate do |message, c|
-               assert_match(/must be an indirect object/, message)
+               assert_match(/children.*must be indirect/i, message)
                assert(c)
              end)
       assert(@kid1.indirect?)

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: hexapdf
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.0.1
 platform: ruby
 authors:
 - Thomas Leitner
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-10-26 00:00:00.000000000 Z
+date: 2024-11-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cmdparse