RubyGems - hexapdf - Versions diffs - 0.14.0 → 0.14.1 - Mend

hexapdf 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +17 -0
data/lib/hexapdf/dictionary.rb +5 -2
data/lib/hexapdf/font/true_type/subsetter.rb +12 -3
data/lib/hexapdf/font/true_type/table/post.rb +15 -10
data/lib/hexapdf/font_loader/from_configuration.rb +2 -2
data/lib/hexapdf/font_loader/from_file.rb +18 -8
data/lib/hexapdf/importer.rb +3 -2
data/lib/hexapdf/parser.rb +9 -1
data/lib/hexapdf/pdf_array.rb +1 -1
data/lib/hexapdf/serializer.rb +33 -7
data/lib/hexapdf/version.rb +1 -1
data/test/hexapdf/font/true_type/table/test_post.rb +1 -1
data/test/hexapdf/font/true_type/test_subsetter.rb +5 -0
data/test/hexapdf/font_loader/test_from_configuration.rb +7 -3
data/test/hexapdf/font_loader/test_from_file.rb +7 -0
data/test/hexapdf/test_dictionary.rb +5 -0
data/test/hexapdf/test_parser.rb +12 -0
data/test/hexapdf/test_writer.rb +2 -2
metadata +3 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0cea16b918ff9aa6e7b32759295ef4ab38c899bcbd227d76ad42e0c971360239
-  data.tar.gz: 932c5edf01114a59d0a64776f304e29f3c8865a2c2c52c340064180464aabad7
+  metadata.gz: e4010e277168cec5c8cc5d584ec324064461e63756d18b538cd335235fe04e6d
+  data.tar.gz: 2b7a71463082a32605adee682c81cdde6b0eb48d360ca66249b08884f82e571b
 SHA512:
-  metadata.gz: 5883c5788487830b0403459b38b4ed1761c1015688977e9823f3c572f1ad645b06eb0578b185ce26f7f02c560050dd8ec7c09e8524b59cd35df4fd6abd1fb4aa
-  data.tar.gz: cdda51a089c86f27319fe424c9a74dc599ed60860338ef49958cd6a820141fa87a0624f2c657565e3f1b4a2392300807b89886178da6af62d22fa03fb543e372
+  metadata.gz: 5748273dc4dc532cd365598e25c4a9cc5872011d2eb638c2986050aeed0a68d2dc5769fda075eb60cbcb76fccbfb1a5b52c3c58581cb6e969978c17d770013e6
+  data.tar.gz: 0ab3abf80967804486fa1f50f186b508fd792acfbd8c47646fa7d0c5b0245161e2833620142b2f05a1ee73b01145016dca7bf7781d579284160c9d2dd2c78d0c

data/CHANGELOG.md CHANGED

@@ -1,3 +1,20 @@
+## 0.14.1 - 2021-01-21
+### Changed
+* Validation message when checking for allowed values to include the invalid
+  object
+* [HexaPDF::FontLoader::FromFile] to allow (re)using an existing font object
+* [HexaPDF::Importer] internals to avoid problems with retained memory
+### Fixed
+* Parsing of invalid PDF files where whitespace is missing after the integer
+  value of an indirect object
+* [HexaPDF::Dictionary] so that adding new key-value pairs during validation is
+  possible
 ## 0.14.0 - 2020-12-30
 ### Added

data/lib/hexapdf/dictionary.rb CHANGED

@@ -155,6 +155,9 @@ module HexaPDF
     #   available (see ::define_field).
     #
     # * Returns the default value if one is specified and no value is available.
+    #
+    # Note: This method may throw a "can't add a new key into hash during iteration" error in
+    # certain cases because it potentially modifies the underlying hash!
     def [](name)
       field = self.class.field(name)
       data = if key?(name)
@@ -255,7 +258,7 @@ module HexaPDF
     # Iterates over all currently set fields and those that are required.
     def each_set_key_or_required_field #:yields: name, field
-      value.each_key {|name| yield(name, self.class.field(name)) }
+      value.keys.each {|name| yield(name, self.class.field(name)) }
       self.class.each_field do |name, field|
         yield(name, field) if field.required? && !value.key?(name)
       end
@@ -301,7 +304,7 @@ module HexaPDF
         # Check the value of the field against the allowed values.
         if field.allowed_values && !field.allowed_values.include?(obj)
-          yield("Field #{name} does not contain an allowed value")
+          yield("Field #{name} does not contain an allowed value: #{obj.inspect}")
         end
         # Check if field value needs to be (in)direct

data/lib/hexapdf/font/true_type/subsetter.rb CHANGED

@@ -63,6 +63,12 @@ module HexaPDF
         def use_glyph(glyph_id)
           return @glyph_map[glyph_id] if @glyph_map.key?(glyph_id)
           @last_id += 1
+          # Handle codes for ASCII characters \r, (, ) and \ specially so that they never appear in
+          # the output (PDF serialization would need to escape them)
+          if @last_id == 13 || @last_id == 40 || @last_id == 41 || @last_id == 92
+            @glyph_map[:"s#{@last_id}"] = @last_id
+            @last_id += 1
+          end
           @glyph_map[glyph_id] = @last_id
         end
@@ -107,7 +113,7 @@ module HexaPDF
           locations = []
           @glyph_map.each_key do |old_gid|
-            glyph = orig_glyf[old_gid]
+            glyph = orig_glyf[old_gid.kind_of?(Symbol) ? 0 : old_gid]
             locations << table.size
             data = glyph.raw_data
             if glyph.compound?
@@ -134,7 +140,7 @@ module HexaPDF
           hmtx = @font[:hmtx]
           data = ''.b
           @glyph_map.each_key do |old_gid|
-            metric = hmtx[old_gid]
+            metric = hmtx[old_gid.kind_of?(Symbol) ? 0 : old_gid]
             data << [metric.advance_width, metric.left_side_bearing].pack('n2')
           end
           data
@@ -166,7 +172,10 @@ module HexaPDF
         # Adds the components of compound glyphs to the subset.
         def add_glyph_components
           glyf = @font[:glyf]
-          @glyph_map.keys.each {|gid| glyf[gid].components&.each {|cgid| use_glyph(cgid) } }
+          @glyph_map.keys.each do |gid|
+            next if gid.kind_of?(Symbol)
+            glyf[gid].components&.each {|cgid| use_glyph(cgid) }
+          end
         end
       end

data/lib/hexapdf/font/true_type/table/post.rb CHANGED

@@ -99,18 +99,23 @@ module HexaPDF
               @max_mem_type42, @min_mem_type1, @max_mem_type1 = read_formatted(24, 's>2N5')
             sub_table_length = directory_entry.length - 32
-            @glyph_names = case @format
-                           when 1 then Format1.parse(io, sub_table_length)
-                           when 2 then Format2.parse(io, sub_table_length)
-                           when 3 then Format3.parse(io, sub_table_length)
-                           when 4 then Format4.parse(io, sub_table_length)
-                           else
-                             if font.config['font.true_type.unknown_format'] == :raise
-                               raise HexaPDF::Error, "Unsupported post table format: #{@format}"
+            cur_pos = io.pos
+            @glyph_names = lambda do |glyph_id|
+              io.pos = cur_pos
+              @glyph_names = case @format
+                             when 1 then Format1.parse(io, sub_table_length)
+                             when 2 then Format2.parse(io, sub_table_length)
+                             when 3 then Format3.parse(io, sub_table_length)
+                             when 4 then Format4.parse(io, sub_table_length)
                              else
-                               []
+                               if font.config['font.true_type.unknown_format'] == :raise
+                                 raise HexaPDF::Error, "Unsupported post table format: #{@format}"
+                               else
+                                 []
+                               end
                              end
-                           end
+              @glyph_names[glyph_id]
+            end
           end
           # 'post' table format 1

data/lib/hexapdf/font_loader/from_configuration.rb CHANGED

@@ -63,8 +63,8 @@ module HexaPDF
         file = document.config['font.map'].dig(name, variant)
         return nil if file.nil?
-        unless File.file?(file)
-          raise HexaPDF::Error, "The configured font file #{file} does not exist"
+        unless file.kind_of?(HexaPDF::Font::TrueType::Font) || File.file?(file)
+          raise HexaPDF::Error, "The configured font file #{file} is not a valid value"
         end
         FromFile.call(document, file, subset: subset)
       end

data/lib/hexapdf/font_loader/from_file.rb CHANGED

@@ -39,26 +39,36 @@ require 'hexapdf/font/true_type_wrapper'
 module HexaPDF
   module FontLoader
-    # This module interprets the font name as file name and tries to load it.
+    # This module interprets the font name either as file name and tries to load it, or as font
+    # object to be wrapped directly.
     module FromFile
-      # Loads the given font by interpreting the font name as file name.
+      # :call-seq:
+      #   FromFile.call(document, file_name, subset: true, **)           -> wrapped_font
+      #   FromFile.call(document, font_object, subset: true, **)    -> wrapped_font
       #
-      # The file object representing the font file is *not* closed and if needed must be closed by
-      # the caller once the font is not needed anymore.
+      # Returns an appropriate font wrapper for the given file name or font object.
+      #
+      # If a file name is given, the file object representing the font file is *not* closed and if
+      # needed must be closed by the caller once the font is not needed anymore.
+      #
+      # The first form using a file name is easier to use in one-off cases. However, if multiple
+      # documents always refer to the same font, the second form is better to avoid re-parsing the
+      # font file.
       #
       # +document+::
       #     The PDF document to associate the font object with.
       #
-      # +name+::
-      #     The file name.
+      # +file_name+/+font_object+::
+      #     The file name or TrueType font object.
       #
       # +subset+::
       #     Specifies whether the font should be subset if possible.
       def self.call(document, name, subset: true, **)
-        return nil unless File.file?(name)
+        is_font = name.kind_of?(HexaPDF::Font::TrueType::Font)
+        return nil unless is_font || File.file?(name)
-        font = HexaPDF::Font::TrueType::Font.new(File.open(name, 'rb'))
+        font = is_font ? name : HexaPDF::Font::TrueType::Font.new(File.open(name, 'rb'))
         HexaPDF::Font::TrueTypeWrapper.new(document, font, subset: subset)
       end

data/lib/hexapdf/importer.rb CHANGED

@@ -90,7 +90,7 @@ module HexaPDF
     #
     # An error is raised if the object doesn't belong to the +source+ document.
     def import(object)
-      mapped_object = @mapper[object.data] if object.kind_of?(HexaPDF::Object)
+      mapped_object = @mapper[object.data]&.__getobj__ if object.kind_of?(HexaPDF::Object)
       if object.kind_of?(HexaPDF::Object) && object.document? && @source != object.document
         raise HexaPDF::Error, "Import error: Incorrect document object for importer"
       elsif mapped_object && mapped_object == @destination.object(mapped_object)
@@ -118,7 +118,8 @@ module HexaPDF
         if object.type == :Catalog || object.type == :Pages
           @mapper[object.data] = nil
         else
-          obj = @mapper[object.data] = object.dup
+          obj = object.dup
+          @mapper[object.data] = NullableWeakRef.new(obj)
           obj.document = @destination.__getobj__
           obj.instance_variable_set(:@data, obj.data.dup)
           obj.data.oid = 0

data/lib/hexapdf/parser.rb CHANGED

@@ -113,7 +113,15 @@ module HexaPDF
         maybe_raise("No indirect object value between 'obj' and 'endobj'", pos: @tokenizer.pos)
         object = nil
       else
-        object = @tokenizer.next_object
+        begin
+          object = @tokenizer.next_object
+        rescue MalformedPDFError
+          # Handle often found invalid indirect object with missing whitespace after number
+          maybe_raise("Invalid object value after 'obj'", pos: @tokenizer.pos,
+                      force: !(tok.kind_of?(Tokenizer::Token) && tok =~ /\A\d+endobj\z/))
+          object = tok.to_i
+          @tokenizer.pos -= 6
+        end
       end
       tok = @tokenizer.next_token

data/lib/hexapdf/pdf_array.rb CHANGED

@@ -66,7 +66,7 @@ module HexaPDF
     #   subclasses of HexaPDF::Object are returned as is (it makes no sense, for example, to return
     #   the hash that describes the Catalog instead of the Catalog object).
     def [](arg1, arg2 = nil)
-      data = value[arg1, *arg2]
+      data = arg2 ? value[arg1, arg2] : value[arg1]
       return if data.nil?
       if arg2 || arg1.kind_of?(Range)

data/lib/hexapdf/serializer.rb CHANGED

@@ -88,13 +88,39 @@ module HexaPDF
     # Creates a new Serializer object.
     def initialize
-      @dispatcher = Hash.new do |h, klass|
-        method = nil
-        klass.ancestors.each do |ancestor_klass|
-          method = "serialize_#{ancestor_klass.name.to_s.downcase.gsub(/::/, '_')}"
-          (h[klass] = method; break) if respond_to?(method, true)
-        end
-        method
+      @dispatcher = {
+        Hash => 'serialize_hash',
+        Array => 'serialize_array',
+        Symbol => 'serialize_symbol',
+        String => 'serialize_string',
+        Integer => 'serialize_integer',
+        Float => 'serialize_float',
+        Time => 'serialize_time',
+        TrueClass => 'serialize_trueclass',
+        FalseClass => 'serialize_falseclass',
+        NilClass => 'serialize_nilclass',
+        HexaPDF::Reference => 'serialize_hexapdf_reference',
+        HexaPDF::Object => 'serialize_hexapdf_object',
+        HexaPDF::Stream => 'serialize_hexapdf_stream',
+        HexaPDF::Dictionary => 'serialize_hexapdf_object',
+        HexaPDF::PDFArray => 'serialize_hexapdf_object',
+        HexaPDF::Rectangle => 'serialize_hexapdf_object',
+      }
+      @dispatcher.default_proc = lambda do |h, klass|
+        h[klass] = if klass <= HexaPDF::Stream
+                     "serialize_hexapdf_stream"
+                   elsif klass <= HexaPDF::Object
+                     "serialize_hexapdf_object"
+                   else
+                     method = nil
+                     klass.ancestors.each do |ancestor_klass|
+                       name = ancestor_klass.name.to_s.downcase
+                       name.gsub!(/::/, '_')
+                       method = "serialize_#{name}"
+                       break if respond_to?(method, true)
+                     end
+                     method
+                   end
       end
       @encrypter = false
       @io = nil

data/lib/hexapdf/version.rb CHANGED

@@ -37,6 +37,6 @@
 module HexaPDF
   # The version of HexaPDF.
-  VERSION = '0.14.0'
+  VERSION = '0.14.1'
 end

data/test/hexapdf/font/true_type/table/test_post.rb CHANGED

@@ -72,7 +72,7 @@ describe HexaPDF::Font::TrueType::Table::Post do
       assert_equal('.notdef', table[0])
       @font.config['font.true_type.unknown_format'] = :raise
-      assert_raises(HexaPDF::Error) { create_table(:Post) }
+      assert_raises(HexaPDF::Error) { create_table(:Post)[0] }
     end
   end
 end

data/test/hexapdf/font/true_type/test_subsetter.rb CHANGED

@@ -27,6 +27,11 @@ describe HexaPDF::Font::TrueType::Subsetter do
     assert_equal(value, @subsetter.subset_glyph_id(5))
   end
+  it "doesn't use certain subset glyph IDs for performance reasons" do
+    1.upto(13) {|i| @subsetter.use_glyph(i) }
+    assert_equal(14, @subsetter.subset_glyph_id(13))
+  end
   it "creates the subset font file" do
     gid = @font[:cmap].preferred_table[0x41]
     @subsetter.use_glyph(gid)

data/test/hexapdf/font_loader/test_from_configuration.rb CHANGED

@@ -8,13 +8,17 @@ describe HexaPDF::FontLoader::FromConfiguration do
   before do
     @doc = HexaPDF::Document.new
     font_file = File.join(TEST_DATA_DIR, "fonts", "Ubuntu-Title.ttf")
-    @doc.config['font.map'] = {'font' => {none: font_file}}
+    @font_obj = HexaPDF::Font::TrueType::Font.new(File.open(font_file, 'rb'))
+    @doc.config['font.map'] = {'font' => {none: font_file}, 'font1' => {none: @font_obj}}
     @klass = HexaPDF::FontLoader::FromConfiguration
   end
   it "loads the configured font" do
     wrapper = @klass.call(@doc, "font")
     assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
+    wrapper = @klass.call(@doc, "font1")
+    assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
+    assert_same(@font_obj, wrapper.wrapped_font)
   end
   it "passes the subset value to the wrapper" do
@@ -24,7 +28,7 @@ describe HexaPDF::FontLoader::FromConfiguration do
     refute(wrapper.subset?)
   end
-  it "fails if the font file cannot be read" do
+  it "fails if the provided font is invalid" do
     @doc.config['font.map']['font'][:none] << "unknown"
     assert_raises(HexaPDF::Error) { @klass.call(@doc, "font") }
   end
@@ -34,6 +38,6 @@ describe HexaPDF::FontLoader::FromConfiguration do
   end
   it "returns a hash with all configured fonts" do
-    assert_equal({'font' => [:none]}, @klass.available_fonts(@doc))
+    assert_equal({'font' => [:none], 'font1' => [:none]}, @klass.available_fonts(@doc))
   end
 end

data/test/hexapdf/font_loader/test_from_file.rb CHANGED

@@ -16,6 +16,13 @@ describe HexaPDF::FontLoader::FromFile do
     assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
   end
+  it "loads the specified font object" do
+    font = HexaPDF::Font::TrueType::Font.new(File.open(@font_file, 'rb'))
+    wrapper = @klass.call(@doc, font)
+    assert_equal("Ubuntu-Title", wrapper.wrapped_font.font_name)
+    assert_same(font, wrapper.wrapped_font)
+  end
   it "passes the subset value to the wrapper" do
     wrapper = @klass.call(@doc, @font_file)
     assert(wrapper.subset?)

data/test/hexapdf/test_dictionary.rb CHANGED

@@ -283,6 +283,11 @@ describe HexaPDF::Dictionary do
       @obj[:TestClass][:Nested][:Nested][:TestClass][:Inherited] = :symbol
       assert(@obj.validate)
     end
+    it "makes sure validation works in special case where the dictionary is modified" do
+      @dict[:Array] = 5
+      refute(@dict.validate {|_, _, object| object[:Boolean] })
+    end
   end
   describe "delete" do

data/test/hexapdf/test_parser.rb CHANGED

@@ -88,6 +88,12 @@ describe HexaPDF::Parser do
       assert_equal('12', TestHelper.collector(stream.fiber))
     end
+    it "handles invalid indirect object value consisting of number followed by endobj without space" do
+      create_parser("1 0 obj 749endobj")
+      object, * = @parser.parse_indirect_object
+      assert_equal(749, object)
+    end
     it "recovers from an invalid stream length value" do
       create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
       obj, _, _, stream = @parser.parse_indirect_object
@@ -151,6 +157,12 @@ describe HexaPDF::Parser do
         assert_match(/not CR alone/, exp.message)
       end
+      it "fails for numbers followed by endobj without space" do
+        create_parser("1 0 obj 749endobj")
+        exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }
+        assert_match(/Invalid object value after 'obj'/, exp.message)
+      end
       it "fails if the stream length value is invalid" do
         create_parser("1 0 obj<</Length 4>> stream\n12endstream endobj")
         exp = assert_raises(HexaPDF::MalformedPDFError) { @parser.parse_indirect_object }

data/test/hexapdf/test_writer.rb CHANGED

@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
       219
       %%EOF
       3 0 obj
-      <</Producer(HexaPDF version 0.14.0)>>
+      <</Producer(HexaPDF version 0.14.1)>>
       endobj
       xref
       3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
       141
       %%EOF
       6 0 obj
-      <</Producer(HexaPDF version 0.14.0)>>
+      <</Producer(HexaPDF version 0.14.1)>>
       endobj
       2 0 obj
       <</Length 10>>stream

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: hexapdf
 version: !ruby/object:Gem::Version
-  version: 0.14.0
+  version: 0.14.1
 platform: ruby
 authors:
 - Thomas Leitner
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-12-30 00:00:00.000000000 Z
+date: 2021-01-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cmdparse
@@ -631,7 +631,7 @@ files:
 homepage: https://hexapdf.gettalong.org
 licenses:
 - AGPL-3.0
-- Commercial License
+- Nonstandard
 metadata: {}
 post_install_message:
 rdoc_options: []