RubyGems - hexapdf - Versions diffs - 1.5.0 → 1.7.0 - Mend

hexapdf 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +54 -0
data/README.md +8 -7
data/examples/022-outline.rb +5 -1
data/examples/032-acro_form_list_and_fill.rb +47 -0
data/examples/033-text_extraction.rb +34 -0
data/lib/hexapdf/cli/debug_info.rb +98 -0
data/lib/hexapdf/cli/images.rb +2 -2
data/lib/hexapdf/cli/info.rb +2 -0
data/lib/hexapdf/cli/inspect.rb +5 -1
data/lib/hexapdf/cli.rb +2 -0
data/lib/hexapdf/configuration.rb +8 -0
data/lib/hexapdf/content/canvas.rb +1 -1
data/lib/hexapdf/content/smart_text_extractor.rb +305 -0
data/lib/hexapdf/content.rb +2 -0
data/lib/hexapdf/digital_signature/signing/default_handler.rb +1 -15
data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +21 -8
data/lib/hexapdf/document.rb +7 -3
data/lib/hexapdf/encryption/security_handler.rb +3 -1
data/lib/hexapdf/filter/brotli_decode.rb +88 -0
data/lib/hexapdf/filter.rb +1 -0
data/lib/hexapdf/font/cmap.rb +10 -6
data/lib/hexapdf/font/true_type/builder.rb +1 -1
data/lib/hexapdf/font/true_type/font.rb +13 -0
data/lib/hexapdf/font/true_type/subsetter.rb +7 -2
data/lib/hexapdf/font/true_type/table/directory.rb +5 -0
data/lib/hexapdf/font/true_type.rb +1 -0
data/lib/hexapdf/layout/style.rb +6 -2
data/lib/hexapdf/parser.rb +29 -4
data/lib/hexapdf/revision.rb +6 -2
data/lib/hexapdf/task/pdfa.rb +108 -1
data/lib/hexapdf/type/acro_form/field.rb +4 -1
data/lib/hexapdf/type/acro_form/form.rb +4 -0
data/lib/hexapdf/type/acro_form/text_field.rb +4 -2
data/lib/hexapdf/type/annotations/widget.rb +9 -0
data/lib/hexapdf/type/document_security_store.rb +80 -0
data/lib/hexapdf/type/page.rb +11 -0
data/lib/hexapdf/type.rb +1 -0
data/lib/hexapdf/version.rb +1 -1
data/test/data/pdfa/mismatching_glyph_widths_cidfont_type2.pdf +0 -0
data/test/hexapdf/content/test_smart_text_extractor.rb +129 -0
data/test/hexapdf/digital_signature/common.rb +19 -5
data/test/hexapdf/digital_signature/signing/test_signed_data_creator.rb +29 -4
data/test/hexapdf/digital_signature/test_signatures.rb +3 -3
data/test/hexapdf/encryption/test_security_handler.rb +7 -5
data/test/hexapdf/filter/test_brotli_decode.rb +34 -0
data/test/hexapdf/font/true_type/table/test_directory.rb +5 -3
data/test/hexapdf/font/true_type/test_builder.rb +9 -0
data/test/hexapdf/font/true_type/test_font.rb +17 -3
data/test/hexapdf/font/true_type/test_subsetter.rb +4 -3
data/test/hexapdf/task/test_pdfa.rb +72 -0
data/test/hexapdf/test_document.rb +13 -0
data/test/hexapdf/test_parser.rb +55 -3
data/test/hexapdf/test_revision.rb +27 -6
data/test/hexapdf/type/acro_form/test_field.rb +5 -0
data/test/hexapdf/type/acro_form/test_form.rb +6 -0
data/test/hexapdf/type/acro_form/test_text_field.rb +7 -1
data/test/hexapdf/type/annotations/test_widget.rb +11 -0
data/test/hexapdf/type/test_page.rb +8 -0
data/test/test_helper.rb +6 -0
metadata +41 -4

data/lib/hexapdf/layout/style.rb CHANGED Viewed

@@ -211,6 +211,8 @@ module HexaPDF
         attr_reader :width
         # The colors of each edge. See Quad.
+        #
+        # See: HexaPDF::Content::ColorSpace.device_color_from_specification
         attr_reader :color
         # The styles of each edge. See Quad.
@@ -897,7 +899,7 @@ module HexaPDF
       #
       # The color used for filling (e.g. text), defaults to black.
       #
-      # See: HexaPDF::Content::Canvas#fill_color
+      # See: HexaPDF::Content::ColorSpace.device_color_from_specification
       #
       # Examples:
       #
@@ -926,7 +928,7 @@ module HexaPDF
       #
       # The color used for stroking (e.g. text outlines), defaults to black.
       #
-      # See: HexaPDF::Content::Canvas#stroke_color
+      # See: HexaPDF::Content::ColorSpace.device_color_from_specification
       #
       # Examples:
       #
@@ -1175,6 +1177,8 @@ module HexaPDF
       #
       # The color used for backgrounds, defaults to +nil+ (i.e. no background).
       #
+      # See: HexaPDF::Content::ColorSpace.device_color_from_specification
+      #
       # Examples:
       #
       #   #>pdf-composer100

data/lib/hexapdf/parser.rb CHANGED Viewed

@@ -112,8 +112,18 @@ module HexaPDF
         end
       if xref_entry.oid != 0 && (oid != xref_entry.oid || gen != xref_entry.gen)
-        raise_malformed("The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \
-                        "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref")
+        msg = "The oid,gen (#{oid},#{gen}) values of the indirect object don't match " \
+              "the values (#{xref_entry.oid},#{xref_entry.gen}) from the xref"
+        # Some invalid PDFs contain entries where the generation number in the xref is different
+        # from the one found in the indirect object. If the file were reconstructed the generation
+        # number from the indirect object itself would be used.
+        # To gracefully handle such invalid PDFs they need to have a single revision.
+        # The other code part that handles this is in Revision#object.
+        if oid == xref_entry.oid && @document.revisions.count == 1
+          maybe_raise(msg, pos: xref_entry.pos)
+        else
+          raise_malformed(msg)
+        end
       end
       if obj.kind_of?(Reference)
@@ -209,9 +219,24 @@ module HexaPDF
         tok = @tokenizer.next_token
         object[:Length] = length
+        if object.key?(:Filter)
+          begin
+            object[:Filter] = @document.unwrap(object[:Filter])
+          rescue HexaPDF::Error
+            maybe_raise("Invalid /Filter entry for stream", pos: @tokenizer.pos)
+            object.delete(:Filter)
+          end
+        end
+        if object.key?(:DecodeParms)
+          begin
+            object[:DecodeParms] = @document.unwrap(object[:DecodeParms])
+          rescue HexaPDF::Error
+            maybe_raise("Invalid /DecodeParms entry for stream", pos: @tokenizer.pos)
+            object.delete(:DecodeParms)
+          end
+        end
         stream = StreamData.new(@tokenizer.io, offset: pos, length: length,
-                                filter: @document.unwrap(object[:Filter]),
-                                decode_parms: @document.unwrap(object[:DecodeParms]))
+                                filter: object[:Filter], decode_parms: object[:DecodeParms])
       end
       unless tok.kind_of?(Tokenizer::Token) && tok == 'endobj'

data/lib/hexapdf/revision.rb CHANGED Viewed

@@ -128,6 +128,11 @@ module HexaPDF
         @objects[oid, gen]
       elsif (xref_entry = @xref_section[oid, gen])
         load_object(xref_entry)
+      elsif (xref_entry = @xref_section[oid]) && (obj = load_object(xref_entry))&.gen == gen
+        # This branch handles invalid PDFs with a single revision containing xref entries where the
+        # gen doesn't match the gen of the indirect object. Also see the special handling in
+        # Parser#load_object.
+        obj
       else
         nil
       end
@@ -219,8 +224,7 @@ module HexaPDF
         seen = {}
         @objects.each {|oid, _gen, data| seen[oid] = true; yield(data) }
         @xref_section.each do |oid, _gen, data|
-          next if seen.key?(oid)
-          yield(@objects[oid] || load_object(data))
+          yield(@objects[oid] || load_object(data)) unless seen.key?(oid)
         end
         @all_objects_loaded = true
       end

data/lib/hexapdf/task/pdfa.rb CHANGED Viewed

@@ -40,6 +40,7 @@ require 'hexapdf/content/parser'
 require 'hexapdf/content/operator'
 require 'hexapdf/type/xref_stream'
 require 'hexapdf/type/object_stream'
+require 'hexapdf/font/true_type'
 module HexaPDF
   module Task
@@ -51,6 +52,13 @@ module HexaPDF
     # * prevents the Standard 14 PDF fonts to be used.
     # * adds an appropriate output intent if none is set.
     # * adds the necessary PDF/A metadata properties.
+    #
+    # Additionally, it applies fixes to the document so that the structures and content of
+    # non-conforming PDFs are corrected. See ::call for more information on the available fixes.
+    #
+    # Note that you should use a PDF/A validation tool like veraPDF (https://verapdf.org/) to ensure
+    # that the resulting files confirm to the PDF/A specification because not all documents can be
+    # fixed at the moment.
     module PDFA
       # Performs the necessary tasks to make the document PDF/A compatible.
@@ -58,7 +66,22 @@ module HexaPDF
       # +level+::
       #     Specifies the PDF/A conformance level that should be used. Can be one of the following
       #     strings: 2b, 2u, 3b, 3u.
-      def self.call(doc, level: '3u')
+      #
+      # +fixes+::
+      #     Specifies the fixes that should be applied when converting a non-conforming PDF. If a
+      #     document is created with HexaPDF but also includes parts of loaded documents, this
+      #     argument hast to be set to +:all+.
+      #
+      #     Can be +:default+ (which is also the default value), +:all+ or an array with one or more
+      #     fix names.
+      #
+      #     +:default+:: Applies all fixes if the document was loaded from a file. Otherwise applies
+      #         only those fixes necessary for files created with HexaPDF.
+      #
+      #     +:all+: Applies all available fixes.
+      #
+      #     +:glyph_widths+:: Corrects mismatching width information in fonts.
+      def self.call(doc, level: '3u', fixes: :default)
         unless level.match?(/\A[23][bu]\z/)
           raise ArgumentError, "The given PDF/A conformance level '#{level}' is not supported"
         end
@@ -68,6 +91,15 @@ module HexaPDF
           doc.metadata.property('pdfaid', 'part', part)
           doc.metadata.property('pdfaid', 'conformance', conformance.upcase)
           add_srgb_icc_output_intent(doc) unless doc.catalog.key?(:OutputIntents)
+          fixes = if fixes == :all || (fixes == :default && doc.revisions.parser)
+                    ALL_FIXES
+                  elsif fixes == :default
+                    ALL_FIXES - FIXES_FOR_LOADED_DOCUMENTS
+                  else
+                    fixes
+                  end
+          fixes.each {|fix| send(fix, doc) }
         end
       end
@@ -81,6 +113,81 @@ module HexaPDF
         ]
       end
+      ALL_FIXES = [:fix_glyph_widths] # :nodoc:
+      FIXES_FOR_LOADED_DOCUMENTS = [:fix_glyph_widths] # :nodoc:
+      # Makes the glyph widths stored in the embedded fonts the same as the ones specified in the
+      # PDF font data structures.
+      #
+      # Note: Currently only handles Type 2 CIDFonts.
+      def self.fix_glyph_widths(doc) # :nodoc:
+        # Step 1: Collect all CIDs together with their respective fonts
+        processor = CIDCollector.new
+        doc.pages.each do |page|
+          page.process_contents(processor)
+          page.each_annotation do |annotation|
+            next unless (appearance = annotation.appearance)
+            appearance.process_contents(processor, original_resources: page.resources)
+          end
+        end
+        # Step 2: Process all found fonts
+        processor.map.each do |font_object, all_cids|
+          next if all_cids.empty?
+          font = HexaPDF::Font::TrueType::Font.new(StringIO.new(font_object.font_file.stream))
+          cid_to_gid = cid_to_gid_mapping(font_object)
+          # Process all found CIDs by comparing their width with the ones defined in the font and
+          # correcting the font if necessary.
+          raw_hmtx = font[:hmtx].raw_data
+          width_conversion_factor = 1000.0 / font[:head].units_per_em
+          all_cids.each do |cid|
+            cid_width = font_object.width(cid)
+            gid = cid_to_gid[cid]
+            gid_width = font[:hmtx][gid].advance_width * width_conversion_factor
+            next if (cid_width - gid_width).abs.round <= 1
+            raw_hmtx[4 * gid, 2] = [(cid_width / width_conversion_factor).round].pack('n')
+          end
+          font_object.font_file.stream = font.build('hmtx' => raw_hmtx)
+        end
+      end
+      # Processes the contents of a stream and collects the CIDs for each composite font.
+      class CIDCollector < HexaPDF::Content::Processor
+        # The mapping from the composite font's descendant font to the set of used CIDs.
+        attr_reader :map
+        def initialize(*) # :nodoc:
+          super
+          @map = Hash.new {|h, k| h[k] = Set.new }
+        end
+        def show_text(data) # :nodoc:
+          font = graphics_state.font
+          return unless font[:Subtype] == :Type0 && font.descendant_font[:Subtype] == :CIDFontType2
+          Array(data).each do |item|
+            next if item.kind_of?(Numeric)
+            @map[font.descendant_font].merge(font.decode(item))
+          end
+        end
+        alias show_text_with_positioning show_text
+      end
+      # Returns an object responding to #[] that maps CIDs to GIDs for Type 2 CIDFonts.
+      def self.cid_to_gid_mapping(font)
+        if font[:CIDToGIDMap] == :Identity
+          proc {|cid| cid }
+        else
+          font[:CIDToGIDMap].stream.unpack('n*')
+        end
+      end
+      private_class_method :cid_to_gid_mapping
     end
   end

data/lib/hexapdf/type/acro_form/field.rb CHANGED Viewed

@@ -291,7 +291,10 @@ module HexaPDF
           if embedded_widget?
             yield(document.wrap(self))
           elsif terminal_field?
-            self[:Kids]&.each {|kid| yield(document.wrap(kid)) }
+            self[:Kids]&.each do |kid|
+              kid = document.wrap(kid)
+              yield(kid) if kid.type == :Annot && kid[:Subtype] == :Widget
+            end
           end
           unless direct_only

data/lib/hexapdf/type/acro_form/form.rb CHANGED Viewed

@@ -412,6 +412,8 @@ module HexaPDF
         #
         # * For radio buttons the value needs to be a String or a Symbol representing the name of
         #   the radio button widget to select.
+        #
+        # * Values for password fields are ignored as they should not be stored in the PDF.
         def fill(data)
           data.each do |field_name, value|
             field = field_by_name(field_name)
@@ -427,6 +429,8 @@ module HexaPDF
                                   when /\A(?:n(o)?|f(alse)?)\z/ then false
                                   else value
                                   end
+            when :password_field
+              # Ignore the value
             else
               raise HexaPDF::Error, "AcroForm field type #{field.concrete_field_type} not yet supported"
             end

data/lib/hexapdf/type/acro_form/text_field.rb CHANGED Viewed

@@ -344,8 +344,10 @@ module HexaPDF
           super
           if self[:V] && !(self[:V].kind_of?(String) || self[:V].kind_of?(HexaPDF::Stream))
-            yield("Text field doesn't contain text but #{self[:V].class} object")
-            return
+            correctable = self[:V].kind_of?(Symbol)
+            yield("Text field doesn't contain text but an object of type #{self[:V].class}", correctable)
+            return unless correctable
+            self[:V] = self[:V].to_s
           end
           if (max_len = self[:MaxLen]) && field_value && field_value.length > max_len
             correctable = true

data/lib/hexapdf/type/annotations/widget.rb CHANGED Viewed

@@ -250,6 +250,15 @@ module HexaPDF
           end
         end
+        private
+        def perform_validation(&block) #:nodoc:
+          super
+          if !key?(:Parent) && (field = form_field) == self
+            field.validate(&block)
+          end
+        end
       end
     end

data/lib/hexapdf/type/document_security_store.rb ADDED Viewed

@@ -0,0 +1,80 @@
+# -*- encoding: utf-8; frozen_string_literal: true -*-
+#
+#--
+# This file is part of HexaPDF.
+#
+# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
+# Copyright (C) 2014-2025 Thomas Leitner
+#
+# HexaPDF is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License version 3 as
+# published by the Free Software Foundation with the addition of the
+# following permission added to Section 15 as permitted in Section 7(a):
+# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
+# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
+# INFRINGEMENT OF THIRD PARTY RIGHTS.
+#
+# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
+#
+# The interactive user interfaces in modified source and object code
+# versions of HexaPDF must display Appropriate Legal Notices, as required
+# under Section 5 of the GNU Affero General Public License version 3.
+#
+# In accordance with Section 7(b) of the GNU Affero General Public
+# License, a covered work must retain the producer line in every PDF that
+# is created or manipulated using HexaPDF.
+#
+# If the GNU Affero General Public License doesn't fit your need,
+# commercial licenses are available at <https://gettalong.at/hexapdf/>.
+#++
+require 'hexapdf/dictionary'
+module HexaPDF
+  module Type
+    # The document security store (DSS) dictionary contains data needed for verifying digital
+    # signatures.
+    #
+    # See: PDF2.0 s12.8.4.3
+    class DocumentSecurityStore < Dictionary
+      # The validation-related information (VRI) dictionary contains validation information for one
+      # signature. It signifies that the signature has been validated using this information.
+      #
+      # See: PDF2.0 s12.8.4.4
+      class ValidationRelatedInformation < Dictionary
+        define_type :VRI
+        define_field :Type, type: Symbol, default: type
+        define_field :Cert, type: PDFArray
+        define_field :CRL,  type: PDFArray
+        define_field :OCSP, type: PDFArray
+        define_field :TU,   type: PDFDate
+        define_field :TS,   type: Stream
+      end
+      define_type :DSS
+      define_field :Type,  type: Symbol, default: type
+      define_field :VRI,   type: Dictionary
+      define_field :Certs, type: PDFArray
+      define_field :OCSPs, type: PDFArray
+      define_field :CRLs,  type: PDFArray
+      define_field :SW, type: Symbol,   default: :A, allowed_values: [:A, :B, :S, :N]
+      define_field :S,  type: Symbol,   default: :P, allowed_values: [:A, :P]
+      define_field :A,  type: PDFArray, default: [0.5, 0.5]
+      define_field :FB, type: Boolean,  default: false, version: '1.5'
+    end
+  end
+end

data/lib/hexapdf/type/page.rb CHANGED Viewed

@@ -395,6 +395,17 @@ module HexaPDF
         Content::Parser.parse(contents, processor)
       end
+      # Extracts the layouted text from the page.
+      #
+      # See HexaPDF::Content::SmartTextExtractor.layout_text_runs for the available +options+.
+      def extract_text(**options)
+        processor = Content::SmartTextExtractor::TextRunProcessor.new
+        process_contents(processor)
+        box = box(:media)
+        Content::SmartTextExtractor.layout_text_runs(processor.text_runs, box.width, box.height,
+                                                     **options)
+      end
       # Returns the index of the page in the page tree.
       def index
         idx = 0

data/lib/hexapdf/type.rb CHANGED Viewed

@@ -89,6 +89,7 @@ module HexaPDF
     autoload(:MarkedContentReference, 'hexapdf/type/marked_content_reference')
     autoload(:ObjectReference, 'hexapdf/type/object_reference')
     autoload(:Measure, 'hexapdf/type/measure')
+    autoload(:DocumentSecurityStore, 'hexapdf/type/document_security_store')
   end

data/lib/hexapdf/version.rb CHANGED Viewed

@@ -37,6 +37,6 @@
 module HexaPDF
   # The version of HexaPDF.
-  VERSION = '1.5.0'
+  VERSION = '1.7.0'
 end

data/test/data/pdfa/mismatching_glyph_widths_cidfont_type2.pdf ADDED Viewed

Binary file

data/test/hexapdf/content/test_smart_text_extractor.rb ADDED Viewed

@@ -0,0 +1,129 @@
+# -*- encoding: utf-8 -*-
+require 'test_helper'
+require 'hexapdf/content/smart_text_extractor'
+require 'hexapdf/document'
+describe HexaPDF::Content::SmartTextExtractor::TextRunCollector::TextRun do
+  it "has various accessors" do
+    text_run = HexaPDF::Content::SmartTextExtractor::TextRunCollector::TextRun.new('s', 1, 2, 3, 5)
+    assert_equal('s', text_run.string)
+    assert_equal(2, text_run.width)
+    assert_equal(3, text_run.height)
+  end
+end
+describe HexaPDF::Content::SmartTextExtractor::TextRunProcessor do
+  it "turns glyphs into TextRun objects" do
+    processor = HexaPDF::Content::SmartTextExtractor::TextRunProcessor.new
+    doc = HexaPDF::Document.new
+    page = doc.pages.add
+    page.canvas.font('Helvetica', size: 10).
+      text('Te', at: [10, 500]).
+      text_matrix(0.866, -0.5, 0.5, 0.866, 0, 0).
+      text('Te')
+    page.process_contents(processor)
+    assert_equal([['T', 10, 497.75, 16.11, 509.31], ['e', 16.11, 497.75, 21.67, 509.31],
+                  ["T", -1.125, -5.0035, 9.94626, 8.06246],
+                  ["e", 4.16626, -7.7835, 14.761220000000002, 5.00746]],
+                  processor.text_runs.map(&:to_a))
+  end
+end
+describe HexaPDF::Content::SmartTextExtractor do
+  def text_run(str, left, bottom, right, top)
+    HexaPDF::Content::SmartTextExtractor::TextRunCollector::TextRun.new(str, left, bottom, right, top)
+  end
+  def layout_runs(runs, width = 595, height = 842, **options)
+    runs = runs.map {|args| text_run(*args) }
+    HexaPDF::Content::SmartTextExtractor.layout_text_runs(runs, width, height, **options)
+  end
+  it "works for a page with no text" do
+    assert_equal('', layout_runs([]))
+  end
+  it "works for a single run on the left side of the page" do
+    assert_equal('test', layout_runs([['test', 0, 100, 20, 110]]))
+  end
+  it "works for a single run not on the left side of the page" do
+    assert_equal('test', layout_runs([['test', 50, 100, 70, 110]]))
+  end
+  it "preserves the relative indent" do
+    assert_equal("Hello\n     World", layout_runs([['Hello', 50, 100, 70, 110],
+                                                   ['World', 70, 80, 90, 100]]))
+  end
+  it "combines text runs if they have the same top/bottom and there is less than 1pt between them" do
+    x = +'Hello'
+    assert_equal('HelloWorld', layout_runs([[x, 50, 100, 60, 110],
+                                            ['World', 60, 100, 70, 110]]))
+    assert_equal('HelloWorld', x)
+  end
+  it "preserves the space between two runs" do
+    assert_equal('Hello World', layout_runs([['Hello', 50, 100, 70, 110],
+                                             ['World', 72, 100, 92, 110]]))
+    assert_equal('Hello   World', layout_runs([['Hello', 50, 100, 70, 110],
+                                               ['World', 80, 100, 100, 110]]))
+ end
+  it "inserts a space after very narrow text parts if necessary" do
+    assert_equal('Hello World!', layout_runs([['Hello', 50, 100, 60, 110],
+                                              ['World!', 63, 100, 87, 110]]))
+ end
+  it "preserves the visual horizontal ordering of two runs" do
+    assert_equal('Hello World', layout_runs([['World', 72, 100, 92, 110],
+                                             ['Hello', 50, 100, 70, 110]]))
+  end
+  it "preserves the visual vertical ordering of two runs" do
+    assert_equal("Hello\nWorld", layout_runs([['World', 50, 80, 70, 100],
+                                              ['Hello', 50, 100, 70, 110]]))
+  end
+  it "inserts a single blank line between paragraphs" do
+    assert_equal("Hello\nWorld\n\nHere",
+                 layout_runs([['Hello', 50, 100, 70, 110],
+                              ['World', 50, 90, 70, 100],
+                              ['Here', 50, 65, 66, 75]]))
+  end
+  it "inserts multiply lines for large gaps between paragraphs" do
+    assert_equal("Hello\nWorld\nHere\n\n\n\n\n\n\nFoot",
+                 layout_runs([['Hello', 50, 100, 70, 110],
+                              ['World', 50, 90, 70, 100],
+                              ['Here', 50, 80, 70, 90],
+                              ['Foot', 50, 10, 66, 20]]))
+  end
+  it "ignores outliers when calculating the normal line spacing" do
+    assert_equal("Hello\nWorld\n\n\n\nHere",
+                 layout_runs([['Hello', 50, 100, 70, 110],
+                              ['World', 50, 90, 70, 100],
+                              ['Here', 50, 50, 70, 60]]))
+  end
+  it "can use a different line_tolerance_factor" do
+    assert_equal("HelloWorld",
+                 layout_runs([['Hello', 50, 100, 70, 110],
+                              ['World', 50, 90, 70, 100]], line_tolerance_factor: 1))
+  end
+  it "can use a different paragraph_distance_threshold" do
+    assert_equal("Hello\n\nWorld",
+                 layout_runs([['Hello', 50, 100, 70, 110],
+                              ['World', 50, 90, 70, 100]], paragraph_distance_threshold: 1))
+  end
+  it "can use a different large_distance_threshold" do
+    assert_equal("Hello\nWorld\n\nHere",
+                 layout_runs([['Hello', 50, 100, 70, 110],
+                              ['World', 50, 90, 70, 100],
+                              ['Here', 50, 50, 66, 60]], large_distance_threshold: 8))
+  end
+end

data/test/hexapdf/digital_signature/common.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module HexaPDF
         @ca_certificate ||=
           begin
             cert = create_cert(name: '/C=AT/O=HexaPDF/CN=HexaPDF Test Root CA', serial: 0,
-                               public_key: ca_key.public_key)
+                               public_key: ca_key)
             add_extensions(cert, cert, ca_key, is_ca: true, key_usage: 'cRLSign,keyCertSign')
             cert
           end
@@ -27,7 +27,7 @@ module HexaPDF
         @signer_certificate ||=
           begin
             cert = create_cert(name: '/CN=RSA signer/DC=gettalong', serial: 2,
-                               public_key: signer_key.public_key, issuer: ca_certificate)
+                               public_key: signer_key, issuer: ca_certificate)
             add_extensions(cert, ca_certificate, ca_key, key_usage: 'digitalSignature')
             cert
           end
@@ -37,7 +37,7 @@ module HexaPDF
         @non_repudiation_signer_certificate ||=
           begin
             cert = create_cert(name: '/CN=Non repudiation signer/DC=gettalong', serial: 2,
-                               public_key: signer_key.public_key, issuer: ca_certificate)
+                               public_key: signer_key, issuer: ca_certificate)
             add_extensions(cert, ca_certificate, ca_key, key_usage: 'nonRepudiation')
             cert
           end
@@ -51,7 +51,21 @@ module HexaPDF
         @dsa_signer_certificate ||=
           begin
             cert = create_cert(name: '/CN=DSA signer/DC=gettalong', serial: 3,
-                               public_key: dsa_signer_key.public_key, issuer: ca_certificate)
+                               public_key: dsa_signer_key, issuer: ca_certificate)
+            add_extensions(cert, ca_certificate, ca_key, key_usage: 'digitalSignature')
+            cert
+          end
+      end
+      def ecdsa_signer_key
+        @ecdsa_signer_key ||= OpenSSL::PKey::EC.generate('sect163k1')
+      end
+      def ecdsa_signer_certificate
+        @ecdsa_signer_certificate ||=
+          begin
+            cert = create_cert(name: '/CN=ECDSA signer/DC=gettalong', serial: 4,
+                               public_key: ecdsa_signer_key, issuer: ca_certificate)
             add_extensions(cert, ca_certificate, ca_key, key_usage: 'digitalSignature')
             cert
           end
@@ -61,7 +75,7 @@ module HexaPDF
         @timestamp_certificate ||=
           begin
             cert = create_cert(name: '/CN=timestamp/DC=gettalong', serial: 3,
-                               public_key: signer_key.public_key, issuer: ca_certificate)
+                               public_key: signer_key, issuer: ca_certificate)
             add_extensions(cert, ca_certificate, ca_key, key_usage: 'digitalSignature',
                            extended_key_usage: 'timeStamping')
             cert

data/test/hexapdf/digital_signature/signing/test_signed_data_creator.rb CHANGED Viewed

@@ -154,10 +154,35 @@ describe HexaPDF::DigitalSignature::Signing::SignedDataCreator do
       assert_equal(CERTIFICATES.signer_key.sign('SHA256', to_sign), @structure.value[5].value)
     end
-    it "fails if the signature algorithm is not supported" do
-      @signed_data.certificate = CERTIFICATES.dsa_signer_certificate
-      @signed_data.key = CERTIFICATES.dsa_signer_key
-      assert_raises(HexaPDF::Error) { @signed_data.create("data") }
+    describe "DSA key pair" do
+      before do
+        @signed_data.certificate = CERTIFICATES.dsa_signer_certificate
+        @signed_data.key = CERTIFICATES.dsa_signer_key
+      end
+      it "works with a DSA key pair" do
+        @structure = @signed_data.create("data").value[1].value[4].value[0]
+        assert_equal('2.16.840.1.101.3.4.3.2', @structure.value[4].value[0].value)
+        assert_nil(@structure.value[4].value[1].value)
+      end
+      it "fails if the digest algorithm is not SHA256" do
+        @signed_data.digest_algorithm = 'sha512'
+        assert_raises { @signed_data.create("data") }
+      end
+    end
+    describe "ECDSA key pair" do
+      before do
+        @signed_data.certificate = CERTIFICATES.ecdsa_signer_certificate
+        @signed_data.key = CERTIFICATES.ecdsa_signer_key
+      end
+      it "works with an ECDSA key pair" do
+        structure = @signed_data.create("data").value[1].value[4].value[0]
+        assert_equal('1.2.840.10045.4.3.2', structure.value[4].value[0].value)
+        assert_nil(structure.value[4].value[1].value)
+      end
     end
     it "can use a different digest algorithm" do